hex

hex.git
git clone git://git.lenczewski.org/hex.git
Log | Files | Refs

commit 45fe3b840d02a7a3a822acabbc2b11a417ff8480
parent e1412842bf102d7c118e60a191016809bfd6bd94
Author: MikoĊ‚aj Lenczewski <mblenczewski@gmail.com>
Date:   Wed, 16 Aug 2023 23:40:47 +0000

hexes: implement exploration in mcts_node_calc_score

Diffstat:
Magents/hexes/src/agent/mcts.c | 31++++++++++++++++++++++++-------
1 file changed, 24 insertions(+), 7 deletions(-)

diff --git a/agents/hexes/src/agent/mcts.c b/agents/hexes/src/agent/mcts.c @@ -87,14 +87,17 @@ mcts_node_calc_score(struct mcts_node *self) */ if (!self->plays) return INFINITY; - s64 exploration_rounds = 300; + s64 exploration_rounds = 3000; f32 beta = MAX(0.0, (exploration_rounds - self->plays) / (f32) exploration_rounds); assert(0.0 <= beta && beta <= 1.0); dbglog(LOG_DEBUG, "beta: %lf, wins: %d, rave_wins: %d, plays: %u, rave_plays: %u\n", beta, self->wins, self->rave_wins, self->plays, self->rave_plays); - f32 exploration = 0; // TODO: implement exploration parameter + struct mcts_node *parent = mcts_node_rel2abs(self, self->parent); + assert(parent); + + f32 exploration = M_SQRT2 * sqrtf(logf(parent->plays) / (f32) self->plays); f32 exploitation = (1 - beta) * ((f32) self->wins / (f32) self->plays); assert(-1.0 <= exploitation && exploitation <= 1.0); @@ -102,6 +105,9 @@ mcts_node_calc_score(struct mcts_node *self) f32 rave_exploitation = beta * ((f32) self->rave_wins / (f32) self->rave_plays); assert(-1.0 <= rave_exploitation && rave_exploitation <= 1.0); + dbglog(LOG_DEBUG, "exploration: %f, exploitation: %f, rave_exploitation: %f\n", + exploration, exploitation, rave_exploitation); + return exploration + exploitation + rave_exploitation; } @@ -175,12 +181,23 @@ agent_mcts_play(struct agent_mcts *self, enum hex_player player, u32 x, u32 y) self->root = mem_pool_alloc(&self->pool, alignof(struct mcts_node), mcts_node_sizeof(moves)); mcts_node_init(self->root, NULL, player, x, y, moves); - // TODO: implement tree compaction and tree reuse, if it improves play + // TODO: implement tree reuse, if it improves play + // // one possible issue is children containing stale board states, - // leading to potentially invalid moves being generated. another - // is the simple fact that walking the tree to compact it takes - // a significant amount of time and potentially outweighs simply - // resetting the pool and performing a few more rounds of MCTS + // leading to potentially invalid moves being generated. + + // TODO: implement tree compaction + // one possible issue is the fact that walking the tree to + // compact it takes a significant amount of time and potentially + // outweighs simply resetting the pool and performing a few more + // rounds of MCTS. + // + // another option would be to implement some form of cyclic + // memory pool, to allow allocations from memory behind the + // current root node, as well as in front of it, but this would + // require tracking stale leaf nodes and reclaiming them (hence + // transforms into a GC, and thus wastes the benefits of a + // simple memory pool) } void