hexes: implement exploration in mcts_node_calc_score - hex

commit 45fe3b840d02a7a3a822acabbc2b11a417ff8480
parent e1412842bf102d7c118e60a191016809bfd6bd94
Author: Mikołaj Lenczewski <mblenczewski@gmail.com>
Date:   Wed, 16 Aug 2023 23:40:47 +0000

hexes: implement exploration in mcts_node_calc_score

Diffstat:
M agents/hexes/src/agent/mcts.c  | 31 ++++++++++++++++++++++++-------

1 file changed, 24 insertions(+), 7 deletions(-)
diff --git a/agents/hexes/src/agent/mcts.c b/agents/hexes/src/agent/mcts.c
@@ -87,14 +87,17 @@ mcts_node_calc_score(struct mcts_node *self)
 	 */
 	if (!self->plays) return INFINITY;
 
-	s64 exploration_rounds = 300;
+	s64 exploration_rounds = 3000;
 	f32 beta = MAX(0.0, (exploration_rounds - self->plays) / (f32) exploration_rounds);
 	assert(0.0 <= beta && beta <= 1.0);
 
 	dbglog(LOG_DEBUG, "beta: %lf, wins: %d, rave_wins: %d, plays: %u, rave_plays: %u\n",
 			  beta, self->wins, self->rave_wins, self->plays, self->rave_plays);
 
-	f32 exploration = 0; // TODO: implement exploration parameter
+	struct mcts_node *parent = mcts_node_rel2abs(self, self->parent);
+	assert(parent);
+
+	f32 exploration = M_SQRT2 * sqrtf(logf(parent->plays) / (f32) self->plays);
 
 	f32 exploitation = (1 - beta) * ((f32) self->wins / (f32) self->plays);
 	assert(-1.0 <= exploitation && exploitation <= 1.0);
@@ -102,6 +105,9 @@ mcts_node_calc_score(struct mcts_node *self)
 	f32 rave_exploitation = beta * ((f32) self->rave_wins / (f32) self->rave_plays);
 	assert(-1.0 <= rave_exploitation && rave_exploitation <= 1.0);
 
+	dbglog(LOG_DEBUG, "exploration: %f, exploitation: %f, rave_exploitation: %f\n",
+			exploration, exploitation, rave_exploitation);
+
 	return exploration + exploitation + rave_exploitation;
 }
 
@@ -175,12 +181,23 @@ agent_mcts_play(struct agent_mcts *self, enum hex_player player, u32 x, u32 y)
 	self->root = mem_pool_alloc(&self->pool, alignof(struct mcts_node), mcts_node_sizeof(moves));
 	mcts_node_init(self->root, NULL, player, x, y, moves);
 
-	// TODO: implement tree compaction and tree reuse, if it improves play
+	// TODO: implement tree reuse, if it improves play
+	//
 	//       one possible issue is children containing stale board states,
-	//       leading to potentially invalid moves being generated. another
-	//       is the simple fact that walking the tree to compact it takes
-	//       a significant amount of time and potentially outweighs simply
-	//       resetting the pool and performing a few more rounds of MCTS
+	//       leading to potentially invalid moves being generated.
+
+	// TODO: implement tree compaction
+	//       one possible issue is the fact that walking the tree to
+	//       compact it takes a significant amount of time and potentially
+	//       outweighs simply resetting the pool and performing a few more
+	//       rounds of MCTS.
+	//
+	//       another option would be to implement some form of cyclic
+	//       memory pool, to allow allocations from memory behind the
+	//       current root node, as well as in front of it, but this would
+	//       require tracking stale leaf nodes and reclaiming them (hence
+	//       transforms into a GC, and thus wastes the benefits of a
+	//       simple memory pool)
 }
 
 void

	hex hex.git
	git clone git://git.lenczewski.org/hex.git
	Log \| Files \| Refs