Better greed

2025-11-06 09:16:57 -08:00
parent eb084e1f07
commit 5bd6331ad9
2 changed files with 260 additions and 14 deletions
--- a/agents/greed-v1.rhai
+++ b/agents/greed-v1.rhai
@@ -1,5 +1,3 @@
-
-
 // Return a random valid action on the given board.
 // Used as a last resort.
 fn random_action(board) {
@@ -59,8 +57,8 @@ fn compute_influence(board) {
 	}


-   // Sort by increasing absolute score
-   influence.sort(|a, b| {
+	// Sort by increasing absolute score
+	influence.sort(|a, b| {
 		let a_abs = a[1].abs();
 		let b_abs = b[1].abs();

@@ -85,7 +83,6 @@ fn place_number(board, minimize) {
 		return random_action(board);
 	}

-
 	// Get the most influential position
 	let pos = influence[-1][0];
 	let val = influence[-1][1];
@@ -104,7 +101,7 @@ fn place_number(board, minimize) {
 		if val > 0 {
 			symbol = available_numbers[-1];
 		} else {
-		  	symbol = available_numbers[0];
+			symbol = available_numbers[0];
 		}
 	}

@@ -139,25 +136,20 @@ fn place_op(board, minimize) {
 	return ();
 }

-
 // Main step function (shared between min and max)
 fn greed_step(board, minimize) {
-
 	let action = place_op(board, minimize);
+
+	// We could not place an op, so place a number
 	if action == () {
 		action = place_number(board, minimize);
 	}

-	if board.can_play(action) {
-		return action;
-	}
-
 	// Prevent invalid moves, random fallback
+	if board.can_play(action) { return action; }
 	return random_action(board);
 }

-
-
 // Minimizer step
 fn step_min(board) {
 	greed_step(board, true)
--- a/agents/greed-v2.rhai
+++ b/agents/greed-v2.rhai
@@ -0,0 +1,254 @@
+// SECRET
+
+// Return a random valid action on the given board.
+// Used as a last resort.
+fn random_action(board) {
+	let symb = rand_symb();
+	let pos = rand_int(0, 10);
+	let action = Action(symb, pos);
+
+	while !board.can_play(action) {
+		let symb = rand_symb();
+		let pos = rand_int(0, 10);
+		action = Action(symb, pos);
+	}
+
+	return action
+}
+
+
+/// Returns an array of (idx, f32) for each empty slot in the board.
+/// - idx is the index of this slot
+/// - f32 is the "influence of" this slot
+fn compute_influence(board) {
+	// Fill all empty slots with fives and compute starting value
+	let filled = board;
+	for i in filled.free_spots_idx() {
+		filled[i] = 5;
+	}
+
+	// Compute the value of the filled board
+	let base = filled.evaluate();
+
+	// Exit early if the board is invalid.
+	// This is usually caused by zero-division.
+	if (base == ()) {
+		return [];
+	}
+
+	// Increase each slot's value by 1
+	// and record the effect on the expression's total value.
+	//
+	// `influence` is an array of (slot_idx, value)
+	let influence = [];
+	for i in 0..board.size() {
+		let slot = board[i];
+
+		// Ignore slots that are not empty
+		if slot != "" {
+			continue
+		}
+
+		// Don't assign directly to `filled`,
+		// we want to keep it full of fives.
+		// Assigning to `b` make a copy of the board.
+		let b = filled;
+		b[i] = 6;
+
+		influence.push([i, b.evaluate() - base]);
+	}
+
+
+   // Sort by increasing absolute score
+   influence.sort(|a, b| {
+		let a_abs = a[1].abs();
+		let b_abs = b[1].abs();
+
+		// Returns...
+		//  1 if positive (a_abs > b_abs),
+		// -1 if negative,
+		//  0 if equal
+		return sign(a_abs - b_abs);
+	});
+
+	return influence;
+}
+
+fn place_number(board, minimize) {
+	let numbers = [0,1,2,3,4,5,6,7,8,9];
+	let available_numbers = numbers.retain(|x| board.contains(x));
+
+	let influence = compute_influence(board);
+
+	// Stupid edge cases, fall back to random
+	if influence.len() == 0 || available_numbers.len() == 0 {
+		return random_action(board);
+	}
+
+
+	// Get the most influential position
+	let pos = influence[-1][0];
+	let val = influence[-1][1];
+
+	// Pick the number we should use,
+	// This is always either the largest
+	// or the smallest number available to us.
+	let symbol = 0;
+	if minimize {
+		if val > 0 {
+			symbol = available_numbers[0];
+		} else {
+			symbol = available_numbers[-1];
+		}
+	} else {
+		if val > 0 {
+			symbol = available_numbers[-1];
+		} else {
+		  	symbol = available_numbers[0];
+		}
+	}
+
+	return Action(symbol, pos);
+}
+
+
+fn op_value(board) {
+    print(board);
+	let actions = [];
+    for o in ["+", "-", "*", "/"] {
+        if board.contains(o) {
+            continue;
+        }
+
+        for p in 0..=10 {
+            let action = Action(o, p);
+            if board.can_play(action) {
+                actions.push(action);
+            }
+        }
+    }
+
+    // No other operators can be placed, return value of fives
+    if actions.is_empty() {
+        let filled = board;
+        for i in filled.free_spots_idx() {
+            filled[i] = 5;
+        }
+        let v = filled.evaluate();
+        if v == () {
+            return ();
+        } else {
+            return [v, v];
+        }
+    }
+
+
+	let max = ();
+    let min = ();
+   	for a in actions {
+       let tmp = board;
+       tmp.play(a);
+
+       let vals = op_value(tmp);
+       if vals != () {
+           	for v in vals {
+                if max == () || min == () {
+                    max = v;
+                    min = v;
+                }
+
+                if v > max {
+                    max = v;
+                } else if v < min {
+                    min = v;
+                }
+            }
+       }
+   }
+
+   if min == () || max == () {
+       return ();
+   }
+
+   return [min, max];
+}
+
+fn place_op(board, minimize) {
+	let ops = ["+", "-", "*", "/"];
+	let available_ops = ops.retain(|x| board.contains(x));
+
+	// Performance optimization if board is empty.
+    // This is the move we would pick, hard-coded.
+    if available_ops.len() == 4 {
+        if minimize {
+        	let act = Action("+", 3);
+        	if board.can_play(act) {return act}
+    	} else {
+            let act = Action("/", 9);
+        	if board.can_play(act) {return act}
+        }
+    }
+
+	// All possible operator actions
+	let actions = [];
+    for o in ["+", "-", "*", "/"] {
+        for p in 0..=10 {
+            let action = Action(o, p);
+            if board.can_play(action) {
+                let tmp = board;
+                tmp.play(action);
+                let v = op_value(tmp);
+                if v != () {
+                	actions.push([action, v]);
+                }
+            }
+        }
+    }
+
+    if actions.is_empty() {
+        return ();
+    }
+
+   let action = ();
+   if minimize {
+        // Sort by increasing minimum score
+        actions.sort(|a, b| sign(a[1][0] - b[1][0]));
+        action = actions[0][0];
+   } else {
+        // Sort by increasing maximum score
+        actions.sort(|a, b| sign(a[1][1] - b[1][1]));
+        action = actions[-1][0];
+   }
+
+   debug(action);
+
+   return action;
+}
+
+
+// Main step function (shared between min and max)
+fn greed_step(board, minimize) {
+	let action = place_op(board, minimize);
+	if action == () {
+		action = place_number(board, minimize);
+	}
+
+	if board.can_play(action) {
+		return action;
+	}
+
+	// Prevent invalid moves, random fallback
+	return random_action(board);
+}
+
+
+
+// Minimizer step
+fn step_min(board) {
+	greed_step(board, true)
+}
+
+// Maximizer step
+fn step_max(board) {
+	greed_step(board, false)
+}