Better greed

This commit is contained in:
2025-11-06 09:16:57 -08:00
parent eb084e1f07
commit 5bd6331ad9
2 changed files with 260 additions and 14 deletions

View File

@@ -1,5 +1,3 @@
// Return a random valid action on the given board. // Return a random valid action on the given board.
// Used as a last resort. // Used as a last resort.
fn random_action(board) { fn random_action(board) {
@@ -85,7 +83,6 @@ fn place_number(board, minimize) {
return random_action(board); return random_action(board);
} }
// Get the most influential position // Get the most influential position
let pos = influence[-1][0]; let pos = influence[-1][0];
let val = influence[-1][1]; let val = influence[-1][1];
@@ -139,25 +136,20 @@ fn place_op(board, minimize) {
return (); return ();
} }
// Main step function (shared between min and max) // Main step function (shared between min and max)
fn greed_step(board, minimize) { fn greed_step(board, minimize) {
let action = place_op(board, minimize); let action = place_op(board, minimize);
// We could not place an op, so place a number
if action == () { if action == () {
action = place_number(board, minimize); action = place_number(board, minimize);
} }
if board.can_play(action) {
return action;
}
// Prevent invalid moves, random fallback // Prevent invalid moves, random fallback
if board.can_play(action) { return action; }
return random_action(board); return random_action(board);
} }
// Minimizer step // Minimizer step
fn step_min(board) { fn step_min(board) {
greed_step(board, true) greed_step(board, true)

254
agents/greed-v2.rhai Normal file
View File

@@ -0,0 +1,254 @@
// SECRET
// Return a random valid action on the given board.
// Used as a last resort.
fn random_action(board) {
let symb = rand_symb();
let pos = rand_int(0, 10);
let action = Action(symb, pos);
while !board.can_play(action) {
let symb = rand_symb();
let pos = rand_int(0, 10);
action = Action(symb, pos);
}
return action
}
/// Returns an array of (idx, f32) for each empty slot in the board.
/// - idx is the index of this slot
/// - f32 is the "influence of" this slot
fn compute_influence(board) {
// Fill all empty slots with fives and compute starting value
let filled = board;
for i in filled.free_spots_idx() {
filled[i] = 5;
}
// Compute the value of the filled board
let base = filled.evaluate();
// Exit early if the board is invalid.
// This is usually caused by zero-division.
if (base == ()) {
return [];
}
// Increase each slot's value by 1
// and record the effect on the expression's total value.
//
// `influence` is an array of (slot_idx, value)
let influence = [];
for i in 0..board.size() {
let slot = board[i];
// Ignore slots that are not empty
if slot != "" {
continue
}
// Don't assign directly to `filled`,
// we want to keep it full of fives.
// Assigning to `b` make a copy of the board.
let b = filled;
b[i] = 6;
influence.push([i, b.evaluate() - base]);
}
// Sort by increasing absolute score
influence.sort(|a, b| {
let a_abs = a[1].abs();
let b_abs = b[1].abs();
// Returns...
// 1 if positive (a_abs > b_abs),
// -1 if negative,
// 0 if equal
return sign(a_abs - b_abs);
});
return influence;
}
fn place_number(board, minimize) {
let numbers = [0,1,2,3,4,5,6,7,8,9];
let available_numbers = numbers.retain(|x| board.contains(x));
let influence = compute_influence(board);
// Stupid edge cases, fall back to random
if influence.len() == 0 || available_numbers.len() == 0 {
return random_action(board);
}
// Get the most influential position
let pos = influence[-1][0];
let val = influence[-1][1];
// Pick the number we should use,
// This is always either the largest
// or the smallest number available to us.
let symbol = 0;
if minimize {
if val > 0 {
symbol = available_numbers[0];
} else {
symbol = available_numbers[-1];
}
} else {
if val > 0 {
symbol = available_numbers[-1];
} else {
symbol = available_numbers[0];
}
}
return Action(symbol, pos);
}
fn op_value(board) {
print(board);
let actions = [];
for o in ["+", "-", "*", "/"] {
if board.contains(o) {
continue;
}
for p in 0..=10 {
let action = Action(o, p);
if board.can_play(action) {
actions.push(action);
}
}
}
// No other operators can be placed, return value of fives
if actions.is_empty() {
let filled = board;
for i in filled.free_spots_idx() {
filled[i] = 5;
}
let v = filled.evaluate();
if v == () {
return ();
} else {
return [v, v];
}
}
let max = ();
let min = ();
for a in actions {
let tmp = board;
tmp.play(a);
let vals = op_value(tmp);
if vals != () {
for v in vals {
if max == () || min == () {
max = v;
min = v;
}
if v > max {
max = v;
} else if v < min {
min = v;
}
}
}
}
if min == () || max == () {
return ();
}
return [min, max];
}
fn place_op(board, minimize) {
let ops = ["+", "-", "*", "/"];
let available_ops = ops.retain(|x| board.contains(x));
// Performance optimization if board is empty.
// This is the move we would pick, hard-coded.
if available_ops.len() == 4 {
if minimize {
let act = Action("+", 3);
if board.can_play(act) {return act}
} else {
let act = Action("/", 9);
if board.can_play(act) {return act}
}
}
// All possible operator actions
let actions = [];
for o in ["+", "-", "*", "/"] {
for p in 0..=10 {
let action = Action(o, p);
if board.can_play(action) {
let tmp = board;
tmp.play(action);
let v = op_value(tmp);
if v != () {
actions.push([action, v]);
}
}
}
}
if actions.is_empty() {
return ();
}
let action = ();
if minimize {
// Sort by increasing minimum score
actions.sort(|a, b| sign(a[1][0] - b[1][0]));
action = actions[0][0];
} else {
// Sort by increasing maximum score
actions.sort(|a, b| sign(a[1][1] - b[1][1]));
action = actions[-1][0];
}
debug(action);
return action;
}
// Main step function (shared between min and max)
fn greed_step(board, minimize) {
let action = place_op(board, minimize);
if action == () {
action = place_number(board, minimize);
}
if board.can_play(action) {
return action;
}
// Prevent invalid moves, random fallback
return random_action(board);
}
// Minimizer step
fn step_min(board) {
greed_step(board, true)
}
// Maximizer step
fn step_max(board) {
greed_step(board, false)
}