diff --git a/agents/greed.rhai b/agents/greed-v1.rhai similarity index 94% rename from agents/greed.rhai rename to agents/greed-v1.rhai index 99e33ec..aa0b5eb 100644 --- a/agents/greed.rhai +++ b/agents/greed-v1.rhai @@ -1,5 +1,3 @@ - - // Return a random valid action on the given board. // Used as a last resort. fn random_action(board) { @@ -59,8 +57,8 @@ fn compute_influence(board) { } - // Sort by increasing absolute score - influence.sort(|a, b| { + // Sort by increasing absolute score + influence.sort(|a, b| { let a_abs = a[1].abs(); let b_abs = b[1].abs(); @@ -85,7 +83,6 @@ fn place_number(board, minimize) { return random_action(board); } - // Get the most influential position let pos = influence[-1][0]; let val = influence[-1][1]; @@ -104,7 +101,7 @@ fn place_number(board, minimize) { if val > 0 { symbol = available_numbers[-1]; } else { - symbol = available_numbers[0]; + symbol = available_numbers[0]; } } @@ -139,25 +136,20 @@ fn place_op(board, minimize) { return (); } - // Main step function (shared between min and max) fn greed_step(board, minimize) { - let action = place_op(board, minimize); + + // We could not place an op, so place a number if action == () { action = place_number(board, minimize); } - if board.can_play(action) { - return action; - } - // Prevent invalid moves, random fallback + if board.can_play(action) { return action; } return random_action(board); } - - // Minimizer step fn step_min(board) { greed_step(board, true) diff --git a/agents/greed-v2.rhai b/agents/greed-v2.rhai new file mode 100644 index 0000000..9537777 --- /dev/null +++ b/agents/greed-v2.rhai @@ -0,0 +1,254 @@ +// SECRET + +// Return a random valid action on the given board. +// Used as a last resort. +fn random_action(board) { + let symb = rand_symb(); + let pos = rand_int(0, 10); + let action = Action(symb, pos); + + while !board.can_play(action) { + let symb = rand_symb(); + let pos = rand_int(0, 10); + action = Action(symb, pos); + } + + return action +} + + +/// Returns an array of (idx, f32) for each empty slot in the board. +/// - idx is the index of this slot +/// - f32 is the "influence of" this slot +fn compute_influence(board) { + // Fill all empty slots with fives and compute starting value + let filled = board; + for i in filled.free_spots_idx() { + filled[i] = 5; + } + + // Compute the value of the filled board + let base = filled.evaluate(); + + // Exit early if the board is invalid. + // This is usually caused by zero-division. + if (base == ()) { + return []; + } + + // Increase each slot's value by 1 + // and record the effect on the expression's total value. + // + // `influence` is an array of (slot_idx, value) + let influence = []; + for i in 0..board.size() { + let slot = board[i]; + + // Ignore slots that are not empty + if slot != "" { + continue + } + + // Don't assign directly to `filled`, + // we want to keep it full of fives. + // Assigning to `b` make a copy of the board. + let b = filled; + b[i] = 6; + + influence.push([i, b.evaluate() - base]); + } + + + // Sort by increasing absolute score + influence.sort(|a, b| { + let a_abs = a[1].abs(); + let b_abs = b[1].abs(); + + // Returns... + // 1 if positive (a_abs > b_abs), + // -1 if negative, + // 0 if equal + return sign(a_abs - b_abs); + }); + + return influence; +} + +fn place_number(board, minimize) { + let numbers = [0,1,2,3,4,5,6,7,8,9]; + let available_numbers = numbers.retain(|x| board.contains(x)); + + let influence = compute_influence(board); + + // Stupid edge cases, fall back to random + if influence.len() == 0 || available_numbers.len() == 0 { + return random_action(board); + } + + + // Get the most influential position + let pos = influence[-1][0]; + let val = influence[-1][1]; + + // Pick the number we should use, + // This is always either the largest + // or the smallest number available to us. + let symbol = 0; + if minimize { + if val > 0 { + symbol = available_numbers[0]; + } else { + symbol = available_numbers[-1]; + } + } else { + if val > 0 { + symbol = available_numbers[-1]; + } else { + symbol = available_numbers[0]; + } + } + + return Action(symbol, pos); +} + + +fn op_value(board) { + print(board); + let actions = []; + for o in ["+", "-", "*", "/"] { + if board.contains(o) { + continue; + } + + for p in 0..=10 { + let action = Action(o, p); + if board.can_play(action) { + actions.push(action); + } + } + } + + // No other operators can be placed, return value of fives + if actions.is_empty() { + let filled = board; + for i in filled.free_spots_idx() { + filled[i] = 5; + } + let v = filled.evaluate(); + if v == () { + return (); + } else { + return [v, v]; + } + } + + + let max = (); + let min = (); + for a in actions { + let tmp = board; + tmp.play(a); + + let vals = op_value(tmp); + if vals != () { + for v in vals { + if max == () || min == () { + max = v; + min = v; + } + + if v > max { + max = v; + } else if v < min { + min = v; + } + } + } + } + + if min == () || max == () { + return (); + } + + return [min, max]; +} + +fn place_op(board, minimize) { + let ops = ["+", "-", "*", "/"]; + let available_ops = ops.retain(|x| board.contains(x)); + + // Performance optimization if board is empty. + // This is the move we would pick, hard-coded. + if available_ops.len() == 4 { + if minimize { + let act = Action("+", 3); + if board.can_play(act) {return act} + } else { + let act = Action("/", 9); + if board.can_play(act) {return act} + } + } + + // All possible operator actions + let actions = []; + for o in ["+", "-", "*", "/"] { + for p in 0..=10 { + let action = Action(o, p); + if board.can_play(action) { + let tmp = board; + tmp.play(action); + let v = op_value(tmp); + if v != () { + actions.push([action, v]); + } + } + } + } + + if actions.is_empty() { + return (); + } + + let action = (); + if minimize { + // Sort by increasing minimum score + actions.sort(|a, b| sign(a[1][0] - b[1][0])); + action = actions[0][0]; + } else { + // Sort by increasing maximum score + actions.sort(|a, b| sign(a[1][1] - b[1][1])); + action = actions[-1][0]; + } + + debug(action); + + return action; +} + + +// Main step function (shared between min and max) +fn greed_step(board, minimize) { + let action = place_op(board, minimize); + if action == () { + action = place_number(board, minimize); + } + + if board.can_play(action) { + return action; + } + + // Prevent invalid moves, random fallback + return random_action(board); +} + + + +// Minimizer step +fn step_min(board) { + greed_step(board, true) +} + +// Maximizer step +fn step_max(board) { + greed_step(board, false) +}