Better greed
This commit is contained in:
@@ -1,5 +1,3 @@
|
||||
|
||||
|
||||
// Return a random valid action on the given board.
|
||||
// Used as a last resort.
|
||||
fn random_action(board) {
|
||||
@@ -59,8 +57,8 @@ fn compute_influence(board) {
|
||||
}
|
||||
|
||||
|
||||
// Sort by increasing absolute score
|
||||
influence.sort(|a, b| {
|
||||
// Sort by increasing absolute score
|
||||
influence.sort(|a, b| {
|
||||
let a_abs = a[1].abs();
|
||||
let b_abs = b[1].abs();
|
||||
|
||||
@@ -85,7 +83,6 @@ fn place_number(board, minimize) {
|
||||
return random_action(board);
|
||||
}
|
||||
|
||||
|
||||
// Get the most influential position
|
||||
let pos = influence[-1][0];
|
||||
let val = influence[-1][1];
|
||||
@@ -104,7 +101,7 @@ fn place_number(board, minimize) {
|
||||
if val > 0 {
|
||||
symbol = available_numbers[-1];
|
||||
} else {
|
||||
symbol = available_numbers[0];
|
||||
symbol = available_numbers[0];
|
||||
}
|
||||
}
|
||||
|
||||
@@ -139,25 +136,20 @@ fn place_op(board, minimize) {
|
||||
return ();
|
||||
}
|
||||
|
||||
|
||||
// Main step function (shared between min and max)
|
||||
fn greed_step(board, minimize) {
|
||||
|
||||
let action = place_op(board, minimize);
|
||||
|
||||
// We could not place an op, so place a number
|
||||
if action == () {
|
||||
action = place_number(board, minimize);
|
||||
}
|
||||
|
||||
if board.can_play(action) {
|
||||
return action;
|
||||
}
|
||||
|
||||
// Prevent invalid moves, random fallback
|
||||
if board.can_play(action) { return action; }
|
||||
return random_action(board);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Minimizer step
|
||||
fn step_min(board) {
|
||||
greed_step(board, true)
|
||||
254
agents/greed-v2.rhai
Normal file
254
agents/greed-v2.rhai
Normal file
@@ -0,0 +1,254 @@
|
||||
// SECRET
|
||||
|
||||
// Return a random valid action on the given board.
|
||||
// Used as a last resort.
|
||||
fn random_action(board) {
|
||||
let symb = rand_symb();
|
||||
let pos = rand_int(0, 10);
|
||||
let action = Action(symb, pos);
|
||||
|
||||
while !board.can_play(action) {
|
||||
let symb = rand_symb();
|
||||
let pos = rand_int(0, 10);
|
||||
action = Action(symb, pos);
|
||||
}
|
||||
|
||||
return action
|
||||
}
|
||||
|
||||
|
||||
/// Returns an array of (idx, f32) for each empty slot in the board.
|
||||
/// - idx is the index of this slot
|
||||
/// - f32 is the "influence of" this slot
|
||||
fn compute_influence(board) {
|
||||
// Fill all empty slots with fives and compute starting value
|
||||
let filled = board;
|
||||
for i in filled.free_spots_idx() {
|
||||
filled[i] = 5;
|
||||
}
|
||||
|
||||
// Compute the value of the filled board
|
||||
let base = filled.evaluate();
|
||||
|
||||
// Exit early if the board is invalid.
|
||||
// This is usually caused by zero-division.
|
||||
if (base == ()) {
|
||||
return [];
|
||||
}
|
||||
|
||||
// Increase each slot's value by 1
|
||||
// and record the effect on the expression's total value.
|
||||
//
|
||||
// `influence` is an array of (slot_idx, value)
|
||||
let influence = [];
|
||||
for i in 0..board.size() {
|
||||
let slot = board[i];
|
||||
|
||||
// Ignore slots that are not empty
|
||||
if slot != "" {
|
||||
continue
|
||||
}
|
||||
|
||||
// Don't assign directly to `filled`,
|
||||
// we want to keep it full of fives.
|
||||
// Assigning to `b` make a copy of the board.
|
||||
let b = filled;
|
||||
b[i] = 6;
|
||||
|
||||
influence.push([i, b.evaluate() - base]);
|
||||
}
|
||||
|
||||
|
||||
// Sort by increasing absolute score
|
||||
influence.sort(|a, b| {
|
||||
let a_abs = a[1].abs();
|
||||
let b_abs = b[1].abs();
|
||||
|
||||
// Returns...
|
||||
// 1 if positive (a_abs > b_abs),
|
||||
// -1 if negative,
|
||||
// 0 if equal
|
||||
return sign(a_abs - b_abs);
|
||||
});
|
||||
|
||||
return influence;
|
||||
}
|
||||
|
||||
fn place_number(board, minimize) {
|
||||
let numbers = [0,1,2,3,4,5,6,7,8,9];
|
||||
let available_numbers = numbers.retain(|x| board.contains(x));
|
||||
|
||||
let influence = compute_influence(board);
|
||||
|
||||
// Stupid edge cases, fall back to random
|
||||
if influence.len() == 0 || available_numbers.len() == 0 {
|
||||
return random_action(board);
|
||||
}
|
||||
|
||||
|
||||
// Get the most influential position
|
||||
let pos = influence[-1][0];
|
||||
let val = influence[-1][1];
|
||||
|
||||
// Pick the number we should use,
|
||||
// This is always either the largest
|
||||
// or the smallest number available to us.
|
||||
let symbol = 0;
|
||||
if minimize {
|
||||
if val > 0 {
|
||||
symbol = available_numbers[0];
|
||||
} else {
|
||||
symbol = available_numbers[-1];
|
||||
}
|
||||
} else {
|
||||
if val > 0 {
|
||||
symbol = available_numbers[-1];
|
||||
} else {
|
||||
symbol = available_numbers[0];
|
||||
}
|
||||
}
|
||||
|
||||
return Action(symbol, pos);
|
||||
}
|
||||
|
||||
|
||||
fn op_value(board) {
|
||||
print(board);
|
||||
let actions = [];
|
||||
for o in ["+", "-", "*", "/"] {
|
||||
if board.contains(o) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for p in 0..=10 {
|
||||
let action = Action(o, p);
|
||||
if board.can_play(action) {
|
||||
actions.push(action);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// No other operators can be placed, return value of fives
|
||||
if actions.is_empty() {
|
||||
let filled = board;
|
||||
for i in filled.free_spots_idx() {
|
||||
filled[i] = 5;
|
||||
}
|
||||
let v = filled.evaluate();
|
||||
if v == () {
|
||||
return ();
|
||||
} else {
|
||||
return [v, v];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
let max = ();
|
||||
let min = ();
|
||||
for a in actions {
|
||||
let tmp = board;
|
||||
tmp.play(a);
|
||||
|
||||
let vals = op_value(tmp);
|
||||
if vals != () {
|
||||
for v in vals {
|
||||
if max == () || min == () {
|
||||
max = v;
|
||||
min = v;
|
||||
}
|
||||
|
||||
if v > max {
|
||||
max = v;
|
||||
} else if v < min {
|
||||
min = v;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if min == () || max == () {
|
||||
return ();
|
||||
}
|
||||
|
||||
return [min, max];
|
||||
}
|
||||
|
||||
fn place_op(board, minimize) {
|
||||
let ops = ["+", "-", "*", "/"];
|
||||
let available_ops = ops.retain(|x| board.contains(x));
|
||||
|
||||
// Performance optimization if board is empty.
|
||||
// This is the move we would pick, hard-coded.
|
||||
if available_ops.len() == 4 {
|
||||
if minimize {
|
||||
let act = Action("+", 3);
|
||||
if board.can_play(act) {return act}
|
||||
} else {
|
||||
let act = Action("/", 9);
|
||||
if board.can_play(act) {return act}
|
||||
}
|
||||
}
|
||||
|
||||
// All possible operator actions
|
||||
let actions = [];
|
||||
for o in ["+", "-", "*", "/"] {
|
||||
for p in 0..=10 {
|
||||
let action = Action(o, p);
|
||||
if board.can_play(action) {
|
||||
let tmp = board;
|
||||
tmp.play(action);
|
||||
let v = op_value(tmp);
|
||||
if v != () {
|
||||
actions.push([action, v]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if actions.is_empty() {
|
||||
return ();
|
||||
}
|
||||
|
||||
let action = ();
|
||||
if minimize {
|
||||
// Sort by increasing minimum score
|
||||
actions.sort(|a, b| sign(a[1][0] - b[1][0]));
|
||||
action = actions[0][0];
|
||||
} else {
|
||||
// Sort by increasing maximum score
|
||||
actions.sort(|a, b| sign(a[1][1] - b[1][1]));
|
||||
action = actions[-1][0];
|
||||
}
|
||||
|
||||
debug(action);
|
||||
|
||||
return action;
|
||||
}
|
||||
|
||||
|
||||
// Main step function (shared between min and max)
|
||||
fn greed_step(board, minimize) {
|
||||
let action = place_op(board, minimize);
|
||||
if action == () {
|
||||
action = place_number(board, minimize);
|
||||
}
|
||||
|
||||
if board.can_play(action) {
|
||||
return action;
|
||||
}
|
||||
|
||||
// Prevent invalid moves, random fallback
|
||||
return random_action(board);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Minimizer step
|
||||
fn step_min(board) {
|
||||
greed_step(board, true)
|
||||
}
|
||||
|
||||
// Maximizer step
|
||||
fn step_max(board) {
|
||||
greed_step(board, false)
|
||||
}
|
||||
Reference in New Issue
Block a user