Better greed

2025-11-06 09:16:57 -08:00
parent eb084e1f07
commit 5bd6331ad9
2 changed files with 260 additions and 14 deletions
--- a/agents/greed-v1.rhai
+++ b/agents/greed-v1.rhai
@@ -1,5 +1,3 @@
 // Return a random valid action on the given board.
 // Used as a last resort.
 fn random_action(board) {
@@ -85,7 +83,6 @@ fn place_number(board, minimize) {
 		return random_action(board);
 	}
 	// Get the most influential position
 	let pos = influence[-1][0];
 	let val = influence[-1][1];
@@ -139,25 +136,20 @@ fn place_op(board, minimize) {
 	return ();
 }
 // Main step function (shared between min and max)
 fn greed_step(board, minimize) {
 	let action = place_op(board, minimize);
 	// We could not place an op, so place a number
 	if action == () {
 		action = place_number(board, minimize);
 	}
 	if board.can_play(action) {
 		return action;
 	}
 	// Prevent invalid moves, random fallback
 	if board.can_play(action) { return action; }
 	return random_action(board);
 }
 // Minimizer step
 fn step_min(board) {
 	greed_step(board, true)
--- a/agents/greed-v2.rhai
+++ b/agents/greed-v2.rhai
@@ -0,0 +1,254 @@
 // SECRET
 // Return a random valid action on the given board.
 // Used as a last resort.
 fn random_action(board) {
 	let symb = rand_symb();
 	let pos = rand_int(0, 10);
 	let action = Action(symb, pos);
 	while !board.can_play(action) {
 		let symb = rand_symb();
 		let pos = rand_int(0, 10);
 		action = Action(symb, pos);
 	}
 	return action
 }
 /// Returns an array of (idx, f32) for each empty slot in the board.
 /// - idx is the index of this slot
 /// - f32 is the "influence of" this slot
 fn compute_influence(board) {
 	// Fill all empty slots with fives and compute starting value
 	let filled = board;
 	for i in filled.free_spots_idx() {
 		filled[i] = 5;
 	}
 	// Compute the value of the filled board
 	let base = filled.evaluate();
 	// Exit early if the board is invalid.
 	// This is usually caused by zero-division.
 	if (base == ()) {
 		return [];
 	}
 	// Increase each slot's value by 1
 	// and record the effect on the expression's total value.
 	//
 	// `influence` is an array of (slot_idx, value)
 	let influence = [];
 	for i in 0..board.size() {
 		let slot = board[i];
 		// Ignore slots that are not empty
 		if slot != "" {
 			continue
 		}
 		// Don't assign directly to `filled`,
 		// we want to keep it full of fives.
 		// Assigning to `b` make a copy of the board.
 		let b = filled;
 		b[i] = 6;
 		influence.push([i, b.evaluate() - base]);
 	}
   // Sort by increasing absolute score
   influence.sort(|a, b| {
 		let a_abs = a[1].abs();
 		let b_abs = b[1].abs();
 		// Returns...
 		//  1 if positive (a_abs > b_abs),
 		// -1 if negative,
 		//  0 if equal
 		return sign(a_abs - b_abs);
 	});
 	return influence;
 }
 fn place_number(board, minimize) {
 	let numbers = [0,1,2,3,4,5,6,7,8,9];
 	let available_numbers = numbers.retain(|x| board.contains(x));
 	let influence = compute_influence(board);
 	// Stupid edge cases, fall back to random
 	if influence.len() == 0 || available_numbers.len() == 0 {
 		return random_action(board);
 	}
 	// Get the most influential position
 	let pos = influence[-1][0];
 	let val = influence[-1][1];
 	// Pick the number we should use,
 	// This is always either the largest
 	// or the smallest number available to us.
 	let symbol = 0;
 	if minimize {
 		if val > 0 {
 			symbol = available_numbers[0];
 		} else {
 			symbol = available_numbers[-1];
 		}
 	} else {
 		if val > 0 {
 			symbol = available_numbers[-1];
 		} else {
 		  	symbol = available_numbers[0];
 		}
 	}
 	return Action(symbol, pos);
 }
 fn op_value(board) {
    print(board);
 	let actions = [];
    for o in ["+", "-", "*", "/"] {
        if board.contains(o) {
            continue;
        }
        for p in 0..=10 {
            let action = Action(o, p);
            if board.can_play(action) {
                actions.push(action);
            }
        }
    }
    // No other operators can be placed, return value of fives
    if actions.is_empty() {
        let filled = board;
        for i in filled.free_spots_idx() {
            filled[i] = 5;
        }
        let v = filled.evaluate();
        if v == () {
            return ();
        } else {
            return [v, v];
        }
    }
 	let max = ();
    let min = ();
   	for a in actions {
       let tmp = board;
       tmp.play(a);
       let vals = op_value(tmp);
       if vals != () {
           	for v in vals {
                if max == () || min == () {
                    max = v;
                    min = v;
                }
                if v > max {
                    max = v;
                } else if v < min {
                    min = v;
                }
            }
       }
   }
   if min == () || max == () {
       return ();
   }
   return [min, max];
 }
 fn place_op(board, minimize) {
 	let ops = ["+", "-", "*", "/"];
 	let available_ops = ops.retain(|x| board.contains(x));
 	// Performance optimization if board is empty.
    // This is the move we would pick, hard-coded.
    if available_ops.len() == 4 {
        if minimize {
        	let act = Action("+", 3);
        	if board.can_play(act) {return act}
    	} else {
            let act = Action("/", 9);
        	if board.can_play(act) {return act}
        }
    }
 	// All possible operator actions
 	let actions = [];
    for o in ["+", "-", "*", "/"] {
        for p in 0..=10 {
            let action = Action(o, p);
            if board.can_play(action) {
                let tmp = board;
                tmp.play(action);
                let v = op_value(tmp);
                if v != () {
                	actions.push([action, v]);
                }
            }
        }
    }
    if actions.is_empty() {
        return ();
    }
   let action = ();
   if minimize {
        // Sort by increasing minimum score
        actions.sort(|a, b| sign(a[1][0] - b[1][0]));
        action = actions[0][0];
   } else {
        // Sort by increasing maximum score
        actions.sort(|a, b| sign(a[1][1] - b[1][1]));
        action = actions[-1][0];
   }
   debug(action);
   return action;
 }
 // Main step function (shared between min and max)
 fn greed_step(board, minimize) {
 	let action = place_op(board, minimize);
 	if action == () {
 		action = place_number(board, minimize);
 	}
 	if board.can_play(action) {
 		return action;
 	}
 	// Prevent invalid moves, random fallback
 	return random_action(board);
 }
 // Minimizer step
 fn step_min(board) {
 	greed_step(board, true)
 }
 // Maximizer step
 fn step_max(board) {
 	greed_step(board, false)
 }