Renamed files, added random motion

2023-02-15 22:24:40 -08:00
parent d6452f5ed8
commit fd02c65b41
2 changed files with 399 additions and 203 deletions
--- a/celeste/celeste.py
+++ b/celeste/celeste.py
@@ -0,0 +1,230 @@
 import subprocess
 import time
 import threading
 import math
 class CelesteError(Exception):
 	pass
 class Celeste:
 	action_space = [
 		"left",		# move left
 		"right",	# move right
 		"jump",		# jump
 		"dash-u",	# dash up
 		"dash-r",	# dash right
 		"dash-l",	# dash left
 		"dash-ru",	# dash right-up
 		"dash-lu"	# dash left-up
 	]
 	def __init__(self, on_get_state):
 		self.on_get_state = on_get_state
 		# Start pico-8
 		self.process = subprocess.Popen(
 			"bin/pico-8/linux/pico8",
 			shell=True,
 			stdout=subprocess.PIPE,
 			stderr=subprocess.STDOUT
 		)
 		# Wait for window to open and get window id
 		time.sleep(2)
 		winid = subprocess.check_output([
 			"xdotool",
 			"search",
 			"--class",
 			"pico8"
 		]).decode("utf-8").strip().split("\n")
 		if len(winid) != 1:
 			raise Exception("Could not find unique PICO-8 window id")
 		self.winid = winid[0]
 		# Load cartridge
 		self.keystring("load hackcel.p8")
 		self.keypress("Enter")
 		self.keystring("run")
 		self.keypress("Enter", post = 1000)
 		# Initialize variables
 		self.internal_status = {}
 		self.dead = False
 		# Score system
 		self.frame_counter = 0
 		self.next_point = 0
 		self.dist = 0 # distance to next point
 		self.target_points = [
 			[	# Stage 1
 				(28, 88),		# Start pillar
 				(60, 80),		# Middle pillar
 				(105, 64),		# Right ledge
 				(25, 40),		# Left ledge
 				(110, 16),		# End ledge
 				(110, -2),		# Next stage
 			]
 		]
 	def act(self, action):
 		self.keyup("x")
 		self.keyup("c")
 		self.keyup("Left")
 		self.keyup("Right")
 		self.keyup("Down")
 		self.keyup("Up")
 		if action is None:
 			return
 		elif action == "left":
 			self.keydown("Left")
 		elif action == "right":
 			self.keydown("Right")
 		elif action == "jump":
 			self.keydown("c")
 		elif action == "dash-u":
 			self.keydown("Up")
 			self.keydown("x")
 		elif action == "dash-r":
 			self.keydown("Right")
 			self.keydown("x")
 		elif action == "dash-l":
 			self.keydown("Left")
 			self.keydown("x")
 		elif action == "dash-ru":
 			self.keydown("Up")
 			self.keydown("Right")
 			self.keydown("x")
 		elif action == "dash-lu":
 			self.keydown("Up")
 			self.keydown("Left")
 			self.keydown("x")
 	@property
 	def status(self):
 		try:
 			return {
 				"stage": (
 					[
 						[0, 1, 2, 3, 4]
 					]
 					[int(self.internal_status["ry"])]
 					[int(self.internal_status["rx"])]
 				),
 				"xpos": int(self.internal_status["px"]),
 				"ypos": int(self.internal_status["py"]),
 				"xvel": float(self.internal_status["vx"]),
 				"yvel": float(self.internal_status["vy"]),
 				"deaths": int(self.internal_status["dc"]),
 				"dist": self.dist,
 				"next_point": self.next_point,
 				"frame_count": self.frame_counter
 			}
 		except KeyError:
 			raise CelesteError("Not enough data to get status.")
 	def keypress(self, key: str, *, post = 200):
 		subprocess.run([
 			"xdotool",
 			"key",
 			"--window", self.winid,
 			key
 		])
 		time.sleep(post / 1000)
 	def keydown(self, key: str):
 		subprocess.run([
 			"xdotool",
 			"keydown",
 			"--window", self.winid,
 			key
 		])
 	def keyup(self, key: str):
 		subprocess.run([
 			"xdotool",
 			"keyup",
 			"--window", self.winid,
 			key
 		])
 	def keystring(self, string, *, delay = 100, post = 200):
 		subprocess.run([
 			"xdotool",
 			"type",
 			"--window", self.winid,
 			"--delay", str(delay),
 			string
 		])
 		time.sleep(post / 1000)
 	def reset(self):
 		self.internal_status = {}
 		self.next_point = 0
 		self.frame_counter = 0
 		self.keypress("Escape")
 		self.keystring("run")
 		self.keypress("Enter", post = 1000)
 		self.dead = False
 	def flush_reader(self):
 		for k in iter(self.process.stdout.readline, ""):
 			k = k.decode("utf-8")[:-1]
 			if k == "!RESTART":
 				break
 	def update_loop(self):
 		# Get state, call callback, wait for state
 		# One line => one frame.
 		for line in iter(self.process.stdout.readline, ""):
 			l = line.decode("utf-8")[:-1].strip()
 			# This should only occur at game start
 			if l in ["!RESTART"]:
 				continue
 			self.frame_counter += 1
 			# Parse status string
 			for entry in l.split(";"):
 				if entry == "":
 					continue
 				key, val = entry.split(":")
 				self.internal_status[key] = val
 			# Update checkpoints
 			tx, ty = self.target_points[self.status["stage"]][self.next_point]
 			x = self.status["xpos"]
 			y = self.status["ypos"]
 			dist = math.sqrt(
 				(x-tx)*(x-tx) +
 				(y-ty)*(y-ty)
 			)
 			if dist <= 4 and y == ty:
 				self.next_point += 1
 				# Recalculate distance to new point
 				tx, ty = self.target_points[self.status["stage"]][self.next_point]
 				dist = math.sqrt(
 					(x-tx)*(x-tx) +
 					(y-ty)*(y-ty)
 				)
 			self.dist = dist
 			# Call step callback
 			self.on_get_state(self)
--- a/celeste/main.py
+++ b/celeste/main.py
@@ -1,213 +1,179 @@
-import subprocess
+from collections import namedtuple
-import time
+from collections import deque
-import threading
+import random
 import math
-class Celeste:
+import torch
 	def __init__(self):
-		# Start process
+# Glue layer
-		self.process = subprocess.Popen(
+from celeste import Celeste
-			"bin/pico-8/linux/pico8",
+
-			shell=True,
+
-			stdout=subprocess.PIPE,
+compute_device = torch.device(
-			stderr=subprocess.STDOUT
+	"cuda" if torch.cuda.is_available() else "cpu"
 )
 		# Wait for window to open and get window id
 		time.sleep(2)
 		winid = subprocess.check_output([
 			"xdotool",
 			"search",
 			"--class",
 			"pico8"
 		]).decode("utf-8").strip().split("\n")
 		if len(winid) != 1:
 			raise Exception("Could not find unique PICO-8 window id")
 		self.winid = winid[0]
 		# Load cartridge
 		self.keystring("load hackcel.p8")
 		self.keypress("Enter")
 		self.keystring("run")
 		self.keypress("Enter", post = 1000)
 		# Initialize variables
 		self.internal_status = {}
 		self.dead = False
 		# -1: left
 		#  0: not moving
 		#  1: moving right
 		self.moving = 0
 		# Start state update thread
 		self.update_thread = threading.Thread(target = self._update_loop)
 		self.update_thread.start()
 	def act(self, action):
 		self.keyup("x")
 		self.keyup("c")
 		self.keyup("Down")
 		self.keyup("Up")
 		if self.moving != -1:
 			self.keyup("Left")
 		if self.moving != 1:
 			self.keyup("Right")
 		if action is None:
 			self.moving = 0
 			self.keyup("Left")
 			self.keyup("Right")
 		elif action == "left":
 			if self.moving != -1:
 				self.keydown("Left")
 			self.moving = -1
 		elif action == "right":
 			if self.moving != 1:
 				self.keydown("Right")
 			self.moving = 1
-	@property
+# Epsilon-greedy parameters
-	def status(self):
+#
-		return {
+# Original docs:
-			"stage": (
+# EPS_START is the starting value of epsilon
-				[
+# EPS_END is the final value of epsilon
-					[0, 1, 2, 3, 4]
+# EPS_DECAY controls the rate of exponential decay of epsilon, higher means a slower decay
-				]
+EPS_START = 0.9
-				[int(self.internal_status["ry"])]
+EPS_END = 0.05
-				[int(self.internal_status["rx"])]
+EPS_DECAY = 1000
 			),
-			"xpos": int(self.internal_status["px"]),
+
-			"ypos": int(self.internal_status["py"]),
+# Outline our network
-			"xvel": float(self.internal_status["vx"]),
+class DQN(torch.nn.Module):
-			"yvel": float(self.internal_status["vy"])
+	def __init__(self, n_observations: int, n_actions: int):
-		}
+		super(DQN, self).__init__()
 		self.layer1 = torch.nn.Linear(n_observations, 128)
 		self.layer2 = torch.nn.Linear(128, 128)
 		self.layer3 = torch.nn.Linear(128, n_actions)
 	# Can be called with one input, or with a batch.
 	#
 	# Returns tensor(
 	#	[ Q(s, left), Q(s, right) ], ...
 	# )
 	#
 	# Recall that Q(s, a) is the (expected) return of taking
 	# action `a` at state `s`
 	def forward(self, x):
 		x = torch.nn.functional.relu(self.layer1(x))
 		x = torch.nn.functional.relu(self.layer2(x))
 		return self.layer3(x)
-	# Possible actions
+# Celeste env properties
-	@property
+n_observations = 4
-	def action_space(self):
+n_actions = len(Celeste.action_space)
 		return [
 			"left", # move left
 			"rght", # move right
 			"jump", # jump
-			"dshn", # dash north
+policy_net = DQN(
-			"dshe", # dash east
+	n_observations,
-			"dshw", # dash west
+	n_actions
-			"dsne", # dash north-east
+).to(compute_device)
 			"dsnw"  # dash north-west
 		]
-	def keypress(self, key: str, *, post = 200):
+def select_action(state, steps_done):
-		subprocess.run([
+	"""
-			"xdotool",
+	Select an action using an epsilon-greedy policy.
 			"key",
 			"--window", self.winid,
 			key
 		])
 		time.sleep(post / 1000)
-	def keydown(self, key: str):
+	Sometimes use our model, sometimes sample one uniformly.
 		subprocess.run([
 			"xdotool",
 			"keydown",
 			"--window", self.winid,
 			key
 		])
-	def keyup(self, key: str):
+	P(random action) starts at EPS_START and decays to EPS_END.
-		subprocess.run([
+	Decay rate is controlled by EPS_DECAY.
-			"xdotool",
+	"""
 			"keyup",
 			"--window", self.winid,
 			key
 		])
-	def keystring(self, string, *, delay = 100, post = 200):
+	# Random number 0 <= x < 1
-		subprocess.run([
+	sample = random.random()
 			"xdotool",
 			"type",
 			"--window", self.winid,
 			"--delay", str(delay),
 			string
 		])
 		time.sleep(post / 1000)
-	def reset(self):
+	# Calculate random step threshhold
-		self.internal_status = {}
+	eps_threshold = (
-		if not self.dead:
+		EPS_END + (EPS_START - EPS_END) *
-			self.keypress("Escape")
+		math.exp(
-		self.keystring("run")
+			-1.0 * steps_done /
-		self.keypress("Enter", post = 1000)
+			EPS_DECAY
-		self.dead = False
+		)
 	def _update_loop(self):
 		# Poll process for new output until finished
 		for line in iter(self.process.stdout.readline, ""):
 			l = line.decode("utf-8")[:-1]
 			if l in ["!RESTART"]:
 				continue
 			for entry in l.split(";"):
 				key, val = entry.split(":")
 				self.internal_status[key] = val
 			# Exit game on death
 			if "dc" in self.internal_status and self.internal_status["dc"] != "0":
 				self.keypress("Escape")
 				self.dead = True
 				# Flush stream reader
 				for k in iter(self.process.stdout.readline, ""):
 					k = k.decode("utf-8")[:-1]
 					if k == "!RESTART":
 						break
 # Stage 1:
 next_point = 0
 target_points = [
 	(28, 88),		# Start pillar
 	(60, 80),		# Middle pillar
 	(105, 64),		# Right ledge
 	(25, 40),		# Left ledge
 	(110, 16),		# End ledge
 	(110, -2),		# Next stage
 ]
 # += 5
 c = Celeste()
 while True:
 	if c.dead:
 		print("\n\nDead, resetting...")
 		c.reset()
 	tx, ty = target_points[next_point]
 	x = c.status["xpos"]
 	y = c.status["ypos"]
 	dist = math.sqrt(
 		(x-tx)*(x-tx) +
 		(y-ty)*(y-ty)
 	)
-	if dist <= 4 and y == ty:
+	if sample > eps_threshold:
-		next_point += 1
+		with torch.no_grad():
 			# t.max(1) will return the largest column value of each row.
 			# second column on max result is index of where max element was
 			# found, so we pick action with the larger expected reward.
 			return policy_net(state).max(1)[1].view(1, 1).item()
-	print(f"Target point: {next_point:02}, Dist: {dist:0.3}")
+	else:
 		return random.randint( 0, n_actions-1 )
 	#print()
 	#print(c.status)
 last_state = None
 Transition = namedtuple(
 	"Transition",
 	(
 		"state",
 		"action",
 		"next_state",
 		"reward"
 	)
 )
 def on_state(celeste):
 	global last_state
 	s = celeste.status
 	if last_state is None:
 		last_state = s
 		return
 	s_next = s["next_point"]
 	s_dist = s["dist"]
 	l_next = last_state["next_point"]
 	l_dist = last_state["dist"]
 	if l_next == s_next:
 		reward = l_dist - s_dist
 	else:
 		reward = 10
 	dead = s["deaths"] != 0
 	frame_count = s["frame_count"]
 	# Values at this point
 	# reward: reward for last action
 	# dead:   true if game over
 	state_number_map = [
 		"xpos",
 		"ypos",
 		"xvel",
 		"yvel"
 	]
 	tf_state = torch.tensor(
 		[s[x] for x in state_number_map],
 		dtype = torch.float32,
 		device = compute_device
 	).unsqueeze(0)
 	tf_last = torch.tensor(
 		[last_state[x] for x in state_number_map],
 		dtype = torch.float32,
 		device = compute_device
 	).unsqueeze(0)
 	action = select_action(
 		tf_state,
 		frame_count
 	)
 	# Turn number into action string
 	action = Celeste.action_space[action]
 	celeste.act(action)
 	# Update previous state
 	last_state = s
 c = Celeste(
 	on_state
 )
 c.update_loop()