Fixed bugs

2023-02-16 13:52:59 -08:00
parent ce02009d64
commit 1216378c49
2 changed files with 33 additions and 15 deletions
--- a/celeste/celeste.py
+++ b/celeste/celeste.py
@ -49,6 +49,8 @@ class Celeste:
 		# Initialize variables
 		self.internal_status = {}
 		self.before_out = None
 		self.last_point_frame = 0
 		# Score system
 		self.frame_counter = 0
@ -166,6 +168,9 @@ class Celeste:
 		self.internal_status = {}
 		self.next_point = 0
 		self.frame_counter = 0
 		self.before_out = None
 		self.resetting = True
 		self.last_point_frame = 0
 		self.keypress("Escape")
 		self.keystring("run")
@ -185,13 +190,12 @@ class Celeste:
 		# Get state, call callback, wait for state
 		# One line => one frame.
 		before_out = None
 		it = iter(self.process.stdout.readline, "")
 		for line in it:
 			l = line.decode("utf-8")[:-1].strip()
 			self.resetting = False
 			# This should only occur at game start
 			if l in ["!RESTART"]:
@ -206,7 +210,7 @@ class Celeste:
 				key, val = entry.split(":")
 				self.internal_status[key] = val
-			
+
 			# Update checkpoints
@ -221,6 +225,7 @@ class Celeste:
 			if dist <= 4 and y == ty:
 				print(f"Got point {self.next_point}")
 				self.next_point += 1
 				self.last_point_frame = self.frame_counter
 				# Recalculate distance to new point
 				tx, ty = self.target_points[self.status["stage"]][self.next_point]
@ -229,9 +234,14 @@ class Celeste:
 					(y-ty)*(y-ty)
 				)
 			# Timeout if we spend too long between points
 			elif self.frame_counter - self.last_point_frame > 40:
 				self.internal_status["dc"] = str(int(self.internal_status["dc"]) + 1)
 			self.dist = dist
-			
+
-			# Call step callback
+			# Call step callbacks
-			if before_out is not None:
+			if self.before_out is not None:
-				after(self, before_out)
+				after(self, self.before_out)
-			before_out = before(self)
+			if not self.resetting:
 				self.before_out = before(self)
--- a/celeste/main.py
+++ b/celeste/main.py
@ -42,9 +42,9 @@ EPS_DECAY = 1000
 BATCH_SIZE = 128
 # Learning rate of target_net.
 # Controls how soft our soft update is.
-# 
+#
 # Should be between 0 and 1.
-# Large values 
+# Large values
 # Small values do the opposite.
 #
 # A value of one makes target_net
@ -174,7 +174,7 @@ def optimize_model():
 		raise Exception(f"Not enough elements in memory for a batch of {BATCH_SIZE}")
-	
+
 	# Get a random sample of transitions
 	batch = random.sample(memory, BATCH_SIZE)
@ -238,13 +238,13 @@ def optimize_model():
 	# V(s_t+1) = max_a ( Q(s_t+1, a) )
 	# = the maximum reward over all possible actions at state s_t+1.
 	next_state_values = torch.zeros(BATCH_SIZE, device = compute_device)
-	
+
 	# Don't compute gradient for operations in this block.
 	# If you don't understand what this means, RTFD.
 	with torch.no_grad():
-		
+
 		# Note the use of non_final_mask here.
-		# States that are final do not have their reward set by the line 
+		# States that are final do not have their reward set by the line
 		# below, so their reward stays at zero.
 		#
 		# States that are not final get their predicted value
@ -274,7 +274,7 @@ def optimize_model():
 		expected_state_action_values.unsqueeze(1)
 	)
-	
+
 	# We can now run a step of backpropagation on our model.
@ -362,10 +362,18 @@ def on_state_after(celeste, before_out):
 		if state["next_point"] == next_state["next_point"]:
 			reward = state["dist"] - next_state["dist"]
 			if reward > 0:
 				reward = 1
 			elif reward < 0:
 				reward = -1
 			else:
 				reward = 0
 		else:
 			# Score for reaching a point
 			reward = 10
 	pt_reward = torch.tensor([reward], device = compute_device)