def startGame(self,env, i): print(" ------- New Game ---------- \n") # Store the Q-Table as a JSON with the provided name print("Saving QTable as JSON") with open(self.QTableName, 'w') as fp: json.dump(self.qTable, fp) # Back-Up the Q-Table every 10 games if (i+1) % 10 == 0: print("Saving QTable BackUp as JSON") # Store a QTable BackUp too every 10 games with open('QTableBackUp.json', 'w') as fp: json.dump(self.qTable, fp) # Initialise the MineCraft environment # Add a sleep to ensure connection to the environment sleep(2) obs = env.reset() # Do an initial 'stop' step in order to get info from the environment obs, currentReward, done, info = env.step(0) # Use utils module to discretise the info from the game - Find the current state [xdisc, ydisc, zdisc, yawdisc, pitchdisc] = utils.discretiseState(info['observation']) currentState = "%d:%d:%d:%d:%d" % (xdisc, zdisc, yawdisc, ydisc, pitchdisc) print("initialState: " + currentState) # Return the currentState and the first info return currentState, info
def startGame(self, env, i): print(" ------- New Game ---------- \n") #Store the Q-Table as a JSON print("Saving mc_QTable as JSON") with open(self.QTableName, 'w') as fp: json.dump(self.qTable, fp) if (i + 1) % 10 == 0: print("Saving mc_QTable BackUp as JSON") # Store a QTable BackUp too every 10 games with open('mc_QTableBackUp.json', 'w') as fp: json.dump(self.qTable, fp) # Initialise the MineCraft environment obs = env.reset() # Do an initial 'stop' step in order to get info from env obs, currentReward, done, info = env.step(0) # Use utils module to discretise the info from the game [xdisc, ydisc, zdisc, yawdisc, pitchdisc] = utils.discretiseState(info['observation']) currentState = "%d:%d:%d:%d:%d" % (xdisc, zdisc, yawdisc, ydisc, pitchdisc) print("initialState: " + currentState) return currentState, info
def runAgent(self, env): results = [] states_count = {} for i in range(200): print("Game " + str(i)) currentState, info = self.startGame(env, i) actionCount = 0 score = 0 done = False history = [] while not done: # Chose the action then run it action = self.act(env, currentState) image, reward, done, info = env.step(action) obs = info['observation'] print(f"Reward of {reward}") # Continue counts of actions and scores actionCount += 1 score += reward if done: if self.training: oldQValueAction = self.qTable[currentState][ self.actions.index(action)] self.qTable[currentState][self.actions.index( action)] = oldQValueAction + self.alpha * ( reward - oldQValueAction) break # have to use this to keep last info for results oldObs = obs # Use utils module to discrete the info from the game [xdisc, ydisc, zdisc, yawdisc, pitchdisc] = utils.discretiseState(obs) newState = "%d:%d:%d:%d:%d" % (xdisc, zdisc, yawdisc, ydisc, pitchdisc) if newState not in states_count: states_count[newState] = ([0] * len(self.actions)) history.append([newState, action, reward]) states_count[newState][self.actions.index(action)] += 1.0 print('Q-Value for Current State: ') print(self.qTable[currentState]) # If no Q Value for this state, Initialise if newState not in self.qTable: self.qTable[newState] = ([0] * len(self.actions)) for t, [ep_state, ep_action, reward] in enumerate(history): # update Q-values for this action return_val = reward + sum( [x[2] * self.gamma**i for i, x in enumerate(history[t:])]) if self.training: oldQValueAction = self.qTable[ep_state][self.actions.index( ep_action)] self.qTable[ep_state][self.actions.index(ep_action)] = oldQValueAction + (1/states_count[ep_state][self.actions.index(ep_action)]) * \ (return_val - oldQValueAction) print(' ------- Game Finished ---------- \n') results.append( [score, actionCount, oldObs['TotalTime'], self.epsilon]) # Decay the epsilon until the minimum if self.epsilon > self.epsilon_min: self.epsilon *= self.epsilon_decay else: self.epsilon = 0 with open(self.CSVName, "w") as f: wr = csv.writer(f) wr.writerows(results) return results
def runAgent(self,env): results = [] # Run for the amount of episodes provided for i in range(self.episodes): print("Game: " + str(i + 1)) print("Epsilon: " + str(self.epsilon)) print("Training: " + str(self.training)) # Start the game using the 'startGame' function currentState, obs = self.startGame(env,i) # Initialise actions, score and done boolean actionCount = 0 score = 0 done = False # Loop through taking actions until the game is done while not done: # Chose an action using the 'act' function then run it action = self.act(env, currentState) # Play the given action in the environment image, reward, done, info = env.step(action) # Get the observations from the info provided obs = info['observation'] # Continue counts of actions and scores actionCount += 1 score += reward # Check if game is done, if so, update the Q-Table and stop the game if done: # update Q-values for this action if self.training: # Find the Q-Functions of the current state and print to screen currentStateActions = self.qTable[currentState] print('\nCurrentStateActionsQValues: ' + str(currentStateActions)) # Find the Q-Function of this current state-action pair oldQValueAction = self.qTable[currentState][self.actions.index(action)] # Update this Q-Function following the Bellman Equation self.qTable[currentState][self.actions.index(action)] = oldQValueAction + self.alpha * (reward - oldQValueAction) print("Reward of %s added to the Q-Table at %s with action %s" % (str(reward), currentState, action)) # Find the new Q-Functions for this state and print to screen currentStateActions = self.qTable[currentState] print('Updated CurrentStateActionsQValues: ' + str(currentStateActions)) newQValueAction = self.qTable[currentState][self.actions.index(action)] print("Q-Value difference for action %s of %s" % (action, abs(oldQValueAction - newQValueAction))) print("\n -------- Final Score: -------- %s" % (score)) break # Use this to keep last info for results oldObs = obs # Use utils module to discrete the info from the game [xdisc, ydisc, zdisc, yawdisc, pitchdisc] = utils.discretiseState(obs) # Find the position of this new state newState = "%d:%d:%d:%d:%d" % (xdisc, zdisc, yawdisc, ydisc, pitchdisc) # If no Q-Function for this state in the Q-Table, initialise it if newState not in self.qTable: self.qTable[newState] = ([0] * len(self.actions)) # Update Q-values for this action, if training is set to True if self.training: # Find the Q-Functions of the current state and print to screen currentStateActions = self.qTable[currentState] print('\nCurrentStateActionsQValues: ' + str(currentStateActions)) # Find the Q-Function of this current state-action pair oldQValueAction = self.qTable[currentState][self.actions.index(action)] # Update this Q-Function following the Bellman Equation self.qTable[currentState][self.actions.index(action)] = oldQValueAction + self.alpha * (reward + self.gamma * max(self.qTable[newState]) - oldQValueAction) print("Reward of %s added to the Q-Table at %s with action %s" % (str(reward), currentState, action)) # Find the new Q-Functions for this state and print to screen currentStateActions = self.qTable[currentState] print('Updated CurrentStateActionsQValues: ' + str(currentStateActions)) newQValueAction = self.qTable[currentState][self.actions.index(action)] print("Q-Value difference for action %s of %s"%(action,abs(oldQValueAction-newQValueAction))) # Move to the new current state, ready to take the next action currentState = newState print('\n ------- Game Finished ---------- \n') # Store the results of this run - If the 'oldObs' not created (Died on first action), presume the time was 0 try: results.append([score,actionCount,oldObs['TotalTime'], self.epsilon]) except: results.append([score, actionCount, 0, self.epsilon]) # Decay the epsilon until the minimum if self.epsilon > self.epsilon_min: self.epsilon *= self.epsilon_decay else: # If the epsilon less than minimum, set to 0 self.epsilon = 0 # Store the results in the provided CSV file with open(self.CSVName,"w") as f: wr = csv.writer(f) wr.writerows(results) # Return the results return results