示例#1
0
    def startGame(self,env, i):
        print(" ------- New Game ----------  \n")
        # Store the Q-Table as a JSON with the provided name
        print("Saving QTable as JSON")
        with open(self.QTableName, 'w') as fp:
            json.dump(self.qTable, fp)

        # Back-Up the Q-Table every 10 games
        if (i+1) % 10 == 0:
            print("Saving QTable BackUp as JSON")
            # Store a QTable BackUp too every 10 games
            with open('QTableBackUp.json', 'w') as fp:
                json.dump(self.qTable, fp)

        # Initialise the MineCraft environment
        # Add a sleep to ensure connection to the environment
        sleep(2)
        obs = env.reset()
        # Do an initial 'stop' step in order to get info from the environment
        obs, currentReward, done, info = env.step(0)

        # Use utils module to discretise the info from the game - Find the current state
        [xdisc, ydisc, zdisc, yawdisc, pitchdisc] = utils.discretiseState(info['observation'])
        currentState = "%d:%d:%d:%d:%d" % (xdisc, zdisc, yawdisc, ydisc, pitchdisc)
        print("initialState: " + currentState)

        # Return the currentState and the first info
        return currentState, info
示例#2
0
    def startGame(self, env, i):
        print(" ------- New Game ----------  \n")
        #Store the Q-Table as a JSON
        print("Saving mc_QTable as JSON")
        with open(self.QTableName, 'w') as fp:
            json.dump(self.qTable, fp)
        if (i + 1) % 10 == 0:
            print("Saving mc_QTable BackUp as JSON")
            # Store a QTable BackUp too every 10 games
            with open('mc_QTableBackUp.json', 'w') as fp:
                json.dump(self.qTable, fp)
        # Initialise the MineCraft environment
        obs = env.reset()
        # Do an initial 'stop' step in order to get info from env
        obs, currentReward, done, info = env.step(0)

        # Use utils module to discretise the info from the game
        [xdisc, ydisc, zdisc, yawdisc,
         pitchdisc] = utils.discretiseState(info['observation'])
        currentState = "%d:%d:%d:%d:%d" % (xdisc, zdisc, yawdisc, ydisc,
                                           pitchdisc)
        print("initialState: " + currentState)
        return currentState, info
示例#3
0
    def runAgent(self, env):
        results = []
        states_count = {}

        for i in range(200):
            print("Game " + str(i))
            currentState, info = self.startGame(env, i)
            actionCount = 0
            score = 0
            done = False
            history = []

            while not done:
                # Chose the action then run it
                action = self.act(env, currentState)
                image, reward, done, info = env.step(action)
                obs = info['observation']
                print(f"Reward of {reward}")
                # Continue counts of actions and scores
                actionCount += 1
                score += reward

                if done:
                    if self.training:
                        oldQValueAction = self.qTable[currentState][
                            self.actions.index(action)]
                        self.qTable[currentState][self.actions.index(
                            action)] = oldQValueAction + self.alpha * (
                                reward - oldQValueAction)
                    break
                # have to use this to keep last info for results
                oldObs = obs
                # Use utils module to discrete the info from the game
                [xdisc, ydisc, zdisc, yawdisc,
                 pitchdisc] = utils.discretiseState(obs)
                newState = "%d:%d:%d:%d:%d" % (xdisc, zdisc, yawdisc, ydisc,
                                               pitchdisc)

                if newState not in states_count:
                    states_count[newState] = ([0] * len(self.actions))

                history.append([newState, action, reward])
                states_count[newState][self.actions.index(action)] += 1.0

                print('Q-Value for Current State: ')
                print(self.qTable[currentState])

                # If no Q Value for this state, Initialise
                if newState not in self.qTable:
                    self.qTable[newState] = ([0] * len(self.actions))

            for t, [ep_state, ep_action, reward] in enumerate(history):
                # update Q-values for this action
                return_val = reward + sum(
                    [x[2] * self.gamma**i for i, x in enumerate(history[t:])])
                if self.training:
                    oldQValueAction = self.qTable[ep_state][self.actions.index(
                        ep_action)]
                    self.qTable[ep_state][self.actions.index(ep_action)] = oldQValueAction + (1/states_count[ep_state][self.actions.index(ep_action)]) * \
                                                                              (return_val - oldQValueAction)

            print(' ------- Game Finished ----------  \n')
            results.append(
                [score, actionCount, oldObs['TotalTime'], self.epsilon])
            # Decay the epsilon until the minimum
            if self.epsilon > self.epsilon_min:
                self.epsilon *= self.epsilon_decay
            else:
                self.epsilon = 0
            with open(self.CSVName, "w") as f:
                wr = csv.writer(f)
                wr.writerows(results)
        return results
示例#4
0
    def runAgent(self,env):
        results = []
        # Run for the amount of episodes provided
        for i in range(self.episodes):
            print("Game: " + str(i + 1))
            print("Epsilon: " + str(self.epsilon))
            print("Training: " + str(self.training))
            # Start the game using the 'startGame' function
            currentState, obs = self.startGame(env,i)
            # Initialise actions, score and done boolean
            actionCount = 0
            score = 0
            done  = False

            # Loop through taking actions until the game is done
            while not done:
                # Chose an action using the 'act' function then run it
                action = self.act(env, currentState)

                # Play the given action in the environment
                image, reward, done, info = env.step(action)

                # Get the observations from the info provided
                obs = info['observation']
                # Continue counts of actions and scores
                actionCount += 1
                score += reward

                # Check if game is done, if so, update the Q-Table and stop the game
                if done:
                    # update Q-values for this action
                    if self.training:
                        # Find the Q-Functions of the current state and print to screen
                        currentStateActions = self.qTable[currentState]
                        print('\nCurrentStateActionsQValues: ' + str(currentStateActions))

                        # Find the Q-Function of this current state-action pair
                        oldQValueAction = self.qTable[currentState][self.actions.index(action)]
                        # Update this Q-Function following the Bellman Equation
                        self.qTable[currentState][self.actions.index(action)] = oldQValueAction + self.alpha * (reward - oldQValueAction)
                        print("Reward of %s added to the Q-Table at %s with action %s" % (str(reward), currentState, action))

                        # Find the new Q-Functions for this state and print to screen
                        currentStateActions = self.qTable[currentState]
                        print('Updated CurrentStateActionsQValues: ' + str(currentStateActions))
                        newQValueAction = self.qTable[currentState][self.actions.index(action)]
                        print("Q-Value difference for action %s of %s" % (action, abs(oldQValueAction - newQValueAction)))
                        print("\n -------- Final Score: -------- %s" % (score))

                    break

                # Use this to keep last info for results
                oldObs = obs
                # Use utils module to discrete the info from the game
                [xdisc, ydisc, zdisc, yawdisc, pitchdisc] = utils.discretiseState(obs)
                # Find the position of this new state
                newState = "%d:%d:%d:%d:%d" % (xdisc, zdisc, yawdisc, ydisc, pitchdisc)


                # If no Q-Function for this state in the Q-Table, initialise it
                if newState not in self.qTable:
                    self.qTable[newState] = ([0] * len(self.actions))


                # Update Q-values for this action, if training is set to True
                if self.training:
                    # Find the Q-Functions of the current state and print to screen
                    currentStateActions = self.qTable[currentState]
                    print('\nCurrentStateActionsQValues: ' + str(currentStateActions))

                    # Find the Q-Function of this current state-action pair
                    oldQValueAction = self.qTable[currentState][self.actions.index(action)]
                    # Update this Q-Function following the Bellman Equation
                    self.qTable[currentState][self.actions.index(action)] = oldQValueAction + self.alpha * (reward + self.gamma * max(self.qTable[newState]) - oldQValueAction)
                    print("Reward of %s added to the Q-Table at %s with action %s" % (str(reward), currentState, action))


                    # Find the new Q-Functions for this state and print to screen
                    currentStateActions = self.qTable[currentState]
                    print('Updated CurrentStateActionsQValues: ' + str(currentStateActions))
                    newQValueAction = self.qTable[currentState][self.actions.index(action)]
                    print("Q-Value difference for action %s of %s"%(action,abs(oldQValueAction-newQValueAction)))

                # Move to the new current state, ready to take the next action
                currentState = newState

            print('\n ------- Game Finished ----------  \n')
            # Store the results of this run - If the 'oldObs' not created (Died on first action), presume the time was 0
            try:
                results.append([score,actionCount,oldObs['TotalTime'], self.epsilon])
            except:
                results.append([score, actionCount, 0, self.epsilon])

            # Decay the epsilon until the minimum
            if self.epsilon > self.epsilon_min:
                self.epsilon *= self.epsilon_decay
            else:
                # If the epsilon less than minimum, set to 0
                self.epsilon = 0
            # Store the results in the provided CSV file
            with open(self.CSVName,"w") as f:
                wr = csv.writer(f)
                wr.writerows(results)

        # Return the results
        return results