示例#1
0
    def __init__(self, discountFactor, epsilon, initVals=0.0):

        super(MonteCarloAgent, self).__init__()
        self.attack = HFOAttackingPlayer()
        # initialise all possible states for the agent
        self.State = [(x, y) for x in range(5) for y in range(6)]
        self.State.append("GOAL")
        self.State.append("OUT_OF_BOUNDS")

        # discount
        self.discountFactor = discountFactor
        # epsilon
        self.epsilon = epsilon
        # variable current state
        self.cur = 0
        # 3 empty lists used to record the episode
        self.logA = []
        self.logR = []
        self.logS = []
        # cumulative reward
        self.G = 0

        # Q table is a dict where keys are the "states" and values are another dict.
        # This inside dict contains "actions" as keys and the values are initilised to 0
        self.Q = {}
        # Returns table is a dict where the key is "state-action" pairs
        self.returns = {}
        for s in self.State:
            self.Q[s] = {}
            for a in self.possibleActions:
                self.Q[s][a] = 0
                self.returns[(s, a)] = []
示例#2
0
        return self.learningRate, self.epsilon


if __name__ == '__main__':

    parser = argparse.ArgumentParser()
    parser.add_argument('--id', type=int, default=0)
    parser.add_argument('--numOpponents', type=int, default=0)
    parser.add_argument('--numTeammates', type=int, default=0)
    parser.add_argument('--numEpisodes', type=int, default=500)

    args = parser.parse_args()

    # Initialize connection with the HFO server
    hfoEnv = HFOAttackingPlayer(numOpponents=args.numOpponents,
                                numTeammates=args.numTeammates,
                                agentId=args.id)
    hfoEnv.connectToServer()

    # Initialize a Q-Learning Agent
    agent = QLearningAgent(learningRate=0.1, discountFactor=0.99, epsilon=1.0)
    numEpisodes = args.numEpisodes

    # Run training using Q-Learning
    numTakenActions = 0
    for episode in range(numEpisodes):
        status = 0
        observation = hfoEnv.reset()

        while status == 0:
            learningRate, epsilon = agent.computeHyperparameters(
示例#3
0
from DiscreteHFO.HFOAttackingPlayer import HFOAttackingPlayer
import random
import argparse


if __name__ == '__main__':

	parser = argparse.ArgumentParser()
	parser.add_argument('--id', type=int, default=0)
	parser.add_argument('--numOpponents', type=int, default=0)
	parser.add_argument('--numTeammates', type=int, default=0)
	parser.add_argument('--numEpisodes', type=int, default=500)

	args=parser.parse_args()

	hfoEnv = HFOAttackingPlayer(numOpponents = args.numOpponents, numTeammates = args.numTeammates, agentId = args.id)
	hfoEnv.connect_to_server()
	
	numEpisodes = 500
	for episode in range(numEpisodes+1):	

		status = 0
		observation = hfoEnv.reset()

		while status==0:
			act = random.randint(0,4)
			nextObservation, reward, done, status = hfoEnv.step(hfoEnv.possibleActions[act])
			observation = nextObservation

		if status == 5:
			hfoEnv.quitGame()