def setWinDelta(self, winDelta): def setLoseDelta(self, loseDelta): def computeHyperparameters(self, numTakenActions, episodeNumber): if __name__ == '__main__': numOpponents = 1 numAgents = 2 MARLEnv = DiscreteMARLEnvironment(numOpponents = numOpponents, numAgents = numAgents) agents = [] for i in range(args.numAgents): agent = WolfPHCAgent(learningRate = 0.2, discountFactor = 0.99) agents.append(agent) numEpisodes = 4000 numTakenActions = 0 for episode in range(numEpisodes): status = ["IN_GAME","IN_GAME","IN_GAME"] observation = MARLEnv.reset() while status[0]=="IN_GAME": for agent in agents: loseDelta, winDelta, learningRate = agent.computeHyperparameters(numTakenActions, episode) agent.setLoseDelta(loseDelta) agent.setWinDelta(winDelta) agent.setLearningRate(learningRate) actions = [] perAgentObs = [] agentIdx = 0 for agent in agents: obsCopy = deepcopy(observation[agentIdx]) perAgentObs.append(obsCopy) agent.setState(agent.toStateRepresentation(obsCopy)) actions.append(agent.act()) agentIdx += 1 nextObservation, reward, done, status = MARLEnv.step(actions) numTakenActions += 1 agentIdx = 0 for agent in agents: agent.setExperience(agent.toStateRepresentation(perAgentObs[agentIdx]), actions[agentIdx], reward[agentIdx], status[agentIdx], agent.toStateRepresentation(nextObservation[agentIdx])) agent.learn() agent.calculateAveragePolicyUpdate() agent.calculatePolicyUpdate() agentIdx += 1 observation = nextObservation
args = parser.parse_args() MARLEnv = DiscreteMARLEnvironment(numOpponents=args.numOpponents, numAgents=args.numAgents) agents = [] for i in range(args.numAgents): agent = IndependentQLearningAgent(learningRate=0.1, discountFactor=0.9, epsilon=1.0) agents.append(agent) numEpisodes = args.numEpisodes numTakenActions = 0 for episode in range(numEpisodes): status = ["IN_GAME", "IN_GAME", "IN_GAME"] observation = MARLEnv.reset() totalReward = 0.0 timeSteps = 0 while status[0] == "IN_GAME": for agent in agents: learningRate, epsilon = agent.computeHyperparameters( numTakenActions, episode) agent.setEpsilon(epsilon) agent.setLearningRate(learningRate) actions = [] stateCopies = [] for agentIdx in range(args.numAgents): obsCopy = deepcopy(observation[agentIdx]) stateCopies.append(obsCopy) agents[agentIdx].setState(agent.toStateRepresentation(obsCopy))