示例#1
0
	def setWinDelta(self, winDelta):
		
	def setLoseDelta(self, loseDelta):
	
	def computeHyperparameters(self, numTakenActions, episodeNumber):

if __name__ == '__main__':

	numOpponents = 1
	numAgents = 2
	MARLEnv = DiscreteMARLEnvironment(numOpponents = numOpponents, numAgents = numAgents)

	agents = []
	for i in range(args.numAgents):
		agent = WolfPHCAgent(learningRate = 0.2, discountFactor = 0.99)
		agents.append(agent)

	numEpisodes = 4000
	numTakenActions = 0
	for episode in range(numEpisodes):	
		status = ["IN_GAME","IN_GAME","IN_GAME"]
		observation = MARLEnv.reset()
		
		while status[0]=="IN_GAME":
			for agent in agents:
				loseDelta, winDelta, learningRate = agent.computeHyperparameters(numTakenActions, episode)
				agent.setLoseDelta(loseDelta)
				agent.setWinDelta(winDelta)
				agent.setLearningRate(learningRate)
			actions = []
			perAgentObs = []
			agentIdx = 0
			for agent in agents:
				obsCopy = deepcopy(observation[agentIdx])
				perAgentObs.append(obsCopy)
				agent.setState(agent.toStateRepresentation(obsCopy))
				actions.append(agent.act())
				agentIdx += 1
			nextObservation, reward, done, status = MARLEnv.step(actions)
			numTakenActions += 1

			agentIdx = 0
			for agent in agents:
				agent.setExperience(agent.toStateRepresentation(perAgentObs[agentIdx]), actions[agentIdx], reward[agentIdx], 
					status[agentIdx], agent.toStateRepresentation(nextObservation[agentIdx]))
				agent.learn()
				agent.calculateAveragePolicyUpdate()
				agent.calculatePolicyUpdate()
				agentIdx += 1
			
			observation = nextObservation
示例#2
0
#
# 	def setLearningRate(self, learningRate):
# 		raise NotImplementedError
#
# 	def computeHyperparameters(self, numTakenActions, episodeNumber):
# 		raise NotImplementedError

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--numOpponents', type=int, default=1)
    parser.add_argument('--numAgents', type=int, default=2)
    parser.add_argument('--numEpisodes', type=int, default=100)

    args = parser.parse_args()

    MARLEnv = DiscreteMARLEnvironment(numOpponents=args.numOpponents,
                                      numAgents=args.numAgents)
    agents = []
    for i in range(args.numAgents):
        agent = IndependentQLearningAgent(learningRate=0.1,
                                          discountFactor=0.9,
                                          epsilon=1.0)
        agents.append(agent)

    numEpisodes = args.numEpisodes
    numTakenActions = 0
    for episode in range(numEpisodes):
        status = ["IN_GAME", "IN_GAME", "IN_GAME"]
        observation = MARLEnv.reset()
        totalReward = 0.0
        timeSteps = 0
    parser.add_argument('--numEpisodes', type=int, default=50000)
    parser.add_argument('--visualize', type=bool, default=False)

    args = parser.parse_args()

    ########### with debugging purposes only ############
    #debug = True
    #if debug:
    #rewards_buffer = []
    #history = [10,500]
    #goals = [0]*max(history)
    #configure("tb/IQL" + str(datetime.now()))
    #####################################################

    MARLEnv = DiscreteMARLEnvironment(numOpponents=args.numOpponents,
                                      numAgents=args.numAgents,
                                      visualize=args.visualize)
    agents = []
    for i in range(args.numAgents):
        agent = IndependentQLearningAgent(learningRate=0.99,
                                          discountFactor=0.9,
                                          epsilon=1.0)
        agents.append(agent)

    numEpisodes = args.numEpisodes
    numTakenActions = 0
    for episode in range(numEpisodes):
        status = ["IN_GAME", "IN_GAME", "IN_GAME"]
        observation = MARLEnv.reset()
        totalReward = 0.0
        timeSteps = 0
示例#4
0
        return self.loseDelta, self.winDelta, learningRate


if __name__ == '__main__':

    parser = argparse.ArgumentParser()
    parser.add_argument('--numOpponents', type=int, default=1)
    parser.add_argument('--numAgents', type=int, default=2)
    parser.add_argument('--numEpisodes', type=int, default=50000)

    args = parser.parse_args()

    numOpponents = args.numOpponents
    numAgents = args.numAgents
    MARLEnv = DiscreteMARLEnvironment(numOpponents=numOpponents,
                                      numAgents=numAgents,
                                      visualize=False)

    agents = []
    for i in range(args.numAgents):
        agent = WolfPHCAgent(learningRate=0.2,
                             discountFactor=0.99,
                             winDelta=0.01,
                             loseDelta=0.1)
        agents.append(agent)

    statusHistory = []

    numEpisodes = args.numEpisodes
    numTakenActions = 0
    for episode in range(numEpisodes):