def _run_partner(self, clients): env = PigChaseEnvironment(clients, PigChaseSymbolicStateBuilder(), role=0, randomize_positions=True) agent = PigChaseChallengeAgent(ENV_AGENT_NAMES[0]) self.agent_type.set( type(agent.current_agent) == RandomAgent and PigChaseEnvironment.AGENT_TYPE_1 or PigChaseEnvironment.AGENT_TYPE_2) obs = env.reset(self.agent_type) reward = 0 agent_done = False while True: # Select an action action = agent.act(obs, reward, agent_done, is_training=True) # Reset if needed if env.done: self.agent_type.set( type(agent.current_agent) == RandomAgent and PigChaseEnvironment.AGENT_TYPE_1 or PigChaseEnvironment.AGENT_TYPE_2) obs = env.reset(self.agent_type) # Take a step obs, reward, agent_done = env.do(action)
def run_challenge_agent(clients): builder = PigChaseSymbolicStateBuilder() env = PigChaseEnvironment(clients, builder, role=0, randomize_positions=True) agent = PigChaseChallengeAgent(ENV_AGENT_NAMES[0]) agent_loop(agent, env, None)
def agent_factory(name, role, type, clients, max_epochs, logdir, visualizer): assert len(clients) >= 2, 'Not enough clients (need at least 2)' clients = parse_clients_args(clients) builder = PigChaseSymbolicStateBuilder() env = PigChaseEnvironment(clients, builder, role=role, randomize_positions=True) if role == 0: agent = PigChaseChallengeAgent(name) obs = env.reset() reward = 0 agent_done = False while True: if env.done: obs = env.reset() # select an action action = agent.act(obs, reward, agent_done, is_training=True) # take a step obs, reward, agent_done = env.do(action) else: if type == 'astar': agent = FocusedAgent(name, ENV_TARGET_NAMES[0]) else: agent = RandomAgent(name, env.available_actions) obs = env.reset() reward = 0 agent_done = False viz_rewards = [] max_training_steps = EPOCH_SIZE * max_epochs for step in range(1, max_training_steps + 1): # check if env needs reset if env.done: visualize_training(visualizer, step, viz_rewards) viz_rewards = [] obs = env.reset() # select an action action = agent.act(obs, reward, agent_done, is_training=True) # take a step obs, reward, agent_done = env.do(action) viz_rewards.append(reward) agent.inject_summaries(step)
def agent_factory(name, role, kind, clients, max_episodes, max_actions, logdir, quit): assert len( clients ) >= 2, 'There are not enough Malmo clients in the pool (need at least 2)' clients = parse_clients_args(clients) visualizer = ConsoleVisualizer(prefix='Agent %d' % role) if role == 0: env = PigChaseEnvironment(clients, PigChaseSymbolicStateBuilder(), actions=ENV_ACTIONS, role=role, human_speed=True, randomize_positions=True) agent = PigChaseChallengeAgent(name) if type(agent.current_agent) == RandomAgent: agent_type = PigChaseEnvironment.AGENT_TYPE_1 else: agent_type = PigChaseEnvironment.AGENT_TYPE_2 obs = env.reset(agent_type) reward = 0 rewards = [] done = False episode = 0 while True: # select an action action = agent.act(obs, reward, done, True) if done: visualizer << (episode + 1, 'Reward', sum(rewards)) rewards = [] episode += 1 if type(agent.current_agent) == RandomAgent: agent_type = PigChaseEnvironment.AGENT_TYPE_1 else: agent_type = PigChaseEnvironment.AGENT_TYPE_2 obs = env.reset(agent_type) # take a step obs, reward, done = env.do(action) rewards.append(reward) else: env = PigChaseEnvironment(clients, PigChaseSymbolicStateBuilder(), actions=list(ARROW_KEYS_MAPPING.values()), role=role, randomize_positions=True) env.reset(PigChaseEnvironment.AGENT_TYPE_3) agent = PigChaseHumanAgent(name, env, list(ARROW_KEYS_MAPPING.keys()), max_episodes, max_actions, visualizer, quit) agent.show()
def agent_factory(name, role, clients, backend, device, max_epochs, logdir, visualizer): assert len(clients) >= 2, 'Not enough clients (need at least 2)' clients = parse_clients_args(clients) if role == 0: builder = PigChaseSymbolicStateBuilder() env = PigChaseEnvironment(clients, builder, role=role, randomize_positions=True) agent = PigChaseChallengeAgent(name) if type(agent.current_agent) == RandomAgent: agent_type = PigChaseEnvironment.AGENT_TYPE_1 else: agent_type = PigChaseEnvironment.AGENT_TYPE_2 obs = env.reset(agent_type) reward = 0 agent_done = False while True: if env.done: if type(agent.current_agent) == RandomAgent: agent_type = PigChaseEnvironment.AGENT_TYPE_1 else: agent_type = PigChaseEnvironment.AGENT_TYPE_2 obs = env.reset(agent_type) while obs is None: # this can happen if the episode ended with the first # action of the other agent print('Warning: received obs == None.') obs = env.reset(agent_type) # select an action action = agent.act(obs, reward, agent_done, is_training=True) # take a step obs, reward, agent_done = env.do(action) else: env = PigChaseEnvironment(clients, MalmoALEStateBuilder(), role=role, randomize_positions=True) memory = TemporalMemory(100000, (84, 84)) if backend == 'cntk': from malmopy.model.cntk import QNeuralNetwork model = QNeuralNetwork((memory.history_length, 84, 84), env.available_actions, device) else: from malmopy.model.chainer import QNeuralNetwork, DQNChain chain = DQNChain((memory.history_length, 84, 84), env.available_actions) target_chain = DQNChain((memory.history_length, 84, 84), env.available_actions) model = QNeuralNetwork(chain, target_chain, device) explorer = LinearEpsilonGreedyExplorer(1, 0.1, 1000000) agent = PigChaseQLearnerAgent(name, env.available_actions, model, memory, 0.99, 32, 50000, explorer=explorer, visualizer=visualizer) obs = env.reset() reward = 0 agent_done = False viz_rewards = [] max_training_steps = EPOCH_SIZE * max_epochs for step in six.moves.range(1, max_training_steps + 1): # check if env needs reset if env.done: visualize_training(visualizer, step, viz_rewards) agent.inject_summaries(step) viz_rewards = [] obs = env.reset() while obs is None: # this can happen if the episode ended with the first # action of the other agent print('Warning: received obs == None.') obs = env.reset() # select an action action = agent.act(obs, reward, agent_done, is_training=True) # take a step obs, reward, agent_done = env.do(action) viz_rewards.append(reward) if (step % EPOCH_SIZE) == 0: if 'model' in locals(): model.save('pig_chase-dqn_%d.model' % (step / EPOCH_SIZE))
def agent_factory(name, role, baseline_agent, clients, max_epochs, logdir, visualizer): assert len(clients) >= 2, 'Not enough clients (need at least 2)' clients = parse_clients_args(clients) batch_size = 32 builder = PigChaseSymbolicStateBuilder() env = PigChaseEnvironment(clients, builder, role=role, randomize_positions=True) if role == 0: agent = PigChaseChallengeAgent(name) if type(agent.current_agent) == RandomAgent: agent_type = PigChaseEnvironment.AGENT_TYPE_1 else: agent_type = PigChaseEnvironment.AGENT_TYPE_2 ##Aqui el state hay que modificarlo para que se adapte a lo que la red neurnal necesita state = env.reset(agent_type) reward = 0 agent_done = False num_actions = 0 while True: # take a step # reset if needed if env.done: print(agent.check_memory(batch_size)) if type(agent.current_agent) == RandomAgent: agent_type = PigChaseEnvironment.AGENT_TYPE_1 else: agent_type = PigChaseEnvironment.AGENT_TYPE_2 ##Aqui el state habria que modificarlo de nuevo if num_actions > batch_size: print('Entrando a replay 1') agent.replay(batch_size) state = env.reset(agent_type) # select an action #print('Accion del role 1') action = agent.act(state, reward, agent_done, is_training=True) next_state, reward, agent_done = env.do(action) num_actions = num_actions + 1 next_state2 = adapt_state(next_state) agent.remember(state, action, reward, next_state2, agent_done) ##Aqui state= obs (que seria el estado anterior estado modificado) state = next_state ##No estoy seguro de si esto va aqui por el while true (no se cuando acaba). Deberia ir cuando acaba una partida ##Hacer check si hace el replay o no. Si no lo hace nunca, meter el replay dentro de el if(env.done (signifca que una etapa ha acabado y empieza otra, por lo que deberia esta bien)) else: if baseline_agent == 'astar': agent = FocusedAgent(name, ENV_TARGET_NAMES[0]) else: agent = RandomAgent(name, env.available_actions) state = env.reset() reward = 0 agent_done = False viz_rewards = [] max_training_steps = EPOCH_SIZE * max_epochs for step in six.moves.range(1, max_training_steps + 1): # check if env needs reset if env.done: visualize_training(visualizer, step, viz_rewards) viz_rewards = [] ##No se si esto se tiene que hacer tambien aqui o no, hacer check if agent.check_memory(batch_size) > batch_size: print('Entrando a replay 2') agent.replay(batch_size) state = env.reset() # select an action #print('Accion del role 2') action = agent.act(state, reward, agent_done, is_training=True) # take a step next_state, reward, agent_done = env.do(action) next_state2 = adapt_state(next_state) agent.remember(state, action, reward, next_state2, agent_done) ##Aqui state= obs (que seria el estado anterior estado modificado) state = next_state #obs, reward, agent_done = env.do(action) viz_rewards.append(reward) agent.inject_summaries(step)
def agent_factory(name, role, baseline_agent, clients, max_epochs, logdir, visualizer): assert len(clients) >= 2, 'Not enough clients (need at least 2)' clients = parse_clients_args(clients) builder = PigChaseSymbolicStateBuilder() env = PigChaseEnvironment(clients, builder, role=role, randomize_positions=True) if role == 0: agent = PigChaseChallengeAgent(name) obs = env.reset(get_agent_type(agent)) reward = 0 agent_done = False while True: if env.done: while True: obs = env.reset(get_agent_type(agent)) if obs: break # select an action action = agent.act(obs, reward, agent_done, is_training=True) # reset if needed if env.done: obs = env.reset(get_agent_type(agent)) # take a step obs, reward, agent_done = env.do(action) else: if baseline_agent == 'tabq': agent = TabularQLearnerAgent(name, visualizer) elif baseline_agent == 'astar': agent = FocusedAgent(name, ENV_TARGET_NAMES[0]) else: agent = RandomAgent(name, env.available_actions) obs = env.reset() reward = 0 agent_done = False viz_rewards = [] max_training_steps = EPOCH_SIZE * max_epochs for step in six.moves.range(1, max_training_steps + 1): # check if env needs reset if env.done: while True: if len(viz_rewards) == 0: viz_rewards.append(0) visualize_training(visualizer, step, viz_rewards) tag = "Episode End Conditions" visualizer.add_entry( step, '%s/timeouts per episode' % tag, env.end_result == "command_quota_reached") visualizer.add_entry( step, '%s/agent_1 defaults per episode' % tag, env.end_result == "Agent_1_defaulted") visualizer.add_entry( step, '%s/agent_2 defaults per episode' % tag, env.end_result == "Agent_2_defaulted") visualizer.add_entry(step, '%s/pig caught per episode' % tag, env.end_result == "caught_the_pig") agent.inject_summaries(step) viz_rewards = [] obs = env.reset() if obs: break # select an action action = agent.act(obs, reward, agent_done, is_training=True) # take a step obs, reward, agent_done = env.do(action) viz_rewards.append(reward)