def main(): """Simple function to bootstrap a game""" # Print all possible environments in the Pommerman registry print(pommerman.REGISTRY) # Create a set of agents (exactly four) agent_list = [ agents.SimpleAgent(), agents.RandomAgent(), agents.SimpleAgent(), agents.HttpAgent(port=10080, host="localhost"), ] # Make the "Free-For-All" environment using the agent list env = pommerman.make('PommeFFACompetition-v0', agent_list) # Run the episodes just like OpenAI Gym for i_episode in range(1): state = env.reset() done = False while not done: env.render() actions = env.act(state) state, reward, done, info = env.step(actions) print('Episode {} finished'.format(i_episode)) env.close()
def __init__(self, name, trainer, model_path, global_episodes, global_steps): self.name = "worker_" + str(name) self.number = name self.model_path = model_path self.trainer = trainer self.global_episodes = global_episodes self.increment_global_episodes = self.global_episodes.assign_add(1) self.global_steps = global_steps self.increment_global_steps = self.global_steps.assign_add(1) self.episode_rewards = [] self.episode_lengths = [] self.episode_mean_values = [] self.summary_writer = tf.summary.FileWriter(model_path + "/train_" + str(self.number)) #Create the local copy of the network and the tensorflow op to copy global paramters to local network self.local_AC = AC_Network(self.name, trainer) self.update_local_ops = update_target_graph('global', self.name) print('Initializing environment #{}...'.format(self.number)) agent_list = [ custom_agents.StaticAgent(), agents.SimpleAgent(), agents.SimpleAgent(), agents.SimpleAgent(), ] self.env = pommerman.make('PommeFFACompetition-v0', agent_list) self.agent = agent_list[0]
def main(): # Instantiate the environment agent_list = [ agents.SimpleAgent(), agents.SimpleAgent(), agents.RandomAgent(), ddpg_agent, # agents.DockerAgent("pommerman/simple-agent", port=12345), ] env = pommerman.make(args.env_name, agent_list) env.seed(RANDOM_SEED) # Random seed agent_num = 0 env = EnvWrapper(env, num_agent=agent_num) # Generate training data stimulator = save_episodes(env) stimulator.stimulate() observations = [] actions = [] rewards = [] for episode in stimulator.episodes: observations.append(episode.observations) actions.append(episode.actions) rewards.append(episode.reward) observations_merged = np.concatenate(observations) actions_merged = np.concatenate(actions) rewards_merged = np.concatenate(rewards) np.save(train_data_obs, observations_merged) np.save(train_data_labels, actions_merged) np.save(train_data_reward, rewards_merged)
def main(): '''Simple function to bootstrap a game. Use this as an example to set up your training env. ''' # Print all possible environments in the Pommerman registry print(pommerman.REGISTRY) # Create a set of agents (exactly four) agent_list = [ agents.SimpleAgent(), agents.RandomAgent(), agents.SimpleAgent(), # agents.RandomAgent(), # agents.DockerAgent("pommerman/simple-agent", port=12345), agents.TFPPOAgent( "/home/pangliang/nips/playground_pl/scripts/ppo_model/model") ] # Make the "Free-For-All" environment using the agent list env = pommerman.make('PommeFFACompetition-v0', agent_list) agent_list[-1].initialize(env) # Run the episodes just like OpenAI Gym for i_episode in range(1): state = env.reset() done = False while not done: #env.render() actions = env.act(state) state, reward, done, info = env.step(actions) print(actions) print('Episode {} finished'.format(i_episode)) env.close()
def run_game(self, env_name): # Create a set of agents (exactly four) agent_list = [ agents.SimpleAgent(), agents.RandomAgent(), agents.SimpleAgent(), agents.RandomAgent(), # agents.DockerAgent("pommerman/simple-agent", port=12345), ] # Limit the agents for one vs one if 'oneVsOne' in env_name: agent_list = agent_list[:2] env = pommerman.make(env_name, agent_list) # Run the episodes just like OpenAI Gym for i_episode in range(1): state = env.reset() done = False while not done: # env.render() actions = env.act(state) state, reward, done, info = env.step(actions) print('Episode {} finished'.format(i_episode)) env.close()
def main(): """Simple function to bootstrap a game""" # Print all possible environments in the Pommerman registry print(pommerman.REGISTRY) # Create a set of agents (exactly four) agent_list = [ agents.SimpleAgent(), AgentTT(), agents.SimpleAgent(), AgentTT(), ] # Make the "Free-For-All" environment using the agent list env = pommerman.make('PommeTeamCompetition-v0', agent_list) #env = pommerman.make(''PommeTeamCompetition-v1'', agent_list) # Run the episodes just like OpenAI Gym for i_episode in range(1): state = env.reset() done = False for frame in range(30): actions = env.act(state) state, reward, done, info = env.step(actions) print('Episode {} finished'.format(i_episode)) env.close()
def main(): '''Simple function to bootstrap a game. Use this as an example to set up your training env. ''' # Print all possible environments in the Pommerman registry print(pommerman.REGISTRY) # Create a set of agents (exactly four) agent_list = [ agents.SimpleAgent(), agents.MyAgentRadio(), agents.SimpleAgent(), # agents.RandomAgent(), agents.MyAgentRadio() # agents.DockerAgent("pommerman/simple-agent", port=12345), ] # Make the "Radio" environment using the agent list env = pommerman.make('PommeRadio-v2', agent_list) # Run the episodes just like OpenAI Gym for i_episode in range(1): state = env.reset() done = False while not done: env.render() actions = env.act(state) state, reward, done, info = env.step(actions) print('Episode {} finished'.format(i_episode)) alive = [agent for agent in env._agents if agent.is_alive] print(alive) env.close()
def main(): print(pommerman.REGISTRY) # Create a set of agents (exactly four) agent_list = [ agents.PlayerAgent(agent_control="arrows"), agents.SimpleAgent(), agents.SimpleAgent(), agents.SimpleAgent() ] # Play with AI with agent list bellow # agent_list = [ # agents.SimpleAgent(), # agents.PlayerAgent(agent_control="arrows"), # Arrows = Move, Space = Bomb # agents.SimpleAgent(), # agents.PlayerAgent(agent_control="wasd"), # W,A,S,D = Move, E = Bomb # ] # Make the "Free-For-All" environment using the agent list env = pommerman.make('PommeFFACompetition-v0', agent_list) # Run the episodes just like OpenAI Gym for i_episode in range(3): state = env.reset() done = False while not done: env.render() actions = env.act(state) state, reward, done, info = env.step(actions) print('Episode {} finished'.format(i_episode)) env.close()
def main(): '''Simple function to bootstrap a game. Use this as an example to set up your training env. ''' # Print all possible environments in the Pommerman registry print(pommerman.REGISTRY) # Create a set of agents (exactly four) agent_list = [ agents.SimpleAgent(), agents.RandomAgent(), agents.SimpleAgent(), agents.RandomAgent(), # agents.DockerAgent("pommerman/simple-agent", port=12345), ] # Make the "Free-For-All" environment using the agent list env = pommerman.make('PommeFFACompetition-v0', agent_list) # Run the episodes just like OpenAI Gym for i_episode in range(1): state = env.reset() done = False env.render() img = env._viewer.get_buffer().get_texture().get_image_data() while not done: env.render() actions = env.act(state) state, reward, done, info = env.step(actions) # data = img.get_data("RGB", img.width * 3) # arr = np.frombuffer(data, dtype=np.uint8) # reshaped_array = arr.reshape(img.width, img.height, 3) print('Episode {} finished'.format(i_episode)) env.close()
def main(): # Print all possible environments in the Pommerman registry print(pommerman.registry) # Create a set of agents (exactly four) agent_list = [ agents.SimpleAgent(), agents.RandomAgent(), agents.SimpleAgent(), # agents.RandomAgent(), agents.DockerAgent("pommerman/simple-agent", port=12345), ] # Make the "Free-For-All" environment using the agent list env = pommerman.make('PommeFFACompetition-v0', agent_list) # Run the episodes just like OpenAI Gym for i_episode in range(1): state = env.reset() done = False while not done: env.render() actions = env.act(state) state, reward, done, info = env.step(actions) print('Episode {} finished'.format(i_episode)) env.close()
def main(config, render=False): # List of four agents env = gym.make(config) agent = agents.TensorForceAgent(algorithm="ppo") agent = agent.initialize(env) agent_list = [ agents.SimpleAgent(), agents.SimpleAgent(), agents.SimpleAgent(), TrainedTensorForceAgent(restore_agent(agent), env), ] # Environment env = pommerman.make(config, agent_list) # Run rewards = list() for episode in range(100): state = env.reset() done = False while not done: if render: env.render() actions = env.act(state) state, reward, done, info = env.step(actions) rewards.append(reward) print('Episode {} finished'.format(episode), reward, np.mean(rewards, axis=0)) print(np.mean(rewards, axis=0)) env.close()
def main(): '''Simple function to bootstrap a game. Use this as an example to set up your training env. ''' # Print all possible environments in the Pommerman registry print(pommerman.REGISTRY) # Create a set of agents (exactly four) agent_list = [ agents.SimpleAgent(), agents.SimpleAgent(), agents.SimpleAgent(), agents.PlayerAgent(agent_control="arrows"), # agents.DockerAgent("pommerman/simple-agent", port=12345), ] # Make the "Free-For-All" environment using the agent list env = pommerman.make('PommeFFACompetition-v0', agent_list) # Run the episodes just like OpenAI Gym for i_episode in range(1): state = env.reset() done = False while not done: env.render() actions = env.act(state) state, reward, done, info = env.step(actions) print('Episode {} finished'.format(i_episode)) env.close()
def main(): '''Simple function to bootstrap a game. Use this as an example to set up your training env. ''' # Print all possible environments in the Pommerman registry parser = argparse.ArgumentParser() parser.add_argument("--modelname", default=None, help="model name of model_name.json") parser.add_argument("--directory", default='./saved_models/', help="file path of models folder") print(pommerman.REGISTRY) args = parser.parse_args() test_agent = agents.TensorForceAgent() test_agent.set_agent_id(0) # Create a set of agents (exactly four) agent_list = [ test_agent, agents.SimpleAgent(), # agents.RandomAgent(), agents.SimpleAgent(), agents.SimpleAgent() # agents.DockerAgent("pommerman/simple-agent", port=12345), ] # Make the "Free-For-All" environment using the agent list env = pommerman.make('PommeFFACompetition-v0', agent_list) test_agent.initialize(env) test_agent.restore_model(directory=args.directory, filename=args.modelname) # observations = [] # inputs = [] # Run the episodes just like OpenAI Gym for i_episode in range(1): state = env.reset() done = False while not done: env.render() actions = env.act(state) state, reward, done, info = env.step(actions) # TODO Change indices of arrays to select player info. # observations.append({ # 'state': env.get_json_info(), 'reward': reward, 'done': done, 'actions': actions}) print(reward[0]) print('Episode {} finished'.format(i_episode)) # save_game(i_episode, observations, info, agent_list) env.close()
def battle(process_number): # プロセスIDを取得しておく。 pid = os.getpid() print("battle start, process_number={}, pid={}".format( process_number, pid)) # Javaへの接続を gateway = JavaGateway() addition_app = gateway.entry_point # ゲーム開始を伝える。 addition_app.start_game(pid) # List of four agents if False: agent_list = [ MyAgentT(), MyAgentT(), MyAgentT(), MyAgentT(), ] agent_list = [ agents.SimpleAgent(), agents.SimpleAgent(), agents.SimpleAgent(), agents.SimpleAgent(), ] agent_list = [ MyAgentO(), MyAgentT(), MyAgentO(), MyAgentT(), ] env = pommerman.make('PommeTeamCompetition-v0', agent_list) state = env.reset() step = 0 done = False while not done: step += 1 actions = env.act(state) state, reward, done, info = env.step(actions) print("pid={} step={} actions={}".format(pid, step, actions)) # 結果を出力する。 print("battle finished, process_number={}, pid={}, reward={}".format( process_number, pid, reward)) # ゲーム終了を伝える。 addition_app.finish_game(pid, reward[0], reward[1], reward[2], reward[3]) # 不要なハンドルを閉じる。 env.close()
def main(): # Print all possible environments in the Pommerman registry print(pommerman.REGISTRY) agent_list = [ agents.SimpleAgent(), agents.SimpleAgent(), # agents.DockerAgent("d9fc50459a6d", port=33333), agents.SimpleAgent(), agents.SimpleAgent(), ] env = pommerman.make('PommeRadioCompetition-v2', agent_list) env_info = {"board_state_shape": get_board_state_size(), "flat_state_shape": get_flat_state_size(), "board_obs_shape": get_board_obs_size(), "flat_obs_shape": get_flat_obs_size(), "n_actions": 6, "n_agents": 2, "episode_limit": 800} scheme = { "board_state": {"vshape": env_info["board_state_shape"]}, "flat_state": {"vshape": env_info["flat_state_shape"]}, "board_obs": {"vshape": env_info["board_obs_shape"], "group": "agents"}, "flat_obs": {"vshape": env_info["flat_obs_shape"], "group": "agents"}, "actions": {"vshape": (1,), "group": "agents", "dtype": th.long}, "avail_actions": {"vshape": (env_info["n_actions"],), "group": "agents", "dtype": th.int}, "reward": {"vshape": (1,)}, "terminated": {"vshape": (1,), "dtype": th.uint8}, } rnn_hidden_dim = 256 # TODO 改 # mac = TestSeeIdMAC(scheme=scheme, agent_output_type="pi_logits", rnn_hidden_dim=rnn_hidden_dim, # model_load_path='/home/hiogdong/pymarl_pmm/results/models/coma_pmm__2019-11-01_11-39-31/5016/agent.th') mac = TestMAC(scheme=scheme, agent_output_type="pi_logits", rnn_hidden_dim=rnn_hidden_dim, model_load_path='/home/hiogdong/pymarl_pmm/results/models/coma_pmm__2019-10-31_20-10-35/199/agent.th') test_idx_list = [0, 2] n_episode = 400 for i_episode in range(n_episode): obs = env.reset() mac.last_action = [th.zeros(6), th.zeros(6)] mac.init_hidden(1, rnn_hidden_dim) done = False while not done: actions = env.act(obs) for idx, agent_idx in enumerate(test_idx_list): action_agent = mac.select_actions(obs[agent_idx], idx).item() # if idx == 1: # print('2:', action_agent) # else: # print('1:', action_agent) temp = th.zeros(6) temp[action_agent] = 1 mac.last_action[idx] = temp actions[agent_idx] = action_agent obs, reward, done, info = env.step(actions) env.render()
def _thunk(): agent_list = [ agents.SimpleAgent(), agents.SimpleAgent(), agents.SimpleAgent(), agents.SimpleAgent() ] env = pommerman.make(env_id, agent_list) return env
def main(): '''Simple function to bootstrap a game. Use this as an example to set up your training env. ''' # Print all possible environments in the Pommerman registry print(pommerman.REGISTRY) # Create a set of agents (exactly four) agent_list = [ agents.SimpleAgent(), agents.SimpleAgent(), agents.SimpleAgent(), agents.SimpleAgent(), # agents.DockerAgent("pommerman/simple-agent", port=12345), ] # Make the "Free-For-All" environment using the agent list env = pommerman.make('PommeTeamCompetition-v0', agent_list) d = [] # Run the episodes just like OpenAI Gym for i_episode in range(300): state = env.reset() done = False while not done: # env.render() cur_obs = env.get_observations() actions = env.act(state) for ob, act in zip(cur_obs, actions): val = np.zeros(6) val[act] = 1 d.append([ob, val]) state, reward, done, info = env.step(actions) print('Episode {} finished'.format(i_episode)) env.close() lf = get_lf() rows = len(d) L = np.zeros([6, rows, len(lf)]) for r in range(rows): for i, f in enumerate(lf): L[:, r, i] = f(d[r][0]) gms = [] for i in range(6): gms.append(GenerativeModel()) # TODO: add ground labels to training filename = 'snorkel_model' for i, gm in enumerate(gms): temp_l = np.squeeze(L[i, :, :]).astype(int) gm.train(temp_l) gm.save(filename + str(i))
def main(): config = "PommeFFACompetition-v0" game_state_file = None myAgents = [ agents.SimpleAgent(), agents.SimpleAgent(), agents.SimpleAgent(), agents.SimpleAgent() ] env = make(config, myAgents, game_state_file) logFile_states_raw = 'simpleAgentStates_raw.txt' logFile_states_obs = 'simpleAgentStates_obs.txt' logFile_actions = 'simpleAgentActions_sequence_rawObs.txt' for i_episode in range(5000): #render every 50'th episode #args.render = not(i_episode % 50) state = env.reset() k = list(state[0].keys()) raw_states = [] obs_states = [] SA_actions = [] action_history = np.zeros(6) for t in range(10000): # Don't infinite loop while learning agent_actions = env.act(state) for i in range(1): #try to only log one agent #we make a list from position, board, bomb blast strength, bomb life, blast strength, can kick and ammo #if agent is alive if 10 in state[i][k[0]]: obs, raw = observe(state[i], action_history) raw_states.append(raw.tolist()) obs_states.append(obs.tolist()) SA_actions.append(agent_actions[i]) action_history[:-1] = action_history[1:] action_history[-1] = agent_actions[i] state, reward, done, _ = env.step(agent_actions) if t == 100 and 10 in state[0][k[0]]: with open(logFile_states_obs, 'a') as fp: # obs_states = [[int(o) for o in inner_list] for inner_list in obs_states] wr = csv.writer(fp, dialect='excel') wr.writerow(obs_states) with open(logFile_states_raw, 'a') as fp: #raw_states = [[int(o) for o in inner_list] for inner_list in raw_states] wr = csv.writer(fp, dialect='excel') wr.writerow(raw_states) with open(logFile_actions, 'a') as fp: wr = csv.writer(fp, dialect='excel') wr.writerow(SA_actions) print(i_episode) break if done or not (10 in state[0][k[0]]): print(i_episode) break
def main(): '''Simple function to bootstrap a game. Use this as an example to set up your training env. ''' with_write = True num_rounds = 1 if with_write: num_rounds = 100 file = open( "test_vs_two_simple_depth_str=4_" + str(2) + "__100___" + str(time.time()), 'w') # Print all possible environments in the Pommerman registry print(pommerman.REGISTRY) a = agents.AlphaBetaAgent(evaluation_function=agents.tomer_eval) # Create a set of agents (exactly four) agent_list = [ a, # agents.SimpleAgent(), agents.SimpleAgent(), agents.SimpleAgent(), agents.SimpleAgent(), # agents.DockerAgent("pommerman/simple-agent", port=12345), ] # Make the "Free-For-All" environment using the agent list env = pommerman.make('PommeFFACompetition-v0', agent_list) agent_list[0].initialize(env) # Run the episodes just like OpenAI Gym for i_episode in range(num_rounds): state = env.reset() done = False while not done: env.render() actions = env.act(state) state, reward, done, info = env.step(actions) if with_write: if env._agents[0].is_alive and env._agents[ 1].is_alive and env._agents[1].is_alive: file.write("tie\n\n") elif env._agents[0].is_alive: file.write("agent 0 won\n\n") elif env._agents[1].is_alive: file.write("agent 1 won\n\n") else: #env._agents[2].is_alive: file.write("agent 2 won\n\n") print('Episode {} finished'.format(i_episode)) if with_write: file.close() env.close()
def _thunk(): agent_list = [ agents.SimpleAgent(), # agents.RandomAgent(), agents.SimpleAgent(), agents.SimpleAgent(), agents.SimpleAgent() # agents.RandomAgent(), ] env = pommerman.make("PommeRadioCompetition-v2", agent_list) return env
def test(gnet): John = A3CAgent(gnet) John.set_train(False) agentList = [ John, agents.SimpleAgent(), agents.SimpleAgent(), agents.SimpleAgent() ] env = pommerman.make('PommeFFACompetition-v0', agentList) wins = [] for ii in range(100): John.reset_lstm() state = env.reset() done = False while done == False: if ii % 20 == 0: env.render() # time.sleep(1/24) with torch.no_grad(): actions = env.act(state) state_next, reward, done, info = env.step(actions) print( ii, "DONE. Info:", info, "reward:", reward, "You win = ", info['winners'][0] == 0 if info['result'].name == 'Win' else False) wins.append(info['winners'][0] if info['result'].name == 'Win' else -1) fig, ax = plt.subplots(num=1, clear=True) winrate = wins.count(0) / len(wins) fig, ax = plt.subplots(num=1, clear=True) t, p0, p1, p2, p3 = plt.bar([-1, 0, 1, 2, 3], [ wins.count(-1) / len(wins) * 100, wins.count(0) / len(wins) * 100, wins.count(1) / len(wins) * 100, wins.count(2) / len(wins) * 100, wins.count(3) / len(wins) * 100 ]) t.set_facecolor('b') p0.set_facecolor('r') p1.set_facecolor('g') p2.set_facecolor('b') p3.set_facecolor('c') ax.set_xticks([-1, 0, 1, 2, 3]) ax.set_xticklabels([ 'Ties', 'Agent\n(A2C)', 'Agent 1\nSimpleAgent', 'Agent 2\nSimpleAgent', 'Agent 3\nSimpleAgent' ]) ax.set_ylim([0, 100]) ax.set_ylabel('Percent') ax.set_title('Bomberman. FFA mode.') print("Winrate: ", winrate) plt.show()
def main(): """Simple function to bootstrap a game""" # Print all possible environments in the Pommerman registry print(pommerman.REGISTRY) # Create a set of agents (exactly four) agent_list = [ agents.SimpleAgent(), agents.SimpleAgent(), agents.SimpleAgent(), ] train_agent_number = 0 agent_list.insert(train_agent_number, agents.BaseAgent()) # Make the "Free-For-All" environment using the agent list env = pommerman.make('PommeFFACompetition-v0', agent_list) env.set_training_agent(train_agent_number) my_agent = MyAgent() # Run the episodes just like OpenAI Gym lose_cnt = 0 for i_episode in range(EPISODE): state = env.reset() done = False step_count = 0 while not done: step_count += 1 # fresh env env.render() # for simple agents making decisions actions = env.act(state) # RL make decision based on present state agent_action = my_agent.act(state, ACTIONS, env) actions.insert(train_agent_number, agent_action) # get next state state_, reward, done, info = env.step(actions) # learn from states agent_reward = reward[0] if done and agent_reward == -1: lose_cnt += 1 # print("#####################") # print("coding:", encoded_state.coding, encoded_state_.coding) # print("actions:", actions) # print("rewards:", reward) # print("#####################") # print('Episode {} finished'.format(i_episode)) env.close() print("lose rate: ", lose_cnt / float(EPISODE)) my_agent.q_table.to_csv('QTable.csv')
def main(): '''Simple function to bootstrap a game. Use this as an example to set up your training env. ''' filename = "rf.pickle" # Print all possible environments in the Pommerman registry print(pommerman.REGISTRY) # Create a set of agents (exactly four) agent_list = [ agents.SimpleAgent(), agents.SimpleAgent(), agents.SimpleAgent(), agents.SimpleAgent(), # agents.DockerAgent("pommerman/simple-agent", port=12345), ] # Make the "Free-For-All" environment using the agent list env = pommerman.make('PommeFFACompetition-v0', agent_list) gms = load_snorkel() # Run the episodes just like OpenAI Gym train_states = [] train_labels = [] for i_episode in range(1500): state = env.reset() done = False while not done: # env.render() cur_obs = env.get_observations() actions = env.act(state) for ob in cur_obs: train_states.append(merge(extract_state(ob))) probs = np.zeros(6) l = np.array([f(ob) for f in get_lf()]) for i, m in enumerate(gms): tmp = sparse.csr_matrix(l[:, i]) probs[i] = m.marginals(tmp) train_labels.append(probs) state, reward, done, info = env.step(actions) print('Episode {} finished'.format(i_episode)) env.close() train_labels = np.array([ np.array(list(map(int, prob == prob.max()))) for prob in train_labels ]) rf = RandomForestClassifier(n_estimators=50) rf.fit(train_states, train_labels) with open(filename, 'wb') as handle: pickle.dump(rf, handle, protocol=pickle.HIGHEST_PROTOCOL)
def run(match_num, iteration_limit, mcts_process_num, result_list=None, process_id=None, render=False): """ Run the match for MCTS and three simple agents. :param iteration_limit: The maximal iteration of MCTS :param match_num: The number of matches :param mcts_process_num: The number of processes used in MCTS :param result_list: A list to record results :param process_id: The process ID given when you do multiprocessing :param render: Determine whether to render game :return: None """ if mcts_process_num == 1: mcts_process_num = None agent_list = [ MCTSAgent([agents.SimpleAgent for _ in range(3)], iteration_limit, process_count=mcts_process_num), agents.SimpleAgent(), agents.SimpleAgent(), agents.SimpleAgent(), ] env = pommerman.make('PommeFFACompetition-v0', agent_list) for i_episode in range(match_num): state = env.reset() done = False initial_agents = state[0]['alive'] survivors = initial_agents dead_agents = [] while not done: if render: env.render() actions = env.act(state) state, reward, done, info = env.step(actions) survivors = state[0]['alive'] for agent in initial_agents: if agent not in survivors and agent not in dead_agents: dead_agents.append(agent) if process_id is not None: print('[Process %d, Episode %d] Dead order: ' % (process_id, i_episode), str(dead_agents), 'Survivors:', survivors) else: print('[Episode %d] Dead order: ' % i_episode, str(dead_agents), 'Survivors:', survivors) if result_list is None: result_list = [] result_list.append((dead_agents, survivors)) env.close() return result_list
def main(): """Simple function to bootstrap a game""" # Print all possible environments in the Pommerman registry print(pommerman.REGISTRY) # Create a set of agents (exactly four) agent_list = [ agents.SimpleAgent(), agents.SimpleAgent(), agents.SimpleAgent(), ] train_agent_number = 0 agent_list.insert(train_agent_number, agents.BaseAgent()) # Make the "Free-For-All" environment using the agent list env = pommerman.make('PommeFFACompetition-v0', agent_list) env.set_training_agent(train_agent_number) my_agent = MyAgent() lose_cnt = 0 # Run the episodes just like OpenAI Gym for i_episode in range(EPISODE): state = env.reset() done = False step_cnt = 1 while not done: step_cnt += 1 if (step_cnt >= 500): break # fresh env env.render() # for simple agents making decisions actions = env.act(state) agent_action = 0 # RL make decision based on present state agent_action = my_agent.act(state[train_agent_number], ACTIONS) actions.insert(train_agent_number, agent_action) # get next state state, reward, done, info = env.step(actions) # learn from states agent_reward = reward[train_agent_number] if done: if agent_reward == -1: lose_cnt += 1 print("lose") else: print("win") env.close() print("win rate: ", 1 - lose_cnt / float(EPISODE))
def main(): tf.reset_default_graph() # Print all possible environments in the Pommerman registry # print(pommerman.registry) sess = tf.Session() # sess.run(tf.global_variables_initializer()) # sess = tf_debug.TensorBoardDebugWrapperSession(sess, 'localhost:6064') # Create a set of agents (exactly four) ddpg_agent = DdpgAgent(id=3, sess=sess) agent_list = [ agents.SimpleAgent(), agents.SimpleAgent(), agents.RandomAgent(), ddpg_agent, # agents.DockerAgent("pommerman/simple-agent", port=12345), ] env = pommerman.make(args.env_name, agent_list) env.seed(RANDOM_SEED) print('HERE0', sess) ddpg_agent.train_transformer(sess, env) print('her2') print(9 / 0) r_sum = np.zeros(1) for i in range(args.num_steps): # Make the "Free-For-All" environment using the agent list env.reset() # Run the episodes just like OpenAI Gym for i_episode in range(args.max_episode_length): state = env.reset() done = False while not done: # if args.display: # env.render() actions = env.act(state) state, reward, done, info = env.step(actions) r_sum[i] += reward[0] if i_episode > 300: break print('Game {} finished'.format(i)) np.savetxt(args.outdir + '/result_2simple_2random.csv', r_sum, fmt='%1.4e') env.close()
def generate_data(EPISODES, save_file_nm, shuffle_agents=False): rnn_agent = RNN_Agent() # Init dataset dset = dataset(rnn_agent.RNN_SEQUENCE_LENGTH, save_file_nm, rnn_agent.utils) if os.path.exists(save_file_nm): dset.load() agent_list = [ rnn_agent, agents.SimpleAgent(), agents.RandomAgent(), agents.SimpleAgent() ] rnn_agent_index = agent_list.index(rnn_agent) if shuffle_agents: shuffle(agent_list) env = pommerman.make('PommeFFACompetition-v0', agent_list) wins = {} iter_num = 0 for an_episode in range(EPISODES): state = env.reset() #------------------------------------------------------------------- done = False episode_obs = [] episode_acts = [] #while not done and rnn_agent.is_alive: while not done: #env.render() actions = env.act(state) episode_acts.append(actions[rnn_agent_index]) episode_obs.append(rnn_agent.utils.input(state[rnn_agent_index])) state, reward, done, info = env.step(actions) iter_num += 1 #------------------------------------------------------------------- # Final timestep observation episode_obs.append(rnn_agent.utils.input(state[rnn_agent_index])) dset.add_episode(episode_obs, episode_acts) #print(info) #print("Median Act Time: {} seconds".format(np.median(np.array(rnn_agent.act_times)))) env.close() dset.save() rnn_agent.sess.close() tf.reset_default_graph()
def main(): '''Simple function to bootstrap a game. Use this as an example to set up your training env. ''' print(f"num games: {NUM_GAMES}, 1v1 free-for-all game, DEPTH={DEPTH}") alpha_balanced = agents.AlphaBetaAgent( evaluation_function=agents.balanced_eval, depth=DEPTH) alpha_attacker = agents.AlphaBetaAgent( evaluation_function=agents.attacker_eval, depth=DEPTH) alpha_coward = agents.AlphaBetaAgent( evaluation_function=agents.pacifist_eval, depth=DEPTH) agent_list = [ alpha_balanced, # alpha_attacker, # alpha_coward # minimax_agent, SmartRandomAgent(), agents.SimpleAgent(), agents.SimpleAgent(), # PlayerAgent(), ] env = pommerman.make('PommeFFACompetition-v0', agent_list) for agent in agent_list: agent.initialize(env) # Run the episodes just like OpenAI Gym wins = [0] + [0] * len(agent_list) for i_episode in range(NUM_GAMES): state = env.reset() done = False info = None turns = 0 while not done: turns += 1 env.render() actions = env.act(state) state, reward, done, info = env.step(actions) if info['result'] == Result.Tie: wins[0] += 1 else: wins[info["winners"][0]] += 1 print( f'Episode {i_episode} finished, info: {info}, took {turns} turns') print( f"ties : {wins[0]}\n player zero {wins[1]}\n player one : {wins[2]}\n player two : {wins[3]}\n " f"player three : {wins[4]}\n overall games: {NUM_GAMES}") env.close()
def _thunk(): agent_list = [ agents.SimpleAgent(), agents.SimpleAgent(), agents.SimpleAgent(), agents.SimpleAgent(), # _agents.StopAgent(), # _agents.SuicideAgent(), # hit18Agent('1'), # hit18Agent('3') ] env = pommerman.make(env_id, agent_list) return env
def main(): '''Simple function to bootstrap a game. Use this as an example to set up your training env. ''' # Print all possible environments in the Pommerman registry print(pommerman.REGISTRY) # Create a set of agents (exactly four) agent_list = [ agents.SimpleAgent(), agents.RandomAgent(), agents.RandomAgent(), #agents.DockerAgent("multiagentlearning/hakozakijunctions", port=12345), #agents.DockerAgent("multiagentlearning/eisenach", port=12345), agents.DockerAgent("multiagentlearning/skynet955", port=12345), ] # Make the "Free-For-All" environment using the agent list env = pommerman.make('PommeTeamCompetition-v1', agent_list) # Run the episodes just like OpenAI Gym for i_episode in range(1): state = env.reset() done = False while not done: env.render() actions = env.act(state) state, reward, done, info = env.step(actions) print('Episode {} finished'.format(i_episode)) print("Final Result: ", info) env.close()