def runner(id, num_episodes, fifo, _args): # make args accessible to MCTSAgent global args args = _args # make sure TF does not allocate all memory init_tensorflow() # make sure agents play at all positions agent_id = id % NUM_AGENTS agent = MCTSAgent(args.model_file, agent_id=agent_id) # create environment with three SimpleAgents agents = [ SimpleAgent(), SimpleAgent(), SimpleAgent(), ] agent_id = id % NUM_AGENTS agents.insert(agent_id, agent) env = pommerman.make('PommeFFACompetition-v0', agents) for i in range(num_episodes): # do rollout start_time = time.time() length, reward, rewards = agent.rollout(env) elapsed = time.time() - start_time # add data samples to log fifo.put((length, reward, rewards, agent_id, elapsed))
def __init__(self, mcts_id=None): self.agents = [ SimpleAgent(), SimpleAgent(), SimpleAgent(), SimpleAgent() ] if mcts_id is not None: self.agents[mcts_id] = None self.mcts_id = mcts_id
def __init__(self, agent): # Test pool and Env self.pool = [ agent, SimpleAgent(), SimpleAgent(), SimpleAgent(), ] #self.pool = [agent, RandomAgent(), RandomAgent(), RandomAgent(), ] self.env = pommerman.make('PommeFFACompetition-v0', self.pool)
def __init__(self, agent_id, args): self.args = args #Initialize the expert and the pool self.expert = MCTSAgent(None, agent_id=agent_id) self.pool = [ SimpleAgent(), SimpleAgent(), SimpleAgent(), ] #self.pool = [RandomAgent(), RandomAgent(), RandomAgent(), ] self.pool.insert(agent_id, self.expert) self.env = pommerman.make('PommeFFACompetition-v0', self.pool)
def __init__( self, n_actions, character, evaluation_model=None, evaluation_model_path=None, # Set agent properties to preprocess observations use_history=True, # Use previous observations for predictions use_2d=True, # Use 2d convolutions patient=True, # Wait to make initial observations (you don't need it if you don't use history) center_view=True, # Use centering original_view=False, # Use 11x11 board, if false, use 21x21 verbose=False # Comment actions ): super(EvaluatorAgent, self).__init__(character=character) # Properties self.use_history = use_history self.use_2d = use_2d self.patient = patient self.center_view = center_view self.original_view = original_view self.verbose = verbose # Acting history for the evaluation self.actions_history = [] self.observations_history = [] self.episode_count = 0 self.steps = 0 self.n_actions = n_actions self.simple_agent = SimpleAgent(character=character) # Load any custom model self.evaluation_model = None if evaluation_model: self.evaluation_model = evaluation_model if evaluation_model_path: try: self.evaluation_model.load_weights(evaluation_model_path) except: print('Weights load failed') elif evaluation_model_path: try: self.evaluation_model = load_model(evaluation_model_path) except: print('Model load failed') else: print('Use SimpleAgent')
def env_for_players(): config = ffa_v0_fast_env() env = Pomme(**config["env_kwargs"]) agents = [ DQN(config["agent"](0, config["game_type"])), SimpleAgent(config["agent"](1, config["game_type"])), SimpleAgent(config["agent"](2, config["game_type"])), SimpleAgent(config["agent"](3, config["game_type"])) ] env.set_agents(agents) env.set_training_agent( agents[0].agent_id) # training_agent is only dqn agent env.set_init_game_state(None) return env
def _thunk(): env = pommerman.make( 'PommeFFACompetition-v0', [SimpleAgent(), SimpleAgent(), SimpleAgent(), SimpleAgent()]) env._agents[0].is_alive = False env._agents[2].is_alive = False env._agents[3].restart = True env._agents[1].restart = True env._agents[2].restart = False env._agents[0].restart = False return env
def f(): config = ffa_competition_env() env = Wrapped_Env(**config["env_kwargs"]) env.observation_space = spaces.Box(0, 20, shape=(11, 11, 18), dtype=np.float32) # Add 3 random agents agents = [] for agent_id in range(3): # if agent_id == env.winner_id: # agents.append(TrainingAgent(config["agent"](agent_id, config["game_type"]))) # else: agents.append( SimpleAgent(config["agent"](agent_id, config["game_type"]))) agent_id += 1 agents.append( TrainingAgent(config["agent"](agent_id, config["game_type"]))) env.set_agents(agents) env.set_training_agent(agents[-1].agent_id) env.set_init_game_state(None) return env
def main(): # Print all possible environments in the Pommerman registry # Instantiate the environment DETERMINISTIC = False VISUALIZE = False if args.test: DETERMINISTIC = True VISUALIZE = True config = ffa_competition_env() env = Wrapped_Env(**config["env_kwargs"]) # env.seed(0) env.observation_space = spaces.Box(0, 20, shape=(11, 11, 18)) env.num_envs = 1 # Add 3 random agents agents = [] for agent_id in range(3): agents.append( SimpleAgent(config["agent"](agent_id, config["game_type"]))) agent_id += 1 # Add TensorforceAgent agents.append(TrainingAgent(config["agent"](agent_id, config["game_type"]))) env.set_agents(agents) env.set_training_agent(agents[-1].agent_id) env.set_init_game_state(None) # env = VecFrameStack(make_pommerman_env(env, 8, 0), 2) # print(env.reset()) policy = CnnPolicy # Model(policy=policy, # ob_space=env.observation_space, # ac_space=env.action_space, # nbatch_act=1, # nbatch_train=100, # nsteps=1000, # ent_coef=0.01, # vf_coef=0.5, # max_grad_norm=0.5) num_timesteps = 10000 learn(policy=policy, env=env, nsteps=800, nminibatches=4, lam=0.95, gamma=0.99, noptepochs=4, log_interval=1, ent_coef=.01, lr=lambda f: f * 2.5e-4, cliprange=lambda f: f * 0.1, total_timesteps=int(num_timesteps * 1.1))
def run_episode(agent, config, env, agent_id=0): # K.clear_session() # Add 3 random agents and one trained agents = [ agent if i == agent_id else SimpleAgent(config["agent"]( i, config["game_type"])) for i in range(4) ] env.set_agents(agents) env.set_init_game_state(None) # Seed and reset the environment env.seed(0) obs = env.reset() # Run the agents until we're done done = False lens = [None] * 4 t = 0 while not done: env.render() actions = env.act(obs) obs, reward, done, info = env.step(actions) for j in range(4): if lens[j] is None and reward[j] != 0: lens[j] = t t += 1 env.render(close=True) env.close() return info, reward, lens
def _make_env(self): """Initialise gym environment, adding agents to them.""" agents = [ self if agent_id == self.agent_id else SimpleAgent() for agent_id in range(NUM_AGENTS) ] return (pommerman.make("PommeFFACompetition-v0", agents))
def make_env(self): agents = [] for agent_id in range(NUM_AGENTS): if agent_id == self.agent_id: agents.append(self) else: agents.append(SimpleAgent()) return pommerman.make('PommeFFACompetition-v0', agents)
def init(self, config): self.env.seed(0) # Add 3 random agents agents = [] for agent_id in range(4): agents.append( SimpleAgent(config["agent"](agent_id, config["game_type"]))) self.env.set_agents(agents) self.env.set_init_game_state(None)
def get_env(): config = ffa_v0_fast_env() env = Pomme(**config["env_kwargs"]) agent_id = 0 agents = [ DQN(config["agent"](0, config["game_type"])), SimpleAgent(config["agent"](1, config["game_type"])), SimpleAgent(config["agent"](2, config["game_type"])), SimpleAgent(config["agent"](3, config["game_type"])), ] env.set_agents(agents) env.set_training_agent(agents[agent_id].agent_id) env.set_init_game_state(None) return env
def testSimpleAgent(): game_type = constants.GameType(4) board = [[0, 0, 2, 1, 1, 1], [0, 0, 0, 0, 0, 0], [2, 8, 0, 1, 0, 1], [1, 0, 1, 0, 10, 1], [1, 0, 3, 0, 0, 1], [1, 11, 1, 1, 1, 0]] bomb_info = [(0, 1, 2, None)] game_state = my_utility.get_gamestate(board, bomb_info) game_data = my_utility.get_gamedata(game_state, game_type) fm = forward_model.ForwardModel() obs = fm.get_observations(game_data.board, game_data.agents, game_data.bombs, game_data.flames, False, None, game_data.game_type, None) simpel_agent = SimpleAgent() print(simpel_agent.act(obs[1], spaces.Discrete(6)))
def make_env(self): agents = [] for agent_id in range(4): if agent_id == self.agent_id: agents.append(self) else: agents.append(SimpleAgent()) env = pommerman.make('PommeFFACompetition-v0', agents) env.set_training_agent(self.agent_id) return env
def makeTrainingObservation(): env = Pomme(**config["env_kwargs"]) agents = {} for agent_id in range(num_players): agent = TrainingAgent(config["agent"](agent_id, config["game_type"])) agents[agent_id] = agent simple_Agent_id = num_players agents[simple_Agent_id] = SimpleAgent(config["agent"](simple_Agent_id, config["game_type"])) env.set_agents(list(agents.values())) env.set_init_game_state(None) return env
def run(best_net, num_episodes, result_list=None, process_id=None, render=False): best_net = Net(trained_model='best.model') agent_list = [ ZeroAgent(net=best_net, num_simulations=100, is_self_play=False, num_exploration_steps=0), SimpleAgent() ] env = pommerman.make('OneVsOne-v0', agent_list) for i_episode in range(num_episodes): state = env.reset() done = False initial_agents = state[0]['alive'] survivors = initial_agents dead_agents = [] while not done: if render: env.render() actions = env.act(state) actions[0] = actions[0].value state, reward, done, info = env.step(actions) survivors = state[0]['alive'] for agent in initial_agents: if agent not in survivors and agent not in dead_agents: dead_agents.append(agent) if process_id is not None: print( '[Process %d, Episode %d] Dead order: ' % (process_id, i_episode), str(dead_agents), 'Survivors:', survivors) else: print('[Episode %d] Dead order: ' % i_episode, str(dead_agents), 'Survivors:', survivors) if result_list is None: result_list = [] result_list.append((dead_agents, survivors)) env.close() return result_list
def make_env(self, config): # Instantiate the environment env = Pomme(**config["env_kwargs"]) # Add agents agents = [] for agent_id in range(NUM_AGENTS): if agent_id == self.agent_id: agents.append(self) else: agents.append( SimpleAgent(config["agent"](agent_id, config["game_type"]))) env.set_agents(agents) env.set_init_game_state(None) return env
def make_agent_env(n_ppo, n_simple, render): # Create environment/agents config = "PommeFFACompetition-v0" agents = [TensorForceAgent(algorithm="ppo") for _ in range(n_ppo)] agents += [SimpleAgent() for _ in range(n_simple)] n_random = 4 - n_ppo - n_simple agents += [RandomAgent() for _ in range(n_random)] env = make(config, agents, None) training_agent = agents[0] env.set_training_agent(training_agent.agent_id) # Map to Tensorforce environment/agents wrapped_env = WrappedEnv(env, visualize=render) agent = training_agent.initialize(env) return agent, wrapped_env
def main(max_steps=200, train_for=100, render=False): '''CLI interface to bootstrap taining''' parser = argparse.ArgumentParser(description="Playground Flags.") parser.add_argument("--config", default="PommeFFACompetition-v0", help="Configuration to execute. See env_ids in " "configs.py for options.") parser.add_argument("--render", default=False, action='store_true', help="Whether to render or not. Defaults to False.") args = parser.parse_args() config = args.config our_selection = DQN_TensorForce_Agent() # our_selection.restore_agent('PPO_Model.file') agents = [our_selection, SimpleAgent(), RandomAgent(), RandomAgent()] env = make(config, agents) training_agent = None training_agent = our_selection env.set_training_agent(our_selection.agent_id) # Create a Proximal Policy Optimization agent agent = training_agent.initialize(env) atexit.register(functools.partial(clean_up_agents, agents)) wrapped_env = WrappedEnv(env, visualize=(args.render or render)) runner = Runner(agent=agent, environment=wrapped_env) runner.run(episodes=train_for, max_episode_timesteps=max_steps) won = len([x for x in runner.episode_rewards if x == 1]) tie = len([x for x in runner.episode_timesteps if x == max_steps]) lost = train_for - won - tie # print("Stats: ", runner.episode_rewards, runner.episode_timesteps, # runner.episode_times) print(won, tie, lost) # our_selection.save_model(file='PPO_Model.file') try: runner.close() except AttributeError as e: pass
def set_pommerman_env(agent_id=0): # Instantiate the environment config = ffa_v0_fast_env() env = Pomme(**config["env_kwargs"]) np.random.seed(0) env.seed(0) # Add 3 Simple Agents and 1 DQN agent agents = [ DQN(config["agent"](agent_id, config["game_type"])) if i == agent_id else SimpleAgent(config["agent"](i, config["game_type"])) for i in range(4) ] env.set_agents(agents) env.set_training_agent( agents[agent_id].agent_id) # training_agent is only dqn agent env.set_init_game_state(None) return env
def runner(id, num_episodes, fifo, _args): # make sure agents play at all positions agent_id = id % NUM_AGENTS for j in range(num_episodes): dirname = "./agent"+str(id)+"_episode_"+str(j) if not os.path.exists(dirname): os.makedirs(dirname) print("made dir") agent_list = [] agent = MCTSAgent() for i in range(NUM_AGENTS): if i == agent_id: agent.set_agent_id(agent_id) agent_list.append(agent) else: agent_list.append(SimpleAgent()) print(agent_list) env = pommerman.make('PommeFFACompetition-v0', agent_list) env.set_training_agent(agent_id) step = 0 # Run the episodes just like OpenAI Gym sum_rewards = 0 obs = env.reset() state = env.get_json_info() done = False start_time = time.time() while not done: # env.render() actions = env.act(obs) action = agent.search(state) actions.insert(agent_id, action) print(actions) obs, step_reward, done, info = env.step(actions) state = env.get_json_info() sum_rewards += step_reward[agent_id] step += 1 env.save_json(dirname) elapsed = time.time() - start_time env.close()
def main(): # Print all possible environments in the Pommerman registry print(pommerman.registry) config = ffa_v1_env() env = Pomme(**config["env_kwargs"]) # Add 3 agents agents = {} for agent_id in range(4): agents[agent_id] = SimpleAgent(config["agent"](agent_id, config["game_type"])) # agents[3] = PlayerAgent(config["agent"](agent_id, config["game_type"]), "arrows") env.set_agents(list(agents.values())) env.set_init_game_state(None) demo = [] # Run the episodes just like OpenAI Gym for i_episode in range(1): state = env.reset() done = False demo.append(env.get_json_info()) while not done: env.render() actions = env.act(state) state, reward, done, info = env.step(actions) demo.append(env.get_json_info()) if 1 in reward: winner = reward.index(1) else: winner = None print('Episode {} finished'.format(i_episode)) env.close() # If game not tied, save demonstration if winner is not None: demonstration = {'demo': demo, 'winner': winner} pickle.dump(demonstration, open("demonstration.p", "wb"))
def _thunk(): game_type = config['game_type'] agent_type = config['agent'] env = config['env'](**config["env_kwargs"]) env.seed(args.seed + rank) if args.how_train == 'simple': agents = [ SimpleAgent(agent_type(game_type=game_type)) for _ in range(3) ] training_agent_id = rank % 4 agents.insert(training_agent_id, training_agents[0]) for agent_id, agent in enumerate(agents): agent.set_agent_id(agent_id) env.set_agents(agents) env.set_training_agents([training_agent_id]) env.set_init_game_state(args.game_state_file) elif args.how_train == 'homogenous': # NOTE: We can't use just one agent character here because it needs to track its own state. # We do that by instantiating three more copies. There is probably a better way. if rank > -1: copies = [ training_agents[0].copy( agent_type(agent_id=agent_id, game_type=game_type)) for agent_id in range(4) ] else: copies = training_agents * 4 env.set_agents(copies) env.set_training_agents(list(range(4))) env.set_init_game_state(args.game_state_file) else: raise env = WrapPomme(env, args.how_train) # TODO: Add the FrameStack in. env = MultiAgentFrameStack(env, args.num_stack) return env
def reset(self): """ Resets the state of the environment and returns an initial observation. # Returns observation (object): The initial observation of the space. Initial reward is assumed to be 0. """ # Add 3 random agents train_agent_pos = np.random.randint(0, 4) agents = [] for agent_id in range(4): if agent_id == train_agent_pos: agents.append( TensorforceAgent(config["agent"](agent_id, config["game_type"]))) else: agents.append( SimpleAgent(config["agent"](agent_id, config["game_type"]))) self.gym.set_agents(agents) self.gym.set_training_agent(agents[train_agent_pos].agent_id) obs = self.gym.reset() agent_obs = self.featurize(obs[self.gym.training_agent]) return agent_obs
def __init__(self, mcts_iters, discount=1.0, c=1.5, temp=1.0, tempsteps=None, agent_id=0, opponent=SimpleAgent(), model_save_file=None, *args, **kwargs): super(MCTSAgent, self).__init__(*args, **kwargs) self.agent_id = agent_id self.env = self.make_env(opponent) self.reset_tree() self.mcts_iters = mcts_iters self.mcts_c_puct = c self.discount = discount self.init_temp = temp self.tempsteps = tempsteps self.model_save_file = model_save_file self.train_count = 0
# Add 3 random agents agents = [] for agent_id in range(4): if agent_id == agent_pos: # agents.append(Cnn12833Dense1281(env.action_space.n, BOARD_SIZE, character=config["agent"](agent_id, config["game_type"]), # save_path=model_path)) # agents.append(Dense82(env.action_space.n, BOARD_SIZE, character=config["agent"](agent_id, config["game_type"]), # save_path=model_path2)) # agents.append(Dense128(env.action_space.n, BOARD_SIZE, character=config["agent"](agent_id, config["game_type"]), # save_path=model_path3)) # agents.append(Dense128(env.action_space.n, BOARD_SIZE, character=config["agent"](agent_id, config["game_type"]), # save_path='./dqn/model/ddgp_dense_128_1_rs/model.h4')) agents.append(Cnn12832Dense1281(env.action_space.n, BOARD_SIZE, character=config["agent"](agent_id, config["game_type"]), save_path='./dqn/model/ddgp_cnn128_3_2_dense_128_1_rs/model.h4')) else: agents.append(SimpleAgent(config["agent"](agent_id, config["game_type"]))) env.set_agents(agents) env.set_init_game_state(None) # Seed and reset the environment env.seed(0) obs = env.reset() # Run the agents until we're done done = False while not done: env.render() actions = env.act(obs) obs, reward, done, info = env.step(actions) env.render(close=True)
def setup(self): agents = [] if self.phase == 0: self.agents_index = [1, 3] self.enemies_agents_index = [0, 2] config = team_v0_fast_env() config["env_kwargs"]["num_wood"] = 2 config["env_kwargs"]["num_items"] = 2 config["env_kwargs"]["num_rigid"] = 20 agents.insert( 0, SuicidalAgent(config["agent"](0, config["game_type"]))) agents.insert(2, NoDoAgent(config["agent"](2, config["game_type"]))) print(config["env_kwargs"]) self.env = Pomme(**config["env_kwargs"]) self.env.seed() if self.phase == 1: self.agents_index = [1, 3] self.enemies_agents_index = [0, 2] config = team_v0_fast_env() config["env_kwargs"]["num_wood"] = 2 config["env_kwargs"]["num_items"] = 2 config["env_kwargs"]["num_rigid"] = 36 agents.insert( 0, SuicidalAgent(config["agent"](0, config["game_type"]))) agents.insert(2, NoDoAgent(config["agent"](2, config["game_type"]))) print(config["env_kwargs"]) self.env = Pomme(**config["env_kwargs"]) self.env.seed() if self.phase == 2: self.agents_index = [1, 3] self.enemies_agents_index = [0, 2] config = team_v0_fast_env() config["env_kwargs"]["num_wood"] = 2 config["env_kwargs"]["num_items"] = 2 config["env_kwargs"]["num_rigid"] = 36 agents.insert(0, NoDoAgent(config["agent"](0, config["game_type"]))) agents.insert(2, NoDoAgent(config["agent"](2, config["game_type"]))) print(config["env_kwargs"]) self.env = Pomme(**config["env_kwargs"]) self.env.seed() if self.phase == 3: self.agents_index = [1, 3] self.enemies_agents_index = [0, 2] config = team_v0_fast_env() config["env_kwargs"]["num_wood"] = 2 config["env_kwargs"]["num_items"] = 2 config["env_kwargs"]["num_rigid"] = 36 agents.insert(0, NoDoAgent(config["agent"](0, config["game_type"]))) agents.insert(2, NoDoAgent(config["agent"](2, config["game_type"]))) print(config["env_kwargs"]) self.env = Pomme(**config["env_kwargs"]) self.env.seed() if self.phase == 4: self.agents_index = [1, 3] self.enemies_agents_index = [0, 2] config = team_v0_fast_env() config["env_kwargs"]["num_wood"] = 0 config["env_kwargs"]["num_items"] = 10 config["env_kwargs"]["num_rigid"] = 36 agents.insert( 0, SuicidalAgent(config["agent"](0, config["game_type"]))) agents.insert(2, SimpleAgent(config["agent"](2, config["game_type"]))) print(config["env_kwargs"]) self.env = Pomme(**config["env_kwargs"]) self.env.seed() for agent_id in self.agents_index: agents.insert( agent_id, BaseLineAgent(config["agent"](agent_id, config["game_type"]))) self.env.set_agents(agents) self.env.set_init_game_state(None) self.observation_space = spaces.Dict({ "boards": spaces.Box(low=-1, high=20, shape=(3, 11, 11)), "states": spaces.Box(low=-1, high=20, shape=(9, )), }) spaces.Box(low=-1.0, high=20.0, shape=(372, ), dtype=np.float32) self.action_space = self.env.action_space
class EvaluatorAgent(BaseAgent): def __init__( self, n_actions, character, evaluation_model=None, evaluation_model_path=None, # Set agent properties to preprocess observations use_history=True, # Use previous observations for predictions use_2d=True, # Use 2d convolutions patient=True, # Wait to make initial observations (you don't need it if you don't use history) center_view=True, # Use centering original_view=False, # Use 11x11 board, if false, use 21x21 verbose=False # Comment actions ): super(EvaluatorAgent, self).__init__(character=character) # Properties self.use_history = use_history self.use_2d = use_2d self.patient = patient self.center_view = center_view self.original_view = original_view self.verbose = verbose # Acting history for the evaluation self.actions_history = [] self.observations_history = [] self.episode_count = 0 self.steps = 0 self.n_actions = n_actions self.simple_agent = SimpleAgent(character=character) # Load any custom model self.evaluation_model = None if evaluation_model: self.evaluation_model = evaluation_model if evaluation_model_path: try: self.evaluation_model.load_weights(evaluation_model_path) except: print('Weights load failed') elif evaluation_model_path: try: self.evaluation_model = load_model(evaluation_model_path) except: print('Model load failed') else: print('Use SimpleAgent') # Featurization def featurize(self, obs): return featurize(obs, center=self.center_view, crop=self.original_view) # Acting def act(self, obs, action_space=None): # Initialize new episode if self.steps == 0: self.actions_history.append([]) # Create observation, merge with the predecessors obs_f = self.featurize(obs) # If our agent is patient, wait for the first 3 steps to make observations if self.patient and len( self.observations_history) < history_length - 1: self.observations_history.append(obs_f) self.actions_history[self.episode_count].append(0) return 0 if self.use_history: obs_history = self.make_observation(obs_f, self.steps, self.use_2d) else: obs_history = obs_f self.observations_history.append( obs_f) # Append current observation after the merge # Predict action if self.evaluation_model is not None: res = self.evaluation_model.predict( obs_history.reshape((1, ) + obs_history.shape))[0] res = np.argmax(res) else: res = self.simple_agent.act(obs, action_space) if self.verbose: print(res, end='; ') # # In the dueling DQN the first output relates to the advantage # if len(res) > self.n_actions: # res = res[1:] self.actions_history[self.episode_count].append(res) if self.verbose: print(ACTIONS[res]) self.steps += 1 return res def make_observation(self, obs, i, use_2d=True): if i == 0: # If it is a first observation res = np.array([obs for _ in range(history_length)]) elif i < history_length - 1: # If there are less than 3 observations in a history n_first = history_length - 1 - i res = np.concatenate( [ np.array([ self.observations_history[0] for _ in range(n_first) ]), # Repeat the first observation np.array(self.observations_history[:i]).reshape( i, view_size, view_size, n_channels), # Add next observations obs.reshape(1, view_size, view_size, n_channels) ], # Current observation axis=0) else: res = np.concatenate( [ np.array( self.observations_history[i - history_length + 1:i]). reshape(history_length - 1, view_size, view_size, n_channels), # Add next observations obs.reshape(1, view_size, view_size, n_channels) ], # Current observation axis=0) if use_2d: res = np.concatenate(res, axis=-1) return res # Evaluation def end_episode(self): self.steps = 0 self.episode_count += 1 self.observations_history = [] def reset_run(self): self.actions_history = [] self.episode_count = 0 self.steps = 0 def close(self): pass def run_episode(self, config, env): return run_episode(self, config, env, self.agent_id) def plot_statistics(self, info, selected_labels): return plot_statistics(self, info, selected_labels) def evaluate_agent(self, selected_labels, iterations=100, plot=True): return evaluate_agent(self, selected_labels, self.agent_id, iterations, plot)