def __init__(self, action_spec, time_budget): self.action_spec = action_spec self.sim_env = ConnectFourEnv() self.opponent_agent = RandomAgent(action_spec) self.time_budget = time_budget self.policy = {}
def run_with_params(num_dcs, num_customers, dcs_per_customer, demand_mean, demand_var, num_commodities, orders_per_day, num_steps): physical_network = PhysicalNetwork( num_dcs, num_customers, dcs_per_customer, demand_mean, demand_var, num_commodities, ) # order_generator = NaiveOrderGenerator(num_dcs, num_customers, orders_per_day) order_generator = ActualOrderGenerator(physical_network, orders_per_day) generator = DirichletInventoryGenerator(physical_network) environment_parameters = EnvironmentParameters(physical_network, order_generator, generator, num_steps) env = ShippingFacilityEnvironment(environment_parameters) agent = RandomAgent(env) obs = env.reset() reward = 0 done = False print("=========== starting episode loop ===========") print("Initial environment: ") env.render() actions = [] episode_rewards = [] #demands_per_k = np.zeros((num_commodities,num_steps)) #inventory_at_t = np.zeros((num_commodities,num_steps)) #todo llenar estos eventualmente while not done: action = agent.act(obs, reward, done) # print(f"Agent is taking action: {action}") # the agent observes the first state and chooses an action # environment steps with the agent's action and returns new state and reward obs, reward, done, info = env.step(action) # print(f"Got reward {reward} done {done}") # Render the current state of the environment env.render() actions.append(action) episode_rewards.append(reward) if done: print("===========Environment says we are DONE ===========") return actions, episode_rewards
def run_random_vs_qlearning(): winners = [] board_length = 8 action_space = (board_length, board_length, board_length, board_length) agent_one = QLearningAgent((board_length, board_length), action_space, "qlearning", "up", 1.0, 2500, 100000) agent_two = RandomAgent((board_length, board_length), (board_length, board_length), "Two", "down") iterations = 50000 for i in range(iterations): board = Board(board_length=8) game = Game(agent_one=agent_one, agent_two=agent_two, board=board) game.play(verbose=False) winners += [game.winner] agent_one.epsilon *= 0.9999 if (i % 5000 == 0 and i > 0) or iterations - 1 == i: victories_player_two = 0 victories_player_one = 0 for winner in winners: if winner == "qlearning": victories_player_one += 1 if winner == "Two": victories_player_two += 1 logging.info("Player One: {}".format(str(victories_player_one))) logging.info("Player Two: {}".format(str(victories_player_two))) logging.info("Mean Rewards Agent One: {}".format( agent_one.moving_average_rewards[-1])) logging.info("Mean Rewards Agent Two: {}".format( agent_two.moving_average_rewards[-1]))
def run_random_vs_random_max(): winners = [] board_length = 8 action_space = (board_length, board_length, board_length, board_length) agent_one = RandomAgentWithMaxValue((board_length, board_length), action_space, "One", "up") agent_two = RandomAgent((board_length, board_length), action_space, "Two", "down") iterations = 1000 for i in range(iterations): board = Board(board_length=8) game = Game(agent_one=agent_one, agent_two=agent_two, board=board) game.play(verbose=False) winners += [game.winner] if (i % 5000 == 0 and i > 0) or iterations - 1 == i: victories_player_two = 0 victories_player_one = 0 for winner in winners: if winner == "One": victories_player_one += 1 if winner == "Two": victories_player_two += 1 logging.info("Player One: {}".format(str(victories_player_one))) logging.info("Player Two: {}".format(str(victories_player_two))) logging.info("Mean Rewards Agent One: {}".format( agent_one.moving_average_rewards[-1])) logging.info("Mean Rewards Agent Two: {}".format( agent_two.moving_average_rewards[-1])) #
def post_evaluate(self, config, population, species, best_genome): if self._best_net is None or best_genome.fitness > self._best_fitness: self._best_net = FeedForwardNetwork.create(best_genome, config) self._best_fitness = best_genome.fitness if self._best_ever is None or self._best_fitness > self._best_ever: self._best_ever = self._best_fitness print("Best fitness so far in this cycle", self._best_fitness, ", Best fitness ever", self._best_ever, ", Currently used agent:", "random" if self._last_fitness is None else self._last_fitness) self._generations += 1 if self._generations >= self._reset_number: is_random = self._last_fitness is not None and self._best_fitness < self._last_fitness print( "Resetting opponent, last fitness was", "random" if self._last_fitness is None else self._last_fitness, "new fitness is", "random" if is_random else self._best_fitness) if is_random: self._current_opponent = RandomAgent() self._last_fitness = None else: self._current_opponent = self._best_net self._last_fitness = self._best_fitness self._best_fitness = None self._best_net = None if self._save_opponents: with open('opponent-net-{}.pkl'.format(self._last_fitness), 'wb') as output: pickle.dump(self._current_opponent, output, 1) self._generations = 0
def test_get_legal_actions(self): env = DoudizhuEnv() env.set_agents( [RandomAgent(env.action_num) for _ in range(env.player_num)]) env.reset() legal_actions = env._get_legal_actions() for legal_action in legal_actions: self.assertLessEqual(legal_action, env.action_num - 1)
def __init__(self, reset_number=100): self._generations = 0 self._best_net = None self._best_fitness = None self._reset_number = reset_number self._current_opponent = RandomAgent() self._last_fitness = None self._best_ever = None
def agent(self, team, seed) -> Agent: if self.kind == 'gre': return GreedyAgent(team, seed=seed) if self.kind == 'cls': return ClassifierAgent(team, self.filename, seed=seed) if self.kind == 'reg': return RegressionAgent(team, self.filename, seed=seed) return RandomAgent(team)
def testAgainstRandom(self): r = AlphaBetaAgent(RED, maxDepth=7, timeLimit=5) b = RandomAgent(BLUE) board, state = buildScenario('Junction') mm = MatchManager('', r, b, board, state, 24) while not mm.end: mm.nextStep()
class TestRandomAgent(unittest.TestCase): def setUp(self): number_of_actions = 2 action_space = spaces.Discrete(2) self.agent = RandomAgent(action_space) def testAction(self): action = self.agent.act(state=None, reward=None, done=None) assert action == 0
def reset(self): self.agents = [] self.book_reads = {} for i in range(1, self.parameters.n_books + 1): self.book_reads[i] = 0 for i in range(0, self.parameters.n_agents): agent = RandomAgent(i, self.book_reads, self.parameters) # agent.gossip_protocol = self.parameters.gossip_protocol(agent) self.agents.append(agent)
def __set_player(self, player_type, model_path, insight, time): if player_type == 'B' and model_path is None: raise Exception('Bot (' + player_type + ') has no brain attached.') elif player_type == 'B': return Bot(model_path, insight=insight, time=time) elif player_type == 'R': return RandomAgent(insight) elif player_type == 'H': return Human() else: raise Exception('Unknown player type: ' + player_type) return None
def self_play(n_iterations=10, ben_steps=1000, training_steps=int(1e4), n_eval_episodes=100, **kwargs): """ Returns an agent that learns from playing against himself from random to optimal play. """ agents = [RLAgent(**kwargs), RandomAgent()] for _ in range(n_iterations): benchmark(agents[0], agents[1], ben_steps, training_steps, n_eval_episodes) # adding the trained agent as the new opponent to exploit agents[1] = opposite_agent(agents[0]) agents[1].eps = agents[0].original_eps return agents[0]
def eval_single(args): """ :param args: :return: """ # Unpack arguments idx, params = args # Scores to save: highest score at certain episode scores = [] # 10 runs and average for i in range(10): # Set seed based on run index params['ENV_SEED'] = i # Select and configure agent if params['AGENT'] == 'random': agent = RandomAgent(params) elif params['AGENT'] == 'sarsa': agent = SarsaAgent(params) elif params['AGENT'] == 'qlearn': agent = QAgent(params) elif params['AGENT'] == 'doubledqn': agent = DoubleDQNAgent(params) else: raise ValueError('Invalid agent specified!') # Start while agent.episode < agent.episode_count: # Do episode agent.do_episode(params) # Get best score scores.append(agent.get_best_score()) # Close agent.env.close() if params['AGENT'] == 'doubledqn': agent.sess.close() # Average for episode and score score = (idx, ) + tuple(map(lambda x: sum(x) / float(len(x)), zip(*scores))) return score
def __init__(self,numberofPlayer = 6,lvl =0,ratio_gun_player=1.2,list_agent=[RandomAgent() for _ in range(6)]): ShowBase.__init__(self) self.init_game_variable(numberofPlayer,lvl,ratio_gun_player,list_agent) self.init_game_method() self.init_taskMgr() # self.active_player = self.numberofPlayer self.scores = np.zeros(self.numberofPlayer) self.available_actions = list(range(48)) self.unique_id = "" self.numberofGun = 0 self.guns = [] self.unique_id_vec = np.zeros(4+(numberofPlayer-1)*3)#+self.numberofGun*3) self.tic = time.time()
def task(round_n, n_games=25): env = gym_connectfour.envs.ConnectFourEnv() random_agent = RandomAgent(env.action_spec()) agent = MCTSAgent(env.action_spec(), time_budget=0.001) data = [] for game_n in range(n_games): results = play_game(env, agent, opponent_agent=random_agent) data.append({"round_n": round_n, "game_n": game_n, **results}) return data
def __init__(self, game_number, numberofPlayer=6, lvl=0, ratio_gun_player=1.2, list_agent=None): if list_agent is None: list_agent = [DeepQLearningAgent(action_space_size=48) if i <3 else RandomAgent() for i in range(6)] self.init_game_variable(game_number,numberofPlayer, lvl, ratio_gun_player, list_agent) self.init_game_method() # self.active_player = self.numberofPlayer self.scores = np.zeros(self.numberofPlayer) self.available_actions = list(range(48)) self.numberofGun = 0 self.guns = [] self.unique_id = "" self.unique_id_vec = np.zeros(4 + (numberofPlayer - 1) * 3 ) # + self.numberofGun * 3) #self.unique_id_vec = np.zeros(7 + (numberofPlayer - 1) * 3) print('vec len',len(self.unique_id_vec))
def test_run(self): env = DoudizhuEnv() env.set_agents( [RandomAgent(env.action_num) for _ in range(env.player_num)]) trajectories, payoffs = env.run(is_training=False) self.assertEqual(len(trajectories), 3) win = [] for player_id, payoff in enumerate(payoffs): if payoff == 1: win.append(player_id) if len(win) == 1: self.assertEqual(env.game.players[win[0]].role, 'landlord') if len(win) == 2: self.assertEqual(env.game.players[win[0]].role, 'peasant') self.assertEqual(env.game.players[win[1]].role, 'peasant')
def start(): agent1 = MinimaxAgent() # red agent2 = RandomAgent() # yellow delay = 0.5 data = GameData() screen = pygame.display.set_mode(data.size) game = ConnectGame(data, GameRenderer(screen, data)) game.print_board() game.draw() pygame.display.update() pygame.time.wait(10) agent1_turn = 0 # Processes mouse and keyboard events, dispatching events to the event bus. # The events are handled by the ConnectGame and GameRenderer classes. while not game.game_data.game_over: for event in pygame.event.get(): if event.type == pygame.QUIT: game.quit() sleep(delay) if data.turn == agent1_turn and not game.game_data.game_over: game.make_movement(agent1.get_move(data)) game.update() game.draw() else: game.make_movement(agent2.get_move(data)) game.update() game.draw() game.update() game.draw()
def evaluate(existing_model_path, num_episodes=100, num_hidden_units=(40,), starting_alpha=0.1, starting_lamda=0.9, min_alpha=0.1, min_lamda=0.7, alpha_decay=1, lamda_decay=0.96, alpha_decay_interval=1, lamda_decay_interval=3e4, hidden_activation=nn.Sigmoid(), num_inputs=198, opponent="pubeval"): """ Evaluate a saved model against an opponent and prints out the model's win rate. :param existing_model_path: String. Path of the saved model. :param num_episodes: Integer. Number of games to play per model. :param num_hidden_units: See EvaluationModel class. :param starting_alpha: See EvaluationModel class. :param starting_lamda: See EvaluationModel class. :param min_alpha: See EvaluationModel class. :param min_lamda: See EvaluationModel class. :param alpha_decay: See EvaluationModel class. :param lamda_decay: See EvaluationModel class. :param alpha_decay_interval: See EvaluationModel class. :param lamda_decay_interval: See EvaluationModel class. :param hidden_activation: See EvaluationModel class. :param num_inputs: See EvaluationModel class. :param opponent: "pubeval" or "random". """ model = EvaluationModel(num_inputs=num_inputs, num_hidden_units=num_hidden_units, starting_alpha=starting_alpha, starting_lamda=starting_lamda, min_alpha=min_alpha, min_lamda=min_lamda, alpha_decay=alpha_decay, lamda_decay=lamda_decay, alpha_decay_interval=alpha_decay_interval, lamda_decay_interval=lamda_decay_interval, hidden_activation=hidden_activation) model.load(checkpoint_path=existing_model_path) if opponent == "pubeval": opponent_agent = PubevalAgent(0) else: opponent_agent = RandomAgent(0) agents = [opponent_agent, TDAgent(1, model)] wins = [0, 0] for i in range(num_episodes): game = Game(agents) wins[game.play()] += 1 print("\n{}: \t{}".format(existing_model_path, float(wins[1]) / float(sum(wins))))
def test_8_by_8_random_agent(self): agent = RandomAgent(problem_id=0) self.assertEqual(agent.problem_id, 0) self.assertEqual(agent.is_stochastic(), True) self.assertEqual(agent.env.ncol, 8) self.assertEqual(agent.env.nrow, 8) agent = RandomAgent(problem_id=1) self.assertEqual(agent.problem_id, 1) self.assertEqual(agent.is_stochastic(), True) self.assertEqual(agent.env.ncol, 8) self.assertEqual(agent.env.nrow, 8)
def act(self, gs: GameState) -> int: available_actions = gs.get_available_actions(gs.get_active_player()) if self.agents is None: self.agents = [RandomAgent()] * gs.player_count() accumulated_scores = np.zeros((len(available_actions),)) for i, a in enumerate(available_actions): gs_clone = gs.clone() gs_clone.step(gs.get_active_player(), a) if self.determinist_environment: max_scores = run_for_n_games_and_return_max(self.agents, gs_clone, self.epochs_per_action) accumulated_scores[i] = max_scores[gs.get_active_player()] else: (total_scores, _) = run_for_n_games_and_return_stats(self.agents, gs_clone, self.epochs_per_action) accumulated_scores[i] = total_scores[gs.get_active_player()] # print((accumulated_scores, available_actions[np.argmax(accumulated_scores)])) return available_actions[np.argmax(accumulated_scores)]
def main(args): """Main Program.""" problem_ids, episodes, grid = parse_args(args) print('It was found out that setting the seed for random was slow.. you can turn it on with seed=True') print('More info in documentation...') # Reset the random generator to a known state (for reproducability) np.random.seed(12) for problem_id in problem_ids: # this seed doesn't work... if needed, change seed to True below agent = RandomAgent(problem_id=problem_id, map_name_base=grid) agent.solve(episodes=episodes, seed=None) agent.evaluate(episodes)
def create_random_experiment_runner(num_dcs, num_customers, dcs_per_customer, demand_mean, demand_var, num_commodities, orders_per_day, num_steps): physical_network = PhysicalNetwork( num_dcs, num_customers, dcs_per_customer, demand_mean, demand_var, num_commodities, ) order_generator = ActualOrderGenerator(physical_network, orders_per_day) generator = DirichletInventoryGenerator(physical_network) environment_parameters = EnvironmentParameters(physical_network, order_generator, generator, num_steps) env = ShippingFacilityEnvironment(environment_parameters) agent = RandomAgent(env) return ExperimentRunner(order_generator, generator, agent, env)
def main_random(): env = AgarioEnv(render=RENDER, speed_scale=SPEED_SCALE, display_text=DISPLAY_TEXT, grid_resolution=GRID_RESOLUTION) agent = RandomAgent() for episode in range(NUM_EPISODES): state = env.reset() num_steps = 0 done = False while True: action = agent.get_action(state) for _ in range(NUM_SKIP_FRAMES): if RENDER: env.render() state, reward, done, _ = env.step(action) if done or num_steps >= MAX_STEPS: print(f'epoch: {episode}, max_mass = {state.mass}') agent.max_masses.append(state.mass) break num_steps += 1 agent.save_performance(path='random.performance') agent.print_final_stats() env.close()
return True def play(self): '''Play a whole game.''' done = False while not done: done = self.play_action() if self.total_reward_a == self.total_reward_b: return None elif self.total_reward_a > self.total_reward_b: return 'a' else: return 'b' if __name__ == '__main__': env = Environment() agent_a = RandomAgent('a') agent_b = RandomAgent('b') game = Game(env, agent_a, agent_b) print('Playing a new game.') winner = game.play() print('Game has completed.') if winner: print('Player {} has won'.format(a)) else: print('Game is a tie.')
episode_num = 100 memory_init_size = 100 train_every = 1 agent = DQNTransformer( scope='DouDiZhuTransformer', action_num=env.action_num, replay_memory_init_size=memory_init_size, train_every=train_every, state_shape=env.state_shape, mlp_layers=[512,512] ) log_dir = Root_Path+'./experiment_log/dqn/' logger = Logger(log_dir) random_agent = RandomAgent(action_num=env_eval.action_num) env.set_agents([agent, random_agent, random_agent]) env_eval.set_agents([agent, random_agent, random_agent]) for episode in range(episode_num): # Generate data from the environment trajectories, _ = env.run(is_training=True) # Feed transitions into agent memory, and train the agent for ts in trajectories[0]: agent.feed(ts) if episode % evaluate_every == 0: logger.log_performance(env.timestep, tournament(env_eval, evaluate_num)[0])
def setUp(self): number_of_actions = 2 action_space = spaces.Discrete(2) self.agent = RandomAgent(action_space)
from agents import DeepQLearningAgent, RandomAgent from environments.battle_royale import BattleRoyale from runners import run_for_n_games_and_print_stats, run_step if __name__ == "__main__": list_agent=[DeepQLearningAgent(action_space_size=48) if i == 1 else RandomAgent() for i in range(6)] gs = BattleRoyale(list_agent = list_agent) gs.run()
from agents import CommandLineAgent, DeepQLearningAgent, ReinforceMeanBaselineAgent, RandomAgent from environments.connect4 import Connect4GameState from runners import run_for_n_games_and_print_stats_1, run_step if __name__ == "__main__": gs = Connect4GameState() agent0 = ReinforceMeanBaselineAgent( state_space_size=gs.get_vectorized_state().shape[0], action_space_size=gs.get_action_space_size()) agent1 = RandomAgent() # for i in range(100): # run_for_n_games_and_print_stats([agent0, agent1], gs, 5000) run_for_n_games_and_print_stats_1([agent0, agent1], gs, 10000, "C4_ReinforceMeanBaseline_10000")