Exemplo n.º 1
0
    def __init__(self, action_spec, time_budget):
        self.action_spec = action_spec

        self.sim_env = ConnectFourEnv()
        self.opponent_agent = RandomAgent(action_spec)

        self.time_budget = time_budget

        self.policy = {}
Exemplo n.º 2
0
def run_with_params(num_dcs, num_customers, dcs_per_customer, demand_mean,
                    demand_var, num_commodities, orders_per_day, num_steps):
    physical_network = PhysicalNetwork(
        num_dcs,
        num_customers,
        dcs_per_customer,
        demand_mean,
        demand_var,
        num_commodities,
    )
    # order_generator = NaiveOrderGenerator(num_dcs, num_customers, orders_per_day)
    order_generator = ActualOrderGenerator(physical_network, orders_per_day)
    generator = DirichletInventoryGenerator(physical_network)
    environment_parameters = EnvironmentParameters(physical_network,
                                                   order_generator, generator,
                                                   num_steps)

    env = ShippingFacilityEnvironment(environment_parameters)
    agent = RandomAgent(env)

    obs = env.reset()
    reward = 0
    done = False
    print("=========== starting episode loop ===========")
    print("Initial environment: ")
    env.render()
    actions = []
    episode_rewards = []
    #demands_per_k = np.zeros((num_commodities,num_steps))
    #inventory_at_t = np.zeros((num_commodities,num_steps)) #todo llenar estos eventualmente
    while not done:
        action = agent.act(obs, reward, done)

        # print(f"Agent is taking action: {action}")
        # the agent observes the first state and chooses an action
        # environment steps with the agent's action and returns new state and reward
        obs, reward, done, info = env.step(action)
        # print(f"Got reward {reward} done {done}")

        # Render the current state of the environment
        env.render()
        actions.append(action)
        episode_rewards.append(reward)

        if done:
            print("===========Environment says we are DONE ===========")

    return actions, episode_rewards
Exemplo n.º 3
0
def run_random_vs_qlearning():
    winners = []
    board_length = 8
    action_space = (board_length, board_length, board_length, board_length)

    agent_one = QLearningAgent((board_length, board_length), action_space,
                               "qlearning", "up", 1.0, 2500, 100000)
    agent_two = RandomAgent((board_length, board_length),
                            (board_length, board_length), "Two", "down")
    iterations = 50000
    for i in range(iterations):
        board = Board(board_length=8)
        game = Game(agent_one=agent_one, agent_two=agent_two, board=board)
        game.play(verbose=False)
        winners += [game.winner]
        agent_one.epsilon *= 0.9999
        if (i % 5000 == 0 and i > 0) or iterations - 1 == i:
            victories_player_two = 0
            victories_player_one = 0
            for winner in winners:
                if winner == "qlearning":
                    victories_player_one += 1
                if winner == "Two":
                    victories_player_two += 1

            logging.info("Player One: {}".format(str(victories_player_one)))
            logging.info("Player Two: {}".format(str(victories_player_two)))
            logging.info("Mean Rewards Agent One: {}".format(
                agent_one.moving_average_rewards[-1]))
            logging.info("Mean Rewards Agent Two: {}".format(
                agent_two.moving_average_rewards[-1]))
Exemplo n.º 4
0
def run_random_vs_random_max():
    winners = []
    board_length = 8
    action_space = (board_length, board_length, board_length, board_length)
    agent_one = RandomAgentWithMaxValue((board_length, board_length),
                                        action_space, "One", "up")
    agent_two = RandomAgent((board_length, board_length), action_space, "Two",
                            "down")
    iterations = 1000
    for i in range(iterations):
        board = Board(board_length=8)
        game = Game(agent_one=agent_one, agent_two=agent_two, board=board)
        game.play(verbose=False)
        winners += [game.winner]
        if (i % 5000 == 0 and i > 0) or iterations - 1 == i:
            victories_player_two = 0
            victories_player_one = 0
            for winner in winners:
                if winner == "One":
                    victories_player_one += 1
                if winner == "Two":
                    victories_player_two += 1

            logging.info("Player One: {}".format(str(victories_player_one)))
            logging.info("Player Two: {}".format(str(victories_player_two)))
            logging.info("Mean Rewards Agent One: {}".format(
                agent_one.moving_average_rewards[-1]))
            logging.info("Mean Rewards Agent Two: {}".format(
                agent_two.moving_average_rewards[-1]))  #
Exemplo n.º 5
0
    def post_evaluate(self, config, population, species, best_genome):
        if self._best_net is None or best_genome.fitness > self._best_fitness:
            self._best_net = FeedForwardNetwork.create(best_genome, config)
            self._best_fitness = best_genome.fitness
            if self._best_ever is None or self._best_fitness > self._best_ever:
                self._best_ever = self._best_fitness
        print("Best fitness so far in this cycle", self._best_fitness,
              ", Best fitness ever", self._best_ever,
              ", Currently used agent:",
              "random" if self._last_fitness is None else self._last_fitness)
        self._generations += 1
        if self._generations >= self._reset_number:
            is_random = self._last_fitness is not None and self._best_fitness < self._last_fitness
            print(
                "Resetting opponent, last fitness was",
                "random" if self._last_fitness is None else self._last_fitness,
                "new fitness is",
                "random" if is_random else self._best_fitness)
            if is_random:
                self._current_opponent = RandomAgent()
                self._last_fitness = None
            else:
                self._current_opponent = self._best_net
                self._last_fitness = self._best_fitness
                self._best_fitness = None
                self._best_net = None
                if self._save_opponents:
                    with open('opponent-net-{}.pkl'.format(self._last_fitness),
                              'wb') as output:
                        pickle.dump(self._current_opponent, output, 1)

            self._generations = 0
Exemplo n.º 6
0
 def test_get_legal_actions(self):
     env = DoudizhuEnv()
     env.set_agents(
         [RandomAgent(env.action_num) for _ in range(env.player_num)])
     env.reset()
     legal_actions = env._get_legal_actions()
     for legal_action in legal_actions:
         self.assertLessEqual(legal_action, env.action_num - 1)
Exemplo n.º 7
0
 def __init__(self, reset_number=100):
     self._generations = 0
     self._best_net = None
     self._best_fitness = None
     self._reset_number = reset_number
     self._current_opponent = RandomAgent()
     self._last_fitness = None
     self._best_ever = None
Exemplo n.º 8
0
    def agent(self, team, seed) -> Agent:
        if self.kind == 'gre':
            return GreedyAgent(team, seed=seed)
        if self.kind == 'cls':
            return ClassifierAgent(team, self.filename, seed=seed)
        if self.kind == 'reg':
            return RegressionAgent(team, self.filename, seed=seed)

        return RandomAgent(team)
Exemplo n.º 9
0
    def testAgainstRandom(self):
        r = AlphaBetaAgent(RED, maxDepth=7, timeLimit=5)
        b = RandomAgent(BLUE)
        board, state = buildScenario('Junction')

        mm = MatchManager('', r, b, board, state, 24)

        while not mm.end:
            mm.nextStep()
Exemplo n.º 10
0
class TestRandomAgent(unittest.TestCase):
    def setUp(self):
        number_of_actions = 2
        action_space = spaces.Discrete(2)
        self.agent = RandomAgent(action_space)

    def testAction(self):
        action = self.agent.act(state=None, reward=None, done=None)
        assert action == 0
Exemplo n.º 11
0
    def reset(self):
        self.agents = []
        self.book_reads = {}
        for i in range(1, self.parameters.n_books + 1):
            self.book_reads[i] = 0

        for i in range(0, self.parameters.n_agents):
            agent = RandomAgent(i, self.book_reads, self.parameters)
            # agent.gossip_protocol = self.parameters.gossip_protocol(agent)
            self.agents.append(agent)
Exemplo n.º 12
0
 def __set_player(self, player_type, model_path, insight, time):
     if player_type == 'B' and model_path is None:
         raise Exception('Bot (' + player_type + ') has no brain attached.')
     elif player_type == 'B':
         return Bot(model_path, insight=insight, time=time)
     elif player_type == 'R':
         return RandomAgent(insight)
     elif player_type == 'H':
         return Human()
     else:
         raise Exception('Unknown player type: ' + player_type)
         return None
Exemplo n.º 13
0
def self_play(n_iterations=10, ben_steps=1000, training_steps=int(1e4),
              n_eval_episodes=100, **kwargs):
  """
  Returns an agent that learns from playing against himself from random to
  optimal play.
  """
  agents = [RLAgent(**kwargs), RandomAgent()]
  for _ in range(n_iterations):
    benchmark(agents[0], agents[1], ben_steps, training_steps, n_eval_episodes)
    # adding the trained agent as the new opponent to exploit
    agents[1] = opposite_agent(agents[0])
    agents[1].eps = agents[0].original_eps
  return agents[0]
Exemplo n.º 14
0
def eval_single(args):
    """

    :param args:
    :return:
    """

    # Unpack arguments
    idx, params = args

    # Scores to save: highest score at certain episode
    scores = []

    # 10 runs and average
    for i in range(10):

        # Set seed based on run index
        params['ENV_SEED'] = i

        # Select and configure agent
        if params['AGENT'] == 'random':
            agent = RandomAgent(params)
        elif params['AGENT'] == 'sarsa':
            agent = SarsaAgent(params)
        elif params['AGENT'] == 'qlearn':
            agent = QAgent(params)
        elif params['AGENT'] == 'doubledqn':
            agent = DoubleDQNAgent(params)
        else:
            raise ValueError('Invalid agent specified!')

        # Start
        while agent.episode < agent.episode_count:
            # Do episode
            agent.do_episode(params)

        # Get best score
        scores.append(agent.get_best_score())

        # Close
        agent.env.close()
        if params['AGENT'] == 'doubledqn':
            agent.sess.close()

    # Average for episode and score
    score = (idx, ) + tuple(map(lambda x: sum(x) / float(len(x)),
                                zip(*scores)))

    return score
Exemplo n.º 15
0
    def __init__(self,numberofPlayer = 6,lvl =0,ratio_gun_player=1.2,list_agent=[RandomAgent() for _ in range(6)]):
        ShowBase.__init__(self)
        self.init_game_variable(numberofPlayer,lvl,ratio_gun_player,list_agent)
        self.init_game_method()
        self.init_taskMgr()

        # self.active_player = self.numberofPlayer
        self.scores = np.zeros(self.numberofPlayer)
        self.available_actions = list(range(48))
        self.unique_id = ""
        self.numberofGun = 0
        self.guns = []
        self.unique_id_vec = np.zeros(4+(numberofPlayer-1)*3)#+self.numberofGun*3)
        self.tic = time.time()
Exemplo n.º 16
0
def task(round_n, n_games=25):
    env = gym_connectfour.envs.ConnectFourEnv()
    random_agent = RandomAgent(env.action_spec())

    agent = MCTSAgent(env.action_spec(), time_budget=0.001)

    data = []

    for game_n in range(n_games):
        results = play_game(env, agent, opponent_agent=random_agent)

        data.append({"round_n": round_n, "game_n": game_n, **results})

    return data
    def __init__(self, game_number, numberofPlayer=6, lvl=0, ratio_gun_player=1.2, list_agent=None):
        if list_agent is None:
            list_agent = [DeepQLearningAgent(action_space_size=48) if i <3 else RandomAgent() for i in range(6)]
        self.init_game_variable(game_number,numberofPlayer, lvl, ratio_gun_player, list_agent)
        self.init_game_method()

        # self.active_player = self.numberofPlayer
        self.scores = np.zeros(self.numberofPlayer)
        self.available_actions = list(range(48))
        self.numberofGun = 0
        self.guns = []
        self.unique_id = ""
        self.unique_id_vec = np.zeros(4 + (numberofPlayer - 1) * 3 ) # + self.numberofGun * 3)
        #self.unique_id_vec = np.zeros(7 + (numberofPlayer - 1) * 3)
        print('vec len',len(self.unique_id_vec))
Exemplo n.º 18
0
 def test_run(self):
     env = DoudizhuEnv()
     env.set_agents(
         [RandomAgent(env.action_num) for _ in range(env.player_num)])
     trajectories, payoffs = env.run(is_training=False)
     self.assertEqual(len(trajectories), 3)
     win = []
     for player_id, payoff in enumerate(payoffs):
         if payoff == 1:
             win.append(player_id)
     if len(win) == 1:
         self.assertEqual(env.game.players[win[0]].role, 'landlord')
     if len(win) == 2:
         self.assertEqual(env.game.players[win[0]].role, 'peasant')
         self.assertEqual(env.game.players[win[1]].role, 'peasant')
Exemplo n.º 19
0
def start():
    agent1 = MinimaxAgent()  # red
    agent2 = RandomAgent()  # yellow

    delay = 0.5
    data = GameData()
    screen = pygame.display.set_mode(data.size)
    game = ConnectGame(data, GameRenderer(screen, data))

    game.print_board()
    game.draw()

    pygame.display.update()
    pygame.time.wait(10)

    agent1_turn = 0

    # Processes mouse and keyboard events, dispatching events to the event bus.
    # The events are handled by the ConnectGame and GameRenderer classes.
    while not game.game_data.game_over:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                game.quit()

            sleep(delay)
            if data.turn == agent1_turn and not game.game_data.game_over:
                game.make_movement(agent1.get_move(data))
                game.update()
                game.draw()
            else:
                game.make_movement(agent2.get_move(data))
                game.update()
                game.draw()

            game.update()
            game.draw()
Exemplo n.º 20
0
def evaluate(existing_model_path,
             num_episodes=100,
             num_hidden_units=(40,),
             starting_alpha=0.1, starting_lamda=0.9,
             min_alpha=0.1, min_lamda=0.7,
             alpha_decay=1, lamda_decay=0.96,
             alpha_decay_interval=1, lamda_decay_interval=3e4,
             hidden_activation=nn.Sigmoid(), num_inputs=198,
             opponent="pubeval"):
    """
    Evaluate a saved model against an opponent and prints out the model's win rate.

    :param existing_model_path: String. Path of the saved model.
    :param num_episodes: Integer. Number of games to play per model.
    :param num_hidden_units: See EvaluationModel class. 
    :param starting_alpha: See EvaluationModel class.
    :param starting_lamda: See EvaluationModel class.
    :param min_alpha: See EvaluationModel class.
    :param min_lamda: See EvaluationModel class.
    :param alpha_decay: See EvaluationModel class.
    :param lamda_decay: See EvaluationModel class.
    :param alpha_decay_interval: See EvaluationModel class.
    :param lamda_decay_interval: See EvaluationModel class.
    :param hidden_activation: See EvaluationModel class.
    :param num_inputs: See EvaluationModel class.
    :param opponent: "pubeval" or "random".
    """

    model = EvaluationModel(num_inputs=num_inputs, num_hidden_units=num_hidden_units,
                            starting_alpha=starting_alpha, starting_lamda=starting_lamda,
                            min_alpha=min_alpha, min_lamda=min_lamda,
                            alpha_decay=alpha_decay, lamda_decay=lamda_decay,
                            alpha_decay_interval=alpha_decay_interval, lamda_decay_interval=lamda_decay_interval,
                            hidden_activation=hidden_activation)

    model.load(checkpoint_path=existing_model_path)

    if opponent == "pubeval":
        opponent_agent = PubevalAgent(0)
    else:
        opponent_agent = RandomAgent(0)
    agents = [opponent_agent, TDAgent(1, model)]
    wins = [0, 0]
    for i in range(num_episodes):
        game = Game(agents)
        wins[game.play()] += 1

    print("\n{}: \t{}".format(existing_model_path, float(wins[1]) / float(sum(wins))))
Exemplo n.º 21
0
    def test_8_by_8_random_agent(self):
        agent = RandomAgent(problem_id=0)
        self.assertEqual(agent.problem_id, 0)
        self.assertEqual(agent.is_stochastic(), True)
        self.assertEqual(agent.env.ncol, 8)
        self.assertEqual(agent.env.nrow, 8)

        agent = RandomAgent(problem_id=1)
        self.assertEqual(agent.problem_id, 1)
        self.assertEqual(agent.is_stochastic(), True)
        self.assertEqual(agent.env.ncol, 8)
        self.assertEqual(agent.env.nrow, 8)
    def act(self, gs: GameState) -> int:
        available_actions = gs.get_available_actions(gs.get_active_player())
        if self.agents is None:
            self.agents = [RandomAgent()] * gs.player_count()
        accumulated_scores = np.zeros((len(available_actions),))

        for i, a in enumerate(available_actions):
            gs_clone = gs.clone()
            gs_clone.step(gs.get_active_player(), a)
            if self.determinist_environment:
                max_scores = run_for_n_games_and_return_max(self.agents, gs_clone, self.epochs_per_action)
                accumulated_scores[i] = max_scores[gs.get_active_player()]
            else:
                (total_scores, _) = run_for_n_games_and_return_stats(self.agents, gs_clone, self.epochs_per_action)
                accumulated_scores[i] = total_scores[gs.get_active_player()]

        # print((accumulated_scores, available_actions[np.argmax(accumulated_scores)]))
        return available_actions[np.argmax(accumulated_scores)]
Exemplo n.º 23
0
def main(args):
    """Main Program."""     

    problem_ids, episodes, grid = parse_args(args)
    print('It was found out that setting the seed for random was slow.. you can turn it on with seed=True')
    print('More info in documentation...')
    
    # Reset the random generator to a known state (for reproducability)
    np.random.seed(12)

    for problem_id in problem_ids:
        # this seed doesn't work... if needed, change seed to True below
        agent = RandomAgent(problem_id=problem_id, map_name_base=grid) 
        agent.solve(episodes=episodes, seed=None)
        agent.evaluate(episodes)
Exemplo n.º 24
0
def create_random_experiment_runner(num_dcs, num_customers, dcs_per_customer,
                                    demand_mean, demand_var, num_commodities,
                                    orders_per_day, num_steps):
    physical_network = PhysicalNetwork(
        num_dcs,
        num_customers,
        dcs_per_customer,
        demand_mean,
        demand_var,
        num_commodities,
    )
    order_generator = ActualOrderGenerator(physical_network, orders_per_day)
    generator = DirichletInventoryGenerator(physical_network)

    environment_parameters = EnvironmentParameters(physical_network,
                                                   order_generator, generator,
                                                   num_steps)

    env = ShippingFacilityEnvironment(environment_parameters)
    agent = RandomAgent(env)

    return ExperimentRunner(order_generator, generator, agent, env)
Exemplo n.º 25
0
def main_random():
    env = AgarioEnv(render=RENDER,
                    speed_scale=SPEED_SCALE,
                    display_text=DISPLAY_TEXT,
                    grid_resolution=GRID_RESOLUTION)
    agent = RandomAgent()
    for episode in range(NUM_EPISODES):
        state = env.reset()
        num_steps = 0
        done = False
        while True:
            action = agent.get_action(state)
            for _ in range(NUM_SKIP_FRAMES):
                if RENDER:
                    env.render()
                state, reward, done, _ = env.step(action)
            if done or num_steps >= MAX_STEPS:
                print(f'epoch: {episode}, max_mass = {state.mass}')
                agent.max_masses.append(state.mass)
                break
            num_steps += 1
    agent.save_performance(path='random.performance')
    agent.print_final_stats()
    env.close()
Exemplo n.º 26
0
        return True

    def play(self):
        '''Play a whole game.'''

        done = False
        while not done:
            done = self.play_action()

        if self.total_reward_a == self.total_reward_b:
            return None
        elif self.total_reward_a > self.total_reward_b:
            return 'a'
        else:
            return 'b'


if __name__ == '__main__':
    env = Environment()
    agent_a = RandomAgent('a')
    agent_b = RandomAgent('b')
    game = Game(env, agent_a, agent_b)

    print('Playing a new game.')
    winner = game.play()
    print('Game has completed.')
    if winner:
        print('Player {} has won'.format(a))
    else:
        print('Game is a tie.')
Exemplo n.º 27
0
episode_num = 100

memory_init_size = 100

train_every = 1
agent = DQNTransformer(
    scope='DouDiZhuTransformer',
    action_num=env.action_num,
    replay_memory_init_size=memory_init_size,
    train_every=train_every,
    state_shape=env.state_shape,
    mlp_layers=[512,512]
)
log_dir = Root_Path+'./experiment_log/dqn/'
logger = Logger(log_dir)
random_agent = RandomAgent(action_num=env_eval.action_num)
env.set_agents([agent, random_agent, random_agent])
env_eval.set_agents([agent, random_agent, random_agent])


for episode in range(episode_num):

    # Generate data from the environment
    trajectories, _ = env.run(is_training=True)

    # Feed transitions into agent memory, and train the agent
    for ts in trajectories[0]:
        agent.feed(ts)

    if episode % evaluate_every == 0:
        logger.log_performance(env.timestep, tournament(env_eval, evaluate_num)[0])
Exemplo n.º 28
0
 def setUp(self):
     number_of_actions = 2
     action_space = spaces.Discrete(2)
     self.agent = RandomAgent(action_space)
Exemplo n.º 29
0
from agents import DeepQLearningAgent, RandomAgent
from environments.battle_royale import BattleRoyale
from runners import run_for_n_games_and_print_stats, run_step

if __name__ == "__main__":
    list_agent=[DeepQLearningAgent(action_space_size=48) if i == 1  else RandomAgent() for i in range(6)]
    gs = BattleRoyale(list_agent = list_agent)
    gs.run()


Exemplo n.º 30
0
from agents import CommandLineAgent, DeepQLearningAgent, ReinforceMeanBaselineAgent, RandomAgent
from environments.connect4 import Connect4GameState
from runners import run_for_n_games_and_print_stats_1, run_step

if __name__ == "__main__":
    gs = Connect4GameState()
    agent0 = ReinforceMeanBaselineAgent(
        state_space_size=gs.get_vectorized_state().shape[0],
        action_space_size=gs.get_action_space_size())
    agent1 = RandomAgent()

    # for i in range(100):
    #     run_for_n_games_and_print_stats([agent0, agent1], gs, 5000)

    run_for_n_games_and_print_stats_1([agent0, agent1], gs, 10000, "C4_ReinforceMeanBaseline_10000")