Exemplo n.º 1
0
def eval_ale_environment(game,
                         model_path,
                         render,
                         num_of_epochs,
                         steps_per_epoch,
                         stochastic,
                         initial_epsilon,
                         log_dir,
                         human_interaction=False):
    if render or human_interaction:
        num_of_threads = 1
    else:
        num_of_threads = 8

    # Create an ALE for game Breakout
    environment = ALEEnvironment(game,
                                 is_render=render,
                                 max_episode_steps=200000)

    # Create a network configuration for Atari A3C
    if not stochastic:
        network_config = AtariA3CConfig(environment, stochastic=False)

        # Create a shared network for A3C agent
        network = PolicyNetwork(network_config, load_model_path=model_path)

        # Create A3C agent
        agent = A3CAgent(network,
                         environment,
                         num_of_threads=num_of_threads,
                         using_e_greedy=True,
                         initial_epsilon=initial_epsilon,
                         report_frequency=1,
                         num_of_epochs=num_of_epochs,
                         steps_per_epoch=steps_per_epoch,
                         log_dir=log_dir)
    else:
        network_config = AtariA3CConfig(environment)

        # Create a shared network for A3C agent
        network = PolicyNetwork(network_config, load_model_path=model_path)

        # Create A3C agent
        agent = A3CAgent(network,
                         environment,
                         num_of_threads=num_of_threads,
                         report_frequency=1,
                         num_of_epochs=num_of_epochs,
                         steps_per_epoch=steps_per_epoch,
                         log_dir=log_dir)

    # Evaluate it
    return agent.evaluate()
Exemplo n.º 2
0
def train_ale_environment():

    # Create an ALE for game Breakout
    environment = ALEEnvironment(ALEEnvironment.BREAKOUT)
    ###################################################################################
    # In case using Gym
    # environment = GymEnvironment("Breakout-v0", state_processor=AtariProcessor())
    ###################################################################################

    # Create a network configuration for Atari DQN
    network_config = PrioritizedAtariDQNConfig(environment,
                                               initial_beta=0.4,
                                               initial_learning_rate=0.00025,
                                               debug_mode=True)

    # Create a policy network for DQN agent
    network = PolicyNetwork(network_config, max_num_of_checkpoints=100)

    # Create DQN agent
    agent = DQNAgent(network,
                     environment,
                     save_frequency=5e5,
                     steps_per_epoch=1e6,
                     num_of_epochs=50,
                     exp_replay_size=2**19,
                     importance_sampling=True,
                     log_dir="./train/breakout/pdqn_check_point",
                     prioritized_alpha=0.6,
                     prioritized=True)

    # Train it
    agent.train()
Exemplo n.º 3
0
def eval_ale_environment(model_path, render, num_of_epochs, steps_per_epoch,
                         initial_epsilon, log_dir):

    # Create an ALE for game Breakout
    environment = ALEEnvironment(ALEEnvironment.RIVERRAID,
                                 is_render=render,
                                 max_episode_steps=5000)

    # Create a network configuration for Atari DQN
    network_config = PrioritizedAtariDQNConfig(environment)

    # Create a policy network for DQN agent
    network = PolicyNetwork(network_config, load_model_path=model_path)

    # Create DQN agent
    agent = DQNAgent(network,
                     environment,
                     initial_epsilon=initial_epsilon,
                     report_frequency=1,
                     num_of_threads=8,
                     num_of_epochs=num_of_epochs,
                     steps_per_epoch=steps_per_epoch,
                     log_dir=log_dir,
                     prioritized=True)

    # Evaluate it
    return agent.evaluate()
Exemplo n.º 4
0
def train_tank_1_player_machine_lstm():
    game_engine = TankBattle(render=False,
                             player1_human_control=False,
                             player2_human_control=False,
                             two_players=False,
                             speed=1000,
                             frame_skip=5,
                             debug=False
                             )

    env = FruitEnvironment(game_engine,
                           max_episode_steps=10000,
                           state_processor=AtariProcessor(),
                           multi_objective=False)

    network_config = AtariA3CLSTMConfig(env,
                                        initial_learning_rate=0.004)

    network = PolicyNetwork(network_config,
                            num_of_checkpoints=40,
                            using_gpu=True)

    agent = A3CLSTMAgent(network, env,
                         num_of_epochs=10,
                         steps_per_epoch=1e6,
                         save_frequency=5e5,
                         update_network_frequency=4,
                         log_dir='./train/nips/TankBattle/a3c_gpu_8_threads_tank_time_based_10_lstm_lr_0004',
                         num_of_threads=8)

    agent.train()
Exemplo n.º 5
0
def evaluate_tank_1_player_machine():
    game_engine = TankBattle(render=True,
                             player1_human_control=False,
                             player2_human_control=False,
                             two_players=False,
                             speed=60,
                             frame_skip=5,
                             debug=False
                             )

    env = FruitEnvironment(game_engine,
                           max_episode_steps=10000,
                           state_processor=AtariProcessor(),
                           multi_objective=False)

    network_config = AtariA3CConfig(env)

    network = PolicyNetwork(network_config,
                            using_gpu=True,
                            load_model_path='./train/nips/TankBattle/a3c_gpu_8_threads_tank_time_based_10_lr_0004_04-10-2018-16-27/model-9500578'
                            )

    agent = A3CAgent(network, env,
                     num_of_epochs=1,
                     steps_per_epoch=100000,
                     report_frequency=1,
                     log_dir='./thi_test/nips/TankBattle/a3c_gpu_8_threads_tank_time_based_30_49_lr_0004',
                     num_of_threads=1)

    agent.evaluate()
Exemplo n.º 6
0
def train_ale_environment_lstm():

    # Create an ALE for game Breakout
    environment = ALEEnvironment(ALEEnvironment.PONG,
                                 loss_of_life_termination=True)
    ###################################################################################
    # In case using Gym
    # environment = GymEnvironment("Breakout-v0", state_processor=AtariProcessor())
    ###################################################################################

    # Create a network configuration for Atari A3C
    network_config = AtariA3CLSTMConfig(environment,
                                        initial_learning_rate=0.001)

    # Create a shared network for A3C agent
    network = PolicyNetwork(network_config,
                            num_of_checkpoints=40,
                            using_gpu=True)

    # Create A3C agent
    agent = A3CLSTMAgent(
        network,
        environment,
        save_time_based=30,
        checkpoint_stop=40,
        log_dir='./train/Pong/a3c_lstm_gpu_8_threads_pong_time_based_30_20',
        num_of_threads=8)

    # Train it
    agent.train()
Exemplo n.º 7
0
def train_ale_environment():
    # Create an ALE for Breakout
    environment = ALEEnvironment(ALEEnvironment.BREAKOUT)
    ###################################################################################
    # In the case of using Gym
    # environment = GymEnvironment("Breakout-v0", state_processor=AtariProcessor())
    ###################################################################################

    # Create a network configuration for Atari A3C
    network_config = AtariA3CConfig(environment, initial_learning_rate=0.004)

    # Create a shared network for A3C agent
    network = PolicyNetwork(network_config,
                            num_of_checkpoints=40,
                            using_gpu=True)

    # Create A3C agent
    agent = A3CAgent(
        network,
        environment,
        save_time_based=30,
        checkpoint_stop=20,
        log_dir='./train/Breakout/a3c_gpu_8_threads_breakout_time_based_30_20',
        num_of_threads=8)

    # Train it
    agent.train()
Exemplo n.º 8
0
def train_ale_environment():

    # Create an ALE for game Breakout
    environment = ALEEnvironment(ALEEnvironment.BREAKOUT,
                                 loss_of_life_negative_reward=True,
                                 state_processor=AtariBlackenProcessor())

    # Create a network configuration for Atari A3C
    network_config = AtariA3CConfig(environment, initial_learning_rate=0.004)

    # Create a shared network for A3C agent
    network = PolicyNetwork(network_config,
                            num_of_checkpoints=40,
                            using_gpu=True)

    # Create A3C agent
    agent = A3CAgent(
        network,
        environment,
        save_time_based=30,
        checkpoint_stop=40,
        log_dir=
        './train/smc/Breakout/a3c_gpu_8_threads_breakout_time_based_30_40',
        num_of_threads=8)

    # Train it
    agent.train()
Exemplo n.º 9
0
def train_atari_sea_quest():
    env = ALEEnvironment(ALEEnvironment.SEAQUEST, frame_skip=8)

    network_config = AtariA3CConfig(env, initial_learning_rate=0.004)

    network = PolicyNetwork(network_config, max_num_of_checkpoints=40)

    agent = AgentFactory.create(A3CLearner, network, env, num_of_epochs=40, steps_per_epoch=1e6,
                                checkpoint_frequency=1e6, log_dir='./train/sea_quest/a3c_checkpoints')

    agent.train()
Exemplo n.º 10
0
def train_tank_1_player_machine():
    game_engine = TankBattle(render=False, player1_human_control=False, player2_human_control=False,
                             two_players=False, speed=2000, frame_skip=5)

    env = FruitEnvironment(game_engine, max_episode_steps=10000, state_processor=AtariProcessor(),
                           reward_processor=TankBattleTotalRewardProcessor())

    network_config = AtariA3CConfig(env, initial_learning_rate=0.004)

    network = PolicyNetwork(network_config, max_num_of_checkpoints=20)

    agent = AgentFactory.create(A3CLearner, network, env, num_of_epochs=10, steps_per_epoch=1e6,
                                checkpoint_frequency=5e5, log_dir='./train/tank_battle/a3c_checkpoints')

    agent.train()
Exemplo n.º 11
0
def train_ale_environment():
    # Create an ALE for Breakout
    environment = ALEEnvironment(ALEEnvironment.BREAKOUT)

    # Create a network configuration for Atari A3C
    network_config = AtariA3CConfig(environment, initial_learning_rate=0.004, debug_mode=True)

    # Create a shared network for A3C agent
    network = PolicyNetwork(network_config, max_num_of_checkpoints=40)

    # Create an A3C agent
    agent = AgentFactory.create(A3CLearner, network, environment, num_of_epochs=40, steps_per_epoch=1e6,
                                checkpoint_frequency=1e6, log_dir='./train/breakout/a3c_checkpoints')

    # Train it
    agent.train()
Exemplo n.º 12
0
def train_milk_1_milk_1_fix_robots_with_no_status():
    game_engine = MilkFactory(render=False, speed=6000, max_frames=200, frame_skip=1, number_of_milk_robots=1,
                              number_of_fix_robots=1, number_of_milks=1, seed=None, human_control=False,
                              error_freq=0.03, human_control_robot=0, milk_speed=3, debug=False,
                              action_combined_mode=False, show_status=False)

    environment = FruitEnvironment(game_engine, max_episode_steps=200, state_processor=AtariProcessor())

    network_config = MAA3CConfig(environment, initial_learning_rate=0.001, beta=0.001)

    network = PolicyNetwork(network_config, max_num_of_checkpoints=40)

    agent = AgentFactory.create(MAA3CLearner, network, environment, num_of_epochs=40, steps_per_epoch=1e5,
                                checkpoint_frequency=1e5, log_dir='./train/milk_factory/a3c_ma_2_checkpoints')

    agent.train()
Exemplo n.º 13
0
def composite_agents(main_model_path, auxiliary_model_path, alpha, epsilon):
    # Create a normal Breakout environment without negative reward
    environment = ALEEnvironment(ALEEnvironment.BREAKOUT)

    # Create a divide and conquer network configuration for Atari A3C
    network_config = DQAtariA3CConfig(environment)

    # Create a shared policy network
    network = PolicyNetwork(network_config, load_model_path=main_model_path)

    # Create an A3C agent
    agent = AgentFactory.create(DQA3CLearner, network, environment, num_of_epochs=1, steps_per_epoch=10000,
                                checkpoint_frequency=1e5, learner_report_frequency=1,
                                auxiliary_model_path=auxiliary_model_path, alpha=alpha, epsilon=epsilon)

    # Test it
    return agent.evaluate()
Exemplo n.º 14
0
def evaluate_ale_environment():
    # Create an ALE for Breakout and enable rendering
    environment = ALEEnvironment(ALEEnvironment.BREAKOUT, is_render=True)

    # Create a network configuration for Atari A3C
    network_config = AtariA3CConfig(environment)

    # Create a shared network for A3C agent
    network = PolicyNetwork(network_config,
                            load_model_path='./train/breakout/a3c_checkpoints_10-23-2019-02-13/model-39030506')

    # Create an A3C agent, use only one learner as we want to show a GUI
    agent = AgentFactory.create(A3CLearner, network, environment, num_of_epochs=1, steps_per_epoch=10000,
                                num_of_learners=1, log_dir='./test/breakout/a3c_checkpoints')

    # Evaluate it
    agent.evaluate()
Exemplo n.º 15
0
def train_ale_environment():
    # Create an ALE for game Breakout
    environment = ALEEnvironment(ALEEnvironment.BREAKOUT)

    # Create a network configuration for Atari DQN
    network_config = AtariDQNConfig(environment)

    # Put the configuration into a policy network
    network = PolicyNetwork(network_config)

    # Create a DQN agent
    agent = DQNAgent(network,
                     environment,
                     log_dir="./train/breakout/dqn_breakout")

    # Train it
    agent.train()
Exemplo n.º 16
0
def train_breakout_with_a3c_remove_immutable_objects():
    # Create an ALE for game Breakout, blacken top half of the state
    environment = ALEEnvironment(ALEEnvironment.BREAKOUT,
                                 loss_of_life_negative_reward=True,
                                 state_processor=AtariBlackenProcessor())

    # Create a network configuration for Atari A3C
    network_config = AtariA3CConfig(environment, initial_learning_rate=0.004, debug_mode=True)

    # Create a shared network for A3C agent
    network = PolicyNetwork(network_config, max_num_of_checkpoints=50)

    # Create an A3C agent
    agent = AgentFactory.create(A3CLearner, network, environment, num_of_epochs=50, steps_per_epoch=1e6,
                                checkpoint_frequency=1e6, log_dir='./train/breakout/a3c_smc_1_checkpoints')

    # Train it
    agent.train()
Exemplo n.º 17
0
def train_atari_sea_quest_with_map():
    def update_reward(rewards):
        oxy_low = rewards[3]
        if oxy_low == 1:
            reward = rewards[2]
        else:
            reward = rewards[0] + rewards[1]
        return reward

    env = ALEEnvironment(ALEEnvironment.SEAQUEST, state_processor=SeaquestMapProcessor(), frame_skip=8)

    network_config = A3CMapConfig(env, initial_learning_rate=0.004)

    network = PolicyNetwork(network_config, max_num_of_checkpoints=40)

    agent = AgentFactory.create(A3CMapLearner, network, env, num_of_epochs=40, steps_per_epoch=1e6,
                                checkpoint_frequency=1e6, log_dir='./train/sea_quest/a3c_map_checkpoints',
                                network_update_steps=12, update_reward_fnc=update_reward)

    agent.train()
Exemplo n.º 18
0
def train_tank_1_player_machine_with_map():
    def update_reward(rewards):
        return rewards[2]

    game_engine = TankBattle(render=False, player1_human_control=False, player2_human_control=False,
                             two_players=False, speed=1000, frame_skip=5, debug=False,
                             using_map=True, num_of_enemies=5, multi_target=True, strategy=3
                             )

    env = FruitEnvironment(game_engine, max_episode_steps=10000, state_processor=AtariProcessor())

    network_config = A3CMapConfig(env, initial_learning_rate=0.004)

    network = PolicyNetwork(network_config, max_num_of_checkpoints=20)

    agent = AgentFactory.create(A3CMapLearner, network, env, num_of_epochs=10, steps_per_epoch=1e6,
                                checkpoint_frequency=5e5, log_dir='./train/tank_battle/a3c_map_checkpoints',
                                network_update_steps=4, update_reward_fnc=update_reward)

    agent.train()
Exemplo n.º 19
0
def train_ale_environment():
    # Create an ALE for Breakout
    environment = ALEEnvironment(ALEEnvironment.BREAKOUT)

    # Create a network configuration for Atari DQN
    network_config = AtariDQNConfig(environment, debug_mode=True)

    # Put the configuration into a policy network
    network = PolicyNetwork(network_config, max_num_of_checkpoints=40)

    # Create a DQN agent
    agent = AgentFactory.create(DQNLearner,
                                network,
                                environment,
                                num_of_epochs=20,
                                steps_per_epoch=1e6,
                                checkpoint_frequency=5e5,
                                log_dir='./train/breakout/dqn_checkpoints')

    # Train it
    agent.train()
Exemplo n.º 20
0
def train_multi_objective_dqn_agent(is_linear=True, extended_config=True):
    if extended_config:
        # Create a Deep Sea Treasure game
        game = DeepSeaTreasure(graphical_state=True, width=5, seed=100, render=False, max_treasure=100, speed=1000)

        # Put game into fruit wrapper
        environment = FruitEnvironment(game, max_episode_steps=60, state_processor=AtariProcessor())
    else:
        # Create a Deep Sea Treasure game
        game = DeepSeaTreasure(graphical_state=False, width=5, seed=100, render=False, max_treasure=100, speed=1000)

        # Put game into fruit wrapper
        environment = FruitEnvironment(game, max_episode_steps=60)

    # Get treasures
    treasures = game.get_treasure()
    if is_linear:
        tlo_thresholds = None
        linear_thresholds = [1, 0]
    else:
        tlo_thresholds = [(treasures[4] + treasures[3]) / 2]
        linear_thresholds = [10, 1]

    if extended_config:
        config = MOExDQNConfig(environment, is_linear=is_linear, linear_thresholds=linear_thresholds,
                               tlo_thresholds=tlo_thresholds, using_cnn=True, history_length=4)
    else:
        config = MODQNConfig(environment, is_linear=is_linear, linear_thresholds=linear_thresholds,
                             tlo_thresholds=tlo_thresholds)

    # Create a shared policy network
    network = PolicyNetwork(config, max_num_of_checkpoints=10)

    # Create a multi-objective DQN agent
    agent = AgentFactory.create(MODQNLearner, network, environment, num_of_epochs=2, steps_per_epoch=100000,
                                checkpoint_frequency=50000, log_dir='./train/deep_sea_treasure/mo_dqn_checkpoints')

    # Train it
    agent.train()
Exemplo n.º 21
0
def train_breakout_with_a3c_normal():
    # Create an ALE for game Breakout. negative reward for each lost life
    environment = ALEEnvironment(ALEEnvironment.BREAKOUT)

    # Create a network configuration for Atari A3C
    network_config = AtariA3CConfig(environment, initial_learning_rate=0.004)

    # Create a shared network for A3C agent
    network = PolicyNetwork(network_config, num_of_checkpoints=20)

    # Create A3C agent
    agent = A3CAgent(
        network,
        environment,
        num_of_epochs=70,
        steps_per_epoch=1e6,
        save_frequency=5e6,
        log_dir=
        './train/smc/Breakout/a3c_gpu_8_threads_breakout_time_based_normal',
        num_of_threads=8)

    # Train it
    agent.train()
Exemplo n.º 22
0
def train_ale_environment():

    # Create an ALE for game Breakout
    environment = ALEEnvironment(ALEEnvironment.PONG)
    ###################################################################################
    # In case using Gym
    # environment = GymEnvironment("Breakout-v0", state_processor=AtariProcessor())
    ###################################################################################

    # Create a network configuration for Atari DQN using Duel network
    network_config = AtariDuelDQNConfig(environment)

    # Create a policy network for DQN agent (create maximum of 40 checkpoints)
    network = PolicyNetwork(network_config, num_of_checkpoints=40)

    # Create DQN agent (Save checkpoint every 30 minutes, stop training at checkpoint 40th)
    agent = DQNAgent(network,
                     environment,
                     save_time_based=30,
                     checkpoint_stop=40,
                     log_dir="./train/Pong/dueldqn_pong_time_based_30_40")

    # Train it
    agent.train()
Exemplo n.º 23
0
def train_ale_environment_human_interaction(human_interaction=True):

    if human_interaction:
        # Create an ALE for game Breakout
        # 1: fast shoot
        # 2: slow shoot
        # 3: right
        # 4: left
        environment = ALEEnvironment(
            ALEEnvironment.RIVERRAID,
            is_render=True,
            disable_actions=[10, 11, 12, 13, 14, 15, 16, 17])

        # Create a network configuration for Atari A3C
        network_config = AtariA3CConfig(environment,
                                        initial_learning_rate=0.001,
                                        debug_mode=True)

        # Create a shared network for A3C agent
        network = PolicyNetwork(network_config,
                                num_of_checkpoints=40,
                                using_gpu=True)

        # Create A3C agent
        agent = A3CAgent(
            network,
            environment,
            save_time_based=30,
            checkpoint_stop=40,
            log_dir=
            './train/Human/Riverraid/a3c_gpu_8_threads_river_disable_fire_time_based_30_40',
            num_of_threads=1)

        # Train it
        agent.train()
    else:
        # Create an ALE for game Breakout
        environment = ALEEnvironment(ALEEnvironment.RIVERRAID, is_render=False)

        # Create a network configuration for Atari A3C
        network_config = AtariA3CConfig(environment,
                                        initial_learning_rate=0.001,
                                        debug_mode=True)

        # Create a shared network for A3C agent
        network = PolicyNetwork(network_config,
                                num_of_checkpoints=40,
                                using_gpu=True)

        # Create A3C agent
        agent = A3CAgent(
            network,
            environment,
            save_time_based=30,
            checkpoint_stop=40,
            log_dir=
            './train/Human/Riverraid/a3c_gpu_8_threads_breakout_time_based_30_40',
            num_of_threads=8)

        # Train it
        agent.train()