Exemplo n.º 1
0
def train_ale_environment_lstm():

    # Create an ALE for game Breakout
    environment = ALEEnvironment(ALEEnvironment.PONG,
                                 loss_of_life_termination=True)
    ###################################################################################
    # In case using Gym
    # environment = GymEnvironment("Breakout-v0", state_processor=AtariProcessor())
    ###################################################################################

    # Create a network configuration for Atari A3C
    network_config = AtariA3CLSTMConfig(environment,
                                        initial_learning_rate=0.001)

    # Create a shared network for A3C agent
    network = PolicyNetwork(network_config,
                            num_of_checkpoints=40,
                            using_gpu=True)

    # Create A3C agent
    agent = A3CLSTMAgent(
        network,
        environment,
        save_time_based=30,
        checkpoint_stop=40,
        log_dir='./train/Pong/a3c_lstm_gpu_8_threads_pong_time_based_30_20',
        num_of_threads=8)

    # Train it
    agent.train()
Exemplo n.º 2
0
def train_ale_environment():
    # Create an ALE for Breakout
    environment = ALEEnvironment(ALEEnvironment.BREAKOUT)
    ###################################################################################
    # In the case of using Gym
    # environment = GymEnvironment("Breakout-v0", state_processor=AtariProcessor())
    ###################################################################################

    # Create a network configuration for Atari A3C
    network_config = AtariA3CConfig(environment, initial_learning_rate=0.004)

    # Create a shared network for A3C agent
    network = PolicyNetwork(network_config,
                            num_of_checkpoints=40,
                            using_gpu=True)

    # Create A3C agent
    agent = A3CAgent(
        network,
        environment,
        save_time_based=30,
        checkpoint_stop=20,
        log_dir='./train/Breakout/a3c_gpu_8_threads_breakout_time_based_30_20',
        num_of_threads=8)

    # Train it
    agent.train()
Exemplo n.º 3
0
def eval_ale_environment(model_path, render, num_of_epochs, steps_per_epoch,
                         initial_epsilon, log_dir):

    # Create an ALE for game Breakout
    environment = ALEEnvironment(ALEEnvironment.RIVERRAID,
                                 is_render=render,
                                 max_episode_steps=5000)

    # Create a network configuration for Atari DQN
    network_config = PrioritizedAtariDQNConfig(environment)

    # Create a policy network for DQN agent
    network = PolicyNetwork(network_config, load_model_path=model_path)

    # Create DQN agent
    agent = DQNAgent(network,
                     environment,
                     initial_epsilon=initial_epsilon,
                     report_frequency=1,
                     num_of_threads=8,
                     num_of_epochs=num_of_epochs,
                     steps_per_epoch=steps_per_epoch,
                     log_dir=log_dir,
                     prioritized=True)

    # Evaluate it
    return agent.evaluate()
Exemplo n.º 4
0
def composite_agents():

    # Create an ALE for game Breakout
    environment = ALEEnvironment(ALEEnvironment.BREAKOUT,
                                 is_render=True,
                                 max_episode_steps=10000)

    network_config = AtariDQA3CConfig(environment)

    network = DQPolicyNetwork(
        network_config,
        load_model_path=
        "./train/Breakout/a3c_breakout_time_based_30_32_02-02-2018-15-53/model-49861769",
        load_model_path_2=
        "./train/Lifetime/Breakout/a3c_gpu_8_threads_breakout_time_based_30_40_03-09-2018-16-24/model-49002411",
        alpha=0.4,
        epsilon=0.02)

    agent = A3CAgent(network,
                     environment,
                     num_of_threads=1,
                     report_frequency=1,
                     num_of_epochs=1,
                     steps_per_epoch=10000,
                     log_dir="./thi_test/Lifetime/Breakout/a3c")

    agent.evaluate()
Exemplo n.º 5
0
def train_ale_environment():

    # Create an ALE for game Breakout
    environment = ALEEnvironment(ALEEnvironment.BREAKOUT)
    ###################################################################################
    # In case using Gym
    # environment = GymEnvironment("Breakout-v0", state_processor=AtariProcessor())
    ###################################################################################

    # Create a network configuration for Atari DQN
    network_config = PrioritizedAtariDQNConfig(environment,
                                               initial_beta=0.4,
                                               initial_learning_rate=0.00025,
                                               debug_mode=True)

    # Create a policy network for DQN agent
    network = PolicyNetwork(network_config, max_num_of_checkpoints=100)

    # Create DQN agent
    agent = DQNAgent(network,
                     environment,
                     save_frequency=5e5,
                     steps_per_epoch=1e6,
                     num_of_epochs=50,
                     exp_replay_size=2**19,
                     importance_sampling=True,
                     log_dir="./train/breakout/pdqn_check_point",
                     prioritized_alpha=0.6,
                     prioritized=True)

    # Train it
    agent.train()
Exemplo n.º 6
0
def train_ale_environment():

    # Create an ALE for game Breakout
    environment = ALEEnvironment(ALEEnvironment.BREAKOUT,
                                 loss_of_life_negative_reward=True,
                                 state_processor=AtariBlackenProcessor())

    # Create a network configuration for Atari A3C
    network_config = AtariA3CConfig(environment, initial_learning_rate=0.004)

    # Create a shared network for A3C agent
    network = PolicyNetwork(network_config,
                            num_of_checkpoints=40,
                            using_gpu=True)

    # Create A3C agent
    agent = A3CAgent(
        network,
        environment,
        save_time_based=30,
        checkpoint_stop=40,
        log_dir=
        './train/smc/Breakout/a3c_gpu_8_threads_breakout_time_based_30_40',
        num_of_threads=8)

    # Train it
    agent.train()
Exemplo n.º 7
0
def train_atari_sea_quest():
    env = ALEEnvironment(ALEEnvironment.SEAQUEST, frame_skip=8)

    network_config = AtariA3CConfig(env, initial_learning_rate=0.004)

    network = PolicyNetwork(network_config, max_num_of_checkpoints=40)

    agent = AgentFactory.create(A3CLearner, network, env, num_of_epochs=40, steps_per_epoch=1e6,
                                checkpoint_frequency=1e6, log_dir='./train/sea_quest/a3c_checkpoints')

    agent.train()
Exemplo n.º 8
0
def eval_ale_environment(game,
                         model_path,
                         render,
                         num_of_epochs,
                         steps_per_epoch,
                         stochastic,
                         initial_epsilon,
                         log_dir,
                         human_interaction=False):
    if render or human_interaction:
        num_of_threads = 1
    else:
        num_of_threads = 8

    # Create an ALE for game Breakout
    environment = ALEEnvironment(game,
                                 is_render=render,
                                 max_episode_steps=200000)

    # Create a network configuration for Atari A3C
    if not stochastic:
        network_config = AtariA3CConfig(environment, stochastic=False)

        # Create a shared network for A3C agent
        network = PolicyNetwork(network_config, load_model_path=model_path)

        # Create A3C agent
        agent = A3CAgent(network,
                         environment,
                         num_of_threads=num_of_threads,
                         using_e_greedy=True,
                         initial_epsilon=initial_epsilon,
                         report_frequency=1,
                         num_of_epochs=num_of_epochs,
                         steps_per_epoch=steps_per_epoch,
                         log_dir=log_dir)
    else:
        network_config = AtariA3CConfig(environment)

        # Create a shared network for A3C agent
        network = PolicyNetwork(network_config, load_model_path=model_path)

        # Create A3C agent
        agent = A3CAgent(network,
                         environment,
                         num_of_threads=num_of_threads,
                         report_frequency=1,
                         num_of_epochs=num_of_epochs,
                         steps_per_epoch=steps_per_epoch,
                         log_dir=log_dir)

    # Evaluate it
    return agent.evaluate()
Exemplo n.º 9
0
def compatible_1():
    print('+++++++++++++++++++++++++++++++++++++++++++++++++')
    fruit_env = ALEEnvironment(rom_name=ALEEnvironment.BREAKOUT,
                               state_processor=None)
    state = fruit_env.get_state_space()
    print(state.get_range())
    print(tuple(state.get_shape()))
    print(fruit_env.get_action_space().get_range())
    print(fruit_env.reset())
    print(fruit_env.get_state())
    print('+++++++++++++++++++++++++++++++++++++++++++++++++')

    print('+++++++++++++++++++++++++++++++++++++++++++++++++')
    env = ArcadeLearningEnvironment('../envs/roms/breakout.bin')
    state = env.states()
    print(state)
    print(env.actions())
    print(env.reset())
    print(env.get_states())
    print(env.execute(0))
    print('+++++++++++++++++++++++++++++++++++++++++++++++++')

    print('+++++++++++++++++++++++++++++++++++++++++++++++++')
    env = TFEnvironment(fruit_environment=fruit_env)
    print(env.states())
    print(env.actions())
    print(env.get_states())
    print(env.execute(0))
    print('+++++++++++++++++++++++++++++++++++++++++++++++++')
Exemplo n.º 10
0
def train_ale_environment():
    # Create an ALE for Breakout
    environment = ALEEnvironment(ALEEnvironment.BREAKOUT)

    # Create a network configuration for Atari A3C
    network_config = AtariA3CConfig(environment, initial_learning_rate=0.004, debug_mode=True)

    # Create a shared network for A3C agent
    network = PolicyNetwork(network_config, max_num_of_checkpoints=40)

    # Create an A3C agent
    agent = AgentFactory.create(A3CLearner, network, environment, num_of_epochs=40, steps_per_epoch=1e6,
                                checkpoint_frequency=1e6, log_dir='./train/breakout/a3c_checkpoints')

    # Train it
    agent.train()
Exemplo n.º 11
0
def evaluate_ale_environment():
    # Create an ALE for Breakout and enable rendering
    environment = ALEEnvironment(ALEEnvironment.BREAKOUT, is_render=True)

    # Create a network configuration for Atari A3C
    network_config = AtariA3CConfig(environment)

    # Create a shared network for A3C agent
    network = PolicyNetwork(network_config,
                            load_model_path='./train/breakout/a3c_checkpoints_10-23-2019-02-13/model-39030506')

    # Create an A3C agent, use only one learner as we want to show a GUI
    agent = AgentFactory.create(A3CLearner, network, environment, num_of_epochs=1, steps_per_epoch=10000,
                                num_of_learners=1, log_dir='./test/breakout/a3c_checkpoints')

    # Evaluate it
    agent.evaluate()
Exemplo n.º 12
0
def train_ale_environment():
    # Create an ALE for game Breakout
    environment = ALEEnvironment(ALEEnvironment.BREAKOUT)

    # Create a network configuration for Atari DQN
    network_config = AtariDQNConfig(environment)

    # Put the configuration into a policy network
    network = PolicyNetwork(network_config)

    # Create a DQN agent
    agent = DQNAgent(network,
                     environment,
                     log_dir="./train/breakout/dqn_breakout")

    # Train it
    agent.train()
Exemplo n.º 13
0
def composite_agents(main_model_path, auxiliary_model_path, alpha, epsilon):
    # Create a normal Breakout environment without negative reward
    environment = ALEEnvironment(ALEEnvironment.BREAKOUT)

    # Create a divide and conquer network configuration for Atari A3C
    network_config = DQAtariA3CConfig(environment)

    # Create a shared policy network
    network = PolicyNetwork(network_config, load_model_path=main_model_path)

    # Create an A3C agent
    agent = AgentFactory.create(DQA3CLearner, network, environment, num_of_epochs=1, steps_per_epoch=10000,
                                checkpoint_frequency=1e5, learner_report_frequency=1,
                                auxiliary_model_path=auxiliary_model_path, alpha=alpha, epsilon=epsilon)

    # Test it
    return agent.evaluate()
Exemplo n.º 14
0
def train_breakout_with_a3c_remove_immutable_objects():
    # Create an ALE for game Breakout, blacken top half of the state
    environment = ALEEnvironment(ALEEnvironment.BREAKOUT,
                                 loss_of_life_negative_reward=True,
                                 state_processor=AtariBlackenProcessor())

    # Create a network configuration for Atari A3C
    network_config = AtariA3CConfig(environment, initial_learning_rate=0.004, debug_mode=True)

    # Create a shared network for A3C agent
    network = PolicyNetwork(network_config, max_num_of_checkpoints=50)

    # Create an A3C agent
    agent = AgentFactory.create(A3CLearner, network, environment, num_of_epochs=50, steps_per_epoch=1e6,
                                checkpoint_frequency=1e6, log_dir='./train/breakout/a3c_smc_1_checkpoints')

    # Train it
    agent.train()
Exemplo n.º 15
0
def train_atari_sea_quest_with_map():
    def update_reward(rewards):
        oxy_low = rewards[3]
        if oxy_low == 1:
            reward = rewards[2]
        else:
            reward = rewards[0] + rewards[1]
        return reward

    env = ALEEnvironment(ALEEnvironment.SEAQUEST, state_processor=SeaquestMapProcessor(), frame_skip=8)

    network_config = A3CMapConfig(env, initial_learning_rate=0.004)

    network = PolicyNetwork(network_config, max_num_of_checkpoints=40)

    agent = AgentFactory.create(A3CMapLearner, network, env, num_of_epochs=40, steps_per_epoch=1e6,
                                checkpoint_frequency=1e6, log_dir='./train/sea_quest/a3c_map_checkpoints',
                                network_update_steps=12, update_reward_fnc=update_reward)

    agent.train()
Exemplo n.º 16
0
def train_ale_environment():
    # Create an ALE for Breakout
    environment = ALEEnvironment(ALEEnvironment.BREAKOUT)

    # Create a network configuration for Atari DQN
    network_config = AtariDQNConfig(environment, debug_mode=True)

    # Put the configuration into a policy network
    network = PolicyNetwork(network_config, max_num_of_checkpoints=40)

    # Create a DQN agent
    agent = AgentFactory.create(DQNLearner,
                                network,
                                environment,
                                num_of_epochs=20,
                                steps_per_epoch=1e6,
                                checkpoint_frequency=5e5,
                                log_dir='./train/breakout/dqn_checkpoints')

    # Train it
    agent.train()
Exemplo n.º 17
0
def composite_agents(path_1, path_2, alpha, epsilon):

    environment = ALEEnvironment(ALEEnvironment.BREAKOUT)

    network_config = AtariDQA3CConfig(environment)

    network = DQPolicyNetwork(network_config,
                              load_model_path=path_1,
                              load_model_path_2=path_2,
                              alpha=alpha,
                              epsilon=epsilon)

    agent = A3CAgent(network,
                     environment,
                     num_of_threads=8,
                     report_frequency=1,
                     num_of_epochs=12,
                     steps_per_epoch=10000,
                     log_dir="./thi_test/Lifetime/Breakout/a3c")

    return agent.evaluate()
Exemplo n.º 18
0
def train_breakout_with_a3c_normal():
    # Create an ALE for game Breakout. negative reward for each lost life
    environment = ALEEnvironment(ALEEnvironment.BREAKOUT)

    # Create a network configuration for Atari A3C
    network_config = AtariA3CConfig(environment, initial_learning_rate=0.004)

    # Create a shared network for A3C agent
    network = PolicyNetwork(network_config, num_of_checkpoints=20)

    # Create A3C agent
    agent = A3CAgent(
        network,
        environment,
        num_of_epochs=70,
        steps_per_epoch=1e6,
        save_frequency=5e6,
        log_dir=
        './train/smc/Breakout/a3c_gpu_8_threads_breakout_time_based_normal',
        num_of_threads=8)

    # Train it
    agent.train()
Exemplo n.º 19
0
def train_ale_environment():

    # Create an ALE for game Breakout
    environment = ALEEnvironment(ALEEnvironment.PONG)
    ###################################################################################
    # In case using Gym
    # environment = GymEnvironment("Breakout-v0", state_processor=AtariProcessor())
    ###################################################################################

    # Create a network configuration for Atari DQN using Duel network
    network_config = AtariDuelDQNConfig(environment)

    # Create a policy network for DQN agent (create maximum of 40 checkpoints)
    network = PolicyNetwork(network_config, num_of_checkpoints=40)

    # Create DQN agent (Save checkpoint every 30 minutes, stop training at checkpoint 40th)
    agent = DQNAgent(network,
                     environment,
                     save_time_based=30,
                     checkpoint_stop=40,
                     log_dir="./train/Pong/dueldqn_pong_time_based_30_40")

    # Train it
    agent.train()
Exemplo n.º 20
0
def train_ale_environment_human_interaction(human_interaction=True):

    if human_interaction:
        # Create an ALE for game Breakout
        # 1: fast shoot
        # 2: slow shoot
        # 3: right
        # 4: left
        environment = ALEEnvironment(
            ALEEnvironment.RIVERRAID,
            is_render=True,
            disable_actions=[10, 11, 12, 13, 14, 15, 16, 17])

        # Create a network configuration for Atari A3C
        network_config = AtariA3CConfig(environment,
                                        initial_learning_rate=0.001,
                                        debug_mode=True)

        # Create a shared network for A3C agent
        network = PolicyNetwork(network_config,
                                num_of_checkpoints=40,
                                using_gpu=True)

        # Create A3C agent
        agent = A3CAgent(
            network,
            environment,
            save_time_based=30,
            checkpoint_stop=40,
            log_dir=
            './train/Human/Riverraid/a3c_gpu_8_threads_river_disable_fire_time_based_30_40',
            num_of_threads=1)

        # Train it
        agent.train()
    else:
        # Create an ALE for game Breakout
        environment = ALEEnvironment(ALEEnvironment.RIVERRAID, is_render=False)

        # Create a network configuration for Atari A3C
        network_config = AtariA3CConfig(environment,
                                        initial_learning_rate=0.001,
                                        debug_mode=True)

        # Create a shared network for A3C agent
        network = PolicyNetwork(network_config,
                                num_of_checkpoints=40,
                                using_gpu=True)

        # Create A3C agent
        agent = A3CAgent(
            network,
            environment,
            save_time_based=30,
            checkpoint_stop=40,
            log_dir=
            './train/Human/Riverraid/a3c_gpu_8_threads_breakout_time_based_30_40',
            num_of_threads=8)

        # Train it
        agent.train()