Exemplo n.º 1
0
def declare_trainer(): 
    ## Initializing our StarCraft 2 environment
    env = SC2Env(map_name="Simple64", render=False, step_mul=16, player_race="terran", enemy_race="terran", difficulty="very_easy")
    
    #action_wrapper = MOspatialTerranWrapper(10, 10, env.env_instance._interface_formats[0]._raw_resolution)
    action_wrapper = SimpleMOTerranWrapper(10, 10, env.env_instance._interface_formats[0]._raw_resolution)
    #state_builder = Simple64GridState_SimpleTerran(grid_size=7) # This state_builder with grid_size=7 will end upt with a total size of 110 ( (7*7)*2 + 12 )
    state_builder = TVTUnitStackingEnemyGridState(grid_size=4) # total size of 92

    helper = ModelBuilder()
    helper.add_input_layer()
    helper.add_fullyconn_layer(nodes=82)
    helper.add_output_layer()

    
    dq_network = DDQNKerasMO(action_wrapper=action_wrapper, state_builder=state_builder, build_model=helper.get_model_layout(), per_episode_epsilon_decay=False,
                        learning_rate=0.001, epsilon_decay=0.99999, epsilon_min=0.005, memory_maxlen=100000, min_memory_size=2000, batch_size=32)
    
    # Terran agent
    agent = SC2Agent(dq_network, KilledUnitsReward())

    # trainer = Trainer(env, agent, save_path='/home/lpdcalves/', file_name="terran_ddqn_mo_v_easy",
    #                 save_every=200, enable_save=True, relative_path=False, reset_epsilon=False,
    #                 max_training_episodes=3000, max_steps_training=1200,
    #                 max_test_episodes=100, max_steps_testing=1200, rolling_avg_window_size=50)

    trainer = Trainer(env, agent, save_path='urnai/models/saved', file_name="terran_ddqn_mostackgridstate_atkgrouping4",
                    save_every=20, enable_save=True, relative_path=True, reset_epsilon=False,
                    max_training_episodes=10, max_steps_training=1200,
                    max_test_episodes=2, max_steps_testing=1200, rolling_avg_window_size=5)
    return trainer
Exemplo n.º 2
0
def declare_trainer():
    env = SC2Env(map_name="Simple64",
                 render=False,
                 step_mul=16,
                 player_race="terran",
                 enemy_race="terran",
                 difficulty="very_easy")

    action_wrapper = SimpleTerranWrapper(atk_grid_x=4, atk_grid_y=4)
    state_builder = MultipleUnitGridState(4)

    helper = ModelBuilder()
    helper.add_input_layer()
    helper.add_fullyconn_layer(nodes=50)
    helper.add_output_layer()

    dq_network = DoubleDeepQLearning(action_wrapper=action_wrapper,
                                     state_builder=state_builder,
                                     build_model=helper.get_model_layout(),
                                     use_memory=True,
                                     gamma=0.99,
                                     learning_rate=0.001,
                                     memory_maxlen=100000,
                                     min_memory_size=2000,
                                     lib="keras",
                                     epsilon_decay=0.9994,
                                     epsilon_start=0.6,
                                     epsilon_min=0.005,
                                     epsilon_linear_decay=True,
                                     per_episode_epsilon_decay=True)

    agent = SC2Agent(dq_network, KilledUnitsReward())

    # trainer = Trainer(env, agent, save_path='/home/lpdcalves/', file_name="tvt_veasy_newactwrapper_t1",
    #                 save_every=200, enable_save=True, relative_path=False, reset_epsilon=False,
    #                 max_training_episodes=3000, max_steps_training=1500,
    #                 max_test_episodes=100, max_steps_testing=1500, rolling_avg_window_size=50)

    trainer = Trainer(env,
                      agent,
                      save_path='urnai/models/saved',
                      file_name="terran_ddql_newgridstate7",
                      save_every=6,
                      enable_save=True,
                      relative_path=True,
                      reset_epsilon=False,
                      max_training_episodes=2,
                      max_steps_training=1000,
                      max_test_episodes=2,
                      max_steps_testing=100,
                      rolling_avg_window_size=5)
    return trainer
def declare_trainer():
    env = GymEnv(id="Breakout-ram-v0", render=True)

    action_wrapper = env.get_action_wrapper()
    #state_builder = PureState(env.env_instance.observation_space)
    state_builder = GymState(env.env_instance.observation_space.shape[0])

    helper = ModelBuilder()
    # helper.add_input_layer(nodes=32)
    # helper.add_fullyconn_layer(8)
    helper.add_input_layer(nodes=66)
    helper.add_output_layer()

    # dq_network = DQNKeras(action_wrapper=action_wrapper, state_builder=state_builder,
    #                         gamma=0.99, learning_rate=0.001, epsilon_decay=0.9995, epsilon_min=0.01,
    #                         build_model=helper.get_model_layout(), memory_maxlen=5000)

    # dq_network = DDQNKeras(action_wrapper=action_wrapper, state_builder=state_builder, build_model=helper.get_model_layout(), use_memory=False,
    #                     gamma=0.99, learning_rate=0.001, epsilon_decay=0.999997, epsilon_min=0.01, memory_maxlen=100000, min_memory_size=2000)

    dq_network = DeepQLearning(action_wrapper=action_wrapper,
                               state_builder=state_builder,
                               build_model=helper.get_model_layout(),
                               gamma=0.99,
                               learning_rate=0.01,
                               epsilon_decay=0.998,
                               epsilon_min=0.01,
                               memory_maxlen=100000,
                               min_memory_size=4000,
                               per_episode_epsilon_decay=True,
                               lib="pytorch")

    #dq_network = PGKeras(action_wrapper, state_builder, learning_rate=0.001, gamma=0.99, build_model=helper.get_model_layout())

    agent = GenericAgent(dq_network, PureReward())

    trainer = Trainer(env,
                      agent,
                      save_path='urnai/models/saved',
                      file_name="breakout-ram-v0_dql_66_pytorch",
                      save_every=300,
                      enable_save=True,
                      relative_path=True,
                      max_training_episodes=1200,
                      max_steps_training=1800,
                      max_test_episodes=100,
                      max_steps_testing=1800)
    return trainer
def declare_trainer():
    env = GymEnv(id="CartPole-v1", render=False)

    action_wrapper = env.get_action_wrapper()
    state_builder = GymState(env.env_instance.observation_space.shape[0])

    helper = ModelBuilder()
    helper.add_input_layer(nodes=50)
    #helper.add_fullyconn_layer(50)
    helper.add_output_layer()

    dq_network = DeepQLearning(action_wrapper=action_wrapper,
                               state_builder=state_builder,
                               build_model=helper.get_model_layout(),
                               gamma=0.99,
                               use_memory=True,
                               learning_rate=0.001,
                               epsilon_decay=0.9997,
                               epsilon_min=0.01,
                               memory_maxlen=50000,
                               min_memory_size=100,
                               batch_size=32,
                               lib="pytorch")

    # dq_network = DQNPytorch(action_wrapper=action_wrapper, state_builder=state_builder, build_model=helper.get_model_layout(),
    #                     gamma=0.99, learning_rate=0.001, epsilon_decay=0.9997, epsilon_min=0.01, memory_maxlen=50000, min_memory_size=100)

    # dq_network = DDQNKeras(action_wrapper=action_wrapper, state_builder=state_builder, build_model=helper.get_model_layout(), use_memory=False,
    #                 gamma=0.99, learning_rate=0.001, epsilon_decay=0.999997, epsilon_min=0.01, memory_maxlen=100000, min_memory_size=2000)

    agent = GenericAgent(dq_network, PureReward())

    # Cartpole-v1 is solved when avg. reward over 100 episodes is greater than or equal to 475
    trainer = Trainer(env,
                      agent,
                      save_path='urnai/models/saved',
                      file_name="cartpole_v1_dql_pytorch1",
                      save_every=100,
                      enable_save=True,
                      relative_path=True,
                      max_training_episodes=1000,
                      max_steps_training=1000,
                      max_test_episodes=100,
                      max_steps_testing=1000)
    return trainer
Exemplo n.º 5
0
def declare_trainer():
    env = GymEnv(id="Breakout-v0", render=True)
    img = env.reset()

    action_wrapper = env.get_action_wrapper()
    state_builder = GymState(env.env_instance.observation_space.shape)

    helper = ModelBuilder()
    helper.add_convolutional_layer(
        filters=8,
        kernel_size=4,
        input_shape=env.env_instance.observation_space.shape)
    helper.add_maxpooling_layer()
    helper.add_flatten_layer()
    helper.add_fullyconn_layer(10)
    helper.add_output_layer()

    dq_network = DeepQLearning(action_wrapper=action_wrapper,
                               state_builder=state_builder,
                               build_model=helper.get_model_layout(),
                               gamma=0.99,
                               learning_rate=0.01,
                               epsilon_decay=0.9999,
                               epsilon_min=0.005,
                               memory_maxlen=50000,
                               min_memory_size=2000,
                               lib="keras")

    agent = GenericAgent(dq_network, PureReward())

    trainer = Trainer(env,
                      agent,
                      save_path='urnai/models/saved',
                      file_name="breakout-v0_keras",
                      save_every=100,
                      enable_save=True,
                      relative_path=True,
                      max_training_episodes=50,
                      max_steps_training=800,
                      max_test_episodes=5,
                      max_steps_testing=800)
    return trainer
Exemplo n.º 6
0
def declare_trainer():
    env = SC2Env(map_name="Simple64",
                 render=False,
                 step_mul=16,
                 player_race="terran",
                 enemy_race="random",
                 difficulty="very_easy")

    action_wrapper = SimpleTerranWrapper()
    state_builder = Simple64GridState(grid_size=4)

    helper = ModelBuilder()
    helper.add_input_layer(nodes=50)
    helper.add_fullyconn_layer(50)
    helper.add_output_layer()

    dq_network = DDQNKeras(action_wrapper=action_wrapper,
                           state_builder=state_builder,
                           build_model=helper.get_model_layout(),
                           per_episode_epsilon_decay=False,
                           gamma=0.99,
                           learning_rate=0.001,
                           epsilon_decay=0.99999,
                           epsilon_min=0.005,
                           memory_maxlen=100000,
                           min_memory_size=2000)

    agent = SC2Agent(dq_network, KilledUnitsReward())

    trainer = Trainer(env,
                      agent,
                      save_path='urnai/models/saved',
                      file_name="terran_ddqn_vs_random_v_easy",
                      save_every=50,
                      enable_save=True,
                      relative_path=True,
                      max_training_episodes=3000,
                      max_steps_training=1200,
                      max_test_episodes=100,
                      max_steps_testing=1200)
    return trainer
Exemplo n.º 7
0
def declare_trainer():
    env = GymEnv(id="CartPole-v1")

    action_wrapper = env.get_action_wrapper()
    state_builder = GymState(env.env_instance.observation_space.shape[0])

    helper = ModelBuilder()
    helper.add_input_layer(nodes=50)
    helper.add_fullyconn_layer(50)
    helper.add_output_layer()

    dq_network = DDQNKeras(action_wrapper=action_wrapper,
                           state_builder=state_builder,
                           build_model=helper.get_model_layout(),
                           use_memory=False,
                           gamma=0.99,
                           learning_rate=0.001,
                           epsilon_decay=0.9997,
                           epsilon_min=0.01,
                           memory_maxlen=50000,
                           min_memory_size=1000)

    # dq_network = PGKeras(action_wrapper, state_builder, learning_rate=0.001, gamma=0.99, build_model=helper.get_model_layout())

    agent = GenericAgent(dq_network, PureReward())

    # Cartpole-v1 is solved when avg. reward over 100 episodes is greater than or equal to 475
    #test_params = TestParams(num_matches=100, steps_per_test=100, max_steps=500, reward_threshold=500)
    trainer = Trainer(env,
                      agent,
                      save_path='urnai/models/saved',
                      file_name="cartpole_v1_ddqn_50x50_test",
                      save_every=100,
                      enable_save=True,
                      relative_path=True,
                      max_training_episodes=1000,
                      max_steps_training=500,
                      max_test_episodes=100,
                      max_steps_testing=500)
    return trainer
Exemplo n.º 8
0
def declare_trainer():
    env = GymEnv(id="CartPole-v0")

    action_wrapper = env.get_action_wrapper()
    state_builder = GymState(env.env_instance.observation_space.shape[0])

    helper = ModelBuilder()
    helper.add_input_layer()
    helper.add_fullyconn_layer(25)
    helper.add_output_layer()

    dq_network = DDQNKeras(action_wrapper=action_wrapper,
                           state_builder=state_builder,
                           build_model=helper.get_model_layout(),
                           use_memory=False,
                           gamma=0.99,
                           learning_rate=0.001,
                           epsilon_decay=0.9997,
                           epsilon_min=0.01,
                           memory_maxlen=50000,
                           min_memory_size=1000)

    agent = GenericAgent(dq_network, PureReward())

    # Cartpole-v0 is solved when avg. reward over 100 episodes is greater than or equal to 195
    trainer = Trainer(env,
                      agent,
                      save_path='urnai/models/saved',
                      file_name="cartpole-v0-unified-step-play3",
                      save_every=100,
                      enable_save=True,
                      relative_path=True,
                      max_training_episodes=1000,
                      max_steps_training=500,
                      max_test_episodes=100,
                      max_steps_testing=500)
    return trainer
Exemplo n.º 9
0
def declare_trainer():
    env = GymEnv(id="FrozenLakeNotSlippery-v0")

    action_wrapper = env.get_action_wrapper()
    state_builder = FrozenLakeState()

    training_date = str(datetime.now()).replace(" ", "_").replace(":",
                                                                  "_").replace(
                                                                      ".", "_")

    helper = ModelBuilder()
    helper.add_input_layer()
    helper.add_fullyconn_layer(256)
    helper.add_fullyconn_layer(256)
    helper.add_fullyconn_layer(256)
    helper.add_fullyconn_layer(256)
    helper.add_output_layer()
    dq_network = DDQNKeras(action_wrapper=action_wrapper,
                           state_builder=state_builder,
                           learning_rate=0.005,
                           gamma=0.90,
                           use_memory=False,
                           per_episode_epsilon_decay=True,
                           build_model=helper.get_model_layout())
    agent = GenericAgent(dq_network, FrozenlakeReward())
    trainer = Trainer(env,
                      agent,
                      file_name=training_date + os.path.sep +
                      "frozenlake_test_ddqnKeras",
                      save_every=1000,
                      enable_save=True,
                      max_training_episodes=1000,
                      max_steps_training=500,
                      max_test_episodes=10,
                      max_steps_testing=500)
    return trainer