Python LinearAnnealedPolicy.LinearAnnealedPolicy示例，rl.policy.LinearAnnealedPolicy.LinearAnnealedPolicy Python示例

示例#1

0

显示文件

def setup(difficulty_level='default', env_name = "AirSimEnv-v42"):
    #parser = argparse.ArgumentParser()
    #parser.add_argument('--mode', choices=['train', 'test'], default='train')
    #parser.add_argument('--env-name', type=str, default='AirSimEnv-v42')
    #parser.add_argument('--weights', type=str, default=None)
    #parser.add_argument('--difficulty-level', type=str, default="default") 
    #args = parser.parse_args()
    #args, unknown = parser.parse_known_args()
    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.6
    config.gpu_options.allow_growth = True
    set_session(tf.Session(config=config))

    # Get the environment and extract the number of actions.
    #msgs.algo = "DQN"
    env = gym.make(env_name)
    env.init_again(eval("settings."+difficulty_level+"_range_dic"))
    env.airgym.unreal_reset() #must rest so the env accomodate the changes
    time.sleep(5)

    np.random.seed(123)
    env.seed(123)
    nb_actions = env.action_space.n
    
    WINDOW_LENGTH = 1
    depth_shape = env.depth.shape
    vel_shape = env.velocity.shape
    dst_shape = env.position.shape

    # Keras-rl interprets an extra dimension at axis=0
    # added on to our observations, so we need to take it into account
    img_kshape = (WINDOW_LENGTH,) + depth_shape

    # Sequential model for convolutional layers applied to image
    image_model = Sequential()
    if(settings.policy=='deep'):
        image_model.add(Conv2D(128,(3, 3), strides=(3, 3), padding='valid', activation='relu', input_shape=img_kshape,
                           data_format="channels_first"))
        image_model.add(Conv2D(64, (3, 3), strides=(2, 2), padding='valid', activation='relu'))
        image_model.add(Conv2D(32, (3, 3), strides=(1, 1), padding='valid', activation='relu'))
        image_model.add(Conv2D(32, (1, 1), strides=(1, 1), padding='valid', activation='relu'))

        image_model.add(Flatten())

        # plot_model(image_model, to_file="model_conv_depth.png", show_shapes=True)
        # Input and output of the Sequential model
        image_input = Input(img_kshape)
        encoded_image = image_model(image_input)

        # Inputs and reshaped tensors for concatenate after with the image
        velocity_input = Input((1,) + vel_shape)
        distance_input = Input((1,) + dst_shape)

        vel = Reshape(vel_shape)(velocity_input)
        dst = Reshape(dst_shape)(distance_input)

        # Concatenation of image, position, distance and geofence values.
        # 3 dense layers of 256 units
        denses = concatenate([encoded_image, vel, dst])
        denses = Dense(1024, activation='relu')(denses)
        denses = Dense(1024, activation='relu')(denses)
        denses = Dense(512, activation='relu')(denses)
        denses = Dense(128, activation='relu')(denses)
        denses = Dense(64, activation='relu')(denses)

    else:
        image_model.add(Conv2D(32, (4, 4), strides=(4, 4), padding='valid', activation='relu', input_shape=img_kshape,
                               data_format="channels_first"))
        image_model.add(Conv2D(64, (3, 3), strides=(2, 2), padding='valid', activation='relu'))
        image_model.add(Conv2D(128, (2, 2), strides=(1, 1), padding='valid', activation='relu'))
        image_model.add(Conv2D(64, (1, 1), strides=(1, 1), padding='valid', activation='relu'))

        image_model.add(Flatten())

        # plot_model(image_model, to_file="model_conv_depth.png", show_shapes=True)
        # Input and output of the Sequential model
        image_input = Input(img_kshape)
        encoded_image = image_model(image_input)

        # Inputs and reshaped tensors for concatenate after with the image
        velocity_input = Input((1,) + vel_shape)
        distance_input = Input((1,) + dst_shape)

        vel = Reshape(vel_shape)(velocity_input)
        dst = Reshape(dst_shape)(distance_input)

        # Concatenation of image, position, distance and geofence values.
        # 3 dense layers of 256 units
        denses = concatenate([encoded_image, vel, dst])
        denses = Dense(256, activation='relu')(denses)
        denses = Dense(256, activation='relu')(denses)
        denses = Dense(256, activation='relu')(denses)

    # Last dense layer with nb_actions for the output
    predictions = Dense(nb_actions, kernel_initializer='zeros', activation='linear')(denses)
    model = Model(
        inputs=[image_input, velocity_input, distance_input],
        outputs=predictions
    )
    env.set_model(model)
    print(model.summary())
    # plot_model(model,to_file="model.png", show_shapes=True)
    #train = True
    #train_checkpoint = False

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = SequentialMemory(limit=100000, window_length=WINDOW_LENGTH)  # reduce memmory
    processor = MultiInputProcessor(nb_inputs=3)

    # Select a policy. We use eps-greedy action selection, which means that a random action is selected
    # with probability eps. We anneal eps from 1.0 to 0.1 over the course of 1M steps. This is done so that
    # the agent initially explores the environment (high eps) and then gradually sticks to what it knows
    # (low eps). We also set a dedicated eps value that is used during testing. Note that we set it to 0.05c
    # so that the agent still performs some random actions. This ensures that the agent cannot get stuck.
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=0.0,
                                  nb_steps=100000)

    dqn = DQNAgent(model=model, processor=processor, nb_actions=nb_actions, memory=memory, nb_steps_warmup=settings.nb_steps_warmup,
                   enable_double_dqn=settings.double_dqn,
                   enable_dueling_network=False, dueling_type='avg',
                   target_model_update=1e-2, policy=policy, gamma=.99)


    
    dqn.compile(Adam(lr=0.00025), metrics=['mae'])

    # Load the check-point weights and start training from there
    return dqn,env

示例#2

0

显示文件

model.add(Activation('linear'))
print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH)
processor = AtariProcessor()

# Select a policy. We use eps-greedy action selection, which means that a random action is selected
# with probability eps. We anneal eps from 1.0 to 0.1 over the course of 1M steps. This is done so that
# the agent initially explores the environment (high eps) and then gradually sticks to what it knows
# (low eps). We also set a dedicated eps value that is used during testing. Note that we set it to 0.05
# so that the agent still performs some random actions. This ensures that the agent cannot get stuck.
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                              attr='eps',
                              value_max=1.,
                              value_min=.1,
                              value_test=.05,
                              nb_steps=1000000)

# The trade-off between exploration and exploitation is difficult and an on-going research topic.
# If you want, you can experiment with the parameters or use a different policy. Another popular one
# is Boltzmann-style exploration:
# policy = BoltzmannQPolicy(tau=1.)
# Feel free to give it a try!

dqn = DQNAgent(model=model,
               nb_actions=nb_actions,
               policy=policy,
               memory=memory,
               processor=processor,
               nb_steps_warmup=50000,

示例#3

0

显示文件

文件： DQN_Agent_LSTM.py 项目： mikesneider/Startcraft_pysc2_minigames

def training_game():
    env = Environment(
        map_name="HallucinIce",
        visualize=True,
        game_steps_per_episode=150,
        agent_interface_format=features.AgentInterfaceFormat(
            feature_dimensions=features.Dimensions(screen=64, minimap=32)))

    input_shape = (_SIZE, _SIZE, 1)
    nb_actions = 12  # Number of actions

    model = neural_network_model(input_shape, nb_actions)
    memory = SequentialMemory(limit=5000, window_length=_WINDOW_LENGTH)

    processor = SC2Proc()

    # Policy

    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                                  attr="eps",
                                  value_max=1,
                                  value_min=0.7,
                                  value_test=.0,
                                  nb_steps=1e6)

    # Agent

    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   enable_double_dqn=False,
                   nb_steps_warmup=500,
                   target_model_update=1e-2,
                   policy=policy,
                   batch_size=150,
                   processor=processor)

    dqn.compile(Adam(lr=.001), metrics=["mae"])

    # Save the parameters and upload them when needed

    name = "HallucinIce"
    w_file = "dqn_{}_weights.h5f".format(name)
    check_w_file = "train_w" + name + "_weights.h5f"

    if SAVE_MODEL:
        check_w_file = "train_w" + name + "_weights_{step}.h5f"

    log_file = "training_w_{}_log.json".format(name)
    callbacks = [ModelIntervalCheckpoint(check_w_file, interval=1000)]
    callbacks += [FileLogger(log_file, interval=100)]

    if LOAD_MODEL:
        dqn.load_weights(w_file)

    dqn.fit(env,
            callbacks=callbacks,
            nb_steps=1e7,
            action_repetition=2,
            log_interval=1e4,
            verbose=2)

    dqn.save_weights(w_file, overwrite=True)
    dqn.test(env, action_repetition=2, nb_episodes=30, visualize=False)

示例#4

0

显示文件

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=1000, window_length=WINDOW_LENGTH)
processor = AtariProcessor()

# Select a policy. We use eps-greedy action selection, which means that a random action is selected
# with probability eps. We anneal eps from 1.0 to 0.1 over the course of 1M steps. This is done so that
# the agent initially explores the environment (high eps) and then gradually sticks to what it knows
# (low eps). We also set a dedicated eps value that is used during testing. Note that we set it to 0.05
# so that the agent still performs some random actions. This ensures that the agent cannot get stuck.
nb_steps = 1000
if args.weights:
    nb_steps = 1
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                              attr='eps',
                              value_max=.5,
                              value_min=0,
                              value_test=0,
                              nb_steps=nb_steps)

# The trade-off between exploration and exploitation is difficult and an on-going research topic.
# If you want, you can experiment with the parameters or use a different policy. Another popular one
# is Boltzmann-style exploration:
# policy = BoltzmannQPolicy(tau=1.)
# Feel free to give it a try!

nb_steps_warmup = 100
if args.weights:
    nb_steps_warmup = 100

dqn = DQNAgent(model=model,
               nb_actions=nb_actions,

示例#5

0

显示文件

env.seed(123)

# Next, we build a very simple model.
model = NETWORK(obs_shape, nb_actions)
print(model.summary())


memory = SequentialMemory(
  limit=MEMORY_SIZE, 
  window_length=1
)

policy = LinearAnnealedPolicy(
  EpsGreedyQPolicy(),
  attr='eps', 
  value_max=EPS_MAX,
  value_min=EPS_MIN,
  value_test=EPS_TEST,
  nb_steps=EPS_DECAY_STEPS
)

dqn = DQNAgent(
  model=model, 
  gamma=GAMMA,
  nb_actions=nb_actions, 
  memory=memory, 
  nb_steps_warmup=1000,
  target_model_update=TARGET_MODEL_UPDATE,
  policy=policy,
  test_policy=policy,
  enable_double_dqn=DOUBLE_DQN
)