Python DQNAgent.load_weights示例，rl.agents.dqn.DQNAgent.load_weights Python示例

示例#1

0

显示文件

文件： dqn.py 项目： petrosgk/RL_experiments

class DQN(BaseAgent):
  def __init__(self, model, processor, policy, test_policy, num_actions):
    # Replay memory
    memory = SequentialMemory(limit=opt.dqn_replay_memory_size,
                              window_length=opt.dqn_window_length)
    self.agent = DQNAgent(model=model,
                          nb_actions=num_actions,
                          policy=policy,
                          test_policy=test_policy,
                          memory=memory,
                          processor=processor,
                          batch_size=opt.dqn_batch_size,
                          nb_steps_warmup=opt.dqn_nb_steps_warmup,
                          gamma=opt.dqn_gamma,
                          target_model_update=opt.dqn_target_model_update,
                          enable_double_dqn=opt.enable_double_dqn,
                          enable_dueling_network=opt.enable_dueling_network,
                          train_interval=opt.dqn_train_interval,
                          delta_clip=opt.dqn_delta_clip)
    self.agent.compile(optimizer=keras.optimizers.Adam(lr=opt.dqn_learning_rate), metrics=['mae'])

  def fit(self, env, num_steps, weights_path=None, visualize=False):
    callbacks = []
    if weights_path is not None:
      callbacks += [ModelIntervalCheckpoint(weights_path, interval=50000, verbose=1)]
    self.agent.fit(env=env,
                   nb_steps=num_steps,
                   action_repetition=opt.dqn_action_repetition,
                   callbacks=callbacks,
                   log_interval=opt.log_interval,
                   test_interval=opt.test_interval,
                   test_nb_episodes=opt.test_nb_episodes,
                   test_action_repetition=opt.dqn_action_repetition,
                   visualize=visualize,
                   test_visualize=visualize,
                   verbose=1)

  def test(self, env, num_episodes, visualize=False):
    self.agent.test(env=env,
                    nb_episodes=num_episodes,
                    action_repetition=opt.dqn_action_repetition,
                    verbose=2,
                    visualize=visualize)

  def save(self, out_dir):
    self.agent.save_weights(out_dir, overwrite=True)

  def load(self, out_dir):
    self.agent.load_weights(out_dir)

示例#2

0

显示文件

        log_filename = model_saves + filename_append + "_" + datestr + "_" + 'expert_' + environment_name + '_REWARD_DATA.txt'
        callbacks = [
            TrainEpisodeLogger(log_filename),
            ModelIntervalCheckpoint(checkpoint_weights_filename,
                                    interval=1000000)
        ]
        if args.mode == 'train':
            dqn.fit(env,
                    callbacks=callbacks,
                    nb_steps=4250000,
                    verbose=0,
                    nb_max_episode_steps=1500)
            dqn.save_weights(weights_filename, overwrite=True)
        if args.mode == 'test':
            if args.weights_file:
                dqn.load_weights(args.weights_file)
            else:
                dqn.load_weights(model_saves + filename_append + "_" +
                                 datestr + "_" + 'expert_' + environment_name +
                                 '_weights.h5f')
            dqn.test(env,
                     nb_episodes=100,
                     visualize=False,
                     verbose=2,
                     nb_max_start_steps=30)
        if args.mode == 'demonstrate':
            dqn.load_weights(model_saves + filename_append + "_" + datestr +
                             "_" + 'expert_' + environment_name +
                             '_weights.h5f')
            demonstrate(dqn, env, 75000, model_saves + demonstrations_file)

示例#3

0

显示文件

               memory=memory,
               nb_steps_warmup=10,
               target_model_update=1e-2,
               policy=policy)
dqn.compile(Adam(lr=0.0015), metrics=['mae'])
"""
Entrenament per 150000 steps
"""
a = dqn.fit(env, nb_steps=150000, visualize=False, verbose=2)
"""
Carregar pesos, cuidado que et carregues l'entrenament
"""

weights_filename = 'dqn64_LunarLander-v2_weights.h5f'.format('LunarLander-v2')

dqn.load_weights(weights_filename)
"""
Test per 20 epochs
"""
dqn.test(env, nb_episodes=20, visualize=False)

import matplotlib.pyplot as plt

plt.plot([
    199.09, 217.98, 233.922, 225.90, 220.99, 245.82, 236.89, 262.95, 221.20,
    241.72
],
         label='Escollit')
plt.plot([
    235.44, 244.76, -94.505, 248.86, 265.25, 228.75, 202.80, 256.86, 239.59,
    -85.32

示例#4

0

显示文件

文件： DQN_agent_LSTM.py 项目： MdeMoUcH/pysc2_StarcraftII_codelab

def training_game():
    env = Environment()

    input_shape = (FLAGS.screen_size, FLAGS.screen_size, 1)
    nb_actions = 12  # Number of actions

    model = neural_network_model(input_shape, nb_actions)
    memory = SequentialMemory(limit=3500, window_length=_WINDOW_LENGTH)

    processor = SC2Proc()

    # Policy

    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                                  attr="eps",
                                  value_max=1,
                                  value_min=0.7,
                                  value_test=.0,
                                  nb_steps=GLOBAL_STEPS)

    # Agent

    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   enable_double_dqn=False,
                   nb_steps_warmup=GLOBAL_STEPS_WARMUP,
                   target_model_update=1e-2,
                   policy=policy,
                   batch_size=150,
                   processor=processor)

    dqn.compile(Adam(lr=.001), metrics=["mae"])

    # Tensorboard callback

    callbacks = keras.callbacks.TensorBoard(log_dir='./Graph',
                                            histogram_freq=0,
                                            write_graph=True,
                                            write_images=False)

    # Save the parameters and upload them when needed

    name = FLAGS.mini_game
    w_file = "dqn_{}_weights.h5f".format(name)
    check_w_file = "train_w" + name + "_weights.h5f"

    if SAVE_MODEL:
        check_w_file = "train_w" + name + "_weights_{step}.h5f"

    log_file = "training_w_{}_log.json".format(name)

    if LOAD_MODEL:
        dqn.load_weights(w_file)

    #dqn.fit(env, callbacks=callbacks, nb_steps=GLOBAL_STEPS, action_repetition=2, log_interval=1e4, verbose=2)
    dqn.fit(env,
            nb_steps=GLOBAL_STEPS,
            action_repetition=2,
            log_interval=1000,
            verbose=2)

    dqn.save_weights(w_file, overwrite=True)
    dqn.test(env, action_repetition=2, nb_episodes=30, visualize=False)

示例#5

0

显示文件

文件： agents.py 项目： jcassiojr/lunarDQfD

                       delta_clip=1.,
                       nb_steps_warmup=50000)

        lr = .00025
        dqn.compile(Adam(lr), metrics=['mae'])
        weights_filename = model_saves + 'expert_lander_weights.h5f'
        checkpoint_weights_filename = model_saves + 'expert_lander_weights{step}.h5f'
        log_filename = model_saves + 'expert_lander_REWARD_DATA.txt'
        callbacks = [
            TrainEpisodeLogger(log_filename),
            ModelIntervalCheckpoint(checkpoint_weights_filename,
                                    interval=1000000)
        ]
        if args.mode == 'train':
            dqn.fit(env,
                    callbacks=callbacks,
                    nb_steps=4250000,
                    verbose=0,
                    nb_max_episode_steps=1500)
            dqn.save_weights(weights_filename, overwrite=True)
        if args.mode == 'test':
            dqn.load_weights(model_saves + 'expert_lander_weights.h5f')
            dqn.test(env,
                     nb_episodes=5,
                     visualize=True,
                     verbose=2,
                     nb_max_start_steps=30)
        if args.mode == 'demonstrate':
            dqn.load_weights(model_saves + 'expert_lander_weights.h5f')
            demonstrate(dqn, env, 75000, model_saves + 'demos.npy')

示例#6

0

显示文件

model = keras.layers.Flatten()(model)
model = keras.layers.Dense(512, activation='relu')(model)
model = keras.layers.Dense(4, activation='linear')(model)
model = keras.Model(inputs=input, outputs=model)
model.summary()
print(model.output)
model.output._keras_shape = (None, 4)
print(model.output._keras_shape)
game = gym.make('Breakout-v0')
agent = DQNAgent(model,
                 policy,
                 nb_actions=game.action_space.n,
                 nb_steps_warmup=50000,
                 memory=memory,
                 processor=AtariProcessor(),
                 train_interval=4,
                 delta_clip=1.)
agent.compile(keras.optimizers.Adam(lr=.00025), metrics=['mae'])
callbacks = [rl.callbacks.ModelIntervalCheckpoint('ckpt.h5f', interval=250000)]
callbacks += [FileLogger('log.json', interval=100)]
if False:
    agent.load_weights('weights.h5f')
agent.fit(game,
          nb_steps=1750000,
          visualize=False,
          log_interval=10000,
          callbacks=callbacks)
agent.save_weights('weights.h5f', overwrite=True)
game.reset()
agent.test(game, nb_episodes=10, visualize=True)

示例#7

0

显示文件

    nb_actions = env.action_space.n
    input_shape = env.observation_space.shape
    window = 4
    model = create_(nb_actions)

    memory = SequentialMemory(limit=1000000, window_length=4)

    processor = AtariProcessor()
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                                  attr='eps',
                                  value_max=1.,
                                  value_min=.1,
                                  value_test=.05,
                                  nb_steps=1000000)

    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   policy=policy,
                   memory=memory,
                   processor=processor,
                   nb_steps_warmup=50000,
                   gamma=.99,
                   target_model_update=10000,
                   train_interval=4,
                   delta_clip=1.)

    dqn.compile(Adam(lr=.00025), metrics=['mae'])

    dqn.load_weights('policy.h5')
    dqn.test(env, nb_episodes=10, visualize=False)

示例#8

0

显示文件

input_dim = env.input_dim
model = Sequential()
model.add(Flatten(input_shape=input_shape))
model.add(Dense(256, activation='relu'))
model.add(Dense(nb_actions, activation='linear'))
memory = SequentialMemory(limit=2000, window_length=1)
policy = EpsGreedyQPolicy()
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100,
                target_model_update=1e-2, policy=policy, gamma=0.99)
dqn.compile(Adam(lr=0.001, epsilon=0.05, decay=0.0), metrics=['mae'])

# history = dqn.fit(env, nb_steps=100, action_repetition=1, visualize=False, verbose=2)

# dqn.save_weights('dqn_weights_%s.h5f' % (100), overwrite=True)

dqn.load_weights('dqn_weights_%s.h5f' % (3000))




# for perc_av in percent_av:


perc_av = 1 
print('Fleet size is {f}'.format(f=fleet_size))
print('Surge is {}'.format(surge))
print('Percentage knowing fares is {}'.format(perc_k))
print('Percentage of professional drivers {}'.format(pro_s))

m = Model(ZONE_IDS, DEMAND_SOURCE, WARMUP_TIME_HOUR, ANALYSIS_TIME_HOUR, FLEET_SIZE=fleet_size, PRO_SHARE=pro_s,
        SURGE_MULTIPLIER=surge, BONUS=bonus, percent_false_demand=percent_false_demand, percentage_know_fare = perc_k,

示例#9

0

显示文件

文件： learn.py 项目： saritaKaloya/gym-micropolis

  # callbacks += [ModelCheckpoint(model_filename)]
    callbacks += [FileLogger(log_filename, interval=250000)]
    callbacks += [TensorBoard(log_dir=run_id)]
#   class TestCallback(Callback):
#       def on_epoch_end(self, epoch, logs=None):
#           test_env = gym.make(args.env_name)
#           test_env.setMapSize(MAP_X,MAP_Y)
#           dqn.test(test_env, nb_episodes=1, visualize=True, nb_max_start_steps=100)
#           test_env.win1.destroy()
#           test_env.close()
#           del(test_env)
#   callbacks += [TestCallback()]
#   if args.loadmodel:
#       dqn.model.load(args.loadmodel)
    if args.weights:
        dqn.load_weights(args.weights)

    dqn.fit(env, callbacks=callbacks, nb_steps=1750000, log_interval=10000)

    # After training is done, we save the final weights one more time.
    dqn.save_weights(weights_filename, overwrite=True)
  # dqn.save_model(model_filename)

    # Finally, evaluate our algorithm for 10 episodes.
    dqn.test(env, nb_episodes=10, visualize=True)
    dqn.test(env, nb_episodes=10, visualize=True)
 #  gtk.main()

elif args.mode == 'test':
    weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name)
    if args.weights:

示例#10

0

显示文件

                   target_model_update=1e-2,
                   policy=policy,
                   gamma=.98)

    dqn.compile(Adam(lr=0.00025), metrics=['mae'])

    # Okay, now it's time to learn something! We visualize the training here for show, but this
    # slows down training quite a lot. You can always safely abort the training prematurely using
    # Ctrl + C.

    log_filename = 'dqn_{}_log.json'.format(args.env_name)
    callbacks = [FileLogger(log_filename, interval=1)]

    dqn.fit(env,
            callbacks=callbacks,
            nb_steps=150000,
            visualize=False,
            verbose=2)

    # After training is done, we save the final weights.
    dqn.save_weights('dqn_{}_weights.h5f'.format(args.env_name),
                     overwrite=True)

    # Finally, evaluate our algorithm for 5 episodes.

    #dqn.test(env, nb_episodes=5, visualize=False)

else:
    dqn.load_weights('dqn_{}_weights.h5f'.format(args.env_name))
    dqn.test(env, nb_episodes=10, visualize=False)

示例#11

0

显示文件

文件： artificial_data_test.py 项目： dxa0010/FBCA

# model.add(Dense(16))
# model.add(Activation('relu'))
# model.add(Dense(16))
# model.add(Activation('relu'))
# model.add(Dense(nb_actions))
# model.add(Activation('linear'))

memory = SequentialMemory(limit=50000, window_length=1)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=10,
               target_model_update=1e-2,
               policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
# history = dqn.fit(env, nb_steps=50000, visualize=True, verbose=2, nb_max_episode_steps=1000)
# dqn.save_weights(os.path.join("model","dqn_multi_cls_{}_weights.h5f".format(ENV_NAME)), overwrite=True)
dqn.load_weights(
    os.path.join(
        "model",
        "dqn_multi_cls_ralenv_multi_classify_{}-v0_weights.h5f".format(
            args[1])))

# env_1.seed(123)
env_1.weight_num = args[1]
dqn.test(env_1, nb_episodes=5, visualize=True)

# env_2.seed(123)
env_2.weight_num = args[1]
dqn.test(env_2, nb_episodes=5, visualize=True)

示例#12

0

显示文件

trade_cost = 0.03
env.init_file(output_file, feature_list, trade_cost, False)

model = create_model(env)
memory = SequentialMemory(limit=5000, window_length=1)
policy = GreedyQPolicy()
dqn = DQNAgent(model=model,
               nb_actions=env.action_size,
               memory=memory,
               nb_steps_warmup=50,
               target_model_update=1e-2,
               policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mse'])

dqn.load_weights(w_file_name)

dqn.test(env,
         nb_episodes=1,
         action_repetition=1,
         callbacks=None,
         visualize=True,
         nb_max_episode_steps=None,
         nb_max_start_steps=0,
         start_step_policy=None,
         verbose=1)

fig = plt.figure()
gs = gridspec.GridSpec(2, 1, figure=fig)
ax1 = fig.add_subplot(gs[0, 0])
ax1.plot(env.df['close'], '-b', linewidth=0.5)

示例#13

0

显示文件

np.random.seed(123)
env.seed(123)
nb_actions = env.action_space.n

model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(NODES))
model.add(PReLU())
model.add(Dense(NODES * 2))
model.add(PReLU())
model.add(Dense(NODES * 4))
model.add(PReLU())
model.add(Dense(NODES * 2))
model.add(PReLU())
model.add(Dense(nb_actions))
model.add(Activation('linear'))

memory = SequentialMemory(limit=memoria, window_length=1)
policy = EpsGreedyQPolicy()
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10,
               batch_size=batch_size, target_model_update=1e-2, policy=policy,
               enable_double_dqn=True)
dqn.compile(Adam(lr=learning_rate), metrics=['mae'])

if not teste:
    dqn.fit(env, nb_steps=epocas, visualize=False, verbose=1)
    dqn.save_weights('dqn_weights.h5f', overwrite=True)
else:
    dqn.load_weights('dqn_weights_1.h5f')

dqn.test(env, nb_episodes=50, visualize=False)

示例#14

0

显示文件

文件： load_weights.py 项目： porterpan/CSN-RL

                target_model_update=1e-2,
                policy=policy,
                enable_double_dqn=False)
dqn4 = DQNAgent(model=model,
                nb_actions=nb_actions,
                memory=memory,
                nb_steps_warmup=1000,
                target_model_update=1e-2,
                enable_double_dqn=False)

dqn1.compile(SGD, metrics=['mae'])
dqn2.compile(SGD, metrics=['mae'])
dqn3.compile(SGD, metrics=['mae'])
dqn4.compile(SGD, metrics=['mae'])

dqn1.load_weights('save/dqn1_{}_weights.h5f'.format(ENV_NAME))
dqn2.load_weights('save/dqn2_{}_weights.h5f'.format(ENV_NAME))
dqn3.load_weights('save/dqn3_{}_weights.h5f'.format(ENV_NAME))
dqn4.load_weights('save/dqn4_{}_weights.h5f'.format(ENV_NAME))
print('Weights loaded!')

test1 = dqn1.test(env, nb_episodes=50, visualize=True)
test2 = dqn2.test(env, nb_episodes=50, visualize=True)
test3 = dqn3.test(env, nb_episodes=50, visualize=True)
test4 = dqn4.test(env, nb_episodes=50, visualize=True)

#pyplot.subplot(2, 1, 1)
#pyplot.plot(test1.history['episode_reward'], 'r--', test2.history['episode_reward'], 'g', test3.history['episode_reward'], 'b--', test4.history['episode_reward'], 'y')
'''
with open('save/history1_2018-06-08 14:53:59', 'r') as f:
    pp1_1 = json.load(f)

示例#15

0

显示文件

文件： agent_CNN+LSTM.py 项目： flipper83/starcraft-codelab

def training_game():
    env = Environment(
        map_name="HallucinIce",
        visualize=True,
        game_steps_per_episode=150,
        agent_interface_format=features.AgentInterfaceFormat(
            feature_dimensions=features.Dimensions(screen=64, minimap=32)))

    input_shape = (_SIZE, _SIZE, 1)
    nb_actions = 12  # Number of actions

    model = neural_network_model(input_shape, nb_actions)
    memory = SequentialMemory(limit=5000, window_length=_WINDOW_LENGTH)

    processor = SC2Proc()

    # Policy

    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                                  attr="eps",
                                  value_max=1,
                                  value_min=0.2,
                                  value_test=.0,
                                  nb_steps=1e2)

    # Agent

    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   enable_double_dqn=True,
                   enable_dueling_network=True,
                   nb_steps_warmup=500,
                   target_model_update=1e-2,
                   policy=policy,
                   batch_size=150,
                   processor=processor,
                   delta_clip=1)

    dqn.compile(Adam(lr=.001), metrics=["mae", "acc"])

    # Tensorboard callback

    callbacks = keras.callbacks.TensorBoard(log_dir='./Graph',
                                            histogram_freq=0,
                                            write_graph=True,
                                            write_images=False)

    # Save the parameters and upload them when needed

    name = "HallDebbugeed"
    w_file = "dqn_{}_weights.h5f".format(name)
    check_w_file = "train_w" + name + "_weights.h5f"

    if SAVE_MODEL:
        check_w_file = "train_w" + name + "_weights_{step}.h5f"

    log_file = "training_w_{}_log.json".format(name)

    if LOAD_MODEL:
        dqn.load_weights(w_file)

    dqn.fit(env,
            callbacks=[callbacks],
            nb_steps=1e7,
            action_repetition=2,
            log_interval=1e4,
            verbose=2)

    dqn.save_weights(w_file, overwrite=True)
    dqn.test(env, action_repetition=2, nb_episodes=30, visualize=False)

示例#16

0

显示文件

文件： openAI_variableLengthPendulum_test.py 项目： DocVaughan/CRAWLAB-Code-Snippets

print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=NUM_STEPS, window_length=1)
# train_policy = BoltzmannQPolicy(tau=0.05)
train_policy = EpsGreedyQPolicy()
test_policy = GreedyQPolicy()

if DUEL_DQN:
    dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100,
               enable_dueling_network=True, dueling_type='avg', target_model_update=1e-2, 
               policy=train_policy, test_policy=test_policy)
              
    filename = 'weights/duel_dqn_{}_weights_{}_{}_{}_{}.h5f'.format(ENV_NAME, LAYER_SIZE,  NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID)
else:
    dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100,
               target_model_update=1e-2, policy=train_policy, test_policy=test_policy)
    
    filename = 'weights/dqn_{}_weights_{}_{}_{}_{}.h5f'.format(ENV_NAME, LAYER_SIZE, NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID)


dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Load the model weights
dqn.load_weights(FILENAME)

#dqn.fit(env, nb_steps=1000, visualize=False, verbose=1, nb_max_episode_steps=500)

# Finally, evaluate our algorithm for 1 episode.
dqn.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=500)

示例#17

0

显示文件

文件： duel_dqn_qwop_1_h.py 项目： amarsdd/QWOP-RL

    re =[]
    for key in dc:
        re.append(dc[key])
    return re
tt = dict_to_list(tpl.rewards_mean)
mm = np.array(tt[:-1])
kk = dict_to_list(tpl.metrics_at_end)
jj = np.array(kk[:-1])
metrics = np.column_stack((mm, jj))

import pickle 
pickle.dump( metrics, open( 'duel_dqn_%d_%s_metrics.p' %(scale, ENV_NAME), "wb" ) )


# load model for testing
dqn.load_weights('/home/am/Desktop/set_tests/final/duel_dqn_%d_%s_weights.h5f' %(scale, ENV_NAME))

# setting up monitoring tools to record the testing episodes
from gym import monitoring
from gym.wrappers import Monitor

def episode5(episode_id):
    if episode_id < 1:
        return True
    else:
        return False
#rec = StatsRecorder(env,"sarsa_1")
#rec.capture_frame()
    
temp = '/home/am/Desktop/set_tests/final/duel_dqn_%d_%s' %(scale, ENV_NAME)
env = Monitor(env, temp, force=True,video_callable=episode5)

示例#18

0

显示文件

                     test_policy=test_policy)
    agent.compile(Adam(lr=1e-3, clipnorm=1.0), metrics=['mae'])

elif METHOD.upper() == 'SARSA':
    # SARSA does not require a memory.
    agent = SarsaAgent(model=model,
                       nb_actions=nb_actions,
                       nb_steps_warmup=10,
                       policy=train_policy)
    agent.compile(Adam(lr=1e-3, clipnorm=1.0), metrics=['mae'])

elif METHOD.upper() == 'CEM':
    memory = EpisodeParameterMemory(limit=1000, window_length=1)
    agent = CEMAgent(model=model,
                     nb_actions=nb_actions,
                     memory=memory,
                     batch_size=50,
                     nb_steps_warmup=2000,
                     train_interval=50,
                     elite_frac=0.05)
    agent.compile()

else:
    raise ('Please select  DQN, DUEL_DQN, SARSA, or CEM for your method type.')

# Load the model weights
agent.load_weights(WEIGHT_FILENAME)

# Finally, evaluate our algorithm for 1 episode.
agent.test(env, nb_episodes=5, visualize=True,
           action_repetition=5)  #, nb_max_episode_steps=500)

示例#19

0

显示文件

def main(shape=10, winsize=4, test=False, num_max_test=200):
    INPUT_SHAPE = (shape, shape)
    WINDOW_LENGTH = winsize

    class SnakeProcessor(Processor):
        def process_observation(self, observation):
            # assert observation.ndim == 1, str(observation.shape)  # (height, width, channel)
            assert observation.shape == INPUT_SHAPE
            return observation.astype(
                'uint8')  # saves storage in experience memory

        def process_state_batch(self, batch):
            # We could perform this processing step in `process_observation`. In this case, however,
            # we would need to store a `float32` array instead, which is 4x more memory intensive than
            # an `uint8` array. This matters if we store 1M observations.
            processed_batch = batch.astype('float32') / 255.
            return processed_batch

        def process_reward(self, reward):
            return reward

    env = gym.make('snakenv-v0')
    np.random.seed(123)
    env.seed(123)

    input_shape = (WINDOW_LENGTH, ) + INPUT_SHAPE
    model = make_model(input_shape, 5)

    memory = SequentialMemory(limit=100000, window_length=WINDOW_LENGTH)
    processor = SnakeProcessor()

    # policy = LinearAnnealedPolicy(
    #     EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1,
    #     value_test=0, nb_steps=500000)
    policy = BoltzmannQPolicy()

    interval = 20000

    dqn = DQNAgent(model=model,
                   nb_actions=5,
                   policy=policy,
                   memory=memory,
                   processor=processor,
                   nb_steps_warmup=20000,
                   gamma=.99,
                   target_model_update=interval,
                   train_interval=4,
                   delta_clip=1.)

    dqn.compile(Adam(lr=0.0005), metrics=['mae'])
    weights_filename = 'dqn_snake_weights.h5f'

    if not test:
        # Okay, now it's time to learn something! We capture the interrupt exception so that training
        # can be prematurely aborted. Notice that now you can use the built-in Keras callbacks!
        weights_filename = 'dqn_{}_weights.h5f'.format('snake')
        checkpoint_weights_filename = 'dqn_' + 'snake' + '_weights_{step}.h5f'
        log_filename = 'dqn_{}_log.json'.format('snake')
        callbacks = [
            ModelIntervalCheckpoint(checkpoint_weights_filename,
                                    interval=interval)
        ]
        callbacks += [
            ModelIntervalCheckpoint(weights_filename, interval=interval)
        ]
        callbacks += [FileLogger(log_filename, interval=500)]
        dqn.fit(env,
                callbacks=callbacks,
                nb_steps=10000000,
                log_interval=10000,
                visualize=False)

        # After training is done, we save the final weights one more time.
        # dqn.save_weights(weights_filename, overwrite=True)

        # Finally, evaluate our algorithm for 10 episodes.
        # dqn.test(env, nb_episodes=10, visualize=True, nb_max_episode_steps=100)
    else:
        while True:
            try:
                dqn.load_weights(weights_filename)
            except Exception:
                print("weights not found, waiting")
            dqn.test(env,
                     nb_episodes=3,
                     visualize=True,
                     nb_max_episode_steps=num_max_test)
            time.sleep(5)

示例#20

0

显示文件

        model = model_from_json(loaded_model_json)
        print(model.summary())

        # 学習後のテストをしたいだけなのに以下宣言が必要なのかは不明 一応同じようにdqnを設定していく
        memory = SequentialMemory(limit=2000000, window_length=1)
        policy = EpsGreedyQPolicy(eps=0.1)
        dqn = DQNAgent(model=model,
                       nb_actions=nb_actions,
                       memory=memory,
                       nb_steps_warmup=100,
                       target_model_update=1e-2,
                       policy=policy)
        dqn.compile(Adam(lr=0.001))

        # weighのロード
        dqn.load_weights(sys.argv[4])

        cb_ep = EpisodeLogger()

        # テストを実行
        # データベースで一通り売買してもらう
        # 時間がかかるので、consoleに状況を出すようにstepメソッド内で実装してもいいかも
        dqn.test(env, nb_episodes=1, visualize=False, callbacks=[cb_ep])

        # 結果の視覚化
        print("COUNT BUY  : " + str(list(cb_ep.actions.values())[0].count(0)))
        print("COUNT SELL : " + str(list(cb_ep.actions.values())[0].count(1)))
        print("COUNT STAY : " + str(list(cb_ep.actions.values())[0].count(2)))

        plt.subplot(211)
        plt.plot(env.get_midprice_list(), linewidth=0.1)

示例#21

0

显示文件

print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in tensorflow.keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=50000, window_length=1)

policy = BoltzmannQPolicy()

dqn = DQNAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=10,
               target_model_update=1e-2,
               policy=policy)

dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
history = dqn.fit(env, nb_steps=2000, visualize=False,
                  verbose=2)  #, callbacks=[WandbLogger()])
#
## After training is done, we save the final weights.
dqn.save_weights('weights/dqn_{}_weights.h5'.format(ENV_NAME), overwrite=True)

dqn.load_weights('weights/dqn_{}_weights.h5'.format(ENV_NAME))

# Finally, evaluate our algorithm for 5 episodes.
history_test = dqn.test(env, nb_episodes=5, visualize=False)

示例#22

0

显示文件

文件： ddqn.py 项目： zyz8821/FleetSim

class DDQN:
    def __init__(
        self,
        env,
        name,
        memory_limit=10000,
        nb_eps=10000,
        nb_warmup=100,
        dueling=True,
        double=True,
    ):
        # Set fixed seet for the environment
        self.env = env
        self.env.seed(123)
        np.random.seed(123)
        random.seed(123)

        self.name = name
        self.log_filename = "./logs/{}_log.json".format(self.name)
        self.weights_filename = "./results/{}_weights.h5f".format(self.name)
        self.result_filename = "./results/{}_result.csv".format(self.name)

        # Extract the number of actions form the environment
        nb_action = self.env.action_space.spaces[0].n
        nb_actions = nb_action ** len(self.env.action_space.spaces)
        nb_states = self.env.observation_space.shape

        # Next, we build a very simple model.
        model = self._build_nn(nb_states, nb_actions)

        # Next, we define the replay memorey
        memory = SequentialMemory(limit=memory_limit, window_length=1)

        policy = LinearAnnealedPolicy(
            EpsGreedyQPolicy(),
            attr="eps",
            nb_steps=nb_eps,
            value_max=1.0,  # Start with full random
            value_min=0.1,  # After nb_steps arrivate at 10% random
            value_test=0.0,  # (Don't) pick random action when testing
        )

        # Configure and compile our agent:
        # You can use every built-in Keras optimizer and even the metrics!
        self.dqn = DQNAgent(
            model=model,
            nb_actions=nb_actions,
            memory=memory,
            nb_steps_warmup=nb_warmup,
            enable_dueling_network=dueling,  # Enable dueling
            dueling_type="avg",
            enable_double_dqn=double,  # Enable double dqn
            target_model_update=1e-2,
            policy=policy,
        )
        self.dqn.compile(Adam(lr=1e-2), metrics=["mae"])

    def _build_nn(self, nb_states, nb_actions):
        model = Sequential()
        model.add(Flatten(input_shape=(1,) + nb_states))
        model.add(Dense(16))
        model.add(Activation("relu"))
        model.add(Dense(16))
        model.add(Activation("relu"))
        model.add(Dense(16))
        model.add(Activation("relu"))
        model.add(Dense(nb_actions))
        model.add(Activation("linear"))
        return model

    def run(self, steps):
        callbacks = [FileLogger(self.log_filename)]
        self.dqn.fit(
            self.env,
            callbacks=callbacks,
            nb_steps=steps,
            visualize=False,
            verbose=1,
            log_interval=10000,
        )
        # After training is done, we save the final weights.
        self.dqn.save_weights(self.weights_filename, overwrite=True)

    def test(self):
        self.dqn.load_weights(self.weights_filename)
        self.dqn.test(self.env, nb_episodes=1, visualize=False)
        self.env.save_results(self.result_filename)

示例#23

0

显示文件

文件： car_racing.py 项目： daman-p/tensforflow_class

        nb_actions=n_actions,
        policy=policy,
        memory=memory,
        nb_steps_warmup=args.warmup_steps,
        gamma=.99,
        target_model_update=args.target_model_update,
        train_interval=args.train_interval,
        delta_clip=1.,
        enable_dueling_network=True)

    agent.compile(Adam(lr=args.learning_rate), metrics=['mae'])

    if args.load_weights_from is not None:
        print(f"Loading Weights From: {args.load_weights_from}")
        weights_filename = f'{args.load_weights_from}/' + 'dqn_{}_weights.h5f'.format(env_name)
        agent.load_weights(weights_filename)

    if args.mode == 'train':
        import os
        current_directory = os.getcwd()
        model_weight_dir = os.path.join(current_directory, MODEL_NAME)
        if not os.path.exists(model_weight_dir):
            os.makedirs(model_weight_dir)

        weights_filename = f'{MODEL_NAME}/dqn_{env_name}_weights.h5f'
        checkpoint_weights_filename = f'{MODEL_NAME}/dqn_' + env_name + '_weights_{step}.h5f'
        log_filename = f'{MODEL_NAME}/' + 'dqn_{}_log.json'.format(env_name)
        callbacks = [
            ModelIntervalCheckpoint(checkpoint_weights_filename, interval=100000),
            FileLogger(log_filename, interval=100),
            TensorboardCallback(log_dir=tb_logs)

示例#24

0

显示文件

def training_game():
    env = Environment(
        map_name="ForceField",
        visualize=True,
        game_steps_per_episode=150,
        agent_interface_format=features.AgentInterfaceFormat(
            feature_dimensions=features.Dimensions(screen=64, minimap=32)))

    input_shape = (_SIZE, _SIZE, 1)
    nb_actions = 12  # Number of actions

    model = neural_network_model(input_shape, nb_actions)
    memory = SequentialMemory(limit=5000, window_length=_WINDOW_LENGTH)

    processor = SC2Proc()

    # Policy

    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                                  attr="eps",
                                  value_max=1,
                                  value_min=0.2,
                                  value_test=.0,
                                  nb_steps=1e2)

    # Agent

    dqn = DQNAgent(
        model=model,
        nb_actions=nb_actions,
        memory=memory,
        enable_double_dqn=True,
        enable_dueling_network=True,
        # 2019-07-12 GU Zhan (Sam) when value shape problem, reduce nb_steps_warmup:
        #                   nb_steps_warmup=300, target_model_update=1e-2, policy=policy,
        nb_steps_warmup=500,
        target_model_update=1e-2,
        policy=policy,
        batch_size=150,
        processor=processor,
        delta_clip=1)

    dqn.compile(Adam(lr=.001), metrics=["mae", "acc"])

    # Tensorboard callback

    timestamp = f"{datetime.datetime.now():%Y-%m-%d %I:%M%p}"
    # 2019-07-12 GU Zhan (Sam) folder name for Lunux:
    #    callbacks = keras.callbacks.TensorBoard(log_dir='./Graph/'+ timestamp, histogram_freq=0,
    #                                write_graph=True, write_images=False)

    # 2019-07-12 GU Zhan (Sam) folder name for Windows:
    callbacks = keras.callbacks.TensorBoard(log_dir='.\Graph\issgz',
                                            histogram_freq=0,
                                            write_graph=True,
                                            write_images=False)

    # Save the parameters and upload them when needed

    name = "agent"
    w_file = "dqn_{}_weights.h5f".format(name)
    check_w_file = "train_w" + name + "_weights.h5f"

    if SAVE_MODEL:
        check_w_file = "train_w" + name + "_weights_{step}.h5f"

    log_file = "training_w_{}_log.json".format(name)

    if LOAD_MODEL:
        dqn.load_weights(w_file)

    class Saver(Callback):
        def on_episode_end(self, episode, logs={}):
            if episode % 200 == 0:
                self.model.save_weights(w_file, overwrite=True)

    s = Saver()
    logs = FileLogger('DQN_Agent_log.csv', interval=1)

    dqn.fit(env,
            callbacks=[callbacks, s, logs],
            nb_steps=600,
            action_repetition=2,
            log_interval=1e4,
            verbose=2)

    dqn.save_weights(w_file, overwrite=True)
    dqn.test(env, action_repetition=2, nb_episodes=30, visualize=False)

示例#25

0

显示文件

文件： gen-cart-data.py 项目： Christopher-P/AutoEncoder-Distance

# even the metrics!
memory = SequentialMemory(limit=50000, window_length=1)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=10,
               target_model_update=1e-2,
               policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.

dqn.load_weights('dqn_{}_weights.h5f'.format(ENV_NAME))

import threading


def thread_gen(n):
    env = gym.make('CartPole-v0')
    actions = []
    for i in range(50):
        obs = env.reset()
        for j in range(200):

            t = env.render()
            act = dqn.forward(obs)

            # Raw image

示例#26

0

显示文件

model.add(Activation('linear'))

# model.summary()

memory = SequentialMemory(limit=env.dataLength, window_length=WINDOW_SIZE)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model,
               nb_actions=env.action_space.n,
               memory=memory,
               nb_steps_warmup=100,
               target_model_update=1e-2,
               policy=policy)
dqn.compile(Adam(lr=0.001), metrics=['mae'])

if (os.path.exists(WEIGHTS_NAME)):
    dqn.load_weights(WEIGHTS_NAME)
    print("saved weight loaded")


def getPredictionAt(index=0):
    state = env.getLatestState(index, window=10)
    state = np.reshape(state, (-1, 1))
    state = np.expand_dims(state, axis=0)
    prediction = model.predict(state)[0]
    index_of_maximum = np.where(prediction == np.max(prediction))
    return index_of_maximum[0]


# dqn.test(env, nb_episodes=1, visualize=False)

prediction = getPredictionAt(0)

示例#27

0

显示文件

model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
model.summary()


train_mode = len(sys.argv) > 1 and sys.argv[1] == 'train'


# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=20000, window_length=1)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10,
               target_model_update=1e-2, policy=policy, enable_dueling_network=True, dueling_type='avg',)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

if os.path.isfile(WEIGHTS_PATH) and os.access(WEIGHTS_PATH, os.R_OK):
    dqn.load_weights(WEIGHTS_PATH)

if train_mode:

    dqn.fit(env, nb_steps=50000, visualize=False, verbose=2)
    dqn.save_weights(WEIGHTS_PATH, overwrite=True)
    print('save')


else:
    # Finally, evaluate our algorithm for 5 episodes.
    dqn.test(env, nb_episodes=5, visualize=True)

示例#28

0

显示文件

random.seed(123)
np.random.seed(123)
env.seed(123)
PREPROC="onehot2steps"

processor = OneHotNNInputProcessor(num_one_hot_matrices=16)

model = Sequential()
model.add(Flatten(input_shape=(1, 4+4*4, 16,) + (4, 4)))
model.add(Dense(units=1024, activation='relu'))
model.add(Dense(units=512, activation='relu'))
model.add(Dense(units=256, activation='relu'))
model.add(Dense(units=4, activation='linear'))
    
memory = SequentialMemory(limit=6000, window_length=1)

TRAIN_POLICY = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=0.05, value_min=0.05, value_test=0.01, nb_steps=100000)
TEST_POLICY = EpsGreedyQPolicy(eps=.01)
dqn = DQNAgent(model=model, nb_actions=4, test_policy=TEST_POLICY, policy=TRAIN_POLICY, memory=memory, processor=processor,
                nb_steps_warmup=5000, gamma=.99, target_model_update=1000, train_interval=4, delta_clip=1.)

dqn.compile(Adam(lr=.00025), metrics=['mse'])

weights_filepath = data_filepath + 'train/weights_steps_'+ str(NB_STEPS_TRAINING) +'.h5f'
dqn.load_weights(weights_filepath)
with open(csv_filepath, 'w', newline='') as file:
    writer = csv.writer(file, quoting=csv.QUOTE_NONNUMERIC, delimiter=';')
    writer.writerow(['episode', 'episode_steps', 'highest_score', 'max_tile'])
_callbacks = [TestCall2048(csv_filepath)] 
dqn.test(env, nb_episodes=500, visualize=False, verbose=1, callbacks=_callbacks)

示例#29

0

显示文件

policy = MaxBoltzmannQPolicy()
dqn = DQNAgent(model=model,
               nb_actions=nb_neuron_output,
               memory=memory,
               nb_steps_warmup=10,
               target_model_update=1e-2,
               policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae', 'accuracy'])

metrics = Metrics(dqn, env)
#fileName = '1D_advanced_Sequential1000_BoltzmannQ_10000steps(7)'
#fileName = '1D_advanced_Sequential1000_EpsGreedyQ_10000steps(7)'
#fileName = '1D_advanced_Sequential1000_MaxBoltzmannQ_10000steps(7)'
#fileName = '1D_advanced_Sequential50000_BoltzmannQPolicy_10000steps(7)'
#fileName = '1D_advanced_Sequential50000_MaxBoltzmannQ_1000000steps(0)'
fileName = '1D__Sequential50000_BoltzmannQ_1000000steps(0)'

dqn.load_weights('./output/' + fileName + '.h5f')
dqn.test(env, nb_episodes=1, visualize=False, callbacks=[metrics])

metrics.export_figs(fileName)

cumulated_reward = metrics.cumulated_reward()
import matplotlib.pyplot as plt
plt.figure()
plt.plot(cumulated_reward, alpha=.6)
plt.title('cumulated_reward for ' + fileName)
plt.ylabel('cumulated_reward')
plt.xlabel('steps')
plt.savefig('./metrics/' + fileName + '_cumulated_reward.png')

示例#30

0

显示文件

文件： DQN_Asteroids_v2_Double.py 项目： shayan-taheri/Deep_Reinforcement_Learning

model.add(Dense(3, input_dim=1,activation= 'tanh'))
model.add(Dense(nb_actions))
model.add(Activation('sigmoid')

policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=-1, value_test=.05,
                              nb_steps=1000000)

memory = SequentialMemory(limit=10000000, window_length=1)
dqn2 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=50,
              target_model_update=1e-2, policy=policy, enable_double_dqn=True, enable_dueling_network=False)
dqn2.compile(Adam(lr=1e-3), metrics=['mae', 'acc'])

import os.path

file_path = 'Double_DQN_Taxi.h5f'
if os.path.exists(file_path):
    dqn2.load_weights(file_path)


class Saver(Callback):
    def on_episode_end(self, episode, logs={}):
        print('episode callback')
        if episode % 1 == 0:
            self.model.save_weights('Double_DQN_Taxi.h5f', overwrite=True)


logs = FileLogger('Double_DQN_Taxi.csv', interval = 1)
s = Saver()
dqn2.fit(env, nb_steps=2e8,callbacks=[s,logs], visualize=False, verbose=2)
#dqn2.test(env, nb_episodes=10, visualize=True)

示例#31

0

显示文件

文件： dqn.py 项目： zhenpingfeng/crypto-rl

class Agent(object):
    name = 'DQN'

    def __init__(self,
                 number_of_training_steps=1e5,
                 gamma=0.999,
                 load_weights=False,
                 visualize=False,
                 dueling_network=True,
                 double_dqn=True,
                 nn_type='mlp',
                 **kwargs):
        """
        Agent constructor
        :param window_size: int, number of lags to include in observation
        :param max_position: int, maximum number of positions able to be held in inventory
        :param fitting_file: str, file used for z-score fitting
        :param testing_file: str,file used for dqn experiment
        :param env: environment name
        :param seed: int, random seed number
        :param action_repeats: int, number of steps to take in environment between actions
        :param number_of_training_steps: int, number of steps to train agent for
        :param gamma: float, value between 0 and 1 used to discount future DQN returns
        :param format_3d: boolean, format observation as matrix or tensor
        :param train: boolean, train or test agent
        :param load_weights: boolean, import existing weights
        :param z_score: boolean, standardize observation space
        :param visualize: boolean, visualize environment
        :param dueling_network: boolean, use dueling network architecture
        :param double_dqn: boolean, use double DQN for Q-value approximation
        """
        # Agent arguments
        # self.env_name = id
        self.neural_network_type = nn_type
        self.load_weights = load_weights
        self.number_of_training_steps = number_of_training_steps
        self.visualize = visualize

        # Create environment
        self.env = gym.make(**kwargs)
        self.env_name = self.env.env.id

        # Create agent
        # NOTE: 'Keras-RL' uses its own frame-stacker
        self.memory_frame_stack = 1  # Number of frames to stack e.g., 1.
        self.model = self.create_model(name=self.neural_network_type)
        self.memory = SequentialMemory(limit=10000,
                                       window_length=self.memory_frame_stack)
        self.train = self.env.env.training
        self.cwd = os.path.dirname(os.path.realpath(__file__))

        # create the agent
        self.agent = DQNAgent(model=self.model,
                              nb_actions=self.env.action_space.n,
                              memory=self.memory,
                              processor=None,
                              nb_steps_warmup=500,
                              enable_dueling_network=dueling_network,
                              dueling_type='avg',
                              enable_double_dqn=double_dqn,
                              gamma=gamma,
                              target_model_update=1000,
                              delta_clip=1.0)
        self.agent.compile(Adam(lr=float("3e-4")), metrics=['mae'])

    def __str__(self):
        # msg = '\n'
        # return msg.join(['{}={}'.format(k, v) for k, v in self.__dict__.items()])
        return 'Agent = {} | env = {} | number_of_training_steps = {}'.format(
            Agent.name, self.env_name, self.number_of_training_steps)

    def create_model(self, name: str = 'cnn') -> Sequential:
        """
        Helper function get create and get the default MLP or CNN model.

        :param name: Neural network type ['mlp' or 'cnn']
        :return: neural network
        """
        LOGGER.info("creating model for {}".format(name))
        if name == 'cnn':
            return self._create_cnn_model()
        elif name == 'mlp':
            return self._create_mlp_model()

    def _create_cnn_model(self) -> Sequential:
        """
        Create a Convolutional neural network with dense layer at the end.

        :return: keras model
        """
        features_shape = (self.memory_frame_stack,
                          *self.env.observation_space.shape)
        model = Sequential()
        conv = Conv2D
        model.add(
            conv(input_shape=features_shape,
                 filters=5,
                 kernel_size=[10, 1],
                 padding='same',
                 activation='relu',
                 strides=[5, 1],
                 data_format='channels_first'))
        model.add(
            conv(filters=5,
                 kernel_size=[5, 1],
                 padding='same',
                 activation='relu',
                 strides=[2, 1],
                 data_format='channels_first'))
        model.add(
            conv(filters=5,
                 kernel_size=[4, 1],
                 padding='same',
                 activation='relu',
                 strides=[2, 1],
                 data_format='channels_first'))
        model.add(Flatten())
        model.add(Dense(256, activation='relu'))
        model.add(Dense(self.env.action_space.n, activation='softmax'))
        LOGGER.info(model.summary())
        return model

    def _create_mlp_model(self) -> Sequential:
        """
        Create a DENSE neural network with dense layer at the end

        :return: keras model
        """
        features_shape = (self.memory_frame_stack,
                          *self.env.observation_space.shape)
        model = Sequential()
        model.add(
            Dense(units=256, input_shape=features_shape, activation='relu'))
        model.add(Dense(units=256, activation='relu'))
        model.add(Flatten())
        model.add(Dense(self.env.action_space.n, activation='softmax'))
        LOGGER.info(model.summary())
        return model

    def start(self) -> None:
        """
        Entry point for agent training and testing

        :return: (void)
        """
        output_directory = os.path.join(self.cwd, 'dqn_weights')
        if not os.path.exists(output_directory):
            LOGGER.info('{} does not exist. Creating Directory.'.format(
                output_directory))
            os.mkdir(output_directory)

        weight_name = 'dqn_{}_{}_weights.h5f'.format(self.env_name,
                                                     self.neural_network_type)
        weights_filename = os.path.join(output_directory, weight_name)
        LOGGER.info("weights_filename: {}".format(weights_filename))

        if self.load_weights:
            LOGGER.info('...loading weights for {} from\n{}'.format(
                self.env_name, weights_filename))
            self.agent.load_weights(weights_filename)

        if self.train:
            step_chkpt = '{step}.h5f'
            step_chkpt = 'dqn_{}_weights_{}'.format(self.env_name, step_chkpt)
            checkpoint_weights_filename = os.path.join(self.cwd, 'dqn_weights',
                                                       step_chkpt)
            LOGGER.info("checkpoint_weights_filename: {}".format(
                checkpoint_weights_filename))
            log_filename = os.path.join(
                self.cwd, 'dqn_weights',
                'dqn_{}_log.json'.format(self.env_name))
            LOGGER.info('log_filename: {}'.format(log_filename))

            callbacks = [
                ModelIntervalCheckpoint(checkpoint_weights_filename,
                                        interval=250000)
            ]
            callbacks += [FileLogger(log_filename, interval=100)]

            LOGGER.info('Starting training...')
            self.agent.fit(self.env,
                           callbacks=callbacks,
                           nb_steps=self.number_of_training_steps,
                           log_interval=10000,
                           verbose=0,
                           visualize=self.visualize)
            LOGGER.info("training over.")
            LOGGER.info('Saving AGENT weights...')
            self.agent.save_weights(weights_filename, overwrite=True)
            LOGGER.info("AGENT weights saved.")
        else:
            LOGGER.info('Starting TEST...')
            self.agent.test(self.env, nb_episodes=2, visualize=self.visualize)

示例#32

0

显示文件

    # After training is done, we save the final weights one more time.
    dqn.save_weights(weights_filename, overwrite=False)

    # Finally, evaluate our algorithm for 10 episodes.
    dqn.test(env,
             nb_episodes=10,
             nb_max_start_steps=30,
             action_repetition=1,
             start_step_policy=start_step_policy,
             visualize=True)
elif args.mode == 'test':
    weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name)
    if args.weights:
        weights_filename = args.weights
    dqn.load_weights(weights_filename)
    dqn.test(env,
             nb_episodes=10,
             nb_max_start_steps=30,
             action_repetition=1,
             nb_max_episode_steps=1800,
             start_step_policy=start_step_policy,
             visualize=True,
             starting_checkpoints=[i for i in xrange(17)])
elif args.mode == 'batch_test':
    #   Test a batch of methods with it's corresponding weights and output it on a log.
    #   The method expects a directory structure consisting of a folder with the different methods as directories.
    #   There is an optional parameter --methods that takes the name of the folder that contains the methods.
    #   Contained in each method folder there should be the weights to be tested (default: 'methods').
    #   The folder methods and weights starting with '__' will be ommited.

示例#33

0

显示文件

文件： dqn_atari.py 项目： Jaystings/keras-rl

# is Boltzmann-style exploration:
# policy = BoltzmannQPolicy(tau=1.)
# Feel free to give it a try!

dqn = DQNAgent(model=model, nb_actions=nb_actions, policy=policy, window_length=WINDOW_LENGTH, memory=memory,
    processor=processor, nb_steps_warmup=50000, gamma=.99, delta_range=(-1., 1.), reward_range=(-1., 1.),
    target_model_update=10000, train_interval=4)
dqn.compile(Adam(lr=.00025), metrics=['mae'])

if args.mode == 'train':
    # Okay, now it's time to learn something! We capture the interrupt exception so that training
    # can be prematurely aborted. Notice that you can the built-in Keras callbacks!
    weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name)
    checkpoint_weights_filename = 'dqn_' + args.env_name + '_weights_{step}.h5f'
    log_filename = 'dqn_{}_log.json'.format(args.env_name)
    callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)]
    callbacks += [FileLogger(log_filename, interval=100)]
    dqn.fit(env, callbacks=callbacks, nb_steps=1750000, log_interval=10000)

    # After training is done, we save the final weights one more time.
    dqn.save_weights(weights_filename, overwrite=True)

    # Finally, evaluate our algorithm for 10 episodes.
    dqn.test(env, nb_episodes=10, visualize=False)
elif args.mode == 'test':
    weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name)
    if args.weights:
        weights_filename = args.weights
    dqn.load_weights(weights_filename)
    dqn.test(env, nb_episodes=10, visualize=True)

示例#34

0

显示文件

文件： openAI_planarCrane_test.py 项目： DocVaughan/CRAWLAB-Code-Snippets

               enable_dueling_network=True, dueling_type='avg', target_model_update=1e-2, 
               policy=train_policy, test_policy=test_policy)
    agent.compile(Adam(lr=1e-3, clipnorm=1.0), metrics=['mae'])

elif METHOD.upper() == 'DQN':
    memory = SequentialMemory(limit=NUM_STEPS, window_length=1)
    agent = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100,
               target_model_update=1e-2, policy=train_policy, test_policy=test_policy)
    agent.compile(Adam(lr=1e-3, clipnorm=1.0), metrics=['mae'])

elif METHOD.upper() == 'SARSA':
     # SARSA does not require a memory.
    agent = SarsaAgent(model=model, nb_actions=nb_actions, nb_steps_warmup=10, policy=train_policy)
    agent.compile(Adam(lr=1e-3, clipnorm=1.0), metrics=['mae'])
    
elif METHOD.upper() == 'CEM':
    memory = EpisodeParameterMemory(limit=1000, window_length=1)
    agent = CEMAgent(model=model, nb_actions=nb_actions, memory=memory,
               batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05)
    agent.compile()
    
else:
    raise('Please select  DQN, DUEL_DQN, SARSA, or CEM for your method type.')



# Load the model weights
agent.load_weights(WEIGHT_FILENAME)

# Finally, evaluate our algorithm for 1 episode.
agent.test(env, nb_episodes=5, visualize=True, action_repetition=5)#, nb_max_episode_steps=500)