Exemplo n.º 1
0
actor.add(Dense(8))
actor.add(Activation('relu'))
actor.add(Dense(nb_actions))
actor.add(Activation('sigmoid'))


action_input = Input(shape=(nb_actions,), name='action_input')
observation_input = Input(shape=(1,) + (11,), name='observation_input')
flattened_observation = Flatten()(observation_input)
x = Concatenate()([action_input, flattened_observation])
x = Dense(32)(x)
x = Activation('relu')(x)
x = Dense(32)(x)
x = Activation('relu')(x)
x = Dense(32)(x)
x = Activation('relu')(x)
x = Dense(1)(x)
x = Activation('linear')(x)
critic = Model(inputs=[action_input, observation_input], outputs=x)

memory = SequentialMemory(limit=100000, window_length=1)
random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.1)

agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
                  memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=10,
                  random_process=random_process, gamma=.995, target_model_update=1e-3)

agent.compile(Adam(lr=.0005, clipnorm=1.), metrics=['mae'])
agent.fit(env, nb_steps=10000, visualize=False, verbose=0, nb_max_episode_steps=95)   
#agent.save_weights('weights/ddpg_{}_weights.h5f'.format("stormwater"), overwrite=True)
agent.test(env, nb_episodes=15, visualize=False, nb_max_episode_steps=95, plt="") 
Exemplo n.º 2
0
                      env=env,
                      memory=(memory, episode_mem),
                      critic_goal_input=goal_input,
                      delta_clip=1.0,
                      nb_steps_warmup_critic=1000,
                      nb_steps_warmup_actor=1000,
                      random_process=random_process,
                      gamma=0.98,
                      target_model_update=0.001,
                      val_goal=args.val_goal,
                      vectorized=args.vectorized)

    agent.compile([Adam(lr=1e-4), Adam(lr=1e-3)], metrics=['mae'])

    agent.fit(env,
              nb_steps=200000,
              folder_to_save=folder_to_save,
              visualize=False,
              verbose=1,
              reward_type=reward_type,
              nb_max_episode_steps=50,
              max_tau=12)

    agent.save_weights(os.path.join(
        folder_to_save, 'ddpg_{}_weights_{}.h5f'.format(ENV_NAME,
                                                        reward_type)),
                       overwrite=True)
    sys.exit()
    # Finally, evaluate our algorithm for 5 episodes.
    agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=50)