Exemplo n.º 1
0
    p.add_argument('--replay_size', type=int, default=1000000)

    # Other stuff
    p.add_argument('--batch_size', type=int, default=64)
    p.add_argument('--do_not_save', action='store_true')
    p.add_argument('--n_iter', type=int, default=1000)
    p.add_argument('--seed', type=int, default=0)
    args = p.parse_args()

    # Handle the log directory and save the arguments.
    logdir = 'outputs/' + args.envname + '/seed' + str(args.seed).zfill(2)
    if args.do_not_save:
        logdir = None
    logz.configure_output_dir(logdir)
    if logdir is not None:
        with open(logdir + '/args.pkl', 'wb') as f:
            pickle.dump(args, f)
    print("Saving in logdir: {}".format(logdir))

    # Other stuff for seeding and getting things set up.
    tf.set_random_seed(args.seed)
    np.random.seed(args.seed)
    env = gym.make(args.envname)
    tf_config = tf.ConfigProto(inter_op_parallelism_threads=1,
                               intra_op_parallelism_threads=1)
    sess = tf.Session(config=tf_config)

    ddpg = DDPGAgent(sess, env, args)
    ddpg.train()
    ddpg.test()
Exemplo n.º 2
0
actor.add(Activation('relu'))
actor.add(Dense(nb_actions))
actor.add(Activation('sigmoid'))


action_input = Input(shape=(nb_actions,), name='action_input')
observation_input = Input(shape=(1,) + (11,), name='observation_input')
flattened_observation = Flatten()(observation_input)
x = Concatenate()([action_input, flattened_observation])
x = Dense(32)(x)
x = Activation('relu')(x)
x = Dense(32)(x)
x = Activation('relu')(x)
x = Dense(32)(x)
x = Activation('relu')(x)
x = Dense(1)(x)
x = Activation('linear')(x)
critic = Model(inputs=[action_input, observation_input], outputs=x)

memory = SequentialMemory(limit=100000, window_length=1)
random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.1)

agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
                  memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=10,
                  random_process=random_process, gamma=.995, target_model_update=1e-3)

agent.compile(Adam(lr=.0005, clipnorm=1.), metrics=['mae'])
agent.fit(env, nb_steps=10000, visualize=False, verbose=0, nb_max_episode_steps=95)   
#agent.save_weights('weights/ddpg_{}_weights.h5f'.format("stormwater"), overwrite=True)
agent.test(env, nb_episodes=15, visualize=False, nb_max_episode_steps=95, plt="") 
Exemplo n.º 3
0
critic = Model(inputs=[action_input, observation_input], outputs=x)

memory = SequentialMemory(limit=1000, window_length=1)
random_process = OrnsteinUhlenbeckProcess(size=nb_actions,
                                          theta=.15,
                                          mu=0.,
                                          sigma=.3)
agent = DDPGAgent(nb_actions=nb_actions,
                  actor=actor,
                  critic=critic,
                  critic_action_input=action_input,
                  memory=memory,
                  nb_steps_warmup_critic=100,
                  nb_steps_warmup_actor=300,
                  random_process=random_process,
                  gamma=.995,
                  target_model_update=2e-3)
agent.compile(Adam(lr=.005, clipnorm=1.), metrics=['mae'])

agent.fit(env,
          nb_steps=555,
          visualize=False,
          verbose=0,
          nb_max_episode_steps=95)

agent.save_weights('weights/ddpg_{}_weights.h5f'.format("stormwater"),
                   overwrite=True)

agent.test(env, nb_episodes=1, visualize=True, nb_max_episode_steps=95)

env.graph("plots/test_plot_")