p.add_argument('--replay_size', type=int, default=1000000) # Other stuff p.add_argument('--batch_size', type=int, default=64) p.add_argument('--do_not_save', action='store_true') p.add_argument('--n_iter', type=int, default=1000) p.add_argument('--seed', type=int, default=0) args = p.parse_args() # Handle the log directory and save the arguments. logdir = 'outputs/' + args.envname + '/seed' + str(args.seed).zfill(2) if args.do_not_save: logdir = None logz.configure_output_dir(logdir) if logdir is not None: with open(logdir + '/args.pkl', 'wb') as f: pickle.dump(args, f) print("Saving in logdir: {}".format(logdir)) # Other stuff for seeding and getting things set up. tf.set_random_seed(args.seed) np.random.seed(args.seed) env = gym.make(args.envname) tf_config = tf.ConfigProto(inter_op_parallelism_threads=1, intra_op_parallelism_threads=1) sess = tf.Session(config=tf_config) ddpg = DDPGAgent(sess, env, args) ddpg.train() ddpg.test()
actor.add(Activation('relu')) actor.add(Dense(nb_actions)) actor.add(Activation('sigmoid')) action_input = Input(shape=(nb_actions,), name='action_input') observation_input = Input(shape=(1,) + (11,), name='observation_input') flattened_observation = Flatten()(observation_input) x = Concatenate()([action_input, flattened_observation]) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.1) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=10, random_process=random_process, gamma=.995, target_model_update=1e-3) agent.compile(Adam(lr=.0005, clipnorm=1.), metrics=['mae']) agent.fit(env, nb_steps=10000, visualize=False, verbose=0, nb_max_episode_steps=95) #agent.save_weights('weights/ddpg_{}_weights.h5f'.format("stormwater"), overwrite=True) agent.test(env, nb_episodes=15, visualize=False, nb_max_episode_steps=95, plt="")
critic = Model(inputs=[action_input, observation_input], outputs=x) memory = SequentialMemory(limit=1000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=300, random_process=random_process, gamma=.995, target_model_update=2e-3) agent.compile(Adam(lr=.005, clipnorm=1.), metrics=['mae']) agent.fit(env, nb_steps=555, visualize=False, verbose=0, nb_max_episode_steps=95) agent.save_weights('weights/ddpg_{}_weights.h5f'.format("stormwater"), overwrite=True) agent.test(env, nb_episodes=1, visualize=True, nb_max_episode_steps=95) env.graph("plots/test_plot_")