def main(): # Get the environment and extract the number of actions. env = gym.make(ENV_NAME) env = wrappers.Monitor(env, '/tmp/{}'.format(ENV_NAME), force=True) np.random.seed(123) env.seed(123) assert len(env.action_space.shape) == 1 action_shape = env.action_space.shape[0] observation_shape = env.observation_space.shape actor = create_actor(observation_shape, action_shape) action_input = Input(shape=(action_shape,), name='action_input') observation_input = Input(shape=(1,) + observation_shape, name='observation_input') critic = create_critic(observation_input, action_input) memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=action_shape, theta=.15, mu=0., sigma=.1) agent = DDPGAgent(nb_actions=action_shape, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3, processor=BipedalProcessor()) agent.compile([Adam(lr=1e-4), Adam(lr=1e-3)], metrics=['mae']) agent.load_weights('ddpg_{}_weights.h5f'.format(ENV_NAME)) #agent.fit() agent.fit(env, nb_steps=3000000, visualize=False, verbose=2) agent.save_weights('ddpg_{}_weights.h5f'.format(ENV_NAME), overwrite=True)
def visualize(session_name): kwargs = {'viewer': True} ENV_NAME = 'singlePendulum-v0' env = gym.make(ENV_NAME, **kwargs) np.random.seed(7) env.seed(7) assert len(env.action_space.shape) == 1 nb_actions = env.action_space.shape[0] actor, critic, action_input = create_networks(env) memory = SequentialMemory(limit=400, window_length=1) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory) agent.compile(Adam(lr=.0005, clipnorm=1., epsilon=1.e-7, beta_1=0.9, beta_2=0.999), metrics=['mae']) checkpoint_filepath = 'checkpoint/ddpg_{}_{}_weights.h5f'.format( ENV_NAME, session_name) filepath = 'ddpg_{}_{}_weights.h5f'.format(ENV_NAME, session_name) agent.load_weights(filepath=filepath) env.viewer = True agent.test(env, nb_episodes=1, visualize=False, nb_max_episode_steps=400) env.close()
class DDPG(): def __init__(self, Env): self.env = Env nb_actions = self.env.action_space.shape[0] actor = Sequential() actor.add(Flatten(input_shape=(1,) + self.env.observation_space.shape)) actor.add(Dense(5)) actor.add(Activation('relu')) actor.add(Dense(8)) actor.add(Activation('relu')) actor.add(Dense(5)) actor.add(Activation('relu')) # actor.add(Dense(16)) # actor.add(Activation('relu')) actor.add(Dense(nb_actions)) actor.add(Activation('softmax')) # print(actor.summary()) action_input = Input(shape=(nb_actions,), name='action_input') observation_input = Input(shape=(1,) + Env.observation_space.shape, name='observation_input') flattened_observation = Flatten()(observation_input) x = concatenate([action_input, flattened_observation], name = 'concatenate') x = Dense(5)(x) x = Activation('relu')(x) x = Dense(8)(x) x = Activation('relu')(x) x = Dense(5)(x) x = Activation('relu')(x) # x = Dense(32)(x) # x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) # print(critic.summary()) memory = SequentialMemory(limit=100000, window_length=1) # random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3) random_process = None self.agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=32, nb_steps_warmup_actor=32, random_process=random_process, gamma=0, target_model_update=0.001) self.agent.processor = ShowActionProcessor(self.agent, self.env) self.agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) def fit(self): history = self.agent.fit(self.env, action_repetition=1, nb_steps=20000, visualize=False, verbose=1, nb_max_episode_steps=10) return history def save_weights(self): self.agent.save_weights('./store/ddpg_{}_weights2.h5f'.format("porfolio"), overwrite=True) def test(self): history = self.agent.test(self.env, nb_episodes=1, visualize=False, nb_max_episode_steps=10) return history def load_weights(self): self.agent.load_weights('./store/ddpg_{}_weights2.h5f'.format("porfolio"))
def load_network(env): ENV_NAME = 'Carom-v0' gym.undo_logger_setup() # Get the environment and extract the number of actions. np.random.seed(323) env.seed(323) assert len(env.action_space.shape) == 1 nb_actions = env.action_space.shape[0] # Next, we build a very simple model. actor = Sequential() actor.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) actor.add(Dense(16)) actor.add(Activation('relu')) actor.add(Dense(16)) actor.add(Activation('relu')) actor.add(Dense(16)) actor.add(Activation('relu')) actor.add(Dense(nb_actions)) actor.add(Activation('linear')) action_input = Input(shape=(nb_actions, ), name='action_input') observation_input = Input(shape=(1, ) + env.observation_space.shape, name='observation_input') flattened_observation = Flatten()(observation_input) x = Concatenate()([action_input, flattened_observation]) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) memory = SequentialMemory(limit=50000, window_length=1) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, gamma=.99, target_model_update=1e3) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) agent.load_weights('ddpg_{}_2balls_final_weights_v4.h5f'.format(ENV_NAME)) return agent
def _train(self): env = CrazyflieEnvironment(self._cf) atexit.register(teardown_env, env, self._cf) np.random.seed(123) assert len(env.action_space.shape) == 1 nb_actions = env.action_space.shape[0] # Next, we build a very simple model. actor = self.actor_model(env, nb_actions) action_input, critic = self.critic_model(env, nb_actions) memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3) model_name = 'ddpg_{}_weights.h5f'.format('crazyflie') agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3) if os.path.exists(model_name): agent.load_weights(model_name) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) try: agent.fit(env, nb_steps=50000, verbose=2) agent.test(env, nb_episodes=1) finally: agent.save_weights(model_name, overwrite=True)
def evaluate_model(model_path=None, interactive=False, seed=12345): np.random.seed(seed) actor, critic, action_input = define_actor_critic_models(actions=3) memory = SequentialMemory(limit=10000, window_length=1) random_process = GaussianWhiteNoiseProcess(mu=0, sigma=0, sigma_min=0, n_steps_annealing=1) agent = DDPGAgent(nb_actions=3, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=500, nb_steps_warmup_actor=100, random_process=random_process, gamma=.95, target_model_update=0.0001, batch_size=32) agent.compile([RMSprop(lr=.0001), RMSprop(lr=.01)], metrics=['mae']) if model_path is not None: agent.load_weights(model_path) # Train Evaluation env = CameraControlEnvCont(dataset_pickle_path='data/dataset.pickle', testing=False, interactive=interactive) env.seed(seed) res = agent.test(env, nb_episodes=500, nb_max_episode_steps=100, verbose=0, visualize=False) train_mean_reward = np.mean(res.history['episode_reward']) before_train_position_error = np.mean( np.abs(env.init_position_error_pixels)) before_train_zoom_error = np.mean(np.abs(env.init_zoom_error_pixels)) after_train_position_error = np.mean( np.abs(env.final_position_error_pixels)) after_train_zoom_error = np.mean(np.abs(env.final_zoom_error_pixels)) print("Training evaluation: ") print("Mean reward: ", train_mean_reward) print("Position: ", before_train_position_error, " -> ", after_train_position_error) print("Zoom: ", before_train_zoom_error, " -> ", after_train_zoom_error) # Test Evaluation env = CameraControlEnvCont(dataset_pickle_path='data/dataset.pickle', testing=True, interactive=interactive) env.seed(seed) res = agent.test(env, nb_episodes=500, nb_max_episode_steps=100, verbose=0, visualize=False) train_mean_reward = np.mean(res.history['episode_reward']) before_train_position_error = np.mean( np.abs(env.init_position_error_pixels)) before_train_zoom_error = np.mean(np.abs(env.init_zoom_error_pixels)) after_train_position_error = np.mean( np.abs(env.final_position_error_pixels)) after_train_zoom_error = np.mean(np.abs(env.final_zoom_error_pixels)) print("Testing evaluation: ") print("Mean reward: ", train_mean_reward) print("Position: ", before_train_position_error, " -> ", after_train_position_error) print("Zoom: ", before_train_zoom_error, " -> ", after_train_zoom_error)
## Initialize Replay Buffer ## memory = SequentialMemory(limit=REPLAY_BUFFER_SIZE, window_length=1) # window_length : usefull for Atari game (cb d'images d'affilé on veut analysé (vitesse de la balle, etc..)) ## Random process (exploration) ## random_process = OrnsteinUhlenbeckProcess(theta=THETA, mu=MEAN, sigma=SIGMA, size=action_size) ## Paramètres agent DDPG ## agent = DDPGAgent(nb_actions=action_size, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, random_process=random_process, gamma=DISC_FACT, target_model_update=TARGET_MODEL_UPDATE, batch_size= BATCH_SIZE) agent.compile(optimizer = [opti_critic, opti_actor], metrics= ['mae']) ##### TRAIN ##### if args.train: check_overwrite(args.model) history = agent.fit(env, nb_steps=N_STEPS_TRAIN, visualize=args.visualize, verbose=VERBOSE, log_interval = LOG_INTERVAL) agent.save_weights(FILES_WEIGHTS_NETWORKS, overwrite=True) save_plot_reward(history, args.model, params) ##### TEST ##### if not args.train : agent.load_weights(FILES_WEIGHTS_NETWORKS) history = agent.test(env, nb_episodes=N_EPISODE_TEST, visualize=args.visualize) save_result(history, args.model, params)
random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile(Adam(lr=0.001, clipnorm=1.), metrics=['mae']) # Okay, now it's time to learn something! mode = 'test' if mode == 'train': hist = agent.fit(env, nb_steps=1000000, visualize=False, verbose=2, nb_max_episode_steps=1000) filename = '600kit_rn4_maior2_mem20k_target01_theta3_batch32_adam2' # we save the history of learning, it can further be used to plot reward evolution with open('_experiments/history_ddpg__redetorcs' + filename + '.pickle', 'wb') as handle: pickle.dump(hist.history, handle, protocol=pickle.HIGHEST_PROTOCOL) #After training is done, we save the final weights. agent.save_weights('h5f_files/ddpg_{}_weights.h5f'.format( '600kit_rn4_maior2_mem20k_target01_theta3_batch32_adam2_action_lim_1'), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. agent.test(env, nb_episodes=10, visualize=True, nb_max_episode_steps=1000) elif mode == 'test': env.set_test_performace() # Define the initialization as performance test env.set_save_experice() # Save the test to plot the results after agent.load_weights('h5f_files/ddpg_{}_weights.h5f'.format( '600kit_rn4_maior2_mem20k_target01_theta3_batch32_adam2')) agent.test(env, nb_episodes=10, visualize=Trues, nb_max_episode_steps=1000)
# gamma=.99, target_model_update=0.1) #agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) agent.compile([RMSprop(lr=.001), RMSprop(lr=.001)], metrics=['mae']) prefix = args.output if args.output else "%s_s%f_t%f" % (args.env ,float(args.sigma), float(args.theta)) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. if args.train: agent.fit(env, nb_steps=nallsteps, visualize=True, verbose=1, nb_max_episode_steps=env.timestep_limit, log_interval=10000, prefix=prefix) # After training is done, we save the final weights. agent.save_weights("%s.h5f" % args.output, overwrite=True) if not args.train: agent.load_weights("%s.h5f" % args.output) # Finally, evaluate our algorithm for 5 episodes. if args.env != "Arm": agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=500) else: for i in range(10000): if i % 300 == 0: env.new_target() print("\n\nTarget shoulder = %f, elbow = %f" % (env.shoulder,env.elbow)) obs = env.get_observation() print "Actual shoulder = %f, elbow = %f\r" % (obs[2],obs[3]), env.step(agent.forward(obs))
memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3, delta_range=(-100., 100.)) # agent = ContinuousDQNAgent(nb_actions=env.noutput, V_model=V_model, L_model=L_model, mu_model=mu_model, # memory=memory, nb_steps_warmup=1000, random_process=random_process, # gamma=.99, target_model_update=0.1) #agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) agent.compile([RMSprop(lr=.001), RMSprop(lr=.001)], metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. if args.train: agent.fit(env, nb_steps=nallsteps, visualize=True, verbose=1, nb_max_episode_steps=env.timestep_limit, log_interval=10000) # After training is done, we save the final weights. agent.save_weights(args.output, overwrite=True) if not args.train: agent.load_weights(args.output) # Finally, evaluate our algorithm for 5 episodes. if args.env != "Arm": agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=500) else: for i in range(10000): if i % 300 == 0: env.new_target() print("Target shoulder = %f, elbow = %f" % (env.shoulder,env.elbow)) env.step(agent.forward(env.get_observation()))
x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) # print(critic.summary()) memory = SequentialMemory(limit=1000000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.1) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=50, nb_steps_warmup_actor=50, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) agent.load_weights('/home/bdb3m/swmm_rl/agent_weights/ddpg_swmm_weights.h5f') # added to continue training agent.fit(env, nb_steps=train_steps, verbose=0) agent.save_weights('/home/bdb3m/swmm_rl/agent_weights/ddpg_swmm_weights.h5f', overwrite=True) env.close() else: agent.load_weights('/home/bdb3m/swmm_rl/agent_weights/ddpg_swmm_weights.h5f') agent.fit(env, nb_steps=train_steps, verbose=0) agent.save_weights('/home/bdb3m/swmm_rl/agent_weights/ddpg_swmm_weights.h5f', overwrite=True) env.close() if file_num % 10 == 0: print("finished training on ", file_num, " files") file_num += 1 # loop through testing envs
mu=0., sigma=.3) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) try: agent.load_weights('ddpg_{}_nomad_v3_weights.h5f'.format(ENV_NAME)) except (OSError, IOError): logger.warning("File not found") n = 0 while True: n += 1 logger.info('Iteration #{}'.format(n)) #train train_history = agent.fit(env, nb_steps=nb_stepis, visualize=False, verbose=1, nb_max_episode_steps=nb_stepis)
def main_function(args, data): #### INITIALISATION DES CONSTANTES ##### ## Model ## SIZE_HIDDEN_LAYER_ACTOR = data['SIZE_HIDDEN_LAYER_ACTOR'][0] LR_ACTOR = data['LR_ACTOR'][0] SIZE_HIDDEN_LAYER_CRITIC = data['SIZE_HIDDEN_LAYER_CRITIC'][0] LR_CRITIC = data['LR_CRITIC'][0] DISC_FACT = data['DISC_FACT'][0] TARGET_MODEL_UPDATE = data['TARGET_MODEL_UPDATE'][0] BATCH_SIZE = data['BATCH_SIZE'][0] REPLAY_BUFFER_SIZE = data['REPLAY_BUFFER_SIZE'][0] ## Exploration ## THETA = data['THETA'][0] SIGMA = data['SIGMA'][0] SIGMA_MIN = data['SIGMA_MIN'][0] N_STEPS_ANNEALING = data['N_STEPS_ANNEALING'][0] ## Acceleration ## ACTION_REPETITION = data['ACTION_REPETITION'][0] INTEGRATOR_ACCURACY = data['INTEGRATOR_ACCURACY'][0] # # Simulation ## N_STEPS_TRAIN = int(args.step) N_EPISODE_TEST = 100 if args.visualize: N_EPISODE_TEST = 3 VERBOSE = 1 # 0: pas de descriptif # 1: descriptif toutes les LOG_INTERVAL steps # 2: descriptif à chaque épisode LOG_INTERVAL = 500 # Save weights ## if not os.path.exists('weights'): os.mkdir('weights') print("Directory ", 'weights', " Created ") FILES_WEIGHTS_NETWORKS = './weights/' + args.model + '.h5f' # #### CHARGEMENT DE L'ENVIRONNEMENT ##### if args.prosthetic: env = ProsContinueRewardWrapper( ProstheticsEnv(visualize=args.visualize, integrator_accuracy=INTEGRATOR_ACCURACY)) if not args.prosthetic: env = CustomDoneOsimWrapper( CustomRewardWrapper( RelativeMassCenterObservationWrapper( NoObstacleObservationWrapper( L2RunEnv(visualize=args.visualize, integrator_accuracy=0.005))))) env.reset() # Examine the action space ## action_size = env.action_space.shape[0] #action_size = int(env.action_space.shape[0]/2) pour la symmétrie print('Size of each action:', action_size) # Examine the state space ## state_size = env.observation_space.shape[0] print('Size of state:', state_size) # #### ACTOR / CRITIC ##### # Actor (mu) ## if args.prosthetic: input_shape = (1, env.observation_space.shape[0]) if not args.prosthetic: input_shape = (1, env.observation_space.shape[0]) observation_input = Input(shape=input_shape, name='observation_input') x = Flatten()(observation_input) x = Dense(SIZE_HIDDEN_LAYER_ACTOR)(x) x = Activation('relu')(x) x = Dense(SIZE_HIDDEN_LAYER_ACTOR)(x) x = Activation('relu')(x) x = Dense(SIZE_HIDDEN_LAYER_ACTOR)(x) x = Activation('relu')(x) x = Dense(action_size)(x) x = Activation('sigmoid')(x) actor = Model(inputs=observation_input, outputs=x) opti_actor = Adam(lr=LR_ACTOR) # Critic (Q) ## action_input = Input(shape=(action_size, ), name='action_input') x = Flatten()(observation_input) x = concatenate([action_input, x]) x = Dense(SIZE_HIDDEN_LAYER_CRITIC)(x) x = Activation('relu')(x) x = Dense(SIZE_HIDDEN_LAYER_CRITIC)(x) x = Activation('relu')(x) x = Dense(SIZE_HIDDEN_LAYER_CRITIC)(x) x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) opti_critic = Adam(lr=LR_CRITIC) # #### SET UP THE AGENT ##### # Initialize Replay Buffer ## memory = SequentialMemory(limit=REPLAY_BUFFER_SIZE, window_length=1) # Random process (exploration) ## random_process = OrnsteinUhlenbeckProcess( theta=THETA, mu=0, sigma=SIGMA, sigma_min=SIGMA_MIN, size=action_size, n_steps_annealing=N_STEPS_ANNEALING) # random_process_l = OrnsteinUhlenbeckProcess(theta=THETA, mu=0, sigma=SIGMA,sigma_min= SIGMA_MIN, # size=action_size, n_steps_annealing=N_STEPS_ANNEALING) # random_process_r = OrnsteinUhlenbeckProcess(theta=THETA, mu=0, sigma=SIGMA,sigma_min= SIGMA_MIN, # size=action_size, n_steps_annealing=N_STEPS_ANNEALING) # Paramètres agent DDPG ## # agent = SymmetricDDPGAgent(nb_actions=action_size, actor=actor, critic=critic, # critic_action_input=action_input, # memory=memory, random_process_l=random_process_l, random_process_r=random_process_r, # gamma=DISC_FACT, target_model_update=TARGET_MODEL_UPDATE, # batch_size=BATCH_SIZE) agent = DDPGAgent(nb_actions=action_size, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, random_process=random_process, gamma=DISC_FACT, target_model_update=TARGET_MODEL_UPDATE, batch_size=BATCH_SIZE) agent.compile(optimizer=[opti_critic, opti_actor]) # #### TRAIN ##### logdir = "keras_logs/" + datetime.now().strftime("%Y-%m-%d_%H.%M.%S") robustensorboard = RobustTensorBoard(log_dir=logdir, hyperparams=data) saveBest = SaveBestEpisode() if args.train: if args.resume: agent.load_weights(FILES_WEIGHTS_NETWORKS) else: check_overwrite(args.model) agent.fit(env, nb_steps=N_STEPS_TRAIN, visualize=args.visualize, verbose=VERBOSE, log_interval=LOG_INTERVAL, callbacks=[robustensorboard, saveBest], action_repetition=ACTION_REPETITION) agent.save_weights(FILES_WEIGHTS_NETWORKS, overwrite=True) #### TEST ##### if not args.train: agent.load_weights(FILES_WEIGHTS_NETWORKS) agent.test(env, nb_episodes=N_EPISODE_TEST, visualize=args.visualize)
env.seed(123) assert len(env.action_space.shape) == 1 nb_actions = env.action_space.shape[0] n = DroneNetwork(nb_actions=nb_actions, observation_shape=env.observation_space.shape) # Next, we build a very simple model. actor = n.create_actor() critic = n.create_critic() action_input = n.get_action_input() actor.summary() critic.summary() print(action_input) memory = SequentialMemory(limit=100000, window_length=1) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) agent.load_weights('ddpg_{}_weights.h5f'.format('drone')) agent.test(env, nb_episodes=100000, visualize=True) #agent.test(env, nb_episodes=20, visualize=True, nb_max_episode_steps=50) env.close()
memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3, delta_range=(-100., 100.)) # agent = ContinuousDQNAgent(nb_actions=env.noutput, V_model=V_model, L_model=L_model, mu_model=mu_model, # memory=memory, nb_steps_warmup=1000, random_process=random_process, # gamma=.99, target_model_update=0.1) #agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) agent.compile([RMSprop(lr=.001), RMSprop(lr=.001)], metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. if args.train: agent.fit(env, nb_steps=nallsteps, visualize=True, verbose=1, nb_max_episode_steps=env.timestep_limit, log_interval=10000) # After training is done, we save the final weights. agent.save_weights(args.output, overwrite=True) if not args.train: agent.load_weights(args.output) # Finally, evaluate our algorithm for 5 episodes. if args.env != "Arm": agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=500) else: for i in range(10000): if i % 300 == 0: env.new_target() print("Target shoulder = %f, elbow = %f" % (env.shoulder,env.elbow)) env.step(agent.forward(env.get_observation()))
x = (Dense(LAYER_SIZE))(x) x = Activation('relu')(x) # Output Layer x = Dense(1)(x) x = Activation('linear')(x) critic = Model(input=[action_input, observation_input], output=x) print(critic.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=2*NUM_STEPS, window_length=1) # random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, dt = env.tau, theta=0.6, mu=0.0, sigma=0.5, sigma_min=0.15, n_steps_annealing=NUM_STEPS) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.999, target_model_update=1e-3, delta_clip=1.0) agent.compile(Adam(lr=.001, clipnorm=1.0), metrics=['mae']) # Load the model weights - this method will automatically load the weights for # both the actor and critic agent.load_weights(FILENAME) # Finally, evaluate our algorithm for 5 episodes. agent.test(env, nb_episodes=5, visualize=True,action_repetition=5) #nb_max_episode_steps=500,
x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) #print(critic.summary()) memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) try: agent.load_weights(os.path.dirname(__file__) + '/weights/trained_weight/ddpg_{}_weights.h5f'.format(ENV_NAME)) print("find weights-file") except: print("not found weights-file") rospy.init_node('pub_drive', anonymous=True) rospy.Subscriber("/observe", Float32MultiArray, callback_observe) pub = rospy.Publisher('/cmd_vel', Twist, queue_size=1) r = rospy.Rate(10) # 5hz fig, ax = plt.subplots(1, 1) ax.set_ylim(math.radians(-50), math.radians(50)) x = [] y = [] step_count = 0
mu=0., sigma=.1) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=50, nb_steps_warmup_actor=50, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) train_steps = 197000 agent.load_weights( 'swmm_rl_multi_inp_forecast/agent_weights/ddpg_swmm_weights_100000_depth_4.61.h5f' .format(depth) ) # these wgts are from training XXX, but are used in the forecast models # agent.load_weights('ddpg_swmm_weights_{}.h5f'.format(train_steps)) agent.fit(env, nb_steps=train_steps, verbose=1) agent.save_weights( 'swmm_rl_multi_inp_forecast/agent_weights/ddpg_swmm_weights2_{}_depth_{}.h5f' .format(train_steps, depth), overwrite=True) print("training finished at: ", datetime.now()) # # get agent weights and names # actor_weights = agent.actor.get_weights() # critic_weights = agent.critic.get_weights() # actor_names = [weight.name for layer in agent.actor.layers for weight in layer.weights]
x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) print(critic.summary()) # Set up the agent for training memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.2, size=env.noutput) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3, delta_clip=1.) # agent = ContinuousDQNAgent(nb_actions=env.noutput, V_model=V_model, L_model=L_model, mu_model=mu_model, # memory=memory, nb_steps_warmup=1000, random_process=random_process, # gamma=.99, target_model_update=0.1) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. if args.train: agent.fit(env, nb_steps=nallsteps, visualize=False, verbose=1, nb_max_episode_steps=200, log_interval=10000) # After training is done, we save the final weights. agent.save_weights(args.model, overwrite=True) if not args.train: agent.load_weights(args.model) # Finally, evaluate our algorithm for 1 episode. agent.test(env, nb_episodes=5, visualize=False, nb_max_episode_steps=1000)
def train_with_params(sigma_v = 0., sigma_o = 0.,test=False): ENV_NAME = 'PongSolo' conf_name = '{}_sv_{}_so_{}'.format(ENV_NAME,sigma_v,sigma_o) # sv, so = sigma_v et sigma_orientation # Get the environment and extract the number of actions. env = EnvPongSolo(sigma_v = sigma_v, sigma_o = sigma_v) np.random.seed(123) #assert len(env.action_space.shape) == 1 nb_actions = 1 leaky_alpha = 0.2 # Next, we build a very simple model. actor = Sequential() actor.add(Flatten(input_shape=(1,) + env.observation_space.shape)) actor.add(Dense(100)) actor.add(LeakyReLU(leaky_alpha)) actor.add(Dense(nb_actions)) actor.add(Activation('linear')) print(actor.summary()) action_input = Input(shape=(nb_actions,), name='action_input') observation_input = Input(shape=(1,) + env.observation_space.shape, name='observation_input') flattened_observation = Flatten()(observation_input) x = merge([action_input, flattened_observation], mode='concat') x = Dense(200)(x) x = LeakyReLU(leaky_alpha)(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(input=[action_input, observation_input], output=x) print(critic.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=100000, window_length=1) n_steps = 5000000 random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=1., mu=0., sigma=.3, sigma_min=0.01, n_steps_annealing=n_steps) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. directory_weights = "weights/ddpg/{}".format(conf_name) if not os.path.exists(directory_weights): os.makedirs(directory_weights) if test == False: perfCheckPoint = ModelPerformanceCheckpoint('{}/checkpoint_avg{}_steps{}'.format(directory_weights,'{}','{}'), 800) agent.fit(env, nb_steps=n_steps, visualize=False, verbose=2, nb_max_episode_steps=200,callbacks=[perfCheckPoint]) # After training is done, we save the final weights. agent.save_weights('{}/final.h5f'.format(directory_weights), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. agent.test(env, nb_episodes=100, visualize=False, nb_max_episode_steps=200) else: agent.load_weights('{}/final.h5f'.format(directory_weights)) agent.test(env, nb_episodes=1000, visualize=False, nb_max_episode_steps=200)
random_process=random_process, gamma=GAMMA, target_model_update=1e-3) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mse']) callbacks = build_callbacks(ENV_NAME) test_callbacks = build_test_callbacks(ENV_NAME) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. #agent.fit(env, nb_steps=500000, visualize=False, callbacks=callbacks, verbose=1, gamma=GAMMA, nb_max_episode_steps=30) # After training is done, we save the final weights. #agent.save_weights('results/InvertedPendulum/exp_6/ddpg_{}_weights.h5f'.format(ENV_NAME), overwrite=True) agent.load_weights( 'results/InvertedPendulum/exp_6/ddpg_{}_weights.h5f'.format(ENV_NAME)) # Finally, evaluate our algorithm for 5 episodes. history, state_history_nominal, episode_reward_nominal = agent.test(env, nb_episodes=1, visualize=True, action_repetition=1, callbacks=test_callbacks, nb_max_episode_steps=30, \ initial_state=[0, np.pi, 0, 0], std_dev_noise=0, gamma=GAMMA) u_max = 12 print(episode_reward_nominal, state_history_nominal) ''' f = open("results/InvertedPendulum/exp_3/data.txt", "a") for i in frange(0.0, 1.0, 0.05): episode_reward_n = 0 Var_n = 0 terminal_mse = 0 Var_terminal_mse = 0 for j in range(n_samples):
agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=10, nb_steps_warmup_actor=10, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile(optimizer=Adam( lr=1e-3, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)) if __name__ == '__main__': # Load agent.load_weights('OsmoEnv.h5f') # Train # agent.fit(env, nb_steps=50000, verbose=1, nb_max_episode_steps=200) # # # Weights # agent.save_weights('OsmoEnv.h5f',overwrite=True) # # # Load # agent.load_weights('OsmoEnv.h5f') # Test # agent.test(env, visualize=False, nb_episodes=50, nb_max_episode_steps=200) #Play for _ in range(10):
class KerasDDPGAgent(object): ''' classdocs ''' def __init__(self, opts): self.metadata = {'discrete_actions': False} self.opts = opts def configure(self, observation_space_shape, nb_actions): # Next, we build a simple model. # actor network actor = Sequential() actor.add(Flatten(input_shape=(1, ) + observation_space_shape)) actor.add(Dense(16)) actor.add(Activation('relu')) actor.add(Dense(16)) actor.add(Activation('relu')) actor.add(Dense(16)) actor.add(Activation('relu')) actor.add(Dense(nb_actions)) actor.add(Activation('linear')) print(actor.summary()) # critic network action_input = Input(shape=(nb_actions, ), name='action_input') observation_input = Input(shape=(1, ) + observation_space_shape, name='observation_input') flattened_observation = Flatten()(observation_input) x = concatenate([action_input, flattened_observation]) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(input=[action_input, observation_input], output=x) print(critic.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3) self.agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3) self.agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) def train(self, env, nb_steps, visualize, verbosity): # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. self.agent.fit(env, nb_steps=nb_steps, visualize=visualize, verbose=verbosity, nb_max_episode_steps=200) def test(self, env, nb_episodes, visualize): # Finally, evaluate our algorithm for 5 episodes. self.agent.test(env, nb_episodes=nb_episodes, visualize=visualize, nb_max_episode_steps=200) def load_weights(self, load_file): self.agent.load_weights(load_file) def save_weights(self, save_file, overwrite): self.agent.save_weights(save_file, overwrite=True)
agent.test(env, nb_episodes=2, visualize=True, nb_max_episode_steps=1000) mode = 'train' if mode == 'train': filename = 'test' # we save the history of learning, it can further be used to plot reward evolution # with open('_experiments/history_ddpg__redetorcs'+filename+'.pickle', 'wb') as handle: # pickle.dump(hist.history, handle, protocol=pickle.HIGHEST_PROTOCOL) #After training is done, we save the final weights. # agent.save_weights('h5f_files/ddpg_{}_weights.h5f'.format('test'), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. elif mode == 'test': # env.set_test_performace() # Define the initialization as performance test # env.set_save_experice() # Save the test to plot the results after agent.load_weights('h5f_files/ddpg_{}_weights.h5f'.format('test')) agent.test(env, nb_episodes=10, visualize=True, nb_max_episode_steps=1000) # env.reset() # # env.render() # done = False # print(done) # count = 0 # while done == False: # if count < 15: # observation, reward, done, info = env.step([0, 0, 0, 0]) # elif count < 30: # observation, reward, done, info = env.step([0, 0.5, 0, 0]) # elif count < 60: # observation, reward, done, info = env.step([0, 0, 0, 0]) # else:
nb_steps_warmup_critic=50, nb_steps_warmup_actor=50, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) agent.fit(env, nb_steps=train_steps, verbose=0) agent.save_weights( 'swmm_rl_multi_inp_forecast/agent_weights/ddpg_swmm_weights.h5f', overwrite=True) env.close() else: agent.load_weights( 'swmm_rl_multi_inp_forecast/agent_weights/ddpg_swmm_weights.h5f' ) agent.fit(env, nb_steps=train_steps, verbose=0) agent.save_weights( 'swmm_rl_multi_inp_forecast/agent_weights/ddpg_swmm_weights.h5f', overwrite=True) env.close() if file_num % 100 == 0: print("finished training on ", file_num, " files") file_num += 1 # loop through testing envs for file in os.scandir(os.path.join(data_dir, "syn_inp_test")): if file.name.endswith('.inp'): print('testing ', file.name)
class DDPG: """Deep Deterministic Policy Gradient Class This is an implementation of DDPG for continuous control tasks made using the high level keras-rl library. Args: env_name (str): Name of the gym environment weights_dir (str): Dir for storing model weights (for both actors and critic as separate files) actor_layers (list(int)): A list of int representing neurons in each subsequent the hidden layer in actor critic_layers (list(int)): A list of int representing neurons in each subsequent the hidden layer in actor n_episodes (int): Maximum training eprisodes visualize (bool): Whether a popup window with the environment view is required """ def __init__(self, env_name='MountainCarContinuous-v0', weights_dir="model_weights", actor_layers=[64, 64, 32], critic_layers=[128, 128, 64], n_episodes=200, visualize=True): self.env_name = env_name self.env = gym.make(env_name) np.random.seed(123) self.env.seed(123) self.actor_layers = actor_layers self.critic_layers = critic_layers self.n_episodes = n_episodes self.visualize = visualize self.n_actions = self.env.action_space.shape[0] self.n_states = self.env.observation_space.shape self.weights_file = os.path.join( weights_dir, 'ddpg_{}_weights.h5f'.format(self.env_name)) self.actor = None self.critic = None self.agent = None self.action_input = None def _make_actor(self): """Internal helper function to create an actor custom model """ self.actor = Sequential() self.actor.add(Flatten(input_shape=(1, ) + self.n_states)) for size in self.actor_layers: self.actor.add(Dense(size, activation='relu')) self.actor.add(Dense(self.n_actions, activation='linear')) self.actor.summary() def _make_critic(self): """Internal helper function to create an actor custom model """ action_input = Input(shape=(self.n_actions, ), name='action_input') observation_input = Input(shape=(1, ) + self.n_states, name='observation_input') flattened_observation = Flatten()(observation_input) input_layer = Concatenate()([action_input, flattened_observation]) hidden_layers = Dense(self.critic_layers[0], activation='relu')(input_layer) for size in self.critic_layers[1:]: hidden_layers = Dense(size, activation='relu')(hidden_layers) output_layer = Dense(1, activation='linear')(hidden_layers) self.critic = Model(inputs=[action_input, observation_input], outputs=output_layer) self.critic.summary() self.action_input = action_input def _make_agent(self): """Internal helper function to create an actor-critic custom agent model """ if self.actor is None: self._make_actor() if self.critic is None: self._make_critic() memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=self.n_actions, theta=.15, mu=0., sigma=.3) self.agent = DDPGAgent(nb_actions=self.n_actions, actor=self.actor, critic=self.critic, critic_action_input=self.action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3) self.agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) def _load_or_make_agent(self): """Internal helper function to load an agent model, creates a new if no model weights exists """ if self.agent is None: self._make_agent() if os.path.exists(self.weights_file): logger.info( "Found existing weights for the model for this environment. Loading..." ) self.agent.load_weights(self.weights_file) def train(self): """Train the DDPG agent """ self._load_or_make_agent() self.agent.fit(self.env, nb_steps=50000, visualize=self.visualize, verbose=1, nb_max_episode_steps=self.n_episodes) self.agent.save_weights(self.weights_file, overwrite=True) def test(self, nb_episodes=5): """Test the DDPG agent """ logger.info( "Testing the agents with {} episodes...".format(nb_episodes)) self.agent.test(self.env, nb_episodes=nb_episodes, visualize=self.visualize, nb_max_episode_steps=200)
class Agent: def __init__(self, env): self.nb_actions = env.action_space.shape[0] self.nb_states = env.observation_space.shape[0] self.env = env self.actor = self.build_actor(env) self.actor.compile('Adam', 'mse') self.critic, action_input = self.build_critic(env) self.loss = self.build_loss() self.processor = WhiteningNormalizerProcessor() self.memory = SequentialMemory(limit=5000000, window_length=1) self.random_process = OrnsteinUhlenbeckProcess(size=self.nb_actions, theta=0.75, mu=0.5, sigma=0.25) self.agent = DDPGAgent(nb_actions=self.nb_actions, actor=self.actor, critic=self.critic, critic_action_input=action_input, memory=self.memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=self.random_process, gamma=.99, target_model_update=1e-3, processor=self.processor) self.agent.compile([Adam(lr=1e-4), Adam(lr=1e-3)], metrics=self.loss) self.sym_actor = self.build_sym_actor() self.sym_actor.compile(optimizer='Adam', loss='mse') def build_loss(self): return ['mse'] def build_actor(self, env): actor = Sequential() actor.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) actor.add(Dense(64, activation='tanh')) actor.add(GaussianNoise(0.05)) actor.add(Dense(64, activation='tanh')) actor.add(GaussianNoise(0.05)) actor.add(Dense(self.nb_actions, activation='hard_sigmoid')) actor.summary() inD = Input(shape=(1, ) + env.observation_space.shape) out = actor(inD) return Model(inD, out) def build_critic(self, env): action_input = Input(shape=(self.nb_actions, ), name='action_input') observation_input = Input(shape=(1, ) + env.observation_space.shape, name='observation_input') flattened_observation = Flatten()(observation_input) x = Dense(64, activation='relu')(flattened_observation) x = Concatenate()([x, action_input]) x = Dense(32, activation='relu')(x) x = Dense(1)(x) critic = Model(inputs=[action_input, observation_input], outputs=x) critic.summary() return critic, action_input def build_sym_actor(self): stateSwap = [] actionSwap = [] state_desc = self.env.get_state_desc() for x in state_desc.keys(): keys = list(state_desc[x].keys()) for (k, key) in enumerate(keys): if '_r' in key: i = keys.index(key.replace('_r', '_l')) if i != -1: stateSwap += [(k, i), (i, k)] muscle_list = [] for i in range(self.env.osim_model.muscleSet.getSize()): muscle_list.append(self.env.osim_model.muscleSet.get(i).getName()) for (k, key) in enumerate(muscle_list): if '_r' in key: i = muscle_list.index(key.replace('_r', '_l')) if i != -1: actionSwap += [(k, i), (i, k)] stateSwapMat = np.zeros((self.nb_states, self.nb_states)) actionSwapMat = np.zeros((self.nb_actions, self.nb_actions)) stateSwapMat[0, 0] for (i, j) in stateSwap: stateSwapMat[i, j] = 1 for (i, j) in actionSwap: actionSwapMat[i, j] = 1 def ssT(shape, dtype=None): if shape != stateSwapMat.shape: raise Exception("State Swap Tensor Shape Error") return K.variable(stateSwapMat, dtype=dtype) def asT(shape, dtype=None): if shape != actionSwapMat.shape: raise Exception("Action Swap Tensor Shape Error") return K.variable(actionSwapMat, dtype=dtype) model1 = Sequential() model1.add( Dense(self.nb_states, input_shape=(1, ) + self.env.observation_space.shape, trainable=False, kernel_initializer=ssT, bias_initializer='zeros')) inD = Input(shape=(1, ) + self.env.observation_space.shape) symState = model1(inD) symPol = self.actor(symState) model2 = Sequential() model2.add( Dense(self.nb_actions, input_shape=(1, self.nb_actions), trainable=False, kernel_initializer=asT, bias_initializer='zeros')) out = model2(symPol) return Model(inD, out) def fit(self, **kwargs): if 'nb_max_episode_steps' in kwargs.keys(): self.env.spec.timestep_limit = kwargs['nb_max_episode_steps'] else: self.env.spec.timestep_limit = self.env.time_limit out = self.agent.fit(self.env, **kwargs) print("\n\ndo symetric loss back propigation\n\n") states = np.random.normal( 0, 10, (kwargs['nb_steps'] // 200, 1, self.nb_states)) actions = self.actor.predict_on_batch(states) self.sym_actor.train_on_batch(states, actions) return out def test(self, **kwargs): print("testing") print("VA:", self.env.get_VA()) if 'nb_max_episode_steps' in kwargs.keys(): self.env.spec.timestep_limit = kwargs['nb_max_episode_steps'] else: self.env.spec.timestep_limit = self.env.time_limit return self.agent.test(self.env, **kwargs) def test_get_steps(self, **kwargs): return self.test(**kwargs).history['nb_steps'][-1] def save_weights(self, filename='osim-rl/ddpg_{}_weights.h5f'): self.agent.save_weights(filename.format("opensim"), overwrite=True) self.save_processor() def load_weights(self, filename='osim-rl/ddpg_{}_weights.h5f'): self.agent.load_weights(filename.format("opensim")) self.load_processor() def search_VA(self): # 1-D line search state = self.env.get_VA() goal = 0.0 if abs(state - goal) < 0.01: self.env.upd_VA(goal) return steps = self.test_get_steps(nb_episodes=1, visualize=False, nb_max_episode_steps=1000) dv = 0.0 dsteps = steps while (state - dv > goal and dsteps > 0.8 * steps): dv += 0.02 self.env.upd_VA(state - dv) dsteps = self.test_get_steps(nb_episodes=1, visualize=False, nb_max_episode_steps=1000) if abs((state - dv) - goal) < 0.01: self.env.upd_VA(goal) else: dv -= 0.02 self.env.upd_VA(state - dv) def save_processor(self): np.savez('osim-rl/processor.npz', _sum=self.processor.normalizer._sum, _count=np.array([self.processor.normalizer._count]), _sumsq=self.processor.normalizer._sumsq, mean=self.processor.normalizer.mean, std=self.processor.normalizer.std) def load_processor(self): f = np.load('osim-rl/processor.npz') dtype = f['_sum'].dtype if (self.processor.normalizer == None): self.processor.normalizer = WhiteningNormalizer( shape=(1, ) + self.env.observation_space.shape, dtype=dtype) self.processor.normalizer._sum = f['_sum'] self.processor.normalizer._count = int(f['_count'][0]) self.processor.normalizer._sumsq = f['_sumsq'] self.processor.normalizer.mean = f['mean'] self.processor.normalizer.std = f['std']
memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.2, size=env.noutput) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3, delta_clip=1.) # agent = ContinuousDQNAgent(nb_actions=env.noutput, V_model=V_model, L_model=L_model, mu_model=mu_model, # memory=memory, nb_steps_warmup=1000, random_process=random_process, # gamma=.99, target_model_update=0.1) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. if args.train: agent.load_weights('aviral_jump_new.h5f') print 'weights loaded' agent.fit(env, nb_steps=nallsteps, visualize=True, verbose=1, nb_max_episode_steps=1000, log_interval=1000) print 'TRAINED THE MODELS' # After training is done, we save the final weights. agent.save_weights(args.model, overwrite=True) if not args.train: print args.model agent.load_weights(args.model) # sys.exit(0) # Finally, evaluate our algorithm for 1 episode. h = Histories() agent.test(env, nb_episodes=10, visualize=False, nb_max_episode_steps=1000, action_repetition=2, callbacks=[h]) # print h.action_list f = open('values_jump_new.txt', 'w')
# random process for exploration noise random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=theta, dt=0.01, mu=0., sigma=.25) # define the DDPG agent agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=GAMMA, target_model_update=1e-3) # compile the model agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mse']) callbacks = common_func.build_callbacks(ENV_NAME, log_filename_pre, filename_exp) # ---------------------------------------------------------------------------------------------------------------------------------------- # Training phase # fitting the agent #agent.fit(env, nb_steps=800000, visualize=False, callbacks=callbacks, verbose=1, gamma=GAMMA, nb_max_episode_steps=900) # After training is done, we save the final weights. #agent.save_weights('../results/Swimmer6/exp_1/ddpg_{}_weights.h5f'.format(ENV_NAME), overwrite=True) # ----------------------------------------------------------------------------------------------------------------------------------------- # Testing phase agent.load_weights(log_filename_pre+filename_exp +'/ddpg_{}_weights.h5f'.format(ENV_NAME)) history, state_history_nominal, episode_reward_nominal, action_history = agent.test(env, nb_episodes=1, visualize=True, action_repetition=1, nb_max_episode_steps=STEPS_PER_EPISODE, \ initial_state=np.zeros((16,)), std_dev_noise=20, gamma=GAMMA, process_noise_std=process_noise_std) # np.savetxt(log_filename_pre+filename_exp+'/s3_nominal_action.txt', action_history) # np.savetxt(log_filename_pre+filename_exp+'/s3_nominal_state.txt', state_history_nominal) print(state_history_nominal,action_history) # -----------------------------------------------------------------------------------------------------------------------------------------
x = Dense(1)(x) x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) memory = SequentialMemory(limit=50000, window_length=1) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, gamma=.99, target_model_update=1e3) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) agent.load_weights('ddpg_{}_2balls_final_weights_v4.h5f'.format(ENV_NAME)) # def B(b): # env.render = False # state = env.reset() # pos = env.arraystate2pos(state) # print(pos) # optimal_action = np.zeros(2) # action, optimal_action, a, b, theta = agent.test(env, nb_episodes=500000, visualize=False, nb_max_episode_steps=200, modif = True, pos = pos) # env.non_random_reset(pos[0], pos[1], pos[2]) # env.render = True # env.step(action, rand = optimal_action, a = a, b = b, theta = theta) # state = env.reset() # pos = env.arraystate2pos(state) # optimal_action = np.zeros(2)
def main(): """Create environment, build models, train.""" env = MarketEnv(("ES", "FUT", "GLOBEX", "USD"), obs_xform=xform.Basic(30, 4), episode_steps=STEPS_PER_EPISODE, client_id=3) #env = MarketEnv(("EUR", "CASH", "IDEALPRO", "USD"), max_quantity=20000, quantity_increment=20000, obs_xform=xform.Basic(30, 4), episode_steps=STEPS_PER_EPISODE, client_id=5, afterhours=False) obs_size = np.product(env.observation_space.shape) # Actor model dropout = 0.1 actor = Sequential([ Flatten(input_shape=(1, ) + env.observation_space.shape), BatchNormalization(), Dense(obs_size, activation='relu'), GaussianDropout(dropout), BatchNormalization(), Dense(obs_size, activation='relu'), GaussianDropout(dropout), BatchNormalization(), Dense(obs_size, activation='relu'), GaussianDropout(dropout), BatchNormalization(), Dense(1, activation='tanh'), ]) print('Actor model') actor.summary() action_input = Input(shape=(1, ), name='action_input') observation_input = Input(shape=(1, ) + env.observation_space.shape, name='observation_input') flattened_observation = Flatten()(observation_input) x = concatenate([action_input, flattened_observation]) x = BatchNormalization()(x) x = Dense(obs_size + 1, activation='relu')(x) x = GaussianDropout(dropout)(x) x = Dense(obs_size + 1, activation='relu')(x) x = GaussianDropout(dropout)(x) x = Dense(obs_size + 1, activation='relu')(x) x = GaussianDropout(dropout)(x) x = Dense(obs_size + 1, activation='relu')(x) x = GaussianDropout(dropout)(x) x = Dense(1, activation='linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) print('\nCritic Model') critic.summary() memory = SequentialMemory(limit=EPISODES * STEPS_PER_EPISODE, window_length=1) random_process = OrnsteinUhlenbeckProcess(theta=.5, mu=0., sigma=.5) agent = DDPGAgent( nb_actions=1, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=STEPS_PER_EPISODE * WARMUP_EPISODES, nb_steps_warmup_actor=STEPS_PER_EPISODE * WARMUP_EPISODES, random_process=random_process, gamma=0.95, target_model_update=0.01) agent.compile('rmsprop', metrics=['mae']) weights_filename = 'ddpg_{}_weights.h5f'.format(env.instrument.symbol) try: agent.load_weights(weights_filename) print( 'Using weights from {}'.format(weights_filename) ) # DDPGAgent actually uses two separate files for actor and critic derived from this filename except IOError: pass agent.fit(env, nb_steps=EPISODES * STEPS_PER_EPISODE, visualize=True, verbose=2, nb_max_episode_steps=STEPS_PER_EPISODE) agent.save_weights(weights_filename, overwrite=True)
random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=0.2, mu=0.0, sigma=0.25, sigma_min=0.01, n_steps_annealing=500000) # agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, # random_process=random_process, gamma=.99, target_model_update=1E-3, # memory=memory, nb_steps_warmup_critic=10000, nb_steps_warmup_actor=100000) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=25000, nb_steps_warmup_actor=25000, random_process=None, gamma=.99, target_model_update=1E-3) agent.compile(Adam(lr=0.001, clipnorm=1.)) # was 1 # # agent.fit(env, nb_steps=500000, visualize=False, verbose=1, nb_max_episode_steps = 10000, log_interval = 10000, # action_repetition = 10) # agent.save_weights('ddpg_{}_SimpleSimFuelReward.h5f'.format(ENV_NAME), overwrite=True) agent.load_weights('ddpg_{}_SimpleSimFuelReward.h5f'.format(ENV_NAME)) agent.test(env, nb_episodes=1, visualize=False, nb_max_episode_steps=10000)
x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) print(critic.summary()) # Set up the agent for training memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.2, size=env.noutput) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3, delta_clip=1.) # agent = ContinuousDQNAgent(nb_actions=env.noutput, V_model=V_model, L_model=L_model, mu_model=mu_model, # memory=memory, nb_steps_warmup=1000, random_process=random_process, # gamma=.99, target_model_update=0.1) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. if args.train: agent.fit(env, nb_steps=nallsteps, visualize=False, verbose=1, nb_max_episode_steps=env.timestep_limit, log_interval=10000) # After training is done, we save the final weights. agent.save_weights(args.model, overwrite=True) if not args.train: agent.load_weights(args.model) # Finally, evaluate our algorithm for 1 episode. agent.test(env, nb_episodes=1, visualize=False, nb_max_episode_steps=500)
nb_steps_warmup_critic=50, nb_steps_warmup_actor=50, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) #agent.load_weights('/home/bdb3m/swmm_rl/agent_weights_gated/ddpg_swmm_weights.h5f') # added to continue training agent.fit(env, nb_steps=train_steps, verbose=0) agent.save_weights( '/home/bdb3m/swmm_rl/agent_weights_gated/ddpg_swmm_weights.h5f', overwrite=True) env.close() else: agent.load_weights( '/home/bdb3m/swmm_rl/agent_weights_gated/ddpg_swmm_weights.h5f' ) agent.fit(env, nb_steps=train_steps, verbose=0) agent.save_weights( '/home/bdb3m/swmm_rl/agent_weights_gated/ddpg_swmm_weights.h5f', overwrite=True) env.close() if file_num % 1000 == 0: print("finished training on ", file_num, " files") file_num += 1 # loop through testing envs for file in os.scandir("/home/bdb3m/swmm_rl/syn_inp_test_gated"): if file.name.endswith('.inp'): print('testing ', file.name)