def main(): # Parse command line args parser = arg_parser() parser.add_argument("-hw", "--use-hardware", action="store_true") parser.add_argument("-l", "--load", type=str, default=None) args = parser.parse_args() env = "QubeSwingupEnv" def make_env(): env_out = QubeSwingupEnv(use_simulator=not args.use_hardware, frequency=250) return env_out try: env = DummyVecEnv([make_env]) policy = MlpPolicy model = PPO2(policy=policy, env=env) model.load_parameters(args.load) print("Running trained model") obs = np.zeros((env.num_envs, ) + env.observation_space.shape) obs[:] = env.reset() while True: actions = model.step(obs)[0] obs[:], reward, done, _ = env.step(actions) if not args.use_hardware: env.render() if done: print("done") obs[:] = env.reset() finally: env.close()
def neuron_values_generator(args, save_dir, pi_theta, eval_timesteps): # logger.log(f"#######EVAL: {args}") neuron_values_list = [] def make_env(): env_out = gym.make(args.env) env_out = bench.Monitor(env_out, logger.get_dir(), allow_early_resets=True) return env_out env = DummyVecEnv([make_env]) if args.normalize: env = VecNormalize(env) # policy = MlpPolicy # # model = PPO2.load(f"{save_dir}/ppo2") # this also loads V function # model = PPO2(policy=policy, env=env, n_steps=args.n_steps, nminibatches=args.nminibatches, lam=0.95, gamma=0.99, noptepochs=10, # ent_coef=0.0, learning_rate=3e-4, cliprange=0.2, optimizer=args.optimizer) model = PPO2.load(f"{save_dir}/ppo2") # this also loads V function if pi_theta is not None: model.set_pi_from_flat(pi_theta) if args.normalize: env.load_running_average(save_dir) obs = np.zeros((env.num_envs, ) + env.observation_space.shape) obs[:] = env.reset() env.render() ep_infos = [] while 1: neuron_values, actions, _, _, _ = model.step_with_neurons(obs) # neuron_values = model.give_neuron_values(obs) # neuron_values_list.append( neuron_values ) yield neuron_values obs, rew, done, infos = env.step(actions) env.render() # time.sleep(1) for info in infos: maybe_ep_info = info.get('episode') if maybe_ep_info is not None: ep_infos.append(maybe_ep_info) # env.render() done = done.any() if done: episode_rew = safe_mean([ep_info['r'] for ep_info in ep_infos]) print(f'episode_rew={episode_rew}') obs = env.reset()
def _make_warmstart_cartpole(): """Warm-start VecNormalize by stepping through CartPole""" venv = DummyVecEnv([lambda: gym.make("CartPole-v1")]) venv = VecNormalize(venv) venv.reset() venv.get_original_obs() for _ in range(100): actions = [venv.action_space.sample()] venv.step(actions) return venv
def test(env_id, seed, policy): """ Train PPO2 model for atari environment, for testing purposes :param env_id: (str) the environment id string :param seed: (int) Used to seed the random generator. :param policy: (Object) The policy model to use (MLP, CNN, LSTM, ...) """ # if 'lstm' in policy: # print('LSTM policies not supported for drawing') # return 1 env = DummyVecEnv([PadEnvRender for _ in range(1)]) # Need for lstm # else: # env = PadEnvRender() env = VecFrameStack(env, 8) model = PPO2.load('./pad_5combo_ppo2.pkl', env) while True: obs, done = env.reset(), False episode_rew = 0 while not done: env.render() action, _ = model.predict(obs) obs, rew, done, _ = env.step(action) done = done.any() episode_rew += rew time.sleep(1 / 24.) if done: print('Episode reward:', rew)
def test_identity_multibinary(model_class): """ Test if the algorithm (with a given policy) can learn an identity transformation (i.e. return observation as an action) with a multibinary action space :param model_class: (BaseRLModel) A RL Model """ env = DummyVecEnv([lambda: IdentityEnvMultiBinary(10)]) model = model_class("MlpPolicy", env) model.learn(total_timesteps=1000, seed=0) n_trials = 1000 reward_sum = 0 obs = env.reset() for _ in range(n_trials): action, _ = model.predict(obs) obs, reward, _, _ = env.step(action) reward_sum += reward assert model.action_probability(obs).shape == (1, 10), \ "Error: action_probability not returning correct shape" assert np.prod(model.action_probability(obs, actions=env.action_space.sample()).shape) == 1, \ "Error: not scalar probability"
def visualize_neurons(args, save_dir, pi_theta, eval_timesteps): # logger.log(f"#######EVAL: {args}") def make_env(): env_out = gym.make(args.env) env_out.env.disableViewer = True env_out.env.visualize = False env_out = bench.Monitor(env_out, logger.get_dir(), allow_early_resets=True) return env_out env = DummyVecEnv([make_env]) if args.normalize: env = VecNormalize(env) model = PPO2.load(f"{save_dir}/ppo2") # this also loads V function if pi_theta is not None: model.set_pi_from_flat(pi_theta) if args.normalize: env.load_running_average(save_dir) obs = np.zeros((env.num_envs, ) + env.observation_space.shape) obs[:] = env.reset() ep_infos = [] for _ in range(eval_timesteps): actions = model.step(obs)[0] neuron_values = model.give_neuron_values(obs) obs, rew, done, infos = env.step(actions) for info in infos: maybe_ep_info = info.get('episode') if maybe_ep_info is not None: ep_infos.append(maybe_ep_info) # env.render() done = done.any() if done: if pi_theta is None: episode_rew = safe_mean([ep_info['r'] for ep_info in ep_infos]) print(f'episode_rew={episode_rew}') obs = env.reset() return safe_mean([ep_info['r'] for ep_info in ep_infos])
def run_model(save_name, nw_type, log_dir='./Logs/', log_name=None, env_name='CartPole-v2', runs=100, save_results=False): # Sets up an environment and a model: env = DummyVecEnv([lambda: gym.make(env_name)]) model = load_model(nw_type=nw_type, log_dir=log_dir, env_name=env_name, log_name=log_name, save_name=save_name) # Runs environment with the loaded model "runs" times max_reward = 0 max_steps = 0 rew_vec = [] header = 'theta1,alpha1,dtheta1,dalpha1,theta2,alpha2,dtheta2,dalpha2' for i in range(runs): # Resets the environment obs, done = env.reset(), False episode_rew = 0 ep_steps = 0 obs_vec = obs.reshape(-1, 1) # This loop runs the environment until a terminal state is reached while not done: action, _states = model.predict(obs) obs, rewards, done, info = env.step(action) env.render() episode_rew += rewards[-1] ep_steps += 1 obs_vec = np.append(obs_vec, obs.reshape(-1, 1) * 180 / np.pi, axis=1) # Saves the reached reward and checks if its a record etc. rew_vec.append(episode_rew) print("Ep reward: ", '{0:.2f}'.format(episode_rew), '\tRecord: ', '{0:.2f}'.format(max_reward), '\tEp steps: ', ep_steps, '\tSteps record: ', max_steps) np.savetxt('rew_vec.csv', rew_vec, delimiter=',') if episode_rew > max_reward: max_reward = episode_rew if save_results: np.savetxt('obs_vec.csv', obs_vec.T, delimiter=',', header=header, fmt='%1.3f', comments='') if ep_steps > max_steps: max_steps = ep_steps
def test_vec_env(): """Test VecNormalize Object""" def make_env(): return gym.make(ENV_ID) env = DummyVecEnv([make_env]) env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10., clip_reward=10.) _, done = env.reset(), [False] obs = None while not done[0]: actions = [env.action_space.sample()] obs, _, done, _ = env.step(actions) assert np.max(obs) <= 10
def test_identity_multidiscrete(model_func): """ Test if the algorithm (with a given policy) can learn an identity transformation (i.e. return observation as an action) with a multidiscrete action space :param model_func: (lambda (Gym Environment): BaseRLModel) the model generator """ env = DummyVecEnv([lambda: IdentityEnvMultiDiscrete(10)]) model = model_func(env) model.learn(total_timesteps=1000, seed=0) n_trials = 1000 reward_sum = 0 obs = env.reset() for _ in range(n_trials): action, _ = model.predict(obs) obs, reward, _, _ = env.step(action) reward_sum += reward
def test_identity_multibinary(model_class): """ Test if the algorithm (with a given policy) can learn an identity transformation (i.e. return observation as an action) with a multibinary action space :param model_class: (BaseRLModel) A RL Model """ env = DummyVecEnv([lambda: IdentityEnvMultiBinary(10)]) model = model_class("MlpPolicy", env) model.learn(total_timesteps=1000, seed=0) n_trials = 1000 reward_sum = 0 obs = env.reset() for _ in range(n_trials): action, _ = model.predict(obs) obs, reward, _, _ = env.step(action) reward_sum += reward
def test_identity(learn_func): """ Test if the algorithm (with a given policy) can learn an identity transformation (i.e. return observation as an action) :param learn_func: (lambda (Gym Environment): A2CPolicy) the policy generator """ env = DummyVecEnv([lambda: IdentityEnv(10)]) model = learn_func(env) n_trials = 1000 reward_sum = 0 obs = env.reset() for _ in range(n_trials): action, _ = model.predict(obs) obs, reward, _, _ = env.step(action) reward_sum += reward assert reward_sum > 0.9 * n_trials # Free memory del model, env
def test(model): env = DummyVecEnv([make_env] * n_env) #env = VecNormalize.load("models/machine_snap_env.bin", venv=env) #env.training = False for trial in range(1): obs = env.reset() running_reward = 0.0 alpha = 0.01 for _ in range(5000): action, _states = model.predict(obs) obs, reward, done, info = env.step(action) reward = reward[0] done = done[0] info = info[0] #running_reward = running_reward * (1-alpha) + alpha * reward running_reward += reward #print(obs, reward, done, info, running_reward) if done: print("Finished after {} timesteps".format(_ + 1)) break else: env.envs[0].render()
def test_model_manipulation(model_class): """ Test if the algorithm can be loaded and saved without any issues, the environment switching works and that the action prediction works :param model_class: (BaseRLModel) A model """ try: env = gym.make(ENV_ID) env = DummyVecEnv([lambda: env]) # create and train model = model_class(policy=MlpPolicy, env=env) model.learn(total_timesteps=NUM_TIMESTEPS) # predict and measure the acc reward acc_reward = 0 obs = env.reset() set_global_seeds(0) for _ in range(N_TRIALS): action, _ = model.predict(obs) obs, reward, _, _ = env.step(action) acc_reward += reward acc_reward = sum(acc_reward) / N_TRIALS # saving model.save("./test_model") del model, env # loading model = model_class.load("./test_model") # changing environment (note: this can be done at loading) env = gym.make(ENV_ID) env = DummyVecEnv([lambda: env]) model.set_env(env) # predict the same output before saving loaded_acc_reward = 0 obs = env.reset() set_global_seeds(0) for _ in range(N_TRIALS): action, _ = model.predict(obs) obs, reward, _, _ = env.step(action) loaded_acc_reward += reward loaded_acc_reward = sum(loaded_acc_reward) / N_TRIALS # assert <5% diff assert abs(acc_reward - loaded_acc_reward) / max(acc_reward, loaded_acc_reward) < 0.05, \ "Error: the prediction seems to have changed between loading and saving" # learn post loading model.learn(total_timesteps=int(NUM_TIMESTEPS / 2)) # validate no reset post learning loaded_acc_reward = 0 obs = env.reset() set_global_seeds(0) for _ in range(N_TRIALS): action, _ = model.predict(obs) obs, reward, _, _ = env.step(action) loaded_acc_reward += reward loaded_acc_reward = sum(loaded_acc_reward) / N_TRIALS # assert <5% diff assert abs(acc_reward - loaded_acc_reward) / max(acc_reward, loaded_acc_reward) < 0.05, \ "Error: the prediction seems to have changed between pre learning and post learning" # predict new values obs = env.reset() for _ in range(N_TRIALS): action, _ = model.predict(obs) obs, _, _, _ = env.step(action) # Free memory del model, env finally: if os.path.exists("./test_model"): os.remove("./test_model")
def test_model_manipulation(model_policy): """ Test if the algorithm (with a given policy) can be loaded and saved without any issues, the environment switching works and that the action prediction works :param model_policy: (BaseRLModel, Object) A model, policy pair """ model_class, policy = model_policy try: env = DummyVecEnv([lambda: IdentityEnv(10)]) # check the env is deterministic action = [env.action_space.sample()] set_global_seeds(0) obs = env.step(action)[0] for _ in range(N_TRIALS): set_global_seeds(0) assert obs == env.step(action)[0], "Error: environment tested not deterministic with the same seed" # create and train model = model_class(policy=policy, env=env) model.learn(total_timesteps=50000) # predict and measure the acc reward acc_reward = 0 obs = env.reset() set_global_seeds(0) for _ in range(N_TRIALS): action, _ = model.predict(obs) obs, reward, _, _ = env.step(action) acc_reward += reward acc_reward = sum(acc_reward) / N_TRIALS # saving model.save("./test_model") del model, env # loading model = model_class.load("./test_model") # changing environment (note: this can be done at loading) env = DummyVecEnv([lambda: IdentityEnv(10)]) model.set_env(env) # predict the same output before saving loaded_acc_reward = 0 obs = env.reset() set_global_seeds(0) for _ in range(N_TRIALS): action, _ = model.predict(obs) obs, reward, _, _ = env.step(action) loaded_acc_reward += reward loaded_acc_reward = sum(loaded_acc_reward) / N_TRIALS assert abs(acc_reward - loaded_acc_reward) < 0.1, "Error: the prediction seems to have changed between " \ "loading and saving" # learn post loading model.learn(total_timesteps=1000) # validate no reset post learning loaded_acc_reward = 0 obs = env.reset() set_global_seeds(0) for _ in range(N_TRIALS): action, _ = model.predict(obs) obs, reward, _, _ = env.step(action) loaded_acc_reward += reward loaded_acc_reward = sum(loaded_acc_reward) / N_TRIALS assert abs(acc_reward - loaded_acc_reward) < 0.1, "Error: the prediction seems to have changed between " \ "pre learning and post learning" # predict new values obs = env.reset() for _ in range(N_TRIALS): action, _ = model.predict(obs) obs, _, _, _ = env.step(action) del model, env finally: if os.path.exists("./test_model"): os.remove("./test_model")
def main(): """ Runs the test """ parser = mujoco_arg_parser() parser.add_argument( '--model-path', default="/cvgl2/u/surajn/workspace/saved_models/sawyerlift_ppo2/model") parser.add_argument('--images', default=False) args = parser.parse_args() logger.configure() if not args.play: model, env = train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, model_path=args.model_path, images=args.images) if args.play: def make_env(): env_out = GymWrapper( suite.make( "SawyerLift", use_camera_obs=False, # do not use pixel observations has_offscreen_renderer= False, # not needed since not using pixel obs has_renderer=True, # make sure we can render to the screen reward_shaping=True, # use dense rewards control_freq= 10, # control should happen fast enough so that simulation looks smooth )) env_out.reward_range = None env_out.metadata = None env_out.spec = None env_out = bench.Monitor(env_out, logger.get_dir(), allow_early_resets=True) return env_out #env = make_env() env = DummyVecEnv([make_env]) env = VecNormalize(env) policy = MlpPolicy #model = PPO1(MlpPolicy, env, timesteps_per_actorbatch=2048, clip_param=0.2, entcoeff=0.0, optim_epochs=10, # optim_stepsize=3e-4, optim_batchsize=64, gamma=0.99, lam=0.95, schedule='linear', verbose=1) model = TRPO(MlpPolicy, env, timesteps_per_batch=1024, max_kl=0.01, cg_iters=10, cg_damping=0.1, entcoeff=0.0, gamma=0.99, lam=0.98, vf_iters=5, vf_stepsize=1e-3) model.load(args.model_path) logger.log("Running trained model") obs = np.zeros((env.num_envs, ) + env.observation_space.shape) obs[:] = env.reset() while True: env.render() actions = model.step(obs)[0] obs[:] = env.step(actions)[0]
def main(): import sys logger.log(sys.argv) common_arg_parser = get_common_parser() args, cma_unknown_args = common_arg_parser.parse_known_args() this_run_dir = get_dir_path_for_this_run(args) plot_dir_alg = get_plot_dir(args) traj_params_dir_name = get_full_params_dir(this_run_dir) save_dir = get_save_dir(this_run_dir) if not os.path.exists(plot_dir_alg): os.makedirs(plot_dir_alg) final_file = get_full_param_traj_file_path(traj_params_dir_name, "pi_final") final_params = pd.read_csv(final_file, header=None).values[0] def make_env(): env_out = gym.make(args.env) env_out = bench.Monitor(env_out, logger.get_dir(), allow_early_resets=True) return env_out env = DummyVecEnv([make_env]) # env_out = gym.make(args.env) # env_out = bench.Monitor(env_out, logger.get_dir(), allow_early_resets=True) if args.normalize: env = VecNormalize(env) # policy = MlpPolicy model = PPO2.load(f"{save_dir}/ppo2") # this also loads V function # model = PPO2(policy=policy, env=env, n_steps=args.n_steps, nminibatches=args.nminibatches, lam=0.95, gamma=0.99, noptepochs=10, # ent_coef=0.0, learning_rate=3e-4, cliprange=0.2, optimizer=args.optimizer) model.set_pi_from_flat(final_params) if args.normalize: env.load_running_average(save_dir) sk = env.venv.envs[0].env.env.robot_skeleton lagrangian_values = {} obs = np.zeros((env.num_envs, ) + env.observation_space.shape) obs[:] = env.reset() # env = VecVideoRecorder(env, "./", # record_video_trigger=lambda x: x == 0, video_length=3000, # name_prefix="3000000agent-{}".format(args.env)) lagrangian_values["M"] = [sk.M.reshape((-1, 1))] lagrangian_values["COM"] = [sk.C.reshape((-1, 1))] lagrangian_values["Coriolis"] = [sk.c.reshape((-1, 1))] lagrangian_values["q"] = [sk.q.reshape((-1, 1))] lagrangian_values["dq"] = [sk.dq.reshape((-1, 1))] contact_values = {} neuron_values = model.give_neuron_values(obs) layer_values_list = [[neuron_value.reshape((-1, 1))] for neuron_value in neuron_values] env.render() ep_infos = [] steps_to_first_done = 0 first_done = False for _ in range(3000): actions = model.step(obs)[0] # yield neuron_values obs, rew, done, infos = env.step(actions) if done and not first_done: first_done = True if not first_done: steps_to_first_done += 1 neuron_values = model.give_neuron_values(obs) for i, layer in enumerate(neuron_values): layer_values_list[i].append(layer.reshape((-1, 1))) fill_contacts_jac_dict(infos[0]["contacts"], contact_dict=contact_values, neuron_values=neuron_values) lagrangian_values["M"].append(sk.M.reshape((-1, 1))) lagrangian_values["q"].append(sk.q.reshape((-1, 1))) lagrangian_values["dq"].append(sk.dq.reshape((-1, 1))) lagrangian_values["COM"].append(sk.C.reshape((-1, 1))) lagrangian_values["Coriolis"].append(sk.c.reshape((-1, 1))) env.render() # time.sleep(1) for info in infos: maybe_ep_info = info.get('episode') if maybe_ep_info is not None: ep_infos.append(maybe_ep_info) # env.render() done = done.any() if done: episode_rew = safe_mean([ep_info['r'] for ep_info in ep_infos]) print(f'episode_rew={episode_rew}') obs = env.reset() #Hstack into a big matrix lagrangian_values["M"] = np.hstack(lagrangian_values["M"]) lagrangian_values["COM"] = np.hstack(lagrangian_values["COM"]) lagrangian_values["Coriolis"] = np.hstack(lagrangian_values["Coriolis"]) lagrangian_values["q"] = np.hstack(lagrangian_values["q"]) lagrangian_values["dq"] = np.hstack(lagrangian_values["dq"]) for contact_body_name, l in contact_values.items(): body_contact_dict = contact_values[contact_body_name] for name, l in body_contact_dict.items(): body_contact_dict[name] = np.hstack(body_contact_dict[name]) layer_values_list = [ np.hstack(layer_list) for layer_list in layer_values_list ][1:-2] # drop variance # plt.scatter(lagrangian_values["M"][15], layer_values_list[1][2]) # plt.scatter(lagrangian_values["M"][11], layer_values_list[0][63]) out_dir = f"/home/panda-linux/PycharmProjects/low_dim_update_dart/low_dim_update_stable/neuron_vis/plots_{args.env}_{args.num_timesteps}" if os.path.exists(out_dir): shutil.rmtree(out_dir) os.makedirs(out_dir) all_weights = model.get_all_weight_values() for ind, weights in enumerate(all_weights): fname = f"{out_dir}/weights_layer_{ind}.txt" np.savetxt(fname, weights) PLOT_CUTOFF = steps_to_first_done plot_everything(lagrangian_values, layer_values_list, out_dir, PLOT_CUTOFF) scatter_the_linear_significant_ones(lagrangian_values, layer_values_list, threshold=0.6, out_dir=out_dir) scatter_the_nonlinear_significant_but_not_linear_ones( lagrangian_values, layer_values_list, linear_threshold=0.3, nonlinear_threshold=0.6, out_dir=out_dir) # # contact_dicts = {} # for contact_body_name, l in contact_values.items(): # body_contact_dict = contact_values[contact_body_name] # # # contact_dicts[contact_body_name] = {} # # build_dict = contact_dicts[contact_body_name] # # build_dict["body"] = {} # build_dict["layer"] = {} # for name, l in body_contact_dict.items(): # for i in range(len(l)): # # if name == contact_body_name: # build_dict["body"][f"{contact_body_name}_{i}"] = l[i] # else: # build_dict["layer"][f"layer_{name}_neuron_{i}"] = l[i] # # body_contact_df = pd.DataFrame.from_dict(build_dict["body"], "index") # layer_contact_df = pd.DataFrame.from_dict(build_dict["layer"], "index") # body_contact_df.to_csv(f"{data_dir}/{contact_body_name}_contact.txt", sep='\t') # layer_contact_df.to_csv(f"{data_dir}/{contact_body_name}_layers.txt", sep='\t') # #TO CSV format # data_dir = f"/home/panda-linux/PycharmProjects/low_dim_update_dart/mictools/examples/neuron_vis_data{args.env}_time_steps_{args.num_timesteps}" # if os.path.exists(data_dir): # shutil.rmtree(data_dir) # # os.makedirs(data_dir) # # for contact_body_name, d in contact_dicts.items(): # # build_dict = d # # body_contact_df = pd.DataFrame.from_dict(build_dict["body"], "index") # layer_contact_df = pd.DataFrame.from_dict(build_dict["layer"], "index") # # body_contact_df.to_csv(f"{data_dir}/{contact_body_name}_contact.txt", sep='\t') # layer_contact_df.to_csv(f"{data_dir}/{contact_body_name}_layers.txt", sep='\t') # # # # neurons_dict = {} # for layer_index in range(len(layer_values_list)): # for neuron_index in range(len(layer_values_list[layer_index])): # neurons_dict[f"layer_{layer_index}_neuron_{neuron_index}"] = layer_values_list[layer_index][neuron_index] # # for i in range(len(lagrangian_values["COM"])): # neurons_dict[f"COM_index_{i}"] = lagrangian_values["COM"][i] # # neuron_df = pd.DataFrame.from_dict(neurons_dict, "index") # # # # lagrangian_dict = {} # for k,v in lagrangian_values.items(): # for i in range(len(v)): # lagrangian_dict[f"{k}_index_{i}"] = v[i] # # lagrangian_df = pd.DataFrame.from_dict(lagrangian_dict, "index") # # # neuron_df.to_csv(f"{data_dir}/neurons.txt", sep='\t') # lagrangian_df.to_csv(f"{data_dir}/lagrangian.txt", sep='\t') # cor = {} # best_cor = {} # cor["M"] = get_correlations(lagrangian_values["M"], layer_values_list) # best_cor["M"] = [np.max(np.abs(cor_m)) for cor_m in cor["M"]] # # # cor["COM"] = get_correlations(lagrangian_values["COM"], layer_values_list) # best_cor["COM"] = [np.max(np.abs(cor_m)) for cor_m in cor["COM"]] # # cor["Coriolis"] = get_correlations(lagrangian_values["Coriolis"], layer_values_list) # best_cor["Coriolis"] = [np.max(np.abs(cor_m)) for cor_m in cor["Coriolis"]] # best_cor["Coriolis_argmax"] = [np.argmax(np.abs(cor_m)) for cor_m in cor["Coriolis"]] # # # # # ncor = {} # nbest_cor = {} # ncor["M"] = get_normalized_correlations(lagrangian_values["M"], layer_values_list) # nbest_cor["M"] = [np.max(np.abs(cor_m)) for cor_m in ncor["M"]] # # # ncor["COM"] = get_normalized_correlations(lagrangian_values["COM"], layer_values_list) # nbest_cor["COM"] = [np.max(np.abs(cor_m)) for cor_m in ncor["COM"]] # # ncor["Coriolis"] = get_normalized_correlations(lagrangian_values["Coriolis"], layer_values_list) # nbest_cor["Coriolis"] = [np.max(np.abs(cor_m)) for cor_m in ncor["Coriolis"]] # nbest_cor["Coriolis_argmax"] = [np.argmax(np.abs(cor_m)) for cor_m in ncor["Coriolis"]] # # # # # # lin_reg = {"perm_1":{}, "perm_2":{}} # best_lin_reg = {"perm_1":{}, "perm_2":{}} # lin_reg["perm_1"]["M"], best_lin_reg["perm_1"]["M"] = get_results("M", lagrangian_values, layer_values_list, perm_num=1) # lin_reg["perm_2"]["M"], best_lin_reg["perm_2"]["M"] = get_results("M", lagrangian_values, layer_values_list, perm_num=2) # lin_reg["perm_1"]["COM"], best_lin_reg["perm_1"]["COM"] = get_results("COM", lagrangian_values, layer_values_list, perm_num=1) # lin_reg["perm_2"]["COM"], best_lin_reg["perm_2"]["COM"] = get_results("COM", lagrangian_values, layer_values_list, perm_num=2) # # # lin_reg_1["M"] = get_linear_regressions_1_perm(lagrangian_values["M"], layer_values_list) # lin_reg_2["M"] = get_linear_regressions_2_perm(lagrangian_values["M"], layer_values_list) # best_lin_reg_2["M"] = [] # for lin_l in lin_reg_2["M"]: # if lin_l == []: # best_lin_reg_2["M"].append([]) # else: # best_lin_reg_2["M"].append(lin_l[np.argmin(lin_l[:,0])]) # # best_lin_reg_1["M"] = [] # for lin_l in lin_reg_1["M"]: # if lin_l == []: # best_lin_reg_1["M"].append([]) # else: # best_lin_reg_1["M"].append(lin_l[np.argmin(lin_l[:,0])]) # best_lin_reg_1["M"] = np.array(best_lin_reg_1["M"]) # best_lin_reg_2["M"] = np.array(best_lin_reg_2["M"]) # # # lin_reg_1["M"].dump("lin_reg_1_M.txt") # lin_reg_2["M"].dump("lin_reg_2_M.txt") # best_lin_reg_1["M"].dump("best_lin_reg_1_M.txt") # best_lin_reg_2["M"].dump("best_lin_reg_2_M.txt") # # lin_reg_1["COM"] = get_linear_regressions_1_perm(lagrangian_values["COM"], layer_values_list) # lin_reg_2["COM"] = get_linear_regressions_2_perm(lagrangian_values["COM"], layer_values_list) # best_lin_reg_2["COM"] = [] # for lin_l in lin_reg_2["COM"]: # if lin_l == []: # best_lin_reg_2["COM"].append([]) # else: # best_lin_reg_2["COM"].append(lin_l[np.argmin(lin_l[:, 0])]) # # best_lin_reg_1["COM"] = [] # for lin_l in lin_reg_1["COM"]: # if lin_l == []: # best_lin_reg_1["COM"].append([]) # else: # best_lin_reg_1["COM"].append(lin_l[np.argmin(lin_l[:, 0])]) # # # best_lin_reg_1["COM"] = np.array(best_lin_reg_1["M"]) # best_lin_reg_2["COM"] = np.array(best_lin_reg_2["M"]) # lin_reg_1["COM"].dump("lin_reg_1_COM.txt") # lin_reg_2["COM"].dump("lin_reg_2_COM.txt") # best_lin_reg_1["COM"].dump("best_lin_reg_1_COM.txt") # best_lin_reg_2["COM"].dump("best_lin_reg_2_COM.txt") pass
def visualize_policy_and_collect_COM( augment_num_timesteps, top_num_to_include_slice, augment_seed, augment_run_num, network_size, policy_env, policy_num_timesteps, policy_run_num, policy_seed, eval_seed, eval_run_num, learning_rate, additional_note, metric_param): result_dir = get_result_dir(policy_env, policy_num_timesteps, policy_run_num, policy_seed, eval_seed, eval_run_num, additional_note, metric_param) args = AttributeDict() args.normalize = True args.num_timesteps = augment_num_timesteps args.run_num = augment_run_num args.alg = "ppo2" args.seed = augment_seed logger.log(f"#######VISUALIZE: {args}") # non_linear_global_dict linear_global_dict, non_linear_global_dict, lagrangian_values, input_values, layers_values, all_weights = read_all_data( policy_env, policy_num_timesteps, policy_run_num, policy_seed, eval_seed, eval_run_num, additional_note=additional_note) timestamp = get_time_stamp('%Y_%m_%d_%H_%M_%S') experiment_label = f"learning_rate_{learning_rate}timestamp_{timestamp}_augment_num_timesteps{augment_num_timesteps}" \ f"_top_num_to_include{top_num_to_include_slice.start}_{top_num_to_include_slice.stop}" \ f"_augment_seed{augment_seed}_augment_run_num{augment_run_num}_network_size{network_size}" \ f"_policy_num_timesteps{policy_num_timesteps}_policy_run_num{policy_run_num}_policy_seed{policy_seed}" \ f"_eval_seed{eval_seed}_eval_run_num{eval_run_num}_additional_note_{additional_note}" entry_point = 'gym.envs.dart:DartWalker2dEnv_aug_input' this_run_dir = get_experiment_path_for_this_run( entry_point, args.num_timesteps, args.run_num, args.seed, learning_rate=learning_rate, top_num_to_include=top_num_to_include_slice, result_dir=result_dir, network_size=network_size, metric_param=metric_param) traj_params_dir_name = get_full_params_dir(this_run_dir) save_dir = get_save_dir(this_run_dir) aug_plot_dir = get_aug_plot_dir(this_run_dir) + "_vis" final_file = get_full_param_traj_file_path(traj_params_dir_name, "pi_final") final_params = pd.read_csv(final_file, header=None).values[0] args.env = f'{experiment_label}_{entry_point}-v1' register(id=args.env, entry_point=entry_point, max_episode_steps=1000, kwargs={ 'linear_global_dict': linear_global_dict, 'non_linear_global_dict': non_linear_global_dict, 'top_to_include_slice': top_num_to_include_slice, 'aug_plot_dir': aug_plot_dir, "lagrangian_values": lagrangian_values, "layers_values": layers_values }) def make_env(): env_out = gym.make(args.env) env_out = bench.Monitor(env_out, logger.get_dir(), allow_early_resets=True) return env_out env = DummyVecEnv([make_env]) walker_env = env.envs[0].env.env walker_env.disableViewer = False if args.normalize: env = VecNormalize(env) set_global_seeds(args.seed) walker_env.seed(args.seed) model = PPO2.load(f"{save_dir}/ppo2", seed=augment_seed) model.set_pi_from_flat(final_params) if args.normalize: env.load_running_average(save_dir) sk = env.venv.envs[0].env.env.robot_skeleton lagrangian_values = {} obs = np.zeros((env.num_envs, ) + env.observation_space.shape) obs[:] = env.reset() env = VecVideoRecorder(env, aug_plot_dir, record_video_trigger=lambda x: x == 0, video_length=3000, name_prefix="vis_this_policy") lagrangian_values["M"] = [sk.M.reshape((-1, 1))] lagrangian_values["COM"] = [sk.C.reshape((-1, 1))] lagrangian_values["Coriolis"] = [sk.c.reshape((-1, 1))] lagrangian_values["q"] = [sk.q.reshape((-1, 1))] lagrangian_values["dq"] = [sk.dq.reshape((-1, 1))] contact_values = {} neuron_values = model.give_neuron_values(obs) raw_layer_values_list = [[neuron_value.reshape((-1, 1))] for neuron_value in neuron_values] env.render() ep_infos = [] steps_to_first_done = 0 first_done = False # epi_rew = 0 for _ in range(3000): actions = model.step(obs)[0] # yield neuron_values obs, rew, done, infos = env.step(actions) # epi_rew+= rew[0] if done and not first_done: first_done = True if not first_done: steps_to_first_done += 1 neuron_values = model.give_neuron_values(obs) for i, layer in enumerate(neuron_values): raw_layer_values_list[i].append(layer.reshape((-1, 1))) # fill_contacts_jac_dict(infos[0]["contacts"], contact_dict=contact_values, neuron_values=neuron_values) lagrangian_values["M"].append(sk.M.reshape((-1, 1))) lagrangian_values["q"].append(sk.q.reshape((-1, 1))) lagrangian_values["dq"].append(sk.dq.reshape((-1, 1))) lagrangian_values["COM"].append(sk.C.reshape((-1, 1))) lagrangian_values["Coriolis"].append(sk.c.reshape((-1, 1))) # env.render() # time.sleep(1) for info in infos: maybe_ep_info = info.get('episode') if maybe_ep_info is not None: ep_infos.append(maybe_ep_info) env.render() done = done.any() if done: episode_rew = safe_mean([ep_info['r'] for ep_info in ep_infos]) print(f'episode_rew={episode_rew}') # print(f'episode_rew={epi_rew}') # epi_rew = 0 obs = env.reset() #Hstack into a big matrix lagrangian_values["M"] = np.hstack(lagrangian_values["M"]) lagrangian_values["COM"] = np.hstack(lagrangian_values["COM"]) lagrangian_values["Coriolis"] = np.hstack(lagrangian_values["Coriolis"]) lagrangian_values["q"] = np.hstack(lagrangian_values["q"]) lagrangian_values["dq"] = np.hstack(lagrangian_values["dq"]) # for contact_body_name, l in contact_values.items(): # body_contact_dict = contact_values[contact_body_name] # for name, l in body_contact_dict.items(): # body_contact_dict[name] = np.hstack(body_contact_dict[name]) input_values = np.hstack(raw_layer_values_list[0]) layers_values = [ np.hstack(layer_list) for layer_list in raw_layer_values_list ][1:-2] # drop variance and inputs for i, com in enumerate(lagrangian_values["COM"]): plt.figure() plt.plot(np.arange(len(com)), com) plt.xlabel("time") plt.ylabel(f"COM{i}") plt.savefig(f"{aug_plot_dir}/COM{i}.jpg") plt.close()
plt.setp(lines, linewidth=1.0, alpha=0.8) plt.xlabel('update') plt.ylabel('rewards') plt.show() pass if __name__ == '__main__': os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' warnings.simplefilter(action='ignore', category=FutureWarning) warnings.simplefilter(action='ignore', category=Warning) log_path = os.path.join('/', 'home', 'user', 'Dropbox', 'MATLAB_dropbox', 'DeepMimic', 'log') run_id = 'run_' + '10071436' run_file = run_id + '_simpleHumanoid.zip' plot_reward_portions(os.path.join(log_path, run_id, 'reward_portions.txt')) envs = DummyVecEnv([make_env(1)]) model = PPO2.load(os.path.join(log_path, run_file), envs) obs = envs.reset() while True: action, _states = model.predict(obs) obs, rewards, dones, info = envs.step(action) envs.render() time.sleep(1 / 30) pass
def eval_trained_policy_and_collect_data(eval_seed, eval_run_num, policy_env, policy_num_timesteps, policy_seed, policy_run_num, additional_note): logger.log(sys.argv) common_arg_parser = get_common_parser() args, cma_unknown_args = common_arg_parser.parse_known_args() args.env = policy_env args.seed = policy_seed args.num_timesteps = policy_num_timesteps args.run_num = policy_run_num this_run_dir = get_dir_path_for_this_run(args) traj_params_dir_name = get_full_params_dir(this_run_dir) save_dir = get_save_dir( this_run_dir) final_file = get_full_param_traj_file_path(traj_params_dir_name, "pi_final") final_params = pd.read_csv(final_file, header=None).values[0] def make_env(): env_out = gym.make(args.env) env_out = bench.Monitor(env_out, logger.get_dir(), allow_early_resets=True) env_out.seed(eval_seed) return env_out env = DummyVecEnv([make_env]) running_env = env.envs[0].env.env set_global_seeds(eval_seed) running_env.seed(eval_seed) if args.normalize: env = VecNormalize(env) model = PPO2.load(f"{save_dir}/ppo2", seed=eval_seed) model.set_pi_from_flat(final_params) if args.normalize: env.load_running_average(save_dir) # is it necessary? running_env = env.venv.envs[0].env.env lagrangian_values = {} obs = np.zeros((env.num_envs,) + env.observation_space.shape) obs[:] = env.reset() # env = VecVideoRecorder(env, "./", # record_video_trigger=lambda x: x == 0, video_length=3000, # name_prefix="3000000agent-{}".format(args.env)) #init lagrangian values for lagrangian_key in lagrangian_keys: flat_array = running_env.get_lagrangian_flat_array(lagrangian_key) lagrangian_values[lagrangian_key] = [flat_array] neuron_values = model.give_neuron_values(obs) raw_layer_values_list = [[neuron_value.reshape((-1,1))] for neuron_value in neuron_values] # env.render() ep_infos = [] steps_to_first_done = 0 first_done = False for _ in range(30000): actions = model.step(obs)[0] # yield neuron_values obs, rew, done, infos = env.step(actions) if done and not first_done: first_done = True if not first_done: steps_to_first_done += 1 neuron_values = model.give_neuron_values(obs) for i, layer in enumerate(neuron_values): raw_layer_values_list[i].append(layer.reshape((-1,1))) # fill_contacts_jac_dict(infos[0]["contacts"], contact_dict=contact_values, neuron_values=neuron_values) # filling lagrangian values for lagrangian_key in lagrangian_keys: flat_array = running_env.get_lagrangian_flat_array(lagrangian_key) lagrangian_values[lagrangian_key].append(flat_array) # env.render() # time.sleep(1) for info in infos: maybe_ep_info = info.get('episode') if maybe_ep_info is not None: ep_infos.append(maybe_ep_info) # env.render() done = done.any() if done: episode_rew = safe_mean([ep_info['r'] for ep_info in ep_infos]) print(f'episode_rew={episode_rew}') obs = env.reset() #Hstack into a big matrix for lagrangian_key in lagrangian_keys: lagrangian_values[lagrangian_key] = np.hstack(lagrangian_values[lagrangian_key]) # for contact_body_name, l in contact_values.items(): # body_contact_dict = contact_values[contact_body_name] # for name, l in body_contact_dict.items(): # body_contact_dict[name] = np.hstack(body_contact_dict[name]) input_values = np.hstack(raw_layer_values_list[0]) layers_values = [np.hstack(layer_list) for layer_list in raw_layer_values_list][1:-2]# drop variance and inputs data_dir = get_data_dir(policy_env=args.env, policy_num_timesteps=policy_num_timesteps, policy_run_num=policy_run_num , policy_seed=policy_seed, eval_seed=eval_seed, eval_run_num=eval_run_num, additional_note=additional_note) if os.path.exists(data_dir): shutil.rmtree(data_dir) os.makedirs(data_dir) lagrangian_values_fn = f"{data_dir}/lagrangian.pickle" with open(lagrangian_values_fn, 'wb') as handle: pickle.dump(lagrangian_values, handle, protocol=pickle.HIGHEST_PROTOCOL) input_values_fn = f"{data_dir}/input_values.npy" layers_values_fn = f"{data_dir}/layer_values.npy" np.save(input_values_fn, input_values) np.save(layers_values_fn, layers_values) all_weights = model.get_all_weight_values() for ind, weights in enumerate(all_weights): fname = f"{data_dir}/weights_layer_{ind}.txt" np.savetxt(fname, weights)
def visualize_policy_and_collect_COM(seed, run_num, policy_env, policy_num_timesteps, policy_seed, policy_run_num): logger.log(sys.argv) common_arg_parser = get_common_parser() args, cma_unknown_args = common_arg_parser.parse_known_args() args.env = policy_env args.seed = policy_seed args.num_timesteps = policy_num_timesteps args.run_num = policy_run_num this_run_dir = get_dir_path_for_this_run(args) traj_params_dir_name = get_full_params_dir(this_run_dir) save_dir = get_save_dir(this_run_dir) final_file = get_full_param_traj_file_path(traj_params_dir_name, "pi_final") final_params = pd.read_csv(final_file, header=None).values[0] def make_env(): env_out = gym.make(args.env) env_out.env.disableViewer = False env_out = bench.Monitor(env_out, logger.get_dir(), allow_early_resets=True) env_out.seed(seed) return env_out env = DummyVecEnv([make_env]) if args.normalize: env = VecNormalize(env) model = PPO2.load(f"{save_dir}/ppo2", seed=seed) model.set_pi_from_flat(final_params) if args.normalize: env.load_running_average(save_dir) sk = env.venv.envs[0].env.env.robot_skeleton lagrangian_values = {} obs = np.zeros((env.num_envs, ) + env.observation_space.shape) obs[:] = env.reset() plot_dir = get_plot_dir(policy_env=args.env, policy_num_timesteps=policy_num_timesteps, policy_run_num=policy_run_num, policy_seed=policy_seed, eval_seed=seed, eval_run_num=run_num, additional_note="") if os.path.exists(plot_dir): shutil.rmtree(plot_dir) os.makedirs(plot_dir) env = VecVideoRecorder(env, plot_dir, record_video_trigger=lambda x: x == 0, video_length=3000, name_prefix="3000000agent-{}".format(args.env)) lagrangian_values["M"] = [sk.M.reshape((-1, 1))] lagrangian_values["COM"] = [sk.C.reshape((-1, 1))] lagrangian_values["Coriolis"] = [sk.c.reshape((-1, 1))] lagrangian_values["q"] = [sk.q.reshape((-1, 1))] lagrangian_values["dq"] = [sk.dq.reshape((-1, 1))] contact_values = {} neuron_values = model.give_neuron_values(obs) raw_layer_values_list = [[neuron_value.reshape((-1, 1))] for neuron_value in neuron_values] env.render() ep_infos = [] steps_to_first_done = 0 first_done = False # epi_rew = 0 for _ in range(3000): actions = model.step(obs)[0] # yield neuron_values obs, rew, done, infos = env.step(actions) # epi_rew+= rew[0] if done and not first_done: first_done = True if not first_done: steps_to_first_done += 1 neuron_values = model.give_neuron_values(obs) for i, layer in enumerate(neuron_values): raw_layer_values_list[i].append(layer.reshape((-1, 1))) # fill_contacts_jac_dict(infos[0]["contacts"], contact_dict=contact_values, neuron_values=neuron_values) lagrangian_values["M"].append(sk.M.reshape((-1, 1))) lagrangian_values["q"].append(sk.q.reshape((-1, 1))) lagrangian_values["dq"].append(sk.dq.reshape((-1, 1))) lagrangian_values["COM"].append(sk.C.reshape((-1, 1))) lagrangian_values["Coriolis"].append(sk.c.reshape((-1, 1))) # env.render() # time.sleep(1) for info in infos: maybe_ep_info = info.get('episode') if maybe_ep_info is not None: ep_infos.append(maybe_ep_info) env.render() done = done.any() if done: episode_rew = safe_mean([ep_info['r'] for ep_info in ep_infos]) print(f'episode_rew={episode_rew}') # print(f'episode_rew={epi_rew}') # epi_rew = 0 obs = env.reset() #Hstack into a big matrix lagrangian_values["M"] = np.hstack(lagrangian_values["M"]) lagrangian_values["COM"] = np.hstack(lagrangian_values["COM"]) lagrangian_values["Coriolis"] = np.hstack(lagrangian_values["Coriolis"]) lagrangian_values["q"] = np.hstack(lagrangian_values["q"]) lagrangian_values["dq"] = np.hstack(lagrangian_values["dq"]) # for contact_body_name, l in contact_values.items(): # body_contact_dict = contact_values[contact_body_name] # for name, l in body_contact_dict.items(): # body_contact_dict[name] = np.hstack(body_contact_dict[name]) input_values = np.hstack(raw_layer_values_list[0]) layers_values = [ np.hstack(layer_list) for layer_list in raw_layer_values_list ][1:-2] # drop variance and inputs for i, com in enumerate(lagrangian_values["COM"]): plt.figure() plt.plot(np.arange(len(com)), com) plt.xlabel("time") plt.ylabel(f"COM{i}") plt.savefig(f"{plot_dir}/COM{i}.jpg") plt.close()