def random_sample(): # mean trajectory length over 100 trials for a newly initialized policy # random_sample_policy # pos1 6.28 # pos2 6.47 # pos3 6.30 # pos_test 6.94 # resetHandEasy 5.78 parameters = OriginalParams(Parmap(time_step=1e-2, nb_iter=500)) env = AllegroHand(parameters) mdim_vals = {} name_pathl = {} # try: tf.reset_default_graph() sess = tf.Session() num_tra = 100 with tf.compat.v1.variable_scope("random_sample_policy"): policy = MLPGaussianPolicy(sess, env.action_space.shape[0], env.observation_space.shape[0], mean_mult=parameters.mean_mult, init_sigma=parameters.init_sigma, min_sigma=parameters.min_sigma) sess.run(tf.compat.v1.global_variables_initializer()) all_positions = position_list.copy() all_positions.append(pos_test) for position_func in all_positions: nb_traj_length = run_lower_with_environment(position_func, env, lambda obs: policy.get_action(obs), num_trajectories=num_tra) # nb_traj_length = run_lower_policy(position_func, params, policy, num_trajectories=num_tra, show=True) name_pathl.update({position_func.__name__: nb_traj_length}) env.close() U.get_session().close() # except Exception as Id: # print(name, 'not found in meta') mdim_vals.update({"random_sample_policy": name_pathl}) return pd.DataFrame(mdim_vals)
def loadParamMap(folder, uuid_s=36): run_folder = os.path.join(TB_DIR, folder) params = Parmap.from_config_file(run_folder + "/config") num_traj_score = read_num_trajectories_score(run_folder) traj_progress = read_num_trajectories_progress(run_folder) params.score = num_traj_score params.score_p = traj_progress params.log_name = folder params.pol_saving_name = folder[-uuid_s:] params.ParamObject = params return params
def find_upper_policy_run_suffixes(): folder_names = os.listdir(TB_DIR) par_list = [] for folder in folder_names: try: params = Parmap.from_config_file(TB_DIR + folder + "/config") if len(params.get_fingers()) == 4: par_list.append(params) except KeyError as keyr: print(folder) print(keyr) print('implement me')
def save_position_as_images(res=1600): env = AllegroHand(Parmap(), gui=True, show_fingers=True) for pos in all_pos_list: # env.pc.reset_debug_visualizer_camera(CamProps.camera_distance, # CamProps.camera_yaw, # CamProps.camera_pitch, # CamProps.camera_target_position) env = pos(env) env = adhere_target_to_initial(env) env.pc.time_step = 1e-3 env.pc.step_simulation() camera_image = env.pc.call(pybullet.getCameraImage, res, res) rbg_image = camera_image[2] img = Image.fromarray(rbg_image[:, :, 0:3]) # discard alpha channel img.save(''+pos.__name__+'.pdf') print("break") time.sleep(1) env.close()
def multi_wrapper(args): pars = OriginalParams(Parmap(time_step=1e-2, nb_iter=500, discount=.99)) rwd_function = args[1] pars.rwd_func = rwd_function t = time.asctime().replace(" ", "_").replace(":", "_") run_name_suffix = rwd_function.__name__ + str(t) pars.run_name = run_name_suffix + pars.run_name env = AllegroCollisionFromTactileHand(pars, gui=False, show_fingers=False) tf.reset_default_graph() sess = tf.Session() learn_ppo(session=sess, env=env, parameters=pars, dir_out="True") print("reset") time.sleep(0.1) U.get_session().close() env.close() time.sleep(0.1) return args[0]
def multi_wrapper(args): pars = OriginalParams( Parmap(time_step=1e-2, nb_iter=600, movement_noise=args)) pars.rwd_func = weighted_or_dist t = time.asctime().replace(" ", "_").replace(":", "_") run_name_suffix = "SimulatedNoise_{}_".format(str(t)) pars.run_name = run_name_suffix + pars.run_name env = AllegronNoiseFromUpper(pars, gui=False, show_fingers=False) tf.reset_default_graph() sess = tf.Session() learn_ppo(session=sess, env=env, parameters=pars, dir_out="True") print("reset") time.sleep(0.1) U.get_session().close() env.close() time.sleep(0.1) return args
def get_non_trained_env(): pars = UpperParmap(Parmap()) env = AllegroHand(pars) return env, pars
from setting_utils.param_handler import Parmap, TB_DIR, OriginalParams import time import baselines.common.tf_util as U from setting_utils.rewards import move_connection_to_object import numpy as np os.environ['OPENAI_LOG_FORMAT'] = 'stdout,log,csv,tensorboard' os.environ['OPENAI_LOGDIR'] = TB_DIR import tensorflow as tf from ausy_base.learn_policy import learn_on_env as learn_ppo if __name__ == "__main__": t = time.asctime().replace(" ", "_").replace(":", "_") print('Start run at %s' % t) pars = OriginalParams(Parmap(nb_iter=300)) pars.rwd_func = move_connection_to_object env = AllegroCollisionFromTactileHand(pars, gui=False, show_fingers=False) # logging run_name_suffix = "TestingNewSaveDir_{}_".format(str(t)) pars.run_name = run_name_suffix + pars.run_name print(dict(**vars(pars))) # init session tf.reset_default_graph() sess = tf.Session() # learn learn_ppo(session=sess, env=env, parameters=pars, dir_out="True") # clean session close time.sleep(0.1) U.get_session().close()
def build_action_space(param_map: Parmap): """low, high""" return np.ones( len(param_map.get_fingers()) * len(OTHER_LOW)) * -1, np.ones( len(param_map.get_fingers()) * len(OTHER_HIGH))
def learn_on_env(*, session, env: gym.Env, parameters: Parmap, seed=0, dir_out='exp/', tf_saver=None): assert parameters is not None, "You have to pass the runs parameter as paramObject!" logger.configure(TB_DIR + parameters.run_name) parameters.log_run() # seeding, comment out if you do not want deterministic behavior across runs # np.random.seed(seed) # tf.set_random_seed(seed) # mlp for policy with tf.compat.v1.variable_scope(parameters.run_name): policy = MLPGaussianPolicy(session, env.action_space.shape[0], env.observation_space.shape[0], mean_mult=parameters.mean_mult, init_sigma=parameters.init_sigma, min_sigma=parameters.min_sigma) p_update = PolicyUpdateObjective(session, policy, e_clip=parameters.e_clip, a_lrate=parameters.a_lrate) # mlp for v_function with tf.compat.v1.variable_scope("v_func"): vmlp = policies.getMLPdef(1, env.observation_space.shape[0]) p_eval = PolicyEvalV(session, policy, vmlp, v_lrate=parameters.v_lrate) # p3o delta_logsig = np.float32( (np.log(parameters.init_sigma) - np.log(parameters.min_sigma)) / parameters.nb_iter) reduce_exploration_op = tf.compat.v1.assign( policy.min_sigma, tf.maximum(parameters.min_sigma, policy.min_sigma.value() / tf.exp(delta_logsig))) reduce_exploration = lambda: session.run(reduce_exploration_op) p3o = P3O(session, policy, p_eval, p_update) session.run(tf.compat.v1.global_variables_initializer()) if not tf_saver: tf_saver = tf.compat.v1.train.Saver( var_list=tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.GLOBAL_VARIABLES, scope=parameters.run_name + "/pi"), max_to_keep=1) for it in range(parameters.nb_iter): logger.record_tabular("iteration", it + 1) # Generates transition data by interacting with the env new_paths = dat.rollouts(env, policy=policy.get_action, min_trans=parameters.min_trans_per_iter, render=False) new_paths["logprob"] = policy.get_log_proba(new_paths["obs"], new_paths["act"]) logger.record_tabular("num_trajectories", new_paths["nb_paths"]) rwd_vals_on_paths_end = new_paths["rwd"][new_paths["done"]] logger.record_tabular( "reached_rel", np.sum(rwd_vals_on_paths_end != parameters.extrinsic_reward) / new_paths['nb_paths']) #see test2 reward function logger.record_tabular("avg / run", np.sum(new_paths["rwd"]) / new_paths["nb_paths"]) logger.record_tabular("avg rwd", np.mean(new_paths["rwd"])) logger.record_tabular("avg rwd non end", np.mean(new_paths["rwd"][~new_paths["done"]])) # logger.record_tabular("mean_tactile_per_state", # new_paths["obs"][:, -TACTILE_OBS.size:].mean(axis=1).sum() / new_paths["obs"].shape[0]) # Keep the last max_v_data_size for off-policy policy eval if it == 0: v_paths = new_paths else: dat.merge_data(v_paths, new_paths) dat.delete_data(v_paths, parameters.max_v_data_size) # update policy gen_adv = p3o.iteration(v_paths, new_paths, parameters.discount, parameters.lam_trace, parameters.epochs_per_iter, parameters.batch_size, reduce_exploration, verbose=True) # log update data if dir_out is not None: entrop_after = policy.get_entropy(None, None) log_act_probas_new = policy.get_log_proba(new_paths["obs"], new_paths["act"]) diff = np.exp(log_act_probas_new - new_paths["logprob"]) - 1 #logger.record_tabular("log_act_probas_new", log_act_probas_new) logger.record_tabular("diff_mean", diff.mean()) logger.record_tabular("gen_adv_sum", gen_adv.sum()) # not gen_advantage? logger.dump_tabular() if it % 30 == 0: print("saving " + parameters.run_name) # regularly save policy tf_saver.save(session, os.path.join(TB_DIR, parameters.run_name) + '/', meta_graph_suffix="meta", write_meta_graph=True, global_step=it)
from pybullet_envs.gym_locomotion_envs import HumanoidBulletEnv from setting_utils.param_handler import Parmap from ausy_base.learn_policy import learn_on_env as learn_ppo import time import tensorflow as tf if __name__ == '__main__': pars = Parmap() #env = Walker2DBulletEnv(render=True) env = HumanoidBulletEnv(render=False) # for logging t = time.asctime().replace(" ", "_").replace(":", "_") run_name_suffix = "PPO_test_{}_".format(str(t)) pars.run_name = run_name_suffix + pars.run_name tf.reset_default_graph() sess = tf.Session() pars.nb_iter = 300000 learn_ppo(session=sess, env=env, parameters=pars, dir_out="True")
# CamProps.camera_pitch, # CamProps.camera_target_position) env = pos(env) env = adhere_target_to_initial(env) env.pc.time_step = 1e-3 env.pc.step_simulation() camera_image = env.pc.call(pybullet.getCameraImage, res, res) rbg_image = camera_image[2] img = Image.fromarray(rbg_image[:, :, 0:3]) # discard alpha channel img.save(''+pos.__name__+'.pdf') print("break") time.sleep(1) env.close() if __name__ == '__main__': pars = Parmap(time_step=1e-2) pars.rwd_func = lambda x: (0, False) env = AllegroCollisionFromTactileHand(pars, gui=True, show_fingers=True) all_pos = all_pos_list.copy() for pos in all_pos: env = pos(env) zero_act = get_joint_state(env.hand) env.pc.step_simulation() for i in range(3): # put debugger here, and use console to prototype new hand-object configurations env.show_collisions() tac_state_dict = tactile_state_dict(env.hand) print(env.reward()) for finger in tac_state_dict: print(finger, np.sum(tac_state_dict[finger]))
def __init__(self, allegros_params: Parmap, gui=False, show_fingers=False): self.objects_with_collision = allegros_params.get_fingers() super(AllegroCollisionFromTactileHand, self).__init__(allegros_params, gui, show_fingers)