def _reset_sim(self): if self.save_data == True and self.episode > 0: # sim_poses_df = pandas.DataFrame(self.sim_poses, columns=["x", "y", "z", "rx", "ry", "rz"]) sim_distance_001_df = pandas.DataFrame(self.distances, columns=["Distance"]) sim_rewards_001_df = pandas.DataFrame(self.rewards, columns=["reward"]) sim_rewards_001_df.to_feather(os.path.join(*[SAVE_PATH, "sim_rewards_001_{}.ftr".format(self.episode)])) sim_distance_001_df.to_feather(os.path.join(*[SAVE_PATH, "sim_distances_001_{}.ftr".format(self.episode)])) # sim_poses_000_df.to_feather(os.path.join(*[SAVE_PATH, "sim_poses_001_{}.ftr".format(self.episode)])) #self.sim_poses = [] self.rewards = [] self.distances = [] self.episode += 1 if self.vary == True: deviation_x = numpy.random.uniform(-self.init_vary_range, self.init_vary_range) deviation_q = self.get_dq(deviation_x) else: deviation_q = numpy.array([0, 0, 0, 0, 0, 0]) if self.ft_drift: self.ft_drift_val = numpy.random.uniform(-self.ft_drift_range, self.ft_drift_range) self.pos_drift_val = numpy.random.uniform(-self.pos_drift_range, self.pos_drift_range) del self.sim self.offset = randomize.randomize_ur10_xml(worker_id=self.worker_id, **self.randomize_kwargs) model = mujoco_py.load_model_from_path(self.model_path) self.sim = mujoco_py.MjSim(model, nsubsteps=self.n_substeps) self.goal = self._sample_goal() if not self.viewer is None: self._get_viewer('human').update_sim(self.sim) #self._get_viewer('human')._ncam = self.sim.model.ncam self.ctrl_buffer = numpy.repeat((self.initial_qpos + deviation_q).reshape(1, 6), 4, axis=0) self.b, self.a = butter(2, 0.12) self.zi = [lfilter_zi(self.b, self.a) * (self.initial_qpos[i] + deviation_q[i]) for i in range(6)] self.qi_diff = 0 self.last_target_q = self.initial_qpos + deviation_q self.set_state(self.initial_qpos + deviation_q) self.sim.forward() self.sim.step() if self.sim.data.ncon == 0: for i in range(100): self.sim.data.ctrl[:] = self.sim.data.qfrc_bias.copy() self.sim.step() self.init_x = numpy.concatenate((self.sim.data.get_body_xpos("gripper_dummy_heg"), self.sim.data.get_body_xquat("gripper_dummy_heg"))) self.set_force_for_q(self.initial_qpos + deviation_q) return self.sim.data.ncon == 0
def _reset_sim(self): if self.vary == True: deviation_x = numpy.random.uniform(-self.init_vary_range, self.init_vary_range) deviation_q = self.get_dq(deviation_x) else: deviation_q = numpy.array([0, 0, 0, 0, 0, 0]) if self.ft_drift: self.ft_drift_val = numpy.random.uniform(-self.ft_drift_range, self.ft_drift_range) self.pos_drift_val = numpy.random.uniform(-self.pos_drift_range, self.pos_drift_range) del self.sim self.offset = randomize.randomize_ur10_xml(worker_id=self.worker_id, **self.randomize_kwargs) model = mujoco_py.load_model_from_path(self.xml_path) self.sim = mujoco_py.MjSim(model, nsubsteps=self.n_substeps) self.goal = self._sample_goal() if not self.viewer is None: self._get_viewer('human').update_sim(self.sim) #self._get_viewer('human')._ncam = self.sim.model.ncam self.ctrl_buffer = np.repeat((self.initial_qpos + deviation_q).reshape(1, 6), 4, axis=0) self.b, self.a = butter(2, 0.12) self.zi = [lfilter_zi(self.b, self.a) * (self.initial_qpos[i] + deviation_q[i]) for i in range(6)] self.qi_diff = 0 self.last_target_q = self.initial_qpos + deviation_q self.set_state(self.initial_qpos + deviation_q) self.sim.forward() self.sim.step() if self.sim.data.ncon == 0: for i in range(100): self.sim.data.ctrl[:] = self.sim.data.qfrc_bias.copy() self.sim.step() self.init_x = numpy.concatenate((self.sim.data.get_body_xpos("gripper_dummy_heg"), self.sim.data.get_body_xquat("gripper_dummy_heg"))) self.set_force_for_q(self.initial_qpos + deviation_q) return self.sim.data.ncon == 0
def __init__(self, env_config, model_xml_path, worker_id): with open(env_config) as cfg: env_config = yaml.load(cfg, Loader=yaml.FullLoader) if model_xml_path == None: self.model_path = env_config["model_xml_file"] ########################################## MONITORING self.episode = 0 self.reward_episodes = 0 self.results = list() self.stage = 0 self.start_flag = True self.save_data = env_config["Saving"] if self.save_data: self.fts = [] self.fxs = [] self.fys = [] self.fzs = [] self.obs = [] self.rewards = [] self.poses = [] ########################################## self.model_path = model_xml_path self.n_substeps = env_config["n_substeps"] self.distance_threshold = env_config["Learning"]["distance_threshold"] self.fail_threshold = env_config["Learning"]["fail_threshold"] self.initial_qpos = numpy.array(env_config["initial_qpos"]) self.reward_type = env_config["reward_type"] self.ctrl_type = env_config["ctrl_type"] self.corrective = env_config["corrective"] self.n_actions = env_config["n_actions"] self.action_rate = env_config["action_rate"] self.SEED = env_config["SEED"] self.R1_static = env_config["Reward"]["R1_static"] self.R2_static = env_config["Reward"]["R2_static"] self.success_reward = env_config["Reward"]["success_reward"] self.dynamic_reward = env_config["Reward"]["dynamic_reward"] self.dynamic_reward_thresh = env_config["Reward"][ "dynamic_reward_thresh"] self.reward_episodes_thresh = env_config["Reward"][ "reward_episodes_thresh"] self.R2_max = env_config["Reward"]["R2_max"] self.rew_function_params = numpy.array( env_config["Reward"]["rew_function_params"]) self.dx_max = env_config["dx_max"] self.gripper_mass = env_config["gripper_mass"] self.force_scaler = env_config["force_scaler"] ########################################## NOISE self.pos_noise_std = numpy.array(env_config["Noise"]["pos_noise_std"]) self.ft_noise_std = numpy.array(env_config["Noise"]["ft_noise_std"]) self.dq_noise_std = numpy.array(env_config["Noise"]["dq_noise_std"]) ########################################## RANDOMIZATION self.vary = env_config["Domain_Randomization"]["vary"] self.vary_params = env_config["Domain_Randomization"]["vary_params"] self.init_vary_range = numpy.concatenate([ numpy.array(self.vary_params[:3]), numpy.array(self.vary_params[3:]) * (numpy.pi / 180) ]) self.pos_rand_uncor_bound = numpy.array( env_config["Domain_Randomization"]["pos_rand_uncor_bound"]) self.ft_rand_uncor_bound = numpy.array( env_config["Domain_Randomization"]["ft_rand_uncor_bound"]) self.pos_rand_cor_bound = numpy.array( env_config["Domain_Randomization"]["pos_rand_cor_bound"]) self.ft_rand_cor_bound = numpy.array( env_config["Domain_Randomization"]["ft_rand_cor_bound"]) self.pos_rand_cor = numpy.array( env_config["Domain_Randomization"]["pos_rand_cor"]) self.ft_rand_cor = numpy.array( env_config["Domain_Randomization"]["ft_rand_cor"]) self.randomize_kwargs = env_config["randomize_kwargs"] self.offset = randomize.randomize_ur10_xml() self.worker_id = worker_id ######################################## CONTROLLER PARAMETERS self.K = numpy.array(env_config["controller"]["K"]) self.kp = numpy.array(env_config["controller"]["kp"]) self.ki = numpy.array(env_config["controller"]["ki"]) self.kd = numpy.array(env_config["controller"]["kd"]) self.kf = numpy.array(env_config["controller"]["kf"]) self.max_fb = numpy.array(env_config["controller"]["max_fb"]) self.ctrl_buffer = numpy.repeat(self.initial_qpos.copy().reshape(1, 6), 4, axis=0) b, a = butter(env_config["controller"]["butter_0"], env_config["controller"]["butter_1"]) self.a = a self.b = b self.zi = [lfilter_zi(b, a) * self.initial_qpos[i] for i in range(6)] self.qi_diff = env_config["controller"]["qi_diff"] self.last_target_q = self.initial_qpos.copy() self.only_grav_comp = env_config["controller"]["only_grav_comp"] self.sim_ctrl_q = self.initial_qpos ######################################### if self.dynamic_reward: self.compute_dynamic_reward_value() print("Reward: Dynamic") else: self.R1 = self.R1_static self.R2 = self.R2_static print("Reward: Static") ######################################## super(Ur10Env, self).__init__(model_path=self.model_path, n_substeps=self.n_substeps, n_actions=self.n_actions, initial_qpos=self.initial_qpos, seed=self.SEED, success_reward=self.success_reward, action_rate=self.action_rate)
def _reset_sim(self): self.start_flag = True if self.episode > 0: self.success_rate = float( numpy.sum(self.results) / float(len(self.results))) if self.dynamic_reward: info = self.reward_change() else: info = "" print(" | Episode: {} | Success Rate: {} | Force Reward: {} |". format(self.episode, self.success_rate, info)) if len(self.results) < 10: self.results.append(0) else: self.results.pop(0) self.results.append(0) if self.save_data and self.episode > 0: if self.success_flag == 1: self.rewards.append(self.success_reward) self.reward_sum = numpy.sum(self.rewards) save_dict = { "fx": self.fxs, "fy": self.fys, "fz": self.fzs, "steps": self.step_count, "success": self.success_flag, "reward": self.reward_sum } with open( os.path.join( *[SAVE_PATH, "episode_{}.json".format(self.episode)]), "w") as file: json.dump(save_dict, file) file.write('\n') self.fxs = [] self.fys = [] self.fzs = [] self.rewards = [] self.step_count = 0 self.success_flag = 0 self.episode += 1 if self.vary: deviation_x = numpy.random.uniform(-self.init_vary_range, self.init_vary_range) deviation_q = self.get_dq(deviation_x) else: deviation_q = numpy.array([0, 0, 0, 0, 0, 0]) self.pos_rand_cor = numpy.random.uniform(-self.pos_rand_cor_bound, self.pos_rand_cor_bound) self.ft_rand_cor = numpy.random.uniform(-self.ft_rand_cor_bound, self.ft_rand_cor_bound) self.pos_rand_cor = numpy.random.uniform(-self.pos_rand_cor_bound, self.pos_rand_cor_bound) del self.sim self.offset = randomize.randomize_ur10_xml(worker_id=self.worker_id, **self.randomize_kwargs) model = mujoco_py.load_model_from_path(self.model_path) self.sim = mujoco_py.MjSim(model, nsubsteps=self.n_substeps) self.goal = self._sample_goal() if not self.viewer is None: self._get_viewer('human').update_sim(self.sim) self.ctrl_buffer = np.repeat( (self.initial_qpos + deviation_q).reshape(1, 6), 4, axis=0) self.b, self.a = butter(2, 0.12) self.zi = [ lfilter_zi(self.b, self.a) * (self.initial_qpos[i] + deviation_q[i]) for i in range(6) ] self.qi_diff = 0 self.last_target_q = self.initial_qpos + deviation_q self.set_state(self.initial_qpos + deviation_q) self.sim.forward() self.sim.step() if self.sim.data.ncon == 0: for i in range(100): self.sim.data.ctrl[:] = self.sim.data.qfrc_bias.copy() self.sim.step() self.init_x = numpy.concatenate( (self.sim.data.get_body_xpos("gripper_dummy_heg"), self.sim.data.get_body_xquat("gripper_dummy_heg"))) self.set_force_for_q(self.initial_qpos + deviation_q) return self.sim.data.ncon == 0
def __init__(self,env_config, model_xml_path, worker_id): with open(env_config) as cfg: env_config = yaml.load(cfg, Loader=yaml.FullLoader) yaml.dump( env_config, open(os.path.join(*[ ADR_CONFIG_PATH, "env_config_ADR_{}.yml".format(worker_id) ]), "w") ) month = (datetime.datetime.now()).strftime("%m") day = (datetime.datetime.now()).strftime("%d") hour = (datetime.datetime.now()).strftime("%H") minute = (datetime.datetime.now()).strftime("%M") date_time = "{}-{}_{}-{}".format(month,day,hour, minute) if not os.path.exists(os.path.join(*[ ADR_TRAIN_DIR, "ADR_{}".format(date_time) ])): os.makedirs(os.path.join(*[ADR_TRAIN_DIR, "ADR_{}".format(date_time)])) self.LVL_SAVE_PATH = os.path.join(os.path.join(*[ ADR_TRAIN_DIR, "ADR_{}".format(date_time) ])) if model_xml_path == None: self.model_path = env_config["model_xml_file"] ########################################## MONITORING self.episode = 0 self.results = list() self.start_flag = True self.save_data = env_config["Saving"] ########################################## self.model_path = model_xml_path self.n_substeps = env_config["n_substeps"] self.distance_threshold = env_config["Learning"]["distance_threshold"] self.fail_threshold = env_config["Learning"]["fail_threshold"] self.initial_qpos = numpy.array(env_config["initial_qpos"]) self.reward_type = env_config["reward_type"] self.ctrl_type = env_config["ctrl_type"] self.corrective = env_config["corrective"] self.n_actions = env_config["n_actions"] self.action_rate = env_config["action_rate"] self.SEED = env_config["SEED"] self.R1 = env_config["Reward"]["R1"] self.R2 = env_config["Reward"]["R2"] self.success_reward = env_config["Reward"]["success_reward"] self.dx_max = env_config["dx_max"] self.gripper_mass = env_config["gripper_mass"] self.force_scaler = env_config["force_scaler"] ########################################## NOISE self.pos_noise_std = numpy.array(env_config["Noise"]["pos_noise_std"]) self.ft_noise_std = numpy.array(env_config["Noise"]["ft_noise_std"]) self.dq_noise_std = numpy.array(env_config["Noise"]["dq_noise_std"]) ########################################## RANDOMIZATION self.vary = env_config["vary"] self.vary_params = numpy.array(env_config["Domain_Randomization"]["vary_params"] ) self.init_vary_range = numpy.concatenate([ numpy.array(self.vary_params[:3]), numpy.array(self.vary_params[3:])*(numpy.pi/180) ]) self.pos_rand_uncor_bound = numpy.array( env_config["Domain_Randomization"]["pos_rand_uncor_bound"]) self.ft_rand_uncor_bound = numpy.array( env_config["Domain_Randomization"]["ft_rand_uncor_bound"]) self.pos_rand_cor_bound = numpy.array( env_config["Domain_Randomization"]["pos_rand_cor_bound"]) self.ft_rand_cor_bound = numpy.array( env_config["Domain_Randomization"]["ft_rand_cor_bound"]) self.pos_rand_cor = numpy.array( env_config["Domain_Randomization"]["pos_rand_cor"]) self.ft_rand_cor = numpy.array( env_config["Domain_Randomization"]["ft_rand_cor"]) self.randomize_kwargs = env_config["randomize_kwargs"] self.offset = randomize.randomize_ur10_xml() self.worker_id = worker_id ######################################## CONTROLLER PARAMETERS self.levels = dict() self.level = int(env_config["Learning"]["ADR"]["level"]) self.level_episodes = 0 self.lower_level_bound = env_config["Learning"]["ADR"]["lower_level_bound"] self.upper_level_bound = env_config["Learning"]["ADR"]["upper_level_bound"] self.level_episodes_bound = env_config["Learning"]["ADR"]["level_episodes_bound"] self.vary_params_delta = numpy.array(env_config["ADR"]["vary_params_delta"]) self.pos_rand_uncor_bound_delta = numpy.array(env_config["ADR"]["pos_rand_uncor_bound_delta"]) self.ft_rand_uncor_bound_delta = numpy.array(env_config["ADR"]["ft_rand_uncor_bound_delta"]) self.pos_rand_cor_bound_delta = numpy.array(env_config["ADR"]["pos_rand_cor_bound_delta"]) self.ft_rand_cor_bound_delta = numpy.array(env_config["ADR"]["ft_rand_cor_bound_delta"]) self.var_mass_delta = env_config["ADR"]["var_mass_delta"] self.var_damp_delta = env_config["ADR"]["var_damp_delta"] self.var_fr_delta = env_config["ADR"]["var_fr_delta"] self.var_grav_x_y_delta = env_config["ADR"]["var_grav_x_y_delta"] self.var_grav_z_delta = env_config["ADR"]["var_grav_z_delta"] self.var_body_pos_delta = env_config["ADR"]["var_body_pos_delta"] self.var_body_rot_delta = env_config["ADR"]["var_body_rot_delta"] ######################################## ADR self.K = numpy.array(env_config["controller"]["K"]) self.kp = numpy.array(env_config["controller"]["kp"]) self.ki = numpy.array(env_config["controller"]["ki"]) self.kd = numpy.array(env_config["controller"]["kd"]) self.kf = numpy.array(env_config["controller"]["kf"]) self.max_fb = numpy.array(env_config["controller"]["max_fb"]) self.ctrl_buffer = numpy.repeat( self.initial_qpos.copy().reshape(1, 6), 4, axis=0) b, a = butter( env_config["controller"]["butter_0"], env_config["controller"]["butter_1"]) self.a = a self.b = b self.zi = [lfilter_zi(b,a) * self.initial_qpos[i] for i in range(6)] self.qi_diff = env_config["controller"]["qi_diff"] self.last_target_q = self.initial_qpos.copy() self.only_grav_comp = env_config["controller"]["only_grav_comp"] self.sim_ctrl_q = self.initial_qpos ######################################### super(Ur10Env, self).__init__( model_path=self.model_path, n_substeps=self.n_substeps, n_actions=self.n_actions, initial_qpos=self.initial_qpos, seed=self.SEED, success_reward=self.success_reward, action_rate=self.action_rate)
def __init__(self, env_config, model_xml_path, worker_id): with open(env_config) as cfg: env_config = yaml.load(cfg, Loader=yaml.FullLoader) if model_xml_path == None: self.model_path = env_config["model_xml_file"] self.save_data = env_config["Saving"] self.start_flag = True self.episode = 0 if self.save_data: self.fts = [] self.fxs = [] self.fys = [] self.fzs = [] self.obs = [] self.rewards = [] self.poses = [] self.SEED = env_config["SEED"] # Environment Parameter #self.model_path = os.path.join(*[MODEL_PATH,env_config["model_xml_file"]]) # Path to the environment xml file self.run_info = env_config["info"] self.results = list() #list(numpy.zeros(10,).astype(int)) self.R1 = env_config["Reward"]["R1"] self.R2 = env_config["Reward"]["R2"] self.success_reward = env_config["Reward"]["success_reward"] self.model_path = model_xml_path self.initial_qpos = numpy.array( env_config["initial_qpos"] ) # An array of values that define the initial configuration) self.sim_ctrl_q = self.initial_qpos self.reward_type = env_config[ "reward_type"] # The reward type i.e. sparse or dense self.ctrl_type = env_config["ctrl_type"] self.n_substeps = env_config["n_substeps"] self.action_rate = env_config[ "action_rate"] # Number of substeps the simulation runs on every call to step self.distance_threshold = env_config["Learning"]["distance_threshold"] self.cur_eps_threshold = env_config["Learning"]["cur_eps_threshold"] self.curriculum_learning = env_config["Learning"][ "curriculum_learning"] self.initial_distance_threshold = env_config["Learning"][ "initial_distance_threshold"] self.final_distance_threshold = env_config["Learning"][ "final_distance_threshold"] # The threshold after which a goal is considered achieved self.fail_threshold = env_config["Learning"]["fail_threshold"] self.n_actions = env_config["n_actions"] self.corrective = env_config["corrective"] self.vary = env_config["vary"] self.dx_max = env_config["dx_max"] self.gripper_mass = env_config["gripper_mass"] # Controller Parameter self.K = numpy.array(env_config["controller"]["K"]) self.kp = numpy.array(env_config["controller"]["kp"]) self.ki = numpy.array(env_config["controller"]["ki"]) self.kd = numpy.array(env_config["controller"]["kd"]) self.kf = numpy.array(env_config["controller"]["kf"]) self.max_fb = numpy.array(env_config["controller"]["max_fb"]) self.ctrl_buffer = numpy.repeat(self.initial_qpos.copy().reshape(1, 6), 4, axis=0) b, a = butter(env_config["controller"]["butter_0"], env_config["controller"]["butter_1"]) self.a = a self.b = b self.zi = [lfilter_zi(b, a) * self.initial_qpos[i] for i in range(6)] self.qi_diff = env_config["controller"]["qi_diff"] self.last_target_q = self.initial_qpos.copy() self.only_grav_comp = False ######################## # NOISE self.f_mean_si = env_config["Noise"]["f_mean_si"] self.t_mean_si = env_config["Noise"]["t_mean_si"] self.pos_mean_si = env_config["Noise"]["pos_mean_si"] self.rot_mean_si = env_config["Noise"]["rot_mean_si"] self.f_std_si = env_config["Noise"]["f_std_si"] self.t_std_si = env_config["Noise"]["t_std_si"] self.pos_std_si = env_config["Noise"]["pos_std_si"] self.rot_std_si = env_config["Noise"]["rot_std_si"] self.dq_mean_si = env_config["Noise"]["dq_mean_si"] self.dq_std_si = env_config["Noise"]["dq_std_si"] # Domain Randomization Parameter self.dq_var_dr_cor = env_config["Domain_Randomization"][ "dq_var_dr_cor"] self.dq_var_dr_uncor = env_config["Domain_Randomization"][ "dq_var_dr_uncor"] self.pos_var_dr_cor = env_config["Domain_Randomization"][ "pos_var_dr_cor"] self.pos_var_dr_uncor = env_config["Domain_Randomization"][ "pos_var_dr_uncor"] self.f_var_dr_cor = env_config["Domain_Randomization"]["f_var_dr_cor"] self.f_var_dr_uncor = env_config["Domain_Randomization"][ "f_var_dr_uncor"] self.t_var_dr_cor = env_config["Domain_Randomization"]["t_var_dr_cor"] self.t_var_dr_uncor = env_config["Domain_Randomization"][ "t_var_dr_uncor"] self.rot_var_dr_cor = env_config["Domain_Randomization"][ "rot_var_dr_cor"] self.rot_var_dr_uncor = env_config["Domain_Randomization"][ "rot_var_dr_uncor"] self.f_corr = numpy.zeros(3) self.t_corr = numpy.zeros(3) self.pos_corr = numpy.zeros(3) self.rot_corr = numpy.zeros(3) self.dq_corr = numpy.zeros(6) self.randomize_kwargs = env_config["randomize_kwargs"] self.offset = randomize.randomize_ur10_xml() self.worker_id = worker_id ########################## super(Ur10Env, self).__init__(model_path=self.model_path, n_substeps=self.n_substeps, n_actions=self.n_actions, initial_qpos=self.initial_qpos, seed=self.SEED, success_reward=self.success_reward, action_rate=self.action_rate)
def _reset_sim(self): # Tracking the first step to zero the ft-sensor self.start_flag = True if self.episode > 0: self.success_rate = float( numpy.sum(self.results) / float(len(self.results))) print(" | Episode: {} | Success Rate: {} | ".format( self.episode, self.success_rate)) if len(self.results) < 10: self.results.append(0) else: self.results.pop(0) self.results.append(0) if self.save_data and self.episode > 0: if self.success_flag == 1: self.rewards.append(self.success_reward) self.reward_sum = numpy.sum(self.rewards) save_dict = { #"observations" : self.obs, #"ft_values" : self.fts, #"rewards" : self.rewards, #"poses" : self.poses "fx": self.fxs, "fy": self.fys, "fz": self.fzs, "steps": self.step_count, "success": self.success_flag, "reward": self.reward_sum } with open( os.path.join( *[SAVE_PATH, "episode_{}.json".format(self.episode)]), "w") as file: json.dump(save_dict, file) file.write('\n') #self.obs = [] #self.fts = [] #self.rewards = [] #self.poses = [] self.fxs = [] self.fys = [] self.fzs = [] self.rewards = [] self.step_count = 0 self.success_flag = 0 self.episode += 1 deviation_q = numpy.array([0, 0, 0, 0, 0, 0]) #if self.ft_drift: # self.ft_drift_val = numpy.random.uniform(-self.ft_drift_range, self.ft_drift_range) #self.pos_drift_val = numpy.random.uniform(-self.pos_drift_range, self.pos_drift_range) # Correlated noise self.dq_corr = numpy.random.uniform(-self.dq_var_dr_cor, self.dq_var_dr_cor, 6) self.pos_corr = numpy.random.uniform(-self.pos_var_dr_cor, self.pos_var_dr_cor, 3) self.rot_corr = numpy.random.uniform(-self.rot_var_dr_cor, self.rot_var_dr_cor, 3) self.f_corr = numpy.random.uniform(-self.f_var_dr_cor, self.f_var_dr_cor, 3) self.t_corr = numpy.random.uniform(-self.t_var_dr_cor, self.t_var_dr_cor, 3) del self.sim self.offset = randomize.randomize_ur10_xml(worker_id=self.worker_id, **self.randomize_kwargs) model = mujoco_py.load_model_from_path(self.model_path) self.sim = mujoco_py.MjSim(model, nsubsteps=self.n_substeps) self.goal = self._sample_goal() if not self.viewer is None: self._get_viewer('human').update_sim(self.sim) #self._get_viewer('human')._ncam = self.sim.model.ncam self.ctrl_buffer = numpy.repeat( (self.initial_qpos + deviation_q).reshape(1, 6), 4, axis=0) self.b, self.a = butter(2, 0.12) self.zi = [ lfilter_zi(self.b, self.a) * (self.initial_qpos[i] + deviation_q[i]) for i in range(6) ] self.qi_diff = 0 self.last_target_q = self.initial_qpos + deviation_q self.set_state(self.initial_qpos + deviation_q) self.sim.forward() self.sim.step() if self.sim.data.ncon == 0: for i in range(100): self.sim.data.ctrl[:] = self.sim.data.qfrc_bias.copy() self.sim.step() self.init_x = numpy.concatenate( (self.sim.data.get_body_xpos("gripper_dummy_heg"), self.sim.data.get_body_xquat("gripper_dummy_heg"))) self.set_force_for_q(self.initial_qpos + deviation_q) return self.sim.data.ncon == 0
def __init__(self, env_config, model_xml_path, worker_id): with open(env_config) as cfg: env_config = yaml.load(cfg, Loader=yaml.FullLoader) if model_xml_path == None: self.model_path = env_config["model_xml_file"] ########################################## self.model_path = model_xml_path self.n_substeps = env_config["n_substeps"] self.distance_threshold = env_config["Learning"]["distance_threshold"] self.fail_threshold = env_config["Learning"]["fail_threshold"] self.initial_qpos = numpy.array(env_config["initial_qpos"]) self.reward_type = env_config["reward_type"] self.ctrl_type = env_config["ctrl_type"] self.vary = env_config["vary"] self.vary_params = env_config["Domain_Randomization"]["vary_params"] # self.init_vary_range = numpy.concatenate([ # self.vary_params[:3], numpy.array(self.vary_params[3:]) * (2 * numpy.pi / 360) ]) self.corrective = env_config["corrective"] self.randomize_kwargs = env_config["randomize_kwargs"] ########################## MH Noise self.pos_std = numpy.array(env_config["Noise"]["pos_std"]) self.pos_drift_range = numpy.array( env_config["Noise"]["pos_drift_range"]) self.pos_drift_val = numpy.array(env_config["Noise"]["pos_drift_val"]) self.ft_noise = env_config["Noise"]["ft_noise"] self.ft_drift = env_config["Noise"]["ft_drift"] self.ft_std = env_config["Noise"]["ft_std"] self.ft_drift_range = env_config["Noise"]["ft_drift_range"] self.ft_drift_val = env_config["Noise"]["ft_drift_val"] ############################ self.pos_mean_si = env_config["Noise"]["pos_mean_si"] self.pos_std_si = env_config["Noise"]["pos_std_si"] self.rot_mean_si = env_config["Noise"]["rot_mean_si"] self.rot_std_si = env_config["Noise"]["rot_std_si"] self.f_mean_si = env_config["Noise"]["f_mean_si"] self.t_mean_si = env_config["Noise"]["t_mean_si"] self.f_std_si = env_config["Noise"]["f_std_si"] self.t_std_si = env_config["Noise"]["t_std_si"] self.dq_mean_si = env_config["Noise"]["dq_mean_si"] self.dq_std_si = env_config["Noise"]["dq_std_si"] ###################### self.n_actions = env_config["n_actions"] self.action_rate = env_config["action_rate"] self.SEED = env_config["SEED"] self.R1 = env_config["Reward"]["R1"] self.R2 = env_config["Reward"]["R2"] self.success_reward = env_config["Reward"]["success_reward"] self.dx_max = env_config["dx_max"] self.offset = randomize.randomize_ur10_xml() self.worker_id = worker_id ######################################## MONITORING self.episode = 0 self.results = list() ######################################## self.K = numpy.array(env_config["controller"]["K"]) self.kp = numpy.array(env_config["controller"]["kp"]) self.ki = numpy.array(env_config["controller"]["ki"]) self.kd = numpy.array(env_config["controller"]["kd"]) self.kf = numpy.array(env_config["controller"]["kf"]) self.max_fb = numpy.array(env_config["controller"]["max_fb"]) self.ctrl_buffer = numpy.repeat(self.initial_qpos.copy().reshape(1, 6), 4, axis=0) b, a = butter(env_config["controller"]["butter_0"], env_config["controller"]["butter_1"]) self.a = a self.b = b self.zi = [lfilter_zi(b, a) * self.initial_qpos[i] for i in range(6)] self.qi_diff = env_config["controller"]["qi_diff"] self.last_target_q = self.initial_qpos.copy() self.only_grav_comp = env_config["controller"]["only_grav_comp"] ######################################### self.sim_ctrl_q = self.initial_qpos super(Ur10Env, self).__init__(model_path=self.model_path, n_substeps=self.n_substeps, n_actions=self.n_actions, initial_qpos=self.initial_qpos, seed=self.SEED, success_reward=self.success_reward, action_rate=self.action_rate)
def __init__(self, env_config): with open(env_config) as cfg: env_config = yaml.load(cfg, Loader=yaml.FullLoader) # Parameter for Custom Savings self.save_data = False self.episode = 0 self.rewards = [] self.distances = [] self.sim_poses = [] self.SEED = env_config["SEED"] # Environment Parameter self.model_path = os.path.join(*[MODEL_PATH,env_config["model_xml_file"]]) # Path to the environment xml file self.initial_qpos = numpy.array(env_config["initial_qpos"]) # An array of values that define the initial configuration) self.sim_ctrl_q = self.initial_qpos self.reward_type = env_config["reward_type"] # The reward type i.e. sparse or dense self.ctrl_type = env_config["ctrl_type"] self.n_substeps = env_config["n_substeps"] # Number of substeps the simulation runs on every call to step self.distance_threshold = env_config["distance_threshold"] # The threshold after which a goal is considered achieved self.fail_threshold = env_config["fail_threshold"] self.n_actions = env_config["n_actions"] self.corrective = env_config["corrective"] self.vary = env_config["vary"] self.dx_max = env_config["dx_max"] # Controller Parameter self.K = numpy.array(env_config["controller"]["K"]) self.kp = numpy.array(env_config["controller"]["kp"]) self.ki = numpy.array(env_config["controller"]["ki"]) self.kd = numpy.array(env_config["controller"]["kd"]) self.kf = numpy.array(env_config["controller"]["kf"]) self.max_fb = numpy.array(env_config["controller"]["max_fb"]) self.ctrl_buffer = numpy.repeat(self.initial_qpos.copy().reshape(1, 6), 4, axis=0) b, a = butter(env_config["controller"]["butter_0"], env_config["controller"]["butter_1"]) self.a = a self.b = b self.zi = [lfilter_zi(b,a) * self.initial_qpos[i] for i in range(6)] self.qi_diff = env_config["controller"]["qi_diff"] self.last_target_q = self.initial_qpos.copy() # Domain Randomization Parameter self.offset = randomize.randomize_ur10_xml() self.punish_force = env_config["punish_force"] self.punish_force_thresh = env_config["punish_force_thresh"] self.punish_force_factor = env_config["punish_force_factor"] self.pos_std = numpy.array(env_config["pos_std"]) self.pos_drift_range = numpy.array(env_config["pos_drift_range"]) self.pos_drift_val = numpy.array(env_config["pos_drift_val"]) # set in sim reset function self.init_vary_range = numpy.array([env_config["init_vary_range"][0], env_config["init_vary_range"][1], env_config["init_vary_range"][2],\ env_config["init_vary_range"][3]/180*numpy.pi, env_config["init_vary_range"][4]/180*numpy.pi, env_config["init_vary_range"][5]/180*numpy.pi]) self.ft_noise = env_config["ft_noise"] self.ft_drift = env_config["ft_drift"] self.ft_std = numpy.array(env_config["ft_std"]) self.ft_drift_range = numpy.array(env_config["ft_drift_range"]) self.ft_drift_val = numpy.array(env_config["ft_drift_val"]) # set in sim reset function self.randomize_kwargs = env_config["randomize_kwargs"] self.worker_id = 1 ############################ super(Ur10Env, self).__init__(model_path=self.model_path, n_substeps=self.n_substeps, n_actions=self.n_actions, initial_qpos=self.initial_qpos, seed=self.SEED)
def __init__(self, env_config, model_xml_path, worker_id): with open(env_config) as cfg: env_config = yaml.load(cfg, Loader=yaml.FullLoader) yaml.dump( env_config, open( os.path.join(*[ ADR_CONFIG_PATH, "env_config_005_{}.yml".format(worker_id) ]), "w")) month = (datetime.datetime.now()).strftime("%m") day = (datetime.datetime.now()).strftime("%d") hour = (datetime.datetime.now()).strftime("%H") minute = (datetime.datetime.now()).strftime("%M") date_time = "{}-{}_{}-{}".format(month, day, hour, minute) if not os.path.exists( os.path.join(*[ADR_TRAIN_DIR, "ADR_{}".format(date_time)])): os.makedirs( os.path.join(*[ADR_TRAIN_DIR, "ADR_{}".format(date_time)])) self.LVL_SAVE_PATH = os.path.join( os.path.join(*[ADR_TRAIN_DIR, "ADR_{}".format(date_time)])) if model_xml_path == None: self.model_path = env_config["model_xml_file"] self.save_data = env_config["Saving"] self.start_flag = True self.episode = 0 self.levels = dict() if self.save_data: self.fts = list() self.fxs = list() self.fys = list() self.fzs = list() self.obs = list() self.rewards = list() self.poses = list() self.SEED = env_config["SEED"] # Environment Parameter self.run_info = env_config["info"] self.results = list() #list(numpy.zeros(10,).astype(int)) self.R1 = env_config["Reward"]["R1"] self.R2 = env_config["Reward"]["R2"] self.success_reward = env_config["Reward"]["success_reward"] self.model_path = model_xml_path self.initial_qpos = numpy.array( env_config["initial_qpos"] ) # An array of values that define the initial configuration) self.sim_ctrl_q = self.initial_qpos self.reward_type = env_config[ "reward_type"] # The reward type i.e. sparse or dense self.ctrl_type = env_config["ctrl_type"] self.n_substeps = env_config["n_substeps"] self.action_rate = env_config[ "action_rate"] # Number of substeps the simulation runs on every call to step self.distance_threshold = env_config["Learning"]["distance_threshold"] self.cur_eps_threshold = env_config["Learning"]["cur_eps_threshold"] self.curriculum_learning = env_config["Learning"][ "curriculum_learning"] self.initial_distance_threshold = env_config["Learning"][ "initial_distance_threshold"] self.final_distance_threshold = env_config["Learning"][ "final_distance_threshold"] # The threshold after which a goal is considered achieved self.fail_threshold = env_config["Learning"]["fail_threshold"] self.n_actions = env_config["n_actions"] self.corrective = env_config["corrective"] self.vary = env_config["vary"] self.dx_max = env_config["dx_max"] # Controller Parameter self.K = numpy.array(env_config["controller"]["K"]) self.kp = numpy.array(env_config["controller"]["kp"]) self.ki = numpy.array(env_config["controller"]["ki"]) self.kd = numpy.array(env_config["controller"]["kd"]) self.kf = numpy.array(env_config["controller"]["kf"]) self.max_fb = numpy.array(env_config["controller"]["max_fb"]) self.ctrl_buffer = numpy.repeat(self.initial_qpos.copy().reshape(1, 6), 4, axis=0) b, a = butter(env_config["controller"]["butter_0"], env_config["controller"]["butter_1"]) self.a = a self.b = b self.zi = [lfilter_zi(b, a) * self.initial_qpos[i] for i in range(6)] self.qi_diff = env_config["controller"]["qi_diff"] self.last_target_q = self.initial_qpos.copy() self.only_grav_comp = True ######################## # NOISE self.f_mean_si = env_config["Noise"]["f_mean_si"] self.t_mean_si = env_config["Noise"]["t_mean_si"] self.pos_mean_si = env_config["Noise"]["pos_mean_si"] self.rot_mean_si = env_config["Noise"]["rot_mean_si"] self.f_std_si = env_config["Noise"]["f_std_si"] self.t_std_si = env_config["Noise"]["t_std_si"] self.pos_std_si = env_config["Noise"]["pos_std_si"] self.rot_std_si = env_config["Noise"]["rot_std_si"] self.dq_mean_si = env_config["Noise"]["dq_mean_si"] self.dq_std_si = env_config["Noise"]["dq_std_si"] # Domain Randomization Parameter self.level = int(env_config["Learning"]["ADR"]["level"]) self.level_episodes = 0 self.lower_level_bound = env_config["Learning"]["ADR"][ "lower_level_bound"] self.upper_level_bound = env_config["Learning"]["ADR"][ "upper_level_bound"] self.level_episodes_bound = env_config["Learning"]["ADR"][ "level_episodes_bound"] self.dq_var_dr = env_config["Domain_Randomization"]["dq_var_dr"] self.pos_var_dr = env_config["Domain_Randomization"]["pos_var_dr"] self.f_var_dr = env_config["Domain_Randomization"]["f_var_dr"] self.t_var_dr = env_config["Domain_Randomization"]["t_var_dr"] self.rot_var_dr = env_config["Domain_Randomization"]["rot_var_dr"] self.randomize_kwargs = env_config["randomize_kwargs"] self.offset = randomize.randomize_ur10_xml() self.worker_id = worker_id ########################## super(Ur10Env, self).__init__(model_path=self.model_path, n_substeps=self.n_substeps, n_actions=self.n_actions, initial_qpos=self.initial_qpos, seed=self.SEED, success_reward=self.success_reward, action_rate=self.action_rate)
def __init__( self, model_path, n_substeps, distance_threshold, initial_qpos, reward_type, ctrl_type="joint", fail_threshold=0.25, vary=False, init_vary_range=numpy.array([0.03, 0.03, 0.03, 3/180*np.pi, 3/180*np.pi, 3/180*np.pi]), corrective=False, worker_id=1, randomize_kwargs={}, pos_std=numpy.array([0, 0, 0, 0, 0, 0]), pos_drift_range=numpy.array([0, 0, 0, 0, 0, 0]), ft_noise=False, ft_drift=False, punish_force=False, punish_force_thresh=20, punish_force_factor=0.001, dx_max=0.0002 ): """Initializes a new Fetch environment. Args: model_path (string): path to the environments XML file n_substeps (int): number of substeps the simulation runs on every call to step distance_threshold (float): the threshold after which a goal is considered achieved initial_qpos (array): an array of values that define the initial configuration reward_type ('sparse' or 'dense'): the reward type, i.e. sparse or dense """ self.dx_max=dx_max self.xml_path = model_path self.offset = randomize.randomize_ur10_xml() self.n_substeps = n_substeps self.distance_threshold = distance_threshold self.reward_type = reward_type self.ctrl_type = ctrl_type self.fail_threshold = fail_threshold self.vary = vary self.initial_qpos = initial_qpos self.corrective = corrective self.sim_ctrl_q = initial_qpos self.worker_id = worker_id self.punish_force = punish_force self.punish_force_thresh = punish_force_thresh self.init_vary_range = init_vary_range self.punish_force_factor = punish_force_factor self.pos_std = pos_std self.pos_drift_range = pos_drift_range self.pos_drift_val = numpy.zeros(6,) # set in sim reset function self.ft_noise = ft_noise self.ft_drift = ft_drift self.ft_std = numpy.array([0.1, 0.1, 0.1, 0.003, 0.003, 0.003]) #These values are based on measurements self.ft_drift_range = numpy.array([1, 1, 1, 0.015, 0.015, 0.015]) #and these ,too self.ft_drift_val = 0 # set in sim reset function self.K = numpy.array([14.87, 13.26, 11.13, 10.49, 11.03, 11.47]) self.kp = numpy.array([4247, 3342, 3306, 707, 1236, 748]) self.ki = numpy.array([70.218, 38.65, 86.12, 19.60, 17.07, 19.40]) self.kd = numpy.array([0, 0, 0, 0, 0, 0]) # numpy.array([81.61, 77.38, 10.61, 10.44, 4.75, 9.14]) self.kf = numpy.array([28, 16.7, 8.42, 2.42, 4.12, 2.34]) self.max_fb = numpy.array([7.18, 1.54, 4.82, 3.22, 1.41, 1.891]) self.ctrl_buffer = np.repeat(initial_qpos.copy().reshape(1, 6), 4, axis=0) b, a = butter(2, 0.12) self.a = a self.b = b self.zi = [lfilter_zi(b, a) * initial_qpos[i] for i in range(6)] self.qi_diff = 0 self.last_target_q = initial_qpos.copy() self.randomize_kwargs = randomize_kwargs super(Ur10Env, self).__init__( model_path=model_path, n_substeps=n_substeps, n_actions=6, initial_qpos=initial_qpos) self.p = [900, 8000, 700, 100, 100, 100] self.d = [12, 80, 5, 0.5, 0.5, 0.5, ] self.max_T = np.array( [40, 50, 35, 2, 2, 2])
def _reset_sim(self): ######################### if self.episode > 0: self.success_rate = float( numpy.sum(self.results) / float(len(self.results))) print("Episode: {} Success Rate: {} ".format( self.episode, self.success_rate)) if len(self.results) < 10: self.results.append(0) else: self.results.pop(0) ######################### self.episode += 1 if self.vary == True: deviation_x = numpy.random.uniform(-self.init_vary_range, self.init_vary_range) deviation_q = self.get_dq(deviation_x) else: deviation_q = numpy.array([0, 0, 0, 0, 0, 0]) #if self.ft_drift: # self.ft_drift_val = numpy.random.uniform(-self.ft_drift_range, self.ft_drift_range) #self.pos_drift_val = numpy.random.uniform(-self.pos_drift_range, self.pos_drift_range) self.ft_dr_cor_noise = numpy.concatenate([ numpy.random.uniform(-self.f_var_dr_cor, self.f_var_dr_cor, 3), numpy.random.uniform(-self.t_var_dr_cor, self.t_var_dr_cor, 3) ]) self.pos_dr_cor_noise = numpy.uniform(-self.pos_var_dr_cor, self.pos_var_dr_cor, 3) self.rot_dr_cor_noise = numpy.uniform( -self.rot_var_dr_cor, self.rot_var_dr_cor, 3) * (numpy.pi / 180) del self.sim self.offset = randomize.randomize_ur10_xml(worker_id=self.worker_id, **self.randomize_kwargs) model = mujoco_py.load_model_from_path(self.model_path) self.sim = mujoco_py.MjSim(model, nsubsteps=self.n_substeps) self.goal = self._sample_goal() if not self.viewer is None: self._get_viewer('human').update_sim(self.sim) #self._get_viewer('human')._ncam = self.sim.model.ncam self.ctrl_buffer = np.repeat( (self.initial_qpos + deviation_q).reshape(1, 6), 4, axis=0) self.b, self.a = butter(2, 0.12) self.zi = [ lfilter_zi(self.b, self.a) * (self.initial_qpos[i] + deviation_q[i]) for i in range(6) ] self.qi_diff = 0 self.last_target_q = self.initial_qpos + deviation_q self.set_state(self.initial_qpos + deviation_q) self.sim.forward() self.sim.step() if self.sim.data.ncon == 0: for i in range(100): self.sim.data.ctrl[:] = self.sim.data.qfrc_bias.copy() self.sim.step() self.init_x = numpy.concatenate( (self.sim.data.get_body_xpos("gripper_dummy_heg"), self.sim.data.get_body_xquat("gripper_dummy_heg"))) self.set_force_for_q(self.initial_qpos + deviation_q) return self.sim.data.ncon == 0