def __init__( self, env, scale_reward=1., normalize_obs=False, normalize_reward=False, obs_alpha=0.001, reward_alpha=0.001, ): ProxyEnv.__init__(self, env) Serializable.quick_init(self, locals()) if not isinstance(env.action_space, Box): print( "Environment not using continuous actions; action normalization skipped!" ) self._scale_reward = scale_reward self._normalize_obs = normalize_obs self._normalize_reward = normalize_reward self._obs_alpha = obs_alpha self._obs_mean = np.array( env.observation_space.zeros ) #np.zeros(env.observation_space._dim)#.flat_dim self._obs_var = np.array( env.observation_space.ones ) #np.ones(env.observation_space._dim)#.flat_dim self._reward_alpha = reward_alpha self._reward_mean = np.zeros((self._obs_mean.shape[0], 1), dtype=float) self._reward_var = np.ones((self._obs_mean.shape[0], 1), dtype=float)
def __init__(self, env, start_generator, only_feasible=False, start_bounds=None, *args, **kwargs): """ This environment wraps around a normal environment to facilitate goal based exploration. Initial position based experiments should not use this class. :param env: wrapped env :param start_generator: a StateGenerator object :param obs_transform: a callable that transforms an observation of the wrapped environment into goal space :param terminal_eps: a threshold of distance that determines if a goal is reached :param terminate_env: a boolean that controls if the environment is terminated with the goal is reached :param start_bounds: array marking the UB of the rectangular limit of goals. :param distance_metric: L1 or L2 or a callable func :param goal_weight: coef of the goal based reward :param inner_weight: coef of the inner environment reward :param append_transformed_obs: append the transformation of the current observation to full observation """ Serializable.quick_init(self, locals()) ProxyEnv.__init__(self, env) StartEnv.__init__(self, *args, **kwargs) self.update_start_generator(start_generator) self.start_bounds = start_bounds self.only_feasible = only_feasible
def __init__(self, env, scale_reward=1., normalize_obs=False, normalize_reward=False, running_obs=False, running_reward=False, obs_alpha=0.001, reward_alpha=0.001, initial_obs_mean=None, initial_obs_var=None, noise_indices=None): ProxyEnv.__init__(self, env) Serializable.quick_init(self, locals()) self._scale_reward = scale_reward self._normalize_obs = normalize_obs self._normalize_reward = normalize_reward self._running_obs = running_obs self._running_reward = running_reward self._obs_alpha = obs_alpha self._reward_alpha = reward_alpha self._reward_mean = 0. self._reward_var = 1. self._noise_indices = noise_indices if initial_obs_mean is None: self._obs_mean = np.zeros(env.observation_space.flat_dim) else: self._obs_mean = initial_obs_mean if initial_obs_var is None: self._obs_var = np.ones(env.observation_space.flat_dim) else: self._obs_var = initial_obs_var
def __init__( self, env, #env_bob, #policy_bob, max_path_length, alice_bonus, alice_factor, gamma=0.1, stop_threshold=0.9, ring_spacing=1, init_iter=2 ): Serializable.quick_init(self, locals()) ProxyEnv.__init__(self, env) #self.env_bob = env_bob #self.policy_bob = policy_bob self.max_path_length = max_path_length self.time = 0 self.iter = init_iter self.ring_spacing = ring_spacing self.bob_speed = 0.1 #0.02 self.alice_bonus = alice_bonus self.alice_factor = alice_factor self.gamma = gamma self.stop_threshold = stop_threshold
def __init__( self, env, scale_reward=1., normalize_obs=False, normalize_reward=False, obs_alpha=0.001, reward_alpha=0.001, ): ProxyEnv.__init__(self, env) Serializable.quick_init(self, locals()) self._scale_reward = scale_reward self._normalize_obs = normalize_obs self._normalize_reward = normalize_reward self._obs_alpha = obs_alpha self._obs_mean = np.zeros(env.observation_space.flat_dim) self._obs_var = np.ones(env.observation_space.flat_dim) self._reward_alpha = reward_alpha self._reward_mean = 0. self._reward_var = 1. #anusha added these self.qpos_dim = env.qpos_dim self.qvel_dim = env.qvel_dim self.ctrl_dim = env.ctrl_dim self.qacc_dim = env.qacc_dim self.model = env.model
def __init__(self, env, max_episode_seconds=None, max_episode_steps=None): Serializable.quick_init(self, locals()) ProxyEnv.__init__(self, env) self._max_episode_seconds = max_episode_seconds self._max_episode_steps = max_episode_steps self.env = env self._elapsed_steps = 0 self._episode_started_at = None
def __init__( self, env, reward_freq=5, ): Serializable.quick_init(self, locals()) ProxyEnv.__init__(self, env) self._reward_freq = reward_freq self._delayed_reward = 0. self._delayed_step = 0
def __init__( self, env, obs_noise=1e-1, ): # super(NoisyObservationEnv, self).__init__(env) # Serializable.quick_init(self, locals()) Serializable.quick_init(self, locals()) ProxyEnv.__init__(self, env) self.obs_noise = obs_noise
def __init__( self, env, motor_controlled_actions=(0, 9), reset_time_steps=200, position_controlled_actions=None, ): Serializable.quick_init(self, locals()) ProxyEnv.__init__(self, env) self.motor_controlled_actions = motor_controlled_actions self.reset_time_steps = reset_time_steps self.position_controlled_actions = position_controlled_actions
def __init__( self, env, time_steps_agg=1, discrete_actions=True, pkl_path=None, json_path=None, npz_path=None, animate=False, keep_rendered_rgb=False, ): """ :param env: Env to wrap, should have same robot characteristics than env where the policy where pre-trained on :param time_steps_agg: Time-steps during which the SNN policy is executed with fixed (discrete) latent :param discrete_actions: whether the policy are applied alone or with a linear combination :param pkl_path: path to pickled pre-training experiment that includes the pre-trained policy :param json_path: path to json of the pre-training experiment. Requires npz_paths of the policy params :param npz_path: only required when using json_path :param keep_rendered_rgb: the returned frac_paths include all rgb images (for plotting video after) """ Serializable.quick_init(self, locals()) ProxyEnv.__init__(self, env) self.time_steps_agg = time_steps_agg self.discrete_actions = discrete_actions self.animate = animate self.keep_rendered_rgb = keep_rendered_rgb if json_path: self.data = json.load(open(os.path.join(config.PROJECT_PATH, json_path), 'r')) self.low_policy_latent_dim = self.data['json_args']['policy']['latent_dim'] elif pkl_path: pkl_path = os.path.join(config.PROJECT_PATH, pkl_path) self.data = joblib.load(pkl_path) self.low_policy_latent_dim = self.data['policy'].latent_dim else: raise Exception("No path to file given") self.low_policy = GaussianMLPPolicy_snn_hier( env_spec=env.spec, env=env, pkl_path=pkl_path, json_path=json_path, npz_path=npz_path, trainable_snn=False, external_latent=True, ) # for openai baselines compatibility self.reward_range = (-float('inf'), float('inf')) self.metadata = None self.t = 0 self.r = 0
def __init__(self, difficulty=1.0, texturedir='/tmp/mujoco_textures', hfield_dir='/tmp/mujoco_terrains', regen_terrain=True, *args, **kwargs): Serializable.quick_init(self, locals()) self.difficulty = max(difficulty, self.MIN_DIFFICULTY) self.texturedir = texturedir self.hfield_dir = hfield_dir model_cls = self.__class__.MODEL_CLASS if model_cls is None: raise Exception("MODEL_CLASS unspecified!") template_file_name = 'hill_' + model_cls.__module__.split( '.')[-1] + '.xml.mako' template_options = dict(difficulty=self.difficulty, texturedir=self.texturedir, hfield_file=os.path.join( self.hfield_dir, self.HFIELD_FNAME)) file_path = os.path.join(MODEL_DIR, template_file_name) lookup = mako.lookup.TemplateLookup(directories=[MODEL_DIR]) with open(file_path) as template_file: template = mako.template.Template(template_file.read(), lookup=lookup) content = template.render(opts=template_options) tmp_f, file_path = tempfile.mkstemp(text=True) with open(file_path, 'w') as f: f.write(content) if self._iam_terrain_generator(regen_terrain): self._gen_terrain(regen_terrain) os.remove(self._get_lock_path()) inner_env = model_cls(*args, file_path=file_path, **kwargs) # file to the robot specifications ProxyEnv.__init__( self, inner_env) # here is where the robot env will be initialized os.close(tmp_f)
def __init__( self, env, time_steps_agg=1, animate=False, keep_rendered_rgb=False, ): """ :param env: Env to wrap, should have same robot characteristics than env where the policy where pre-trained on :param time_steps_agg: Time-steps during which the SNN policy is executed with fixed (discrete) latent :param keep_rendered_rgb: the returned frac_paths include all rgb images (for plotting video after) """ Serializable.quick_init(self, locals()) ProxyEnv.__init__(self, env) self.time_steps_agg = time_steps_agg self.animate = animate self.keep_rendered_rgb = keep_rendered_rgb
def __init__( self, env, time_steps_agg=1, discrete_actions=True, pkl_path=None, json_path=None, npz_path=None, animate=False, keep_rendered_rgb=False, ): """ :param env: Env to wrap, should have same robot characteristics than env where the policy where pre-trained on :param time_steps_agg: Time-steps during which the SNN policy is executed with fixed (discrete) latent :param discrete_actions: whether the policy are applied alone or with a linear combination :param pkl_path: path to pickled pre-training experiment that includes the pre-trained policy :param json_path: path to json of the pre-training experiment. Requires npz_paths of the policy params :param npz_path: only required when using json_path :param keep_rendered_rgb: the returned frac_paths include all rgb images (for plotting video after) """ Serializable.quick_init(self, locals()) ProxyEnv.__init__(self, env) # the inner env can be referred to as wrapped_env self.time_steps_agg = time_steps_agg self.discrete_actions = discrete_actions self.animate = animate self.keep_rendered_rgb = keep_rendered_rgb if json_path: self.data = json.load(open(os.path.join(config.PROJECT_PATH, json_path), 'r')) self.low_policy_latent_dim = self.data['json_args']['policy']['latent_dim'] elif pkl_path: pkl_path = os.path.join(config.PROJECT_PATH, pkl_path) self.data = joblib.load(pkl_path) self.low_policy_latent_dim = self.data['policy'].latent_dim else: raise Exception("No path to file given") self.low_policy = GaussianMLPPolicy_snn_restorable( env_spec=env.spec, env=env, #latent_dim=latent_dim, # restore from file latent_name='categorical', pkl_path=pkl_path, bilinear_integration=True, # concatenate also the outer product hidden_sizes=(64, 64), min_std=1e-6, )
def __init__(self, env, init_selector, goal_selector, append_goal=False, terminal_bonus=0, terminal_eps=0.1, distance_metric='L2', goal_reward='NegativeDistance', goal_weight=1, inner_weight=0, angle_idxs=(None, ), persistence=3): """ :param env: wrapped env NEEDS RESET WITH INIT_STATE arg! :param init_selector: already instantiated: NEEDS GOOD DIM OF GOALS! --> TO DO: give the class a method to update dim? :param terminal_bonus: if not 0, the rollout terminates with this bonus if the goal state is reached :param terminal_eps: eps around which the ter/minal goal is considered reached :param distance_metric: L1 or L2 or a callable func :param goal_reward: NegativeDistance or InverseDistance or callable func :param goal_weight: coef of the goal-dist reward :param inner_weight: coef of the inner reward """ Serializable.quick_init(self, locals()) self.append_goal = append_goal self.terminal_bonus = terminal_bonus self.terminal_eps = terminal_eps self._distance_metric = distance_metric self._goal_reward = goal_reward self.goal_weight = goal_weight self.inner_weight = inner_weight self.persistence = persistence self.persistence_count = 1 self.fig_number = 0 ProxyEnv.__init__(self, env) InitBaseAngle.__init__(self, angle_idxs=angle_idxs, init_selector=init_selector, goal_selector=goal_selector) ub = BIG * np.ones(self.get_current_obs().shape) self._observation_space = spaces.Box(ub * -1, ub) # keep around all sampled goals, one dict per training itr (ie, call of this env.log_diagnostics) self.inits_trained = []
def __init__( self, env, time_steps_agg=1, discrete_actions=True, pkl_paths=None, json_paths=None, npz_paths=None, animate=False, keep_rendered_rgb=False, ): """ :param env: Env to wrap, should have same robot characteristics than env where the policies where pretrained on :param time_steps_agg: Time-steps during which one of the policies is executed without external action possible :param discrete_actions: whether the policy are applied alone or with a linear combination :param pkl_paths: list of paths to pickled pre-training experiment that includes the pre-trained policy :param json_paths: list of paths to json of the pre-training experiment. Requires npz_paths of the policy params :param npz_paths: only required when using json_paths :param keep_rendered_rgb: the returned frac_paths include all rgb images (for plotting video after) """ Serializable.quick_init(self, locals()) ProxyEnv.__init__(self, env) self.time_steps_agg = time_steps_agg self.discrete_actions = discrete_actions self.animate = animate self.keep_rendered_rgb = keep_rendered_rgb if json_paths: self.low_policy_selector_dim = len(json_paths) elif pkl_paths: self.low_policy_selector_dim = len(pkl_paths) else: raise Exception("No path no file given") self.low_policy = GaussianMLPPolicy_multi_hier( env_spec=env.spec, env=env, pkl_paths=pkl_paths, json_paths=json_paths, npz_paths=npz_paths, trainable_old=False, external_selector=True, )
def __init__( self, env, ): Serializable.quick_init(self, locals()) ProxyEnv.__init__(self, env) # load policy that moves peg peg_policy = "/home/michael/rllab_goal_rl/data/policies/params.pkl" data = joblib.load(peg_policy) if "algo" in data: self.move_peg_agent = data["algo"].policy self.move_peg_env = data["algo"].env self.max_path_length = 200 # for moving peg self.amount_moved = 0.05 # locates base environment (i.e. environment that tries to move disk) self.base_env = self while hasattr(self.base_env, "wrapped_env"): self.base_env = self.base_env.wrapped_env
def __init__( self, env, scale_reward=1., normalize_obs=False, normalize_reward=False, obs_alpha=0.001, reward_alpha=0.001, ): ProxyEnv.__init__(self, env) Serializable.quick_init(self, locals()) self._scale_reward = scale_reward self._normalize_obs = normalize_obs self._normalize_reward = normalize_reward self._obs_alpha = obs_alpha self._obs_mean = np.zeros(env.observation_space.flat_dim) self._obs_var = np.ones(env.observation_space.flat_dim) self._reward_alpha = reward_alpha self._reward_mean = 0. self._reward_var = 1.
def __init__( self, env, scale_reward=1., normalize_obs=False, normalize_reward=False, obs_alpha=0.001, reward_alpha=0.001, ): Serializable.quick_init(self, locals()) ProxyEnv.__init__(self, env) self._scale_reward = scale_reward self._normalize_obs = normalize_obs self._normalize_reward = normalize_reward self._obs_alpha = obs_alpha self._obs_mean = np.zeros(env.observation_space.flat_dim) self._obs_var = np.ones(env.observation_space.flat_dim) self._reward_alpha = reward_alpha self._reward_mean = 0. self._reward_var = 1.
def __init__( self, env, scale_reward=1., normalize_obs=False, normalize_reward=False, obs_alpha=0.001, reward_alpha=0.001, ): #print "Normalized! I'm here!" Serializable.quick_init(self, locals()) ProxyEnv.__init__(self, env) assert isinstance(env.action_space, UBSpace) self._scale_reward = scale_reward self._normalize_obs = normalize_obs self._normalize_reward = normalize_reward self._obs_alpha = obs_alpha self._obs_mean = np.zeros(env.observation_space.flat_dim) self._obs_var = np.ones(env.observation_space.flat_dim) self._reward_alpha = reward_alpha self._reward_mean = 0. self._reward_var = 1.
def __init__( self, env, scale_reward=1., normalize_obs=False, normalize_reward=False, obs_alpha=0.001, reward_alpha=0.001, ): ProxyEnv.__init__(self, env) Serializable.quick_init(self, locals()) if not isinstance(env.action_space, Box): print("Environment not using continuous actions; action normalization skipped!") self._scale_reward = scale_reward self._normalize_obs = normalize_obs self._normalize_reward = normalize_reward self._obs_alpha = obs_alpha self._obs_mean = np.zeros(env.observation_space.flat_dim) self._obs_var = np.ones(env.observation_space.flat_dim) self._reward_alpha = reward_alpha self._reward_mean = 0. self._reward_var = 1.
def __init__(self, env_alice, env_bob, policy_bob, max_path_length, alice_bonus, alice_factor, gamma=0.1, stop_threshold=0.9, start_generation=True): Serializable.quick_init(self, locals()) ProxyEnv.__init__(self, env_alice) self.env_bob = env_bob self.policy_bob = policy_bob self.max_path_length = max_path_length self.time = 0 self.alice_bonus = alice_bonus self.alice_factor = alice_factor self.gamma = gamma self.stop_threshold = stop_threshold self.start_generation = start_generation
def __init__(self, env, goal_generator, obs2goal_transform=None, terminal_eps=0.05, only_feasible=False, terminate_env=False, goal_bounds=None, distance_metric='L2', extend_dist_rew=0., goal_weight=1, inner_weight=0, append_transformed_obs=False, append_goal_to_observation=True, append_extra_info=False, relative_goal=False, include_goal_obs=True, **kwargs): """ This environment wraps around a normal environment to facilitate goal based exploration. Initial position based experiments should not use this class. :param env: wrapped env :param goal_generator: a StateGenerator object :param obs2goal_transform: a callable that transforms an observation of the wrapped environment into goal space :param terminal_eps: a threshold of distance that determines if a goal is reached :param terminate_env: a boolean that controls if the environment is terminated with the goal is reached :param goal_bounds: array marking the UB of the rectangular limit of goals. :param distance_metric: L1 or L2 or a callable func :param goal_weight: coef of the goal based reward :param inner_weight: coef of the inner environment reward :param append_transformed_obs: append the transformation of the current observation to full observation """ Serializable.quick_init(self, locals()) ProxyEnv.__init__(self, env) # print( "ProxyEnv", str(ProxyEnv)) GoalEnv.__init__(self, **kwargs) self.update_goal_generator(goal_generator) if obs2goal_transform is None: self._obs2goal_transform = lambda x: x # needed for replay old policies [:2] else: self._obs2goal_transform = obs2goal_transform self.terminate_env = terminate_env self.goal_bounds = goal_bounds self.terminal_eps = terminal_eps self.only_feasible = only_feasible self.distance_metric = distance_metric self.extend_dist_rew_weight = extend_dist_rew self.goal_weight = goal_weight self.inner_weight = inner_weight self.append_transformed_obs = append_transformed_obs self.append_goal_to_observation = append_goal_to_observation self.append_extra_info = append_extra_info self.relative_goal = relative_goal self.include_goal_obs = include_goal_obs self.obs_dim = self.wrapped_env.observation_space.flat_dim # TODO fix this if self.goal_bounds is None: # print("setting goal bounds to match env") self.goal_bounds = self.wrapped_env.observation_space.bounds[ 1] # we keep only UB self._feasible_goal_space = self.wrapped_env.observation_space else: self._feasible_goal_space = Box(low=-1 * self.goal_bounds, high=self.goal_bounds)
def __init__( self, n_bins=20, sensor_range=10., sensor_span=math.pi, maze_id=0, length=1, maze_height=0.5, maze_size_scaling=4, coef_inner_rew=0., # a coef of 0 gives no reward to the maze from the wrapped env. goal_rew=1000., # reward obtained when reaching the goal death_reward=0., random_start=False, direct_goal=False, velocity_field=True, visualize_goal=False, *args, **kwargs): Serializable.quick_init(self, locals()) self._n_bins = n_bins self._sensor_range = sensor_range self._sensor_span = sensor_span self._maze_id = maze_id self.length = length self.coef_inner_rew = coef_inner_rew self.goal_rew = goal_rew self.death_reward = death_reward self.direct_goal = direct_goal self.velocity_field = velocity_field self.algo = None # will be added in set_algo! self.visualize_goal = visualize_goal model_cls = self.__class__.MODEL_CLASS # print("model_cls", model_cls) if model_cls is None: raise "MODEL_CLASS unspecified!" xml_path = osp.join(MODEL_DIR, model_cls.FILE) # print("xml_path", xml_path) self.tree = tree = ET.parse(xml_path) self.worldbody = worldbody = tree.find(".//worldbody") self.MAZE_HEIGHT = height = maze_height self.MAZE_SIZE_SCALING = size_scaling = maze_size_scaling self.init_actual_x = self.init_actual_y = 0 self.random_start = random_start if self.random_start: # in kwargs # define: actual coordinates: origin at center of struct (3, 1) # define: mujoco coordinates: origin at the first ant self.pool, self.p, structure, x_relative, y_relative \ = construct_maze_random(maze_id=self._maze_id, length=self.length) # x, y_relative: x, y index in struct, relative to (3, 1) self.MAZE_STRUCTURE = structure self.init_actual_x = y_relative * self.MAZE_SIZE_SCALING # map x direction is list y direction! self.init_actual_y = x_relative * self.MAZE_SIZE_SCALING # print('self init:', self.init_actual_x, self.init_actual_y) self.x_r_prev, self.y_r_prev = x_relative + 3, y_relative + 1 # maintain the previous map for later update for i, tmp_line in enumerate(structure): for j, tmp_member in enumerate(tmp_line): if tmp_member == 'g': self.x_g_prev, self.y_g_prev = i, j break # self.x_g_prev, self.y_g_prev = 3, 3 # hard code here! - (1, j) in big maze structure in maze_env_utils.py self.x_g, self.y_g = self.x_g_prev, self.y_g_prev else: self.x_r_prev, self.y_r_prev = 3, 1 self.x_g_prev, self.y_g_prev = 1, 1 self.MAZE_STRUCTURE = structure = construct_maze( maze_id=self._maze_id, length=self.length) self.tot = 0 torso_x, torso_y = self._find_robot() self._init_torso_x = torso_x # this is not the actual init x, just convenient for goal pos computing self._init_torso_y = torso_y # torso pos in index coords! self.init_torso_x = self._init_torso_x # make visible from outside self.init_torso_y = self._init_torso_y for i in range(len(structure)): for j in range(len(structure[0])): if str(structure[i][j]) == '1': # offset all coordinates so that robot starts at the origin ET.SubElement( worldbody, "geom", name="block_%d_%d" % (i, j), pos="%f %f %f" % (j * size_scaling - torso_x, i * size_scaling - torso_y, height / 2 * size_scaling), size="%f %f %f" % (0.5 * size_scaling, 0.5 * size_scaling, height / 2 * size_scaling), type="box", material="", contype="1", conaffinity="1", rgba="0.4 0.4 0.4 1") if self.visualize_goal and str( structure[i][j] ) == 'g': # visualize goal! uncomment this block when testing! # offset all coordinates so that robot starts at the origin self.goal_element =\ ET.SubElement( self.worldbody, "geom", name="block_%d_%d" % (i, j), pos="%f %f %f" % (j * size_scaling - self._init_torso_x, i * size_scaling - self._init_torso_y, self.MAZE_HEIGHT / 2 * size_scaling), size="%f %f %f" % (0.2 * size_scaling, 0.2 * size_scaling, self.MAZE_HEIGHT / 2 * size_scaling), type="box", material="", contype="1", conaffinity="1", rgba="1.0 0.0 0.0 0.5" ) self.args = args self.kwargs = kwargs torso = tree.find(".//body[@name='torso']") geoms = torso.findall(".//geom") for geom in geoms: # print("geom", geom.attrib) if 'name' not in geom.attrib: raise Exception("Every geom of the torso must have a name " "defined") if self.__class__.MAZE_MAKE_CONTACTS: contact = ET.SubElement(tree.find("."), "contact") for i in range(len(structure)): for j in range(len(structure[0])): if str(structure[i][j]) == '1': for geom in geoms: ET.SubElement(contact, "pair", geom1=geom.attrib["name"], geom2="block_%d_%d" % (i, j)) _, file_path = tempfile.mkstemp(text=True) tree.write( file_path ) # here we write a temporal file with the robot specifications. Why not the original one?? self._goal_range = self._find_goal_range() self.goal = np.array([(self._goal_range[0] + self._goal_range[1]) / 2, (self._goal_range[2] + self._goal_range[3]) / 2]) # print ("goal_range", self._goal_range) # print("x_y", self.wrapped_env.model.data.qpos.flat[0:2]) # print("goal", self.goal) self._cached_segments = None self.gradient_pool = [ (1, 0), (0.707, 0.707), (0, 1), (-0.707, 0.707), (-1, 0), (-0.707, -0.707), (0, -1), (0.707, -0.707) ] # added gradient pool for train_low_with_v_gradient inner_env = model_cls( *args, file_path=file_path, **kwargs) # file to the robot specifications; model_cls is AntEnv ProxyEnv.__init__( self, inner_env) # here is where the robot env will be initialized
def reset(self): # randomize starting pos! if self.random_start: # in kwargs random_i = np.random.choice(len(self.pool), p=self.p) # random i for r x_r, y_r = self.pool[random_i] x_g, y_g = self.x_g_prev, self.y_g_prev random_i = np.random.choice(len(self.pool), p=self.p) # new random i for g if self._maze_id == 11: # big maze, goal should also be randomly sampled! x_g, y_g = self.pool[random_i] while (x_g == x_r) and (y_g == y_r): # shouldn't overlap! random_i = np.random.choice(len(self.pool), p=self.p) x_g, y_g = self.pool[random_i] # x_r, y_r = 3, 1 # uncomment for test # print('robot:', x_r, y_r) # print('goal:', x_g, y_g) self.MAZE_STRUCTURE[self.x_g_prev][self.y_g_prev] = 0 self.MAZE_STRUCTURE[x_g][y_g] = 'g' self.x_g_prev, self.y_g_prev = x_g, y_g self._goal_range = self._find_goal_range() self.goal = np.array([ (self._goal_range[0] + self._goal_range[1]) / 2, (self._goal_range[2] + self._goal_range[3]) / 2 ]) # # (3, 1) was the initial choice! self.MAZE_STRUCTURE[self.x_r_prev][self.y_r_prev] = 0 self.x_r_prev, self.y_r_prev = x_r, y_r self.MAZE_STRUCTURE[x_r][y_r] = 'r' # update maze # # print("x_r, y_r", x_r, y_r) # x_relative = x_r - 3 # the x index relative to (0, 0) # y_relative = y_r - 1 # self.init_actual_x = y_relative * self.MAZE_SIZE_SCALING # map x direction is list y direction! # self.init_actual_y = x_relative * self.MAZE_SIZE_SCALING # # print(self.MAZE_STRUCTURE) # # print('self init:', self.init_actual_x, self.init_actual_y) x_new_actual = (y_r - 1) * self.MAZE_SIZE_SCALING y_new_actual = (x_r - 3) * self.MAZE_SIZE_SCALING x_new_mujoco = x_new_actual - self.init_actual_x y_new_mujoco = y_new_actual - self.init_actual_y qpos = [ x_new_mujoco, y_new_mujoco, 0.75, 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0. ] qvel = [0.] * 14 # make sure these should be set to 0.? qacc = [0.] * 14 ctrl = [0.] * 14 init_state = qpos + qvel + qacc + ctrl self.wrapped_env.reset( init_state=init_state ) # wapped_env is the inner_env in __init__() # in mujoco_env.py, reset(self, init_state=None) # print(x_r, y_r) if self.visualize_goal: # remove the prev goal and add a new goal i, j = x_g, y_g size_scaling = self.MAZE_SIZE_SCALING # remove the original goal try: self.worldbody.remove(self.goal_element) except AttributeError: pass # offset all coordinates so that robot starts at the origin self.goal_element = \ ET.SubElement( self.worldbody, "geom", name="block_%d_%d" % (i, j), pos="%f %f %f" % (j * size_scaling - self._init_torso_x, i * size_scaling - self._init_torso_y, self.MAZE_HEIGHT / 2 * size_scaling), size="%f %f %f" % (0.2 * size_scaling, # smaller than the block to prevent collision 0.2 * size_scaling, self.MAZE_HEIGHT / 2 * size_scaling), type="box", material="", contype="1", conaffinity="1", rgba="1.0 0.0 0.0 0.5" ) # Note: running the lines below will make the robot position wrong! (because the graph is rebuilt) torso = self.tree.find(".//body[@name='torso']") geoms = torso.findall(".//geom") for geom in geoms: # print("geom", geom.attrib) if 'name' not in geom.attrib: raise Exception( "Every geom of the torso must have a name " "defined") _, file_path = tempfile.mkstemp(text=True) self.tree.write( file_path ) # here we write a temporal file with the robot specifications. Why not the original one?? model_cls = self.__class__.MODEL_CLASS inner_env = model_cls( *self.args, file_path=file_path, **self.kwargs ) # file to the robot specifications; model_cls is AntEnv ProxyEnv.__init__( self, inner_env ) # here is where the robot env will be initialized else: self.wrapped_env.reset() return self.get_current_obs()
def __init__(self, n_apples=8, n_bombs=8, activity_range=12., robot_object_spacing=5., catch_range=1., n_bins=10, sensor_range=12., sensor_span=2 * math.pi, coef_inner_rew=0., dying_cost=0., *args, **kwargs): Serializable.quick_init(self, locals()) self.n_apples = n_apples self.n_bombs = n_bombs self.activity_range = activity_range self.robot_object_spacing = robot_object_spacing self.catch_range = catch_range self.n_bins = n_bins self.sensor_range = sensor_range self.sensor_span = sensor_span self.coef_inner_rew = coef_inner_rew self.dying_cost = dying_cost self.objects = [] self.viewer = None # super(GatherEnv, self).__init__(*args, **kwargs) model_cls = self.__class__.MODEL_CLASS if model_cls is None: raise "MODEL_CLASS unspecified!" xml_path = osp.join(MODEL_DIR, model_cls.FILE) tree = ET.parse(xml_path) worldbody = tree.find(".//worldbody") attrs = dict(type="box", conaffinity="1", rgba="0.8 0.9 0.8 1", condim="3") # import pdb;pdb.set_trace() walldist = self.activity_range + 1 ET.SubElement( worldbody, "geom", dict(attrs, name="wall1", pos="0 -%d 0" % walldist, size="%d.5 0.5 1" % walldist)) ET.SubElement( worldbody, "geom", dict(attrs, name="wall2", pos="0 %d 0" % walldist, size="%d.5 0.5 1" % walldist)) ET.SubElement( worldbody, "geom", dict(attrs, name="wall3", pos="-%d 0 0" % walldist, size="0.5 %d.5 1" % walldist)) ET.SubElement( worldbody, "geom", dict(attrs, name="wall4", pos="%d 0 0" % walldist, size="0.5 %d.5 1" % walldist)) _, file_path = tempfile.mkstemp(text=True) tree.write(file_path) # pylint: disable=not-callable inner_env = model_cls(*args, file_path=file_path, **kwargs) # pylint: enable=not-callable ProxyEnv.__init__( self, inner_env) # to access the inner env, do self.wrapped_env
def __init__( self, n_apples=8, n_bombs=8, activity_range=6., robot_object_spacing=2., catch_range=1., n_bins=10, sensor_range=6., sensor_span=math.pi, coef_inner_rew=0., dying_cost=-10, *args, **kwargs ): Serializable.quick_init(self, locals()) self.n_apples = n_apples self.n_bombs = n_bombs self.activity_range = activity_range self.robot_object_spacing = robot_object_spacing self.catch_range = catch_range self.n_bins = n_bins self.sensor_range = sensor_range self.sensor_span = sensor_span self.coef_inner_rew = coef_inner_rew self.dying_cost = dying_cost self.objects = [] self.viewer = None # super(GatherEnv, self).__init__(*args, **kwargs) model_cls = self.__class__.MODEL_CLASS if model_cls is None: raise "MODEL_CLASS unspecified!" xml_path = osp.join(MODEL_DIR, model_cls.FILE) tree = ET.parse(xml_path) worldbody = tree.find(".//worldbody") attrs = dict( type="box", conaffinity="1", rgba="0.8 0.9 0.8 1", condim="3" ) walldist = self.activity_range + 1 ET.SubElement( worldbody, "geom", dict( attrs, name="wall1", pos="0 -%d 0" % walldist, size="%d.5 0.5 1" % walldist)) ET.SubElement( worldbody, "geom", dict( attrs, name="wall2", pos="0 %d 0" % walldist, size="%d.5 0.5 1" % walldist)) ET.SubElement( worldbody, "geom", dict( attrs, name="wall3", pos="-%d 0 0" % walldist, size="0.5 %d.5 1" % walldist)) ET.SubElement( worldbody, "geom", dict( attrs, name="wall4", pos="%d 0 0" % walldist, size="0.5 %d.5 1" % walldist)) _, file_path = tempfile.mkstemp(text=True) tree.write(file_path) # pylint: disable=not-callable inner_env = model_cls(*args, file_path=file_path, **kwargs) # pylint: enable=not-callable ProxyEnv.__init__(self, inner_env) # to access the inner env, do self.wrapped_env
def __init__( self, # goal_generator, n_bins=20, sensor_range=10., sensor_span=math.pi, maze_id=0, length=1, maze_height=0.5, maze_size_scaling=2, coef_inner_rew=1., # a coef of 0 gives no reward to the maze from the wrapped env. # goal_rew=1., # reward obtained when reaching the goal include_maze_obs=False, *args, **kwargs): Serializable.quick_init(self, locals()) self._n_bins = n_bins self._sensor_range = sensor_range self._sensor_span = sensor_span self._maze_id = maze_id self.length = length self.coef_inner_rew = coef_inner_rew # self.goal_rew = goal_rew self.include_maze_obs = include_maze_obs model_cls = self.__class__.MODEL_CLASS if model_cls is None: raise "MODEL_CLASS unspecified!" xml_path = osp.join(MODEL_DIR, model_cls.FILE) tree = ET.parse(xml_path) worldbody = tree.find(".//worldbody") self.MAZE_HEIGHT = height = maze_height self.MAZE_SIZE_SCALING = size_scaling = maze_size_scaling self.MAZE_STRUCTURE = structure = construct_maze(maze_id=self._maze_id, length=self.length) if self._maze_id == 0: self.LINEARIZED = MazeEnv.MAZE_0 elif self._maze_id == 11: self.LINEARIZED = MazeEnv.MAZE_11 elif self._maze_id == 13: self.LINEARIZED = MazeEnv.MAZE_13 elif self._maze_id == 14: self.LINEARIZED = MazeEnv.MAZE_14 else: self.LINEARIZED = None torso_x, torso_y = self._find_robot() self._init_torso_x = torso_x self._init_torso_y = torso_y for i in range(len(structure)): for j in range(len(structure[0])): if str(structure[i][j]) == '1': # offset all coordinates so that robot starts at the origin ET.SubElement( worldbody, "geom", name="block_%d_%d" % (i, j), pos="%f %f %f" % (j * size_scaling - torso_x, i * size_scaling - torso_y, height / 2 * size_scaling), size="%f %f %f" % (0.5 * size_scaling, 0.5 * size_scaling, height / 2 * size_scaling), type="box", material="", contype="1", conaffinity="1", rgba="0.4 0.4 0.4 0.5") torso = tree.find(".//body[@name='torso']") geoms = torso.findall(".//geom") for geom in geoms: if 'name' not in geom.attrib: raise Exception("Every geom of the torso must have a name " "defined") segments = [] # Get all line segments of the goal and the obstacles for i in range(len(structure)): for j in range(len(structure[0])): if structure[i][j] == 1 or structure[i][j] == 'g': cx = j * size_scaling - self._init_torso_x cy = i * size_scaling - self._init_torso_y x1 = cx - 0.5 * size_scaling x2 = cx + 0.5 * size_scaling y1 = cy - 0.5 * size_scaling y2 = cy + 0.5 * size_scaling struct_segments = [ ((x1, y1), (x2, y1)), ((x2, y1), (x2, y2)), ((x2, y2), (x1, y2)), ((x1, y2), (x1, y1)), ] for seg in struct_segments: segments.append( dict( segment=seg, type=structure[i][j], )) self.segments = segments if self.__class__.MAZE_MAKE_CONTACTS: contact = ET.SubElement(tree.find("."), "contact") for i in range(len(structure)): for j in range(len(structure[0])): if str(structure[i][j]) == '1': for geom in geoms: ET.SubElement(contact, "pair", geom1=geom.attrib["name"], geom2="block_%d_%d" % (i, j)) _, file_path = tempfile.mkstemp(text=True) tree.write( file_path ) # here we write a temporal file with the robot specifications. Why not the original one?? self._goal_range = self._find_goal_range() self._cached_segments = None inner_env = model_cls(file_path=file_path, *args, **kwargs) # file to the robot specifications ProxyEnv.__init__( self, inner_env) # here is where the robot env will be initialized
def __init__( self, n_bins=20, sensor_range=10., sensor_span=math.pi, *args, **kwargs): self._n_bins = n_bins self._sensor_range = sensor_range self._sensor_span = sensor_span model_cls = self.__class__.MODEL_CLASS if model_cls is None: raise "MODEL_CLASS unspecified!" xml_path = osp.join(MODEL_DIR, model_cls.FILE) tree = ET.parse(xml_path) worldbody = tree.find(".//worldbody") size_scaling = self.__class__.MAZE_SIZE_SCALING height = self.__class__.MAZE_HEIGHT structure = self.__class__.MAZE_STRUCTURE torso_x, torso_y = self._find_robot() self._init_torso_x = torso_x self._init_torso_y = torso_y for i in range(len(structure)): for j in range(len(structure[0])): if str(structure[i][j]) == '1': # offset all coordinates so that robot starts at the origin ET.SubElement( worldbody, "geom", name="block_%d_%d" % (i, j), pos="%f %f %f" % (j * size_scaling - torso_x, i * size_scaling - torso_y, height / 2 * size_scaling), size="%f %f %f" % (0.5 * size_scaling, 0.5 * size_scaling, height / 2 * size_scaling), type="box", material="", contype="1", conaffinity="1", rgba="0.4 0.4 0.4 1" ) torso = tree.find(".//body[@name='torso']") geoms = torso.findall(".//geom") for geom in geoms: if 'name' not in geom.attrib: raise Exception("Every geom of the torso must have a name " "defined") if self.__class__.MAZE_MAKE_CONTACTS: contact = ET.SubElement( tree.find("."), "contact" ) for i in range(len(structure)): for j in range(len(structure[0])): if str(structure[i][j]) == '1': for geom in geoms: ET.SubElement( contact, "pair", geom1=geom.attrib["name"], geom2="block_%d_%d" % (i, j) ) _, file_path = tempfile.mkstemp(text=True) tree.write(file_path) self._goal_range = self._find_goal_range() self._cached_segments = None inner_env = model_cls(*args, file_path=file_path, **kwargs) ProxyEnv.__init__(self, inner_env) Serializable.quick_init(self, locals())
def __init__( self, # added radius=100, radius_decay=0, radius_mode='ball', ball_radius=0.5, action_noise_std=0.1, action_noise_discount=0.98, # effective dynamism time-horizon of (1-discount)^{-1} timesteps action_noise_mode=None, n_apples=8, n_bombs=8, activity_range=6., robot_object_spacing=2., catch_range=1., n_bins=10, sensor_range=6., sensor_span=math.pi, coef_inner_rew=0., dying_cost=-10, *args, **kwargs): Serializable.quick_init(self, locals()) # added self.t = 0 self.radius = radius self.radius_decay = radius_decay self.radius_mode = radius_mode self.ball_radius = ball_radius self.action_noise_std = action_noise_std self.action_noise_discount = action_noise_discount self.action_noise_mode = action_noise_mode self.n_apples = n_apples self.n_bombs = n_bombs self.activity_range = activity_range self.robot_object_spacing = robot_object_spacing self.catch_range = catch_range self.n_bins = n_bins self.sensor_range = sensor_range self.sensor_span = sensor_span self.coef_inner_rew = coef_inner_rew self.dying_cost = dying_cost self.objects = [] self.viewer = None # super(GatherEnv, self).__init__(*args, **kwargs) model_cls = self.__class__.MODEL_CLASS if model_cls is None: raise "MODEL_CLASS unspecified!" xml_path = osp.join(MODEL_DIR, model_cls.FILE) tree = ET.parse(xml_path) worldbody = tree.find(".//worldbody") attrs = dict(type="box", conaffinity="1", rgba="0.8 0.9 0.8 1", condim="3") walldist = self.activity_range + 1 ET.SubElement( worldbody, "geom", dict(attrs, name="wall1", pos="0 -%d 0" % walldist, size="%d.5 0.5 1" % walldist)) ET.SubElement( worldbody, "geom", dict(attrs, name="wall2", pos="0 %d 0" % walldist, size="%d.5 0.5 1" % walldist)) ET.SubElement( worldbody, "geom", dict(attrs, name="wall3", pos="-%d 0 0" % walldist, size="0.5 %d.5 1" % walldist)) ET.SubElement( worldbody, "geom", dict(attrs, name="wall4", pos="%d 0 0" % walldist, size="0.5 %d.5 1" % walldist)) _, file_path = tempfile.mkstemp(text=True) tree.write(file_path) # pylint: disable=not-callable inner_env = model_cls(*args, file_path=file_path, **kwargs) # pylint: enable=not-callable ProxyEnv.__init__( self, inner_env) # to access the inner env, do self.wrapped_env
def __init__(self, nonlinear_reward=False, max_path_length_range=None, random_start_range=None, show_limit=True, lim_size=20, lim_height=2, xlim=10, abs_lim=False, circle_mode=False, target_dist=1., max_ep_len=65, *args, **kwargs): self._nonlinear_reward = nonlinear_reward self._max_path_length_range = max_path_length_range self._random_start_range = random_start_range self._step = 0 self._circle_mode = circle_mode self._target_dist = target_dist self._xlim = xlim self._max_ep_len = max_ep_len model_cls = self.__class__.MODEL_CLASS if model_cls is None: raise "MODEL_CLASS unspecified!" xml_path = osp.join(MODEL_DIR, model_cls.FILE) if show_limit: tree = ET.parse(xml_path) worldbody = tree.find(".//worldbody") ET.SubElement(worldbody, "geom", name="finish_line", pos="%f %f %f" % (xlim, 0, lim_height / 2), size="%f %f %f" % (0.1, lim_size, lim_height), type="box", material="", contype="0", conaffinity="0", rgba="0.1 0.1 0.8 0.4") if abs_lim: ET.SubElement(worldbody, "geom", name="finish_line2", pos="%f %f %f" % (-xlim, 0, lim_height / 2), size="%f %f %f" % (0.1, lim_size, lim_height), type="box", material="", contype="0", conaffinity="0", rgba="0.1 0.1 0.8 0.4") _, file_path = tempfile.mkstemp(text=True) tree.write(file_path) else: file_path = xml_path inner_env = model_cls(*args, file_path=file_path, **kwargs) Serializable.quick_init(self, locals()) ProxyEnv.__init__(self, inner_env) # new code for caching obs / act space shp = self.get_current_obs().shape ub = BIG * np.ones(shp) self._cached_observation_space = spaces.Box(ub * -1, ub) bounds = self._wrapped_env.model.actuator_ctrlrange lb = bounds[:, 0] ub = bounds[:, 1] self._cached_action_space = spaces.Box(lb, ub)
def __init__(self, env, delay=0.01): Serializable.quick_init(self, locals()) ProxyEnv.__init__(self, env) self._delay = delay
def __init__(self, n_apples=8, n_bombs=8, activity_range=6., robot_object_spacing=2., catch_range=1., n_bins=10, sensor_range=6., sensor_span=math.pi, coef_inner_rew=0., dying_cost=-10, *args, **kwargs): Serializable.quick_init(self, locals()) self.n_apples = n_apples self.n_bombs = n_bombs self.activity_range = activity_range self.robot_object_spacing = robot_object_spacing self.catch_range = catch_range self.n_bins = n_bins self.sensor_range = sensor_range self.sensor_span = sensor_span self.coef_inner_rew = coef_inner_rew self.dying_cost = dying_cost self.objects = [] self.viewer = None # for openai baseline self.reward_range = (-float('inf'), float('inf')) self.metadata = None self.t = 0 self.r = 0 # super(GatherEnv, self).__init__(*args, **kwargs) model_cls = self.__class__.MODEL_CLASS if model_cls is None: raise "MODEL_CLASS unspecified!" xml_path = osp.join(MODEL_DIR, model_cls.FILE) tree = ET.parse(xml_path) worldbody = tree.find(".//worldbody") attrs = dict(type="box", conaffinity="1", rgba="0.8 0.9 0.8 1", condim="3") walldist = self.activity_range + 1 ET.SubElement( worldbody, "geom", dict(attrs, name="wall1", pos="0 -%d 0" % walldist, size="%d.5 0.5 1" % walldist)) ET.SubElement( worldbody, "geom", dict(attrs, name="wall2", pos="0 %d 0" % walldist, size="%d.5 0.5 1" % walldist)) ET.SubElement( worldbody, "geom", dict(attrs, name="wall3", pos="-%d 0 0" % walldist, size="0.5 %d.5 1" % walldist)) ET.SubElement( worldbody, "geom", dict(attrs, name="wall4", pos="%d 0 0" % walldist, size="0.5 %d.5 1" % walldist)) # _, file_path = tempfile.mkstemp(text=True) #todo: note that this is different from snn4hrl default file_path = osp.join( config.PROJECT_PATH, "sandbox/snn4hrl/envs/mujoco/gather/mujoco_models/" + model_cls.FILE.split(".")[0] + "_gather.xml") if not osp.exists(file_path): # create file if not there with open(file_path, 'w+'): pass tree.write(file_path) # pylint: disable=not-callable inner_env = model_cls( *args, file_path=file_path, **kwargs) # giving problems because of this weird tempfile # pylint: enable=not-callable ProxyEnv.__init__( self, inner_env) # to access the inner env, do self.wrapped_env
def __init__(self, n_bins=20, sensor_range=10., sensor_span=math.pi, *args, **kwargs): self._n_bins = n_bins self._sensor_range = sensor_range self._sensor_span = sensor_span model_cls = self.__class__.MODEL_CLASS if model_cls is None: raise "MODEL_CLASS unspecified!" xml_path = osp.join(MODEL_DIR, model_cls.FILE) tree = ET.parse(xml_path) worldbody = tree.find(".//worldbody") size_scaling = self.__class__.MAZE_SIZE_SCALING height = self.__class__.MAZE_HEIGHT structure = self.__class__.MAZE_STRUCTURE torso_x, torso_y = self._find_robot() self._init_torso_x = torso_x self._init_torso_y = torso_y for i in range(len(structure)): for j in range(len(structure[0])): if str(structure[i][j]) == '1': # offset all coordinates so that robot starts at the origin ET.SubElement( worldbody, "geom", name="block_%d_%d" % (i, j), pos="%f %f %f" % (j * size_scaling - torso_x, i * size_scaling - torso_y, height / 2 * size_scaling), size="%f %f %f" % (0.5 * size_scaling, 0.5 * size_scaling, height / 2 * size_scaling), type="box", material="", contype="1", conaffinity="1", rgba="0.4 0.4 0.4 1") torso = tree.find(".//body[@name='torso']") geoms = torso.findall(".//geom") for geom in geoms: if 'name' not in geom.attrib: raise Exception("Every geom of the torso must have a name " "defined") if self.__class__.MAZE_MAKE_CONTACTS: contact = ET.SubElement(tree.find("."), "contact") for i in range(len(structure)): for j in range(len(structure[0])): if str(structure[i][j]) == '1': for geom in geoms: ET.SubElement(contact, "pair", geom1=geom.attrib["name"], geom2="block_%d_%d" % (i, j)) _, file_path = tempfile.mkstemp(text=True) tree.write(file_path) self._goal_range = self._find_goal_range() self._cached_segments = None inner_env = model_cls(*args, file_path=file_path, **kwargs) ProxyEnv.__init__(self, inner_env) Serializable.quick_init(self, locals())
def __init__( self, n_bins=20, sensor_range=10., sensor_span=math.pi, maze_id=11, length=1, maze_height=0.5, maze_size_scaling=2, coef_inner_rew=0., # a coef of 0 gives no reward to the maze from the wrapped env. goal_rew=1., # reward obtained when reaching the goal *args, **kwargs): print(kwargs) print(locals()) Serializable.quick_init(self, locals()) Serializable.quick_init(self, locals()) self.name = 'maze' self._n_bins = n_bins self._sensor_range = sensor_range self._sensor_span = sensor_span self._maze_id = maze_id self.length = length self.coef_inner_rew = coef_inner_rew self.goal_rew = goal_rew self.ep_length = 200 self.ep_count = 0 self.reward_range = (-np.inf, np.inf) model_cls = self.__class__.MODEL_CLASS if model_cls is None: raise "MODEL_CLASS unspecified!" xml_path = osp.join(MODEL_DIR, model_cls.FILE) tree = ET.parse(xml_path) worldbody = tree.find(".//worldbody") self.MAZE_HEIGHT = height = maze_height self.MAZE_SIZE_SCALING = size_scaling = maze_size_scaling structure = [ [1, 1, 1, 1, 1, 1, 1], [1, 0, 0, 0, 0, 0, 1], [1, 0, 1, 0, 1, 0, 1], [1, 0, 1, 0, 1, 'r', 1], [1, 'g', 1, 0, 1, 0, 1], [1, 0, 0, 0, 0, 0, 1], [1, 1, 1, 1, 1, 1, 1], ] self.MAZE_STRUCTURE = structure # = construct_maze(maze_id=self._maze_id, length=self.length) torso_x, torso_y = self._find_robot() self._init_torso_x = torso_x self._init_torso_y = torso_y for i in range(len(structure)): for j in range(len(structure[0])): if str(structure[i][j]) == '1': # offset all coordinates so that robot starts at the origin ET.SubElement( worldbody, "geom", name="block_%d_%d" % (i, j), pos="%f %f %f" % (j * size_scaling - torso_x, i * size_scaling - torso_y, height / 2 * size_scaling), size="%f %f %f" % (0.5 * size_scaling, 0.5 * size_scaling, height / 2 * size_scaling), type="box", material="", contype="1", conaffinity="1", rgba="0.4 0.4 0.4 1" ) torso = tree.find(".//body[@name='torso']") geoms = torso.findall(".//geom") for geom in geoms: if 'name' not in geom.attrib: raise Exception("Every geom of the torso must have a name " "defined") if self.__class__.MAZE_MAKE_CONTACTS: contact = ET.SubElement( tree.find("."), "contact" ) for i in range(len(structure)): for j in range(len(structure[0])): if str(structure[i][j]) == '1': for geom in geoms: ET.SubElement( contact, "pair", geom1=geom.attrib["name"], geom2="block_%d_%d" % (i, j) ) _, file_path = tempfile.mkstemp(text=True) tree.write(file_path) # here we write a temporal file with the robot specifications. Why not the original one?? self._goal_range = self._find_goal_range() self._cached_segments = None inner_env = model_cls(*args, file_path=file_path, **kwargs) # file to the robot specifications ProxyEnv.__init__(self, inner_env) # here is where the robot env will be initialized