Пример #1
0
 def __init__(
     self,
     env,
     scale_reward=1.,
     normalize_obs=False,
     normalize_reward=False,
     obs_alpha=0.001,
     reward_alpha=0.001,
 ):
     ProxyEnv.__init__(self, env)
     Serializable.quick_init(self, locals())
     if not isinstance(env.action_space, Box):
         print(
             "Environment not using continuous actions; action normalization skipped!"
         )
     self._scale_reward = scale_reward
     self._normalize_obs = normalize_obs
     self._normalize_reward = normalize_reward
     self._obs_alpha = obs_alpha
     self._obs_mean = np.array(
         env.observation_space.zeros
     )  #np.zeros(env.observation_space._dim)#.flat_dim
     self._obs_var = np.array(
         env.observation_space.ones
     )  #np.ones(env.observation_space._dim)#.flat_dim
     self._reward_alpha = reward_alpha
     self._reward_mean = np.zeros((self._obs_mean.shape[0], 1), dtype=float)
     self._reward_var = np.ones((self._obs_mean.shape[0], 1), dtype=float)
Пример #2
0
    def __init__(self,
                 env,
                 start_generator,
                 only_feasible=False,
                 start_bounds=None,
                 *args,
                 **kwargs):
        """
        This environment wraps around a normal environment to facilitate goal based exploration.
        Initial position based experiments should not use this class.
        
        :param env: wrapped env
        :param start_generator: a StateGenerator object
        :param obs_transform: a callable that transforms an observation of the wrapped environment into goal space
        :param terminal_eps: a threshold of distance that determines if a goal is reached
        :param terminate_env: a boolean that controls if the environment is terminated with the goal is reached
        :param start_bounds: array marking the UB of the rectangular limit of goals.
        :param distance_metric: L1 or L2 or a callable func
        :param goal_weight: coef of the goal based reward
        :param inner_weight: coef of the inner environment reward
        :param append_transformed_obs: append the transformation of the current observation to full observation
        """
        Serializable.quick_init(self, locals())
        ProxyEnv.__init__(self, env)
        StartEnv.__init__(self, *args, **kwargs)
        self.update_start_generator(start_generator)

        self.start_bounds = start_bounds
        self.only_feasible = only_feasible
Пример #3
0
    def __init__(
            self,
            env,
            #env_bob,
            #policy_bob,
            max_path_length,
            alice_bonus,
            alice_factor,
            gamma=0.1,
            stop_threshold=0.9,
            ring_spacing=1,
            init_iter=2
    ):
        Serializable.quick_init(self, locals())
        ProxyEnv.__init__(self, env)

        #self.env_bob = env_bob
        #self.policy_bob = policy_bob
        self.max_path_length = max_path_length
        self.time = 0
        self.iter = init_iter
        self.ring_spacing = ring_spacing
        self.bob_speed = 0.1 #0.02

        self.alice_bonus = alice_bonus
        self.alice_factor = alice_factor
        self.gamma = gamma
        self.stop_threshold = stop_threshold
Пример #4
0
 def __init__(
     self,
     env,
     scale_reward=1.,
     normalize_obs=False,
     normalize_reward=False,
     obs_alpha=0.001,
     reward_alpha=0.001,
 ):
     ProxyEnv.__init__(self, env)
     Serializable.quick_init(self, locals())
     self._scale_reward = scale_reward
     self._normalize_obs = normalize_obs
     self._normalize_reward = normalize_reward
     self._obs_alpha = obs_alpha
     self._obs_mean = np.zeros(env.observation_space.flat_dim)
     self._obs_var = np.ones(env.observation_space.flat_dim)
     self._reward_alpha = reward_alpha
     self._reward_mean = 0.
     self._reward_var = 1.
     #anusha added these
     self.qpos_dim = env.qpos_dim
     self.qvel_dim = env.qvel_dim
     self.ctrl_dim = env.ctrl_dim
     self.qacc_dim = env.qacc_dim
     self.model = env.model
Пример #5
0
    def __init__(self,
                 env,
                 scale_reward=1.,
                 normalize_obs=False,
                 normalize_reward=False,
                 running_obs=False,
                 running_reward=False,
                 obs_alpha=0.001,
                 reward_alpha=0.001,
                 initial_obs_mean=None,
                 initial_obs_var=None,
                 noise_indices=None):
        ProxyEnv.__init__(self, env)
        Serializable.quick_init(self, locals())
        self._scale_reward = scale_reward
        self._normalize_obs = normalize_obs
        self._normalize_reward = normalize_reward
        self._running_obs = running_obs
        self._running_reward = running_reward
        self._obs_alpha = obs_alpha
        self._reward_alpha = reward_alpha
        self._reward_mean = 0.
        self._reward_var = 1.

        self._noise_indices = noise_indices

        if initial_obs_mean is None:
            self._obs_mean = np.zeros(env.observation_space.flat_dim)
        else:
            self._obs_mean = initial_obs_mean
        if initial_obs_var is None:
            self._obs_var = np.ones(env.observation_space.flat_dim)
        else:
            self._obs_var = initial_obs_var
Пример #6
0
    def __init__(self, env, max_episode_seconds=None, max_episode_steps=None):
        Serializable.quick_init(self, locals())
        ProxyEnv.__init__(self, env)
        self._max_episode_seconds = max_episode_seconds
        self._max_episode_steps = max_episode_steps

        self.env = env
        self._elapsed_steps = 0
        self._episode_started_at = None
Пример #7
0
 def __init__(
     self,
     env,
     reward_freq=5,
 ):
     Serializable.quick_init(self, locals())
     ProxyEnv.__init__(self, env)
     self._reward_freq = reward_freq
     self._delayed_reward = 0.
     self._delayed_step = 0
Пример #8
0
    def __init__(
        self,
        env,
        obs_noise=1e-1,
    ):
        # super(NoisyObservationEnv, self).__init__(env)
        # Serializable.quick_init(self, locals())

        Serializable.quick_init(self, locals())
        ProxyEnv.__init__(self, env)
        self.obs_noise = obs_noise
Пример #9
0
 def __init__(
     self,
     env,
     motor_controlled_actions=(0, 9),
     reset_time_steps=200,
     position_controlled_actions=None,
 ):
     Serializable.quick_init(self, locals())
     ProxyEnv.__init__(self, env)
     self.motor_controlled_actions = motor_controlled_actions
     self.reset_time_steps = reset_time_steps
     self.position_controlled_actions = position_controlled_actions
    def __init__(
            self,
            env,
            time_steps_agg=1,
            discrete_actions=True,
            pkl_path=None,
            json_path=None,
            npz_path=None,
            animate=False,
            keep_rendered_rgb=False,
    ):
        """
        :param env: Env to wrap, should have same robot characteristics than env where the policy where pre-trained on
        :param time_steps_agg: Time-steps during which the SNN policy is executed with fixed (discrete) latent
        :param discrete_actions: whether the policy are applied alone or with a linear combination
        :param pkl_path: path to pickled pre-training experiment that includes the pre-trained policy
        :param json_path: path to json of the pre-training experiment. Requires npz_paths of the policy params
        :param npz_path: only required when using json_path
        :param keep_rendered_rgb: the returned frac_paths include all rgb images (for plotting video after)
        """
        Serializable.quick_init(self, locals())
        ProxyEnv.__init__(self, env)
        self.time_steps_agg = time_steps_agg
        self.discrete_actions = discrete_actions
        self.animate = animate
        self.keep_rendered_rgb = keep_rendered_rgb
        if json_path:
            self.data = json.load(open(os.path.join(config.PROJECT_PATH, json_path), 'r'))
            self.low_policy_latent_dim = self.data['json_args']['policy']['latent_dim']
        elif pkl_path:
            pkl_path = os.path.join(config.PROJECT_PATH, pkl_path)
            self.data = joblib.load(pkl_path)
            self.low_policy_latent_dim = self.data['policy'].latent_dim
        else:
            raise Exception("No path to file given")

        self.low_policy = GaussianMLPPolicy_snn_hier(
            env_spec=env.spec,
            env=env,
            pkl_path=pkl_path,
            json_path=json_path,
            npz_path=npz_path,
            trainable_snn=False,
            external_latent=True,
        )

        # for openai baselines compatibility
        self.reward_range = (-float('inf'), float('inf'))
        self.metadata = None
        self.t = 0
        self.r = 0
Пример #11
0
    def reset(self,
              reset_goal=True,
              **kwargs):  # allows to pass init_state if needed
        if reset_goal:
            self.update_goal()

        ret = ProxyEnv.reset(self, goal=self.current_goal, **kwargs)

        self.init_goal_obs = self._obs2goal_transform(ret)
        #default behavior
        if self.append_goal_to_observation:
            ret = self.append_goal_observation(
                ret)  # the wrapped env needs to use or ignore it

        if not self.include_goal_obs:
            ret = ret[2:]

        # used by disk environment # todo: make more generalizable!  NOT USABLE FOR OTHER ENVS!!
        if 'init_state' in kwargs and len(kwargs['init_state']) == 9:
            delta = tuple(kwargs['init_state']
                          [-2:])  # joint position is in terms of amount moved
            original_goal = self.wrapped_env.model.data.site_xpos[-1]
            new_goal = delta[0] + original_goal[0], delta[1] + original_goal[
                1], original_goal[2]  # z dim unchanged
            self.update_goal(new_goal)
        return ret
Пример #12
0
    def __init__(
            self,
            env,
            time_steps_agg=1,
            discrete_actions=True,
            pkl_path=None,
            json_path=None,
            npz_path=None,
            animate=False,
            keep_rendered_rgb=False,
    ):
        """
        :param env: Env to wrap, should have same robot characteristics than env where the policy where pre-trained on
        :param time_steps_agg: Time-steps during which the SNN policy is executed with fixed (discrete) latent
        :param discrete_actions: whether the policy are applied alone or with a linear combination
        :param pkl_path: path to pickled pre-training experiment that includes the pre-trained policy
        :param json_path: path to json of the pre-training experiment. Requires npz_paths of the policy params
        :param npz_path: only required when using json_path
        :param keep_rendered_rgb: the returned frac_paths include all rgb images (for plotting video after)
        """
        Serializable.quick_init(self, locals())
        ProxyEnv.__init__(self, env) # the inner env can be referred to as wrapped_env
        self.time_steps_agg = time_steps_agg
        self.discrete_actions = discrete_actions
        self.animate = animate
        self.keep_rendered_rgb = keep_rendered_rgb
        if json_path:
            self.data = json.load(open(os.path.join(config.PROJECT_PATH, json_path), 'r'))
            self.low_policy_latent_dim = self.data['json_args']['policy']['latent_dim']
        elif pkl_path:
            pkl_path = os.path.join(config.PROJECT_PATH, pkl_path)
            self.data = joblib.load(pkl_path)
            self.low_policy_latent_dim = self.data['policy'].latent_dim
        else:
            raise Exception("No path to file given")

        self.low_policy = GaussianMLPPolicy_snn_restorable(
            env_spec=env.spec,
            env=env,
            #latent_dim=latent_dim, # restore from file
            latent_name='categorical',
            pkl_path=pkl_path,
            bilinear_integration=True,  # concatenate also the outer product
            hidden_sizes=(64, 64),
            min_std=1e-6,
        )
Пример #13
0
    def __init__(self,
                 difficulty=1.0,
                 texturedir='/tmp/mujoco_textures',
                 hfield_dir='/tmp/mujoco_terrains',
                 regen_terrain=True,
                 *args,
                 **kwargs):
        Serializable.quick_init(self, locals())

        self.difficulty = max(difficulty, self.MIN_DIFFICULTY)
        self.texturedir = texturedir
        self.hfield_dir = hfield_dir

        model_cls = self.__class__.MODEL_CLASS
        if model_cls is None:
            raise Exception("MODEL_CLASS unspecified!")

        template_file_name = 'hill_' + model_cls.__module__.split(
            '.')[-1] + '.xml.mako'

        template_options = dict(difficulty=self.difficulty,
                                texturedir=self.texturedir,
                                hfield_file=os.path.join(
                                    self.hfield_dir, self.HFIELD_FNAME))

        file_path = os.path.join(MODEL_DIR, template_file_name)
        lookup = mako.lookup.TemplateLookup(directories=[MODEL_DIR])
        with open(file_path) as template_file:
            template = mako.template.Template(template_file.read(),
                                              lookup=lookup)
        content = template.render(opts=template_options)

        tmp_f, file_path = tempfile.mkstemp(text=True)
        with open(file_path, 'w') as f:
            f.write(content)

        if self._iam_terrain_generator(regen_terrain):
            self._gen_terrain(regen_terrain)
            os.remove(self._get_lock_path())

        inner_env = model_cls(*args, file_path=file_path,
                              **kwargs)  # file to the robot specifications
        ProxyEnv.__init__(
            self, inner_env)  # here is where the robot env will be initialized

        os.close(tmp_f)
 def __init__(
     self,
     env,
     time_steps_agg=1,
     animate=False,
     keep_rendered_rgb=False,
 ):
     """
     :param env: Env to wrap, should have same robot characteristics than env where the policy where pre-trained on
     :param time_steps_agg: Time-steps during which the SNN policy is executed with fixed (discrete) latent
     :param keep_rendered_rgb: the returned frac_paths include all rgb images (for plotting video after)
     """
     Serializable.quick_init(self, locals())
     ProxyEnv.__init__(self, env)
     self.time_steps_agg = time_steps_agg
     self.animate = animate
     self.keep_rendered_rgb = keep_rendered_rgb
Пример #15
0
    def __init__(self,
                 env,
                 init_selector,
                 goal_selector,
                 append_goal=False,
                 terminal_bonus=0,
                 terminal_eps=0.1,
                 distance_metric='L2',
                 goal_reward='NegativeDistance',
                 goal_weight=1,
                 inner_weight=0,
                 angle_idxs=(None, ),
                 persistence=3):
        """
        :param env: wrapped env  NEEDS RESET WITH INIT_STATE arg!
        :param init_selector: already instantiated: NEEDS GOOD DIM OF GOALS! --> TO DO: give the class a method to update dim?
        :param terminal_bonus: if not 0, the rollout terminates with this bonus if the goal state is reached
        :param terminal_eps: eps around which the ter/minal goal is considered reached
        :param distance_metric: L1 or L2 or a callable func
        :param goal_reward: NegativeDistance or InverseDistance or callable func
        :param goal_weight: coef of the goal-dist reward
        :param inner_weight: coef of the inner reward
        """

        Serializable.quick_init(self, locals())
        self.append_goal = append_goal
        self.terminal_bonus = terminal_bonus
        self.terminal_eps = terminal_eps
        self._distance_metric = distance_metric
        self._goal_reward = goal_reward
        self.goal_weight = goal_weight
        self.inner_weight = inner_weight
        self.persistence = persistence
        self.persistence_count = 1
        self.fig_number = 0
        ProxyEnv.__init__(self, env)
        InitBaseAngle.__init__(self,
                               angle_idxs=angle_idxs,
                               init_selector=init_selector,
                               goal_selector=goal_selector)
        ub = BIG * np.ones(self.get_current_obs().shape)
        self._observation_space = spaces.Box(ub * -1, ub)
        # keep around all sampled goals, one dict per training itr (ie, call of this env.log_diagnostics)
        self.inits_trained = []
Пример #16
0
    def __init__(
        self,
        env,
        time_steps_agg=1,
        discrete_actions=True,
        pkl_paths=None,
        json_paths=None,
        npz_paths=None,
        animate=False,
        keep_rendered_rgb=False,
    ):
        """
        :param env: Env to wrap, should have same robot characteristics than env where the policies where pretrained on
        :param time_steps_agg: Time-steps during which one of the policies is executed without external action possible
        :param discrete_actions: whether the policy are applied alone or with a linear combination
        :param pkl_paths: list of paths to pickled pre-training experiment that includes the pre-trained policy
        :param json_paths: list of paths to json of the pre-training experiment. Requires npz_paths of the policy params
        :param npz_paths: only required when using json_paths
        :param keep_rendered_rgb: the returned frac_paths include all rgb images (for plotting video after)
        """
        Serializable.quick_init(self, locals())
        ProxyEnv.__init__(self, env)
        self.time_steps_agg = time_steps_agg
        self.discrete_actions = discrete_actions
        self.animate = animate
        self.keep_rendered_rgb = keep_rendered_rgb
        if json_paths:
            self.low_policy_selector_dim = len(json_paths)
        elif pkl_paths:
            self.low_policy_selector_dim = len(pkl_paths)
        else:
            raise Exception("No path no file given")

        self.low_policy = GaussianMLPPolicy_multi_hier(
            env_spec=env.spec,
            env=env,
            pkl_paths=pkl_paths,
            json_paths=json_paths,
            npz_paths=npz_paths,
            trainable_old=False,
            external_selector=True,
        )
Пример #17
0
    def __init__(
        self,
        env,
    ):
        Serializable.quick_init(self, locals())
        ProxyEnv.__init__(self, env)

        # load policy that moves peg
        peg_policy = "/home/michael/rllab_goal_rl/data/policies/params.pkl"
        data = joblib.load(peg_policy)
        if "algo" in data:
            self.move_peg_agent = data["algo"].policy
            self.move_peg_env = data["algo"].env
        self.max_path_length = 200  # for moving peg
        self.amount_moved = 0.05

        # locates base environment (i.e. environment that tries to move disk)
        self.base_env = self
        while hasattr(self.base_env, "wrapped_env"):
            self.base_env = self.base_env.wrapped_env
Пример #18
0
 def __init__(
         self,
         env,
         scale_reward=1.,
         normalize_obs=False,
         normalize_reward=False,
         obs_alpha=0.001,
         reward_alpha=0.001,
 ):
     ProxyEnv.__init__(self, env)
     Serializable.quick_init(self, locals())
     self._scale_reward = scale_reward
     self._normalize_obs = normalize_obs
     self._normalize_reward = normalize_reward
     self._obs_alpha = obs_alpha
     self._obs_mean = np.zeros(env.observation_space.flat_dim)
     self._obs_var = np.ones(env.observation_space.flat_dim)
     self._reward_alpha = reward_alpha
     self._reward_mean = 0.
     self._reward_var = 1.
Пример #19
0
 def step(self, action):
     observation, reward, done, info = ProxyEnv.step(self, action)
     info['reward_inner'] = reward_inner = self.inner_weight * reward
     obsIdx = observation[self.idx, ]
     info['distance'] = dist = np.linalg.norm(obsIdx - self.current_goal)
     info['reward_dist'] = reward_dist = self._compute_dist_reward(obsIdx)
     if self.terminal_bonus and dist <= self.terminal_eps:
         # print("*****done!!*******")
         done = True
         reward_dist += self.terminal_bonus
     return (self.get_current_obs(), reward_dist + reward_inner, done, info)
Пример #20
0
 def __init__(
     self,
     env,
     scale_reward=1.,
     normalize_obs=False,
     normalize_reward=False,
     obs_alpha=0.001,
     reward_alpha=0.001,
 ):
     Serializable.quick_init(self, locals())
     ProxyEnv.__init__(self, env)
     self._scale_reward = scale_reward
     self._normalize_obs = normalize_obs
     self._normalize_reward = normalize_reward
     self._obs_alpha = obs_alpha
     self._obs_mean = np.zeros(env.observation_space.flat_dim)
     self._obs_var = np.ones(env.observation_space.flat_dim)
     self._reward_alpha = reward_alpha
     self._reward_mean = 0.
     self._reward_var = 1.
Пример #21
0
 def __init__(
         self,
         env,
         scale_reward=1.,
         normalize_obs=False,
         normalize_reward=False,
         obs_alpha=0.001,
         reward_alpha=0.001,
 ):
     ProxyEnv.__init__(self, env)
     Serializable.quick_init(self, locals())
     if not isinstance(env.action_space, Box):
         print("Environment not using continuous actions; action normalization skipped!")
     self._scale_reward = scale_reward
     self._normalize_obs = normalize_obs
     self._normalize_reward = normalize_reward
     self._obs_alpha = obs_alpha
     self._obs_mean = np.zeros(env.observation_space.flat_dim)
     self._obs_var = np.ones(env.observation_space.flat_dim)
     self._reward_alpha = reward_alpha
     self._reward_mean = 0.
     self._reward_var = 1.
Пример #22
0
 def __init__(
         self,
         env,
         scale_reward=1.,
         normalize_obs=False,
         normalize_reward=False,
         obs_alpha=0.001,
         reward_alpha=0.001,
 ):
     #print "Normalized! I'm here!"
     Serializable.quick_init(self, locals())
     ProxyEnv.__init__(self, env)
     assert isinstance(env.action_space, UBSpace)
     self._scale_reward = scale_reward
     self._normalize_obs = normalize_obs
     self._normalize_reward = normalize_reward
     self._obs_alpha = obs_alpha
     self._obs_mean = np.zeros(env.observation_space.flat_dim)
     self._obs_var = np.ones(env.observation_space.flat_dim)
     self._reward_alpha = reward_alpha
     self._reward_mean = 0.
     self._reward_var = 1.
Пример #23
0
def run_task(v):
    record_video = False

    import mujoco_envs.pomdp
    main_env = GymEnv('Peg3d-v0', record_video=record_video)
    # main_env = MultiagentEnv(GymEnv("Swimmer-v1", record_video=record_video))

    # replace raw shadow_envs with wrapped envs
    main_env._shadow_envs = [TfEnv(ProxyEnv(env)) for env in main_env.shadow_envs]
    # main_env._shadow_envs = [TfEnv(normalize(env)) for env in main_env.shadow_envs]

    sub_policies = [AutoMLPPolicy(
    # sub_policies = [BottleneckAutoMLPPolicy(
        name="sub-policy-%s" % i,
        env_spec=env.spec,
        # The neural network policy should have two hidden layers, each with 32 hidden units.
        hidden_sizes=(32, 32) # 32)
    ) for i,env in enumerate(main_env.shadow_envs)]

    # reduces the initialization, to discourage pre-commiting to an action
    # for sp in sub_policies:
    #     import ipdb; ipdb.set_trace()
    #     sp.get_params()[-3].set_value(sp.get_params()[-3].get_value()*0.01)
    policy = MultiMLPPolicy(
        name="policy",
        env_spec=[env.spec for env in main_env.shadow_envs],
        policies=sub_policies
    )

    baselines = [LinearFeatureBaseline(env_spec=env.spec) for env in main_env.shadow_envs]

    # TODO(cathywu) Start with large batch sizes 100-1000 trajectories
    algo = MultiTRPO(
        env=main_env,
        policy=policy,
        baselines=baselines,
        batch_size=25000,
        whole_paths=True,
        max_path_length=250,
        n_itr=700,
        discount=0.995,
        step_size=v["step_size"],
        # Uncomment both lines (this and the plot parameter below) to enable plotting
        # plot=True,
        # NPO_cls=ConsensusNPO,
        NPO_cls=NPO,
        sample_processor_cls=MultiSampleProcessor,
        n_vectorized_envs=40,
    )
    algo.train()
Пример #24
0
    def __init__(self,
                 env_alice,
                 env_bob,
                 policy_bob,
                 max_path_length,
                 alice_bonus,
                 alice_factor,
                 gamma=0.1,
                 stop_threshold=0.9,
                 start_generation=True):
        Serializable.quick_init(self, locals())
        ProxyEnv.__init__(self, env_alice)

        self.env_bob = env_bob
        self.policy_bob = policy_bob
        self.max_path_length = max_path_length
        self.time = 0

        self.alice_bonus = alice_bonus
        self.alice_factor = alice_factor
        self.gamma = gamma
        self.stop_threshold = stop_threshold
        self.start_generation = start_generation
Пример #25
0
    def step(self, action):
        xy_pos = self.transform_to_goal_space(
            self.wrapped_env.get_current_obs())
        observation, reward, done, info = ProxyEnv.step(self, action)
        info['reward_inner'] = reward_inner = self.inner_weight * reward
        # print("REWARD INNER", reward)
        if 'distance' not in info:
            info['distance'] = dist = self.dist_to_goal(observation)
            info['reward_dist'] = reward_dist = self.compute_dist_reward(
                observation)
            info['goal_reached'] = 1.0 * self.is_goal_reached(observation)
        else:
            # modified so that inner environment can pass in goal via step
            dist = info['distance']
            # info['goal_reached'] = 1.0 * (dist < self.terminal_eps)
            info['goal_reached'] = 1.0 * self.is_goal_reached(observation)
            info[
                'reward_dist'] = reward_dist = -self.extend_dist_rew_weight * dist

        info['goal'] = self.current_goal
        info['obs2goal'] = self._obs2goal_transform(observation)

        info['xy_pos'] = xy_pos
        info['xy_pos_new'] = self.transform_to_goal_space(observation)

        # print(reward_dist)
        # print(reward_inner)
        # print("step: obs={}, goal={}, dist={}".format(self.append_goal_observation(observation), self.current_goal, dist))
        if self.terminate_env and info['goal_reached']:
            done = True
        if self.append_goal_to_observation:
            observation = self.append_goal_observation(observation)
        if not self.include_goal_obs:
            observation = observation[2:]

        return (observation, reward_dist + reward_inner +
                info['goal_reached'] * self.goal_weight, done, info)
    def __init__(
            self,
            n_bins=20,
            sensor_range=10.,
            sensor_span=math.pi,
            maze_id=0,
            length=1,
            maze_height=0.5,
            maze_size_scaling=4,
            coef_inner_rew=0.,  # a coef of 0 gives no reward to the maze from the wrapped env.
            goal_rew=1000.,  # reward obtained when reaching the goal
            death_reward=0.,
            random_start=False,
            direct_goal=False,
            velocity_field=True,
            visualize_goal=False,
            *args,
            **kwargs):
        Serializable.quick_init(self, locals())
        self._n_bins = n_bins
        self._sensor_range = sensor_range
        self._sensor_span = sensor_span
        self._maze_id = maze_id
        self.length = length
        self.coef_inner_rew = coef_inner_rew
        self.goal_rew = goal_rew
        self.death_reward = death_reward
        self.direct_goal = direct_goal
        self.velocity_field = velocity_field
        self.algo = None  # will be added in set_algo!
        self.visualize_goal = visualize_goal

        model_cls = self.__class__.MODEL_CLASS
        # print("model_cls", model_cls)
        if model_cls is None:
            raise "MODEL_CLASS unspecified!"
        xml_path = osp.join(MODEL_DIR, model_cls.FILE)
        # print("xml_path", xml_path)
        self.tree = tree = ET.parse(xml_path)
        self.worldbody = worldbody = tree.find(".//worldbody")

        self.MAZE_HEIGHT = height = maze_height
        self.MAZE_SIZE_SCALING = size_scaling = maze_size_scaling
        self.init_actual_x = self.init_actual_y = 0
        self.random_start = random_start
        if self.random_start:  # in kwargs
            # define: actual coordinates: origin at center of struct (3, 1)
            # define: mujoco coordinates: origin at the first ant
            self.pool, self.p, structure, x_relative, y_relative \
                = construct_maze_random(maze_id=self._maze_id, length=self.length)
            # x, y_relative: x, y index in struct, relative to (3, 1)
            self.MAZE_STRUCTURE = structure
            self.init_actual_x = y_relative * self.MAZE_SIZE_SCALING  # map x direction is list y direction!
            self.init_actual_y = x_relative * self.MAZE_SIZE_SCALING
            # print('self init:', self.init_actual_x, self.init_actual_y)
            self.x_r_prev, self.y_r_prev = x_relative + 3, y_relative + 1  # maintain the previous map for later update
            for i, tmp_line in enumerate(structure):
                for j, tmp_member in enumerate(tmp_line):
                    if tmp_member == 'g':
                        self.x_g_prev, self.y_g_prev = i, j
                        break
            # self.x_g_prev, self.y_g_prev = 3, 3 # hard code here! - (1, j) in big maze structure in maze_env_utils.py
            self.x_g, self.y_g = self.x_g_prev, self.y_g_prev
        else:
            self.x_r_prev, self.y_r_prev = 3, 1
            self.x_g_prev, self.y_g_prev = 1, 1
            self.MAZE_STRUCTURE = structure = construct_maze(
                maze_id=self._maze_id, length=self.length)
        self.tot = 0

        torso_x, torso_y = self._find_robot()
        self._init_torso_x = torso_x  # this is not the actual init x, just convenient for goal pos computing
        self._init_torso_y = torso_y  # torso pos in index coords!
        self.init_torso_x = self._init_torso_x  # make visible from outside
        self.init_torso_y = self._init_torso_y

        for i in range(len(structure)):
            for j in range(len(structure[0])):
                if str(structure[i][j]) == '1':
                    # offset all coordinates so that robot starts at the origin
                    ET.SubElement(
                        worldbody,
                        "geom",
                        name="block_%d_%d" % (i, j),
                        pos="%f %f %f" %
                        (j * size_scaling - torso_x, i * size_scaling -
                         torso_y, height / 2 * size_scaling),
                        size="%f %f %f" %
                        (0.5 * size_scaling, 0.5 * size_scaling,
                         height / 2 * size_scaling),
                        type="box",
                        material="",
                        contype="1",
                        conaffinity="1",
                        rgba="0.4 0.4 0.4 1")
                if self.visualize_goal and str(
                        structure[i][j]
                ) == 'g':  # visualize goal! uncomment this block when testing!
                    # offset all coordinates so that robot starts at the origin
                    self.goal_element =\
                    ET.SubElement(
                        self.worldbody, "geom",
                        name="block_%d_%d" % (i, j),
                        pos="%f %f %f" % (j * size_scaling - self._init_torso_x,
                                          i * size_scaling - self._init_torso_y,
                                          self.MAZE_HEIGHT / 2 * size_scaling),
                        size="%f %f %f" % (0.2 * size_scaling,
                                           0.2 * size_scaling,
                                           self.MAZE_HEIGHT / 2 * size_scaling),
                        type="box",
                        material="",
                        contype="1",
                        conaffinity="1",
                        rgba="1.0 0.0 0.0 0.5"
                    )

        self.args = args
        self.kwargs = kwargs

        torso = tree.find(".//body[@name='torso']")
        geoms = torso.findall(".//geom")
        for geom in geoms:
            # print("geom", geom.attrib)
            if 'name' not in geom.attrib:
                raise Exception("Every geom of the torso must have a name "
                                "defined")

        if self.__class__.MAZE_MAKE_CONTACTS:
            contact = ET.SubElement(tree.find("."), "contact")
            for i in range(len(structure)):
                for j in range(len(structure[0])):
                    if str(structure[i][j]) == '1':
                        for geom in geoms:
                            ET.SubElement(contact,
                                          "pair",
                                          geom1=geom.attrib["name"],
                                          geom2="block_%d_%d" % (i, j))

        _, file_path = tempfile.mkstemp(text=True)
        tree.write(
            file_path
        )  # here we write a temporal file with the robot specifications. Why not the original one??

        self._goal_range = self._find_goal_range()
        self.goal = np.array([(self._goal_range[0] + self._goal_range[1]) / 2,
                              (self._goal_range[2] + self._goal_range[3]) / 2])
        # print ("goal_range", self._goal_range)
        # print("x_y", self.wrapped_env.model.data.qpos.flat[0:2])
        # print("goal", self.goal)
        self._cached_segments = None

        self.gradient_pool = [
            (1, 0), (0.707, 0.707), (0, 1), (-0.707, 0.707), (-1, 0),
            (-0.707, -0.707), (0, -1), (0.707, -0.707)
        ]  # added gradient pool for train_low_with_v_gradient

        inner_env = model_cls(
            *args, file_path=file_path,
            **kwargs)  # file to the robot specifications; model_cls is AntEnv
        ProxyEnv.__init__(
            self, inner_env)  # here is where the robot env will be initialized
    def reset(self):  # randomize starting pos!
        if self.random_start:  # in kwargs
            random_i = np.random.choice(len(self.pool),
                                        p=self.p)  # random i for r
            x_r, y_r = self.pool[random_i]
            x_g, y_g = self.x_g_prev, self.y_g_prev
            random_i = np.random.choice(len(self.pool),
                                        p=self.p)  # new random i for g
            if self._maze_id == 11:  # big maze, goal should also be randomly sampled!
                x_g, y_g = self.pool[random_i]
                while (x_g == x_r) and (y_g == y_r):  # shouldn't overlap!
                    random_i = np.random.choice(len(self.pool), p=self.p)
                    x_g, y_g = self.pool[random_i]
                # x_r, y_r = 3, 1 # uncomment for test
                # print('robot:', x_r, y_r)
                # print('goal:', x_g, y_g)
                self.MAZE_STRUCTURE[self.x_g_prev][self.y_g_prev] = 0
                self.MAZE_STRUCTURE[x_g][y_g] = 'g'
                self.x_g_prev, self.y_g_prev = x_g, y_g
                self._goal_range = self._find_goal_range()
                self.goal = np.array([
                    (self._goal_range[0] + self._goal_range[1]) / 2,
                    (self._goal_range[2] + self._goal_range[3]) / 2
                ])

            # # (3, 1) was the initial choice!
            self.MAZE_STRUCTURE[self.x_r_prev][self.y_r_prev] = 0
            self.x_r_prev, self.y_r_prev = x_r, y_r
            self.MAZE_STRUCTURE[x_r][y_r] = 'r'  # update maze
            # # print("x_r, y_r", x_r, y_r)
            # x_relative = x_r - 3  # the x index relative to (0, 0)
            # y_relative = y_r - 1
            # self.init_actual_x = y_relative * self.MAZE_SIZE_SCALING # map x direction is list y direction!
            # self.init_actual_y = x_relative * self.MAZE_SIZE_SCALING
            # # print(self.MAZE_STRUCTURE)
            # # print('self init:', self.init_actual_x, self.init_actual_y)
            x_new_actual = (y_r - 1) * self.MAZE_SIZE_SCALING
            y_new_actual = (x_r - 3) * self.MAZE_SIZE_SCALING
            x_new_mujoco = x_new_actual - self.init_actual_x
            y_new_mujoco = y_new_actual - self.init_actual_y
            qpos = [
                x_new_mujoco, y_new_mujoco, 0.75, 1., 0., 0., 0., 0., 0., 0.,
                0., 0., 0., 0., 0.
            ]
            qvel = [0.] * 14  # make sure these should be set to 0.?
            qacc = [0.] * 14
            ctrl = [0.] * 14
            init_state = qpos + qvel + qacc + ctrl
            self.wrapped_env.reset(
                init_state=init_state
            )  # wapped_env is the inner_env in __init__()
            # in mujoco_env.py, reset(self, init_state=None)
            # print(x_r, y_r)
            if self.visualize_goal:  # remove the prev goal and add a new goal
                i, j = x_g, y_g
                size_scaling = self.MAZE_SIZE_SCALING
                # remove the original goal
                try:
                    self.worldbody.remove(self.goal_element)
                except AttributeError:
                    pass
                # offset all coordinates so that robot starts at the origin
                self.goal_element = \
                    ET.SubElement(
                        self.worldbody, "geom",
                        name="block_%d_%d" % (i, j),
                        pos="%f %f %f" % (j * size_scaling - self._init_torso_x,
                                          i * size_scaling - self._init_torso_y,
                                          self.MAZE_HEIGHT / 2 * size_scaling),
                        size="%f %f %f" % (0.2 * size_scaling, # smaller than the block to prevent collision
                                           0.2 * size_scaling,
                                           self.MAZE_HEIGHT / 2 * size_scaling),
                        type="box",
                        material="",
                        contype="1",
                        conaffinity="1",
                        rgba="1.0 0.0 0.0 0.5"
                    )
                # Note: running the lines below will make the robot position wrong! (because the graph is rebuilt)
                torso = self.tree.find(".//body[@name='torso']")
                geoms = torso.findall(".//geom")
                for geom in geoms:
                    # print("geom", geom.attrib)
                    if 'name' not in geom.attrib:
                        raise Exception(
                            "Every geom of the torso must have a name "
                            "defined")
                _, file_path = tempfile.mkstemp(text=True)
                self.tree.write(
                    file_path
                )  # here we write a temporal file with the robot specifications. Why not the original one??
                model_cls = self.__class__.MODEL_CLASS
                inner_env = model_cls(
                    *self.args, file_path=file_path, **self.kwargs
                )  # file to the robot specifications; model_cls is AntEnv
                ProxyEnv.__init__(
                    self, inner_env
                )  # here is where the robot env will be initialized

        else:
            self.wrapped_env.reset()
        return self.get_current_obs()
Пример #28
0
 def __init__(self,
              n_apples=8,
              n_bombs=8,
              activity_range=12.,
              robot_object_spacing=5.,
              catch_range=1.,
              n_bins=10,
              sensor_range=12.,
              sensor_span=2 * math.pi,
              coef_inner_rew=0.,
              dying_cost=0.,
              *args,
              **kwargs):
     Serializable.quick_init(self, locals())
     self.n_apples = n_apples
     self.n_bombs = n_bombs
     self.activity_range = activity_range
     self.robot_object_spacing = robot_object_spacing
     self.catch_range = catch_range
     self.n_bins = n_bins
     self.sensor_range = sensor_range
     self.sensor_span = sensor_span
     self.coef_inner_rew = coef_inner_rew
     self.dying_cost = dying_cost
     self.objects = []
     self.viewer = None
     # super(GatherEnv, self).__init__(*args, **kwargs)
     model_cls = self.__class__.MODEL_CLASS
     if model_cls is None:
         raise "MODEL_CLASS unspecified!"
     xml_path = osp.join(MODEL_DIR, model_cls.FILE)
     tree = ET.parse(xml_path)
     worldbody = tree.find(".//worldbody")
     attrs = dict(type="box",
                  conaffinity="1",
                  rgba="0.8 0.9 0.8 1",
                  condim="3")
     # import pdb;pdb.set_trace()
     walldist = self.activity_range + 1
     ET.SubElement(
         worldbody, "geom",
         dict(attrs,
              name="wall1",
              pos="0 -%d 0" % walldist,
              size="%d.5 0.5 1" % walldist))
     ET.SubElement(
         worldbody, "geom",
         dict(attrs,
              name="wall2",
              pos="0 %d 0" % walldist,
              size="%d.5 0.5 1" % walldist))
     ET.SubElement(
         worldbody, "geom",
         dict(attrs,
              name="wall3",
              pos="-%d 0 0" % walldist,
              size="0.5 %d.5 1" % walldist))
     ET.SubElement(
         worldbody, "geom",
         dict(attrs,
              name="wall4",
              pos="%d 0 0" % walldist,
              size="0.5 %d.5 1" % walldist))
     _, file_path = tempfile.mkstemp(text=True)
     tree.write(file_path)
     # pylint: disable=not-callable
     inner_env = model_cls(*args, file_path=file_path, **kwargs)
     # pylint: enable=not-callable
     ProxyEnv.__init__(
         self, inner_env)  # to access the inner env, do self.wrapped_env
Пример #29
0
 def __init__(
         self,
         n_apples=8,
         n_bombs=8,
         activity_range=6.,
         robot_object_spacing=2.,
         catch_range=1.,
         n_bins=10,
         sensor_range=6.,
         sensor_span=math.pi,
         coef_inner_rew=0.,
         dying_cost=-10,
         *args, **kwargs
 ):
     Serializable.quick_init(self, locals())
     self.n_apples = n_apples
     self.n_bombs = n_bombs
     self.activity_range = activity_range
     self.robot_object_spacing = robot_object_spacing
     self.catch_range = catch_range
     self.n_bins = n_bins
     self.sensor_range = sensor_range
     self.sensor_span = sensor_span
     self.coef_inner_rew = coef_inner_rew
     self.dying_cost = dying_cost
     self.objects = []
     self.viewer = None
     # super(GatherEnv, self).__init__(*args, **kwargs)
     model_cls = self.__class__.MODEL_CLASS
     if model_cls is None:
         raise "MODEL_CLASS unspecified!"
     xml_path = osp.join(MODEL_DIR, model_cls.FILE)
     tree = ET.parse(xml_path)
     worldbody = tree.find(".//worldbody")
     attrs = dict(
         type="box", conaffinity="1", rgba="0.8 0.9 0.8 1", condim="3"
     )
     walldist = self.activity_range + 1
     ET.SubElement(
         worldbody, "geom", dict(
             attrs,
             name="wall1",
             pos="0 -%d 0" % walldist,
             size="%d.5 0.5 1" % walldist))
     ET.SubElement(
         worldbody, "geom", dict(
             attrs,
             name="wall2",
             pos="0 %d 0" % walldist,
             size="%d.5 0.5 1" % walldist))
     ET.SubElement(
         worldbody, "geom", dict(
             attrs,
             name="wall3",
             pos="-%d 0 0" % walldist,
             size="0.5 %d.5 1" % walldist))
     ET.SubElement(
         worldbody, "geom", dict(
             attrs,
             name="wall4",
             pos="%d 0 0" % walldist,
             size="0.5 %d.5 1" % walldist))
     _, file_path = tempfile.mkstemp(text=True)
     tree.write(file_path)
     # pylint: disable=not-callable
     inner_env = model_cls(*args, file_path=file_path, **kwargs)
     # pylint: enable=not-callable
     ProxyEnv.__init__(self, inner_env)  # to access the inner env, do self.wrapped_env
Пример #30
0
        Walker2DEnv,
        SwimmerEnv,
        SimpleHumanoidEnv,
        InvertedDoublePendulumEnv,
        HopperEnv,
        HalfCheetahEnv,
        PointGatherEnv,
        SwimmerGatherEnv,
        AntGatherEnv,
        PointMazeEnv,
        SwimmerMazeEnv,
        AntMazeEnv,
    ])

envs = [cls() for cls in simple_env_classes]
envs.append(ProxyEnv(envs[0]))
envs.append(IdentificationEnv(CartpoleEnv, {}))
envs.append(NoisyObservationEnv(CartpoleEnv()))
envs.append(DelayedActionEnv(CartpoleEnv()))
envs.append(NormalizedEnv(CartpoleEnv()))
envs.append(GymEnv('CartPole-v0'))


@tools.params(*envs)
def test_env(env):
    print("Testing", env.__class__)
    ob_space = env.observation_space
    act_space = env.action_space
    ob = env.reset()
    assert ob_space.contains(ob)
    a = act_space.sample()
Пример #31
0
    def __init__(
            self,
            n_bins=20,
            sensor_range=10.,
            sensor_span=math.pi,
            *args,
            **kwargs):

        self._n_bins = n_bins
        self._sensor_range = sensor_range
        self._sensor_span = sensor_span

        model_cls = self.__class__.MODEL_CLASS
        if model_cls is None:
            raise "MODEL_CLASS unspecified!"
        xml_path = osp.join(MODEL_DIR, model_cls.FILE)
        tree = ET.parse(xml_path)
        worldbody = tree.find(".//worldbody")

        size_scaling = self.__class__.MAZE_SIZE_SCALING
        height = self.__class__.MAZE_HEIGHT
        structure = self.__class__.MAZE_STRUCTURE

        torso_x, torso_y = self._find_robot()
        self._init_torso_x = torso_x
        self._init_torso_y = torso_y

        for i in range(len(structure)):
            for j in range(len(structure[0])):
                if str(structure[i][j]) == '1':
                    # offset all coordinates so that robot starts at the origin
                    ET.SubElement(
                        worldbody, "geom",
                        name="block_%d_%d" % (i, j),
                        pos="%f %f %f" % (j * size_scaling - torso_x,
                                          i * size_scaling - torso_y,
                                          height / 2 * size_scaling),
                        size="%f %f %f" % (0.5 * size_scaling,
                                           0.5 * size_scaling,
                                           height / 2 * size_scaling),
                        type="box",
                        material="",
                        contype="1",
                        conaffinity="1",
                        rgba="0.4 0.4 0.4 1"
                    )

        torso = tree.find(".//body[@name='torso']")
        geoms = torso.findall(".//geom")
        for geom in geoms:
            if 'name' not in geom.attrib:
                raise Exception("Every geom of the torso must have a name "
                                "defined")

        if self.__class__.MAZE_MAKE_CONTACTS:
            contact = ET.SubElement(
                tree.find("."), "contact"
            )
            for i in range(len(structure)):
                for j in range(len(structure[0])):
                    if str(structure[i][j]) == '1':
                        for geom in geoms:
                            ET.SubElement(
                                contact, "pair",
                                geom1=geom.attrib["name"],
                                geom2="block_%d_%d" % (i, j)
                            )

        _, file_path = tempfile.mkstemp(text=True)
        tree.write(file_path)

        self._goal_range = self._find_goal_range()
        self._cached_segments = None

        inner_env = model_cls(*args, file_path=file_path, **kwargs)
        ProxyEnv.__init__(self, inner_env)
        Serializable.quick_init(self, locals())
Пример #32
0
    def __init__(self,
                 nonlinear_reward=False,
                 max_path_length_range=None,
                 random_start_range=None,
                 show_limit=True,
                 lim_size=20,
                 lim_height=2,
                 xlim=10,
                 abs_lim=False,
                 circle_mode=False,
                 target_dist=1.,
                 max_ep_len=65,
                 *args,
                 **kwargs):
        self._nonlinear_reward = nonlinear_reward
        self._max_path_length_range = max_path_length_range
        self._random_start_range = random_start_range
        self._step = 0
        self._circle_mode = circle_mode
        self._target_dist = target_dist
        self._xlim = xlim
        self._max_ep_len = max_ep_len

        model_cls = self.__class__.MODEL_CLASS
        if model_cls is None:
            raise "MODEL_CLASS unspecified!"
        xml_path = osp.join(MODEL_DIR, model_cls.FILE)
        if show_limit:
            tree = ET.parse(xml_path)
            worldbody = tree.find(".//worldbody")
            ET.SubElement(worldbody,
                          "geom",
                          name="finish_line",
                          pos="%f %f %f" % (xlim, 0, lim_height / 2),
                          size="%f %f %f" % (0.1, lim_size, lim_height),
                          type="box",
                          material="",
                          contype="0",
                          conaffinity="0",
                          rgba="0.1 0.1 0.8 0.4")
            if abs_lim:
                ET.SubElement(worldbody,
                              "geom",
                              name="finish_line2",
                              pos="%f %f %f" % (-xlim, 0, lim_height / 2),
                              size="%f %f %f" % (0.1, lim_size, lim_height),
                              type="box",
                              material="",
                              contype="0",
                              conaffinity="0",
                              rgba="0.1 0.1 0.8 0.4")
            _, file_path = tempfile.mkstemp(text=True)
            tree.write(file_path)
        else:
            file_path = xml_path

        inner_env = model_cls(*args, file_path=file_path, **kwargs)
        Serializable.quick_init(self, locals())
        ProxyEnv.__init__(self, inner_env)

        # new code for caching obs / act space
        shp = self.get_current_obs().shape
        ub = BIG * np.ones(shp)
        self._cached_observation_space = spaces.Box(ub * -1, ub)
        bounds = self._wrapped_env.model.actuator_ctrlrange
        lb = bounds[:, 0]
        ub = bounds[:, 1]
        self._cached_action_space = spaces.Box(lb, ub)
Пример #33
0
    def __init__(self, env, delay=0.01):
        Serializable.quick_init(self, locals())
        ProxyEnv.__init__(self, env)

        self._delay = delay
    def __init__(self,
                 n_apples=8,
                 n_bombs=8,
                 activity_range=6.,
                 robot_object_spacing=2.,
                 catch_range=1.,
                 n_bins=10,
                 sensor_range=6.,
                 sensor_span=math.pi,
                 coef_inner_rew=0.,
                 dying_cost=-10,
                 *args,
                 **kwargs):
        Serializable.quick_init(self, locals())
        self.n_apples = n_apples
        self.n_bombs = n_bombs
        self.activity_range = activity_range
        self.robot_object_spacing = robot_object_spacing
        self.catch_range = catch_range
        self.n_bins = n_bins
        self.sensor_range = sensor_range
        self.sensor_span = sensor_span
        self.coef_inner_rew = coef_inner_rew
        self.dying_cost = dying_cost
        self.objects = []
        self.viewer = None

        # for openai baseline
        self.reward_range = (-float('inf'), float('inf'))
        self.metadata = None
        self.t = 0
        self.r = 0
        # super(GatherEnv, self).__init__(*args, **kwargs)
        model_cls = self.__class__.MODEL_CLASS
        if model_cls is None:
            raise "MODEL_CLASS unspecified!"
        xml_path = osp.join(MODEL_DIR, model_cls.FILE)
        tree = ET.parse(xml_path)
        worldbody = tree.find(".//worldbody")
        attrs = dict(type="box",
                     conaffinity="1",
                     rgba="0.8 0.9 0.8 1",
                     condim="3")
        walldist = self.activity_range + 1
        ET.SubElement(
            worldbody, "geom",
            dict(attrs,
                 name="wall1",
                 pos="0 -%d 0" % walldist,
                 size="%d.5 0.5 1" % walldist))
        ET.SubElement(
            worldbody, "geom",
            dict(attrs,
                 name="wall2",
                 pos="0 %d 0" % walldist,
                 size="%d.5 0.5 1" % walldist))
        ET.SubElement(
            worldbody, "geom",
            dict(attrs,
                 name="wall3",
                 pos="-%d 0 0" % walldist,
                 size="0.5 %d.5 1" % walldist))
        ET.SubElement(
            worldbody, "geom",
            dict(attrs,
                 name="wall4",
                 pos="%d 0 0" % walldist,
                 size="0.5 %d.5 1" % walldist))
        # _, file_path = tempfile.mkstemp(text=True) #todo: note that this is different from snn4hrl default
        file_path = osp.join(
            config.PROJECT_PATH,
            "sandbox/snn4hrl/envs/mujoco/gather/mujoco_models/" +
            model_cls.FILE.split(".")[0] + "_gather.xml")
        if not osp.exists(file_path):  # create file if not there
            with open(file_path, 'w+'):
                pass
        tree.write(file_path)
        # pylint: disable=not-callable
        inner_env = model_cls(
            *args, file_path=file_path,
            **kwargs)  # giving problems because of this weird tempfile
        # pylint: enable=not-callable
        ProxyEnv.__init__(
            self, inner_env)  # to access the inner env, do self.wrapped_env
Пример #35
0
    def __init__(self,
                 n_bins=20,
                 sensor_range=10.,
                 sensor_span=math.pi,
                 *args,
                 **kwargs):

        self._n_bins = n_bins
        self._sensor_range = sensor_range
        self._sensor_span = sensor_span

        model_cls = self.__class__.MODEL_CLASS
        if model_cls is None:
            raise "MODEL_CLASS unspecified!"
        xml_path = osp.join(MODEL_DIR, model_cls.FILE)
        tree = ET.parse(xml_path)
        worldbody = tree.find(".//worldbody")

        size_scaling = self.__class__.MAZE_SIZE_SCALING
        height = self.__class__.MAZE_HEIGHT
        structure = self.__class__.MAZE_STRUCTURE

        torso_x, torso_y = self._find_robot()
        self._init_torso_x = torso_x
        self._init_torso_y = torso_y

        for i in range(len(structure)):
            for j in range(len(structure[0])):
                if str(structure[i][j]) == '1':
                    # offset all coordinates so that robot starts at the origin
                    ET.SubElement(
                        worldbody,
                        "geom",
                        name="block_%d_%d" % (i, j),
                        pos="%f %f %f" %
                        (j * size_scaling - torso_x, i * size_scaling -
                         torso_y, height / 2 * size_scaling),
                        size="%f %f %f" %
                        (0.5 * size_scaling, 0.5 * size_scaling,
                         height / 2 * size_scaling),
                        type="box",
                        material="",
                        contype="1",
                        conaffinity="1",
                        rgba="0.4 0.4 0.4 1")

        torso = tree.find(".//body[@name='torso']")
        geoms = torso.findall(".//geom")
        for geom in geoms:
            if 'name' not in geom.attrib:
                raise Exception("Every geom of the torso must have a name "
                                "defined")

        if self.__class__.MAZE_MAKE_CONTACTS:
            contact = ET.SubElement(tree.find("."), "contact")
            for i in range(len(structure)):
                for j in range(len(structure[0])):
                    if str(structure[i][j]) == '1':
                        for geom in geoms:
                            ET.SubElement(contact,
                                          "pair",
                                          geom1=geom.attrib["name"],
                                          geom2="block_%d_%d" % (i, j))

        _, file_path = tempfile.mkstemp(text=True)
        tree.write(file_path)

        self._goal_range = self._find_goal_range()
        self._cached_segments = None

        inner_env = model_cls(*args, file_path=file_path, **kwargs)
        ProxyEnv.__init__(self, inner_env)
        Serializable.quick_init(self, locals())