def __init__( self, max_speed=0.05, max_distance=1, use_low_gear_ratio=True, speed_weight=0.9, done_threshold=0.005, goal_dim_weights=None, ): Serializable.quick_init(self, locals()) self.max_distance = max_distance self.max_speed = max_speed self.speed_weight = speed_weight self.done_threshold = done_threshold self.initializing = True # TODO: fix this hack if speed_weight is None: self.speed_weight = 0.9 # just for init to work MultitaskEnv.__init__(self, goal_dim_weights=goal_dim_weights) super().__init__(use_low_gear_ratio=use_low_gear_ratio) self.set_goal( np.array([ self.max_distance, self.max_distance, self.max_speed, self.max_speed, ])) self.initializing = False if speed_weight is None: assert (self.goal_dim_weights[0] == self.goal_dim_weights[1]) and ( self.goal_dim_weights[2] == self.goal_dim_weights[3]) self.speed_weight = self.goal_dim_weights[2] assert 0 <= self.speed_weight <= 1
def __init__( self, env: MultitaskEnv, give_goal_difference=False, ): # self._wrapped_env needs to be called first because # Serializable.quick_init calls getattr, on this class. And the # implementation of getattr (see below) calls self._wrapped_env. # Without setting this first, the call to self._wrapped_env would call # getattr again (since it's not set yet) and therefore loop forever. self._wrapped_env = env # Or else serialization gets delegated to the wrapped_env. Serialize # this env separately from the wrapped_env. self._serializable_initialized = False self._wrapped_obs_dim = env.observation_space.low.size self.give_goal_difference = give_goal_difference Serializable.quick_init(self, locals()) ProxyEnv.__init__(self, env) wrapped_low = self.observation_space.low low = np.hstack( (wrapped_low, min(wrapped_low) * np.ones(self._wrapped_env.goal_dim))) wrapped_high = self.observation_space.low high = np.hstack( (wrapped_high, max(wrapped_high) * np.ones(self._wrapped_env.goal_dim))) self.observation_space = Box(low, high)
def __init__( self, env, reward_scale=1., obs_mean=None, obs_std=None, ): # self._wrapped_env needs to be called first because # Serializable.quick_init calls getattr, on this class. And the # implementation of getattr (see below) calls self._wrapped_env. # Without setting this first, the call to self._wrapped_env would call # getattr again (since it's not set yet) and therefore loop forever. self._wrapped_env = env # Or else serialization gets delegated to the wrapped_env. Serialize # this env separately from the wrapped_env. self._serializable_initialized = False Serializable.quick_init(self, locals()) ProxyEnv.__init__(self, env) self._should_normalize = not (obs_mean is None and obs_std is None) if self._should_normalize: if obs_mean is None: obs_mean = np.zeros_like(env.observation_space.low) else: obs_mean = np.array(obs_mean) if obs_std is None: obs_std = np.ones_like(env.observation_space.low) else: obs_std = np.array(obs_std) self._reward_scale = reward_scale self._obs_mean = obs_mean self._obs_std = obs_std ub = np.ones(self._wrapped_env.action_space.shape) self.action_space = Box(-1 * ub, ub)
def __init__( self, action_space, mu=0, theta=0.15, max_sigma=0.3, min_sigma=None, decay_period=100000, ): Serializable.quick_init(self, locals()) if min_sigma is None: min_sigma = max_sigma self.mu = mu self.theta = theta self.sigma = max_sigma self._max_sigma = max_sigma if min_sigma is None: min_sigma = max_sigma self._min_sigma = min_sigma self._decay_period = decay_period self.dim = np.prod(action_space.low.shape) self.low = action_space.low self.high = action_space.high self.state = np.ones(self.dim) * self.mu self.reset()
def __init__(self, action_space, max_sigma=1.0, min_sigma=None, decay_period=1000000): assert len(action_space.shape) == 1 Serializable.quick_init(self, locals()) self._max_sigma = max_sigma if min_sigma is None: min_sigma = max_sigma self._min_sigma = min_sigma self._decay_period = decay_period self._action_space = action_space
def __init__(self, min_distance=0, max_distance=2, use_low_gear_ratio=True): Serializable.quick_init(self, locals()) self.max_distance = max_distance self.min_distance = min_distance MultitaskEnv.__init__(self) super().__init__(use_low_gear_ratio=use_low_gear_ratio) self.set_goal(np.array([self.max_distance, self.max_distance]))
def __init__(self, distance_metric_order=None, goal_dim_weights=None): self._desired_xyz = np.zeros(3) Serializable.quick_init(self, locals()) MultitaskEnv.__init__( self, distance_metric_order=distance_metric_order, goal_dim_weights=goal_dim_weights, ) mujoco_env.MujocoEnv.__init__( self, get_asset_xml('reacher_7dof.xml'), 5, ) self.observation_space = Box( np.array([ -2.28, -0.52, -1.4, -2.32, -1.5, -1.094, -1.5, # joint -3, -3, -3, -3, -3, -3, -3, # velocity -0.75, -1.25, -0.2, # EE xyz ]), np.array([ 1.71, 1.39, 1.7, 0, 1.5, 0, 1.5, # joints 3, 3, 3, 3, 3, 3, 3, # velocity 0.75, 0.25, 0.6, # EE xyz ]))
def __getstate__(self): d = Serializable.__getstate__(self) # Add these explicitly in case they were modified d["_obs_mean"] = self._obs_mean d["_obs_std"] = self._obs_std d["_reward_scale"] = self._reward_scale return d
def __setstate__(self, d): Serializable.__setstate__(self, d) self._obs_mean = d["_obs_mean"] self._obs_std = d["_obs_std"] self._reward_scale = d["_reward_scale"]
def __init__(self, wrapped_env): Serializable.quick_init(self, locals()) self._wrapped_env = wrapped_env self.action_space = self._wrapped_env.action_space self.observation_space = self._wrapped_env.observation_space
def __init__(self): Serializable.quick_init(self, locals()) self.target_x_vel = np.random.uniform(-MAX_SPEED, MAX_SPEED) super().__init__() MultitaskEnv.__init__(self) self.set_goal(np.array([5]))
def __setstate__(self, state): return Serializable.__setstate__(self, state)
def __getstate__(self): return Serializable.__getstate__(self)
def init_serialization(self, locals): Serializable.quick_init(self, locals)
def __init__(self, action_space, prob_random_action=0.1): Serializable.quick_init(self, locals()) assert isinstance(action_space, Discrete) Serializable.quick_init(self, locals()) self.prob_random_action = prob_random_action self.action_space = action_space