def reset(self): # Enforce that each GoalEnv uses a Goal-compatible observation space. if not isinstance(self.observation_space, gym.spaces.Dict): raise error.Error( 'GoalEnv requires an observation space of type gym.spaces.Dict' ) for key in ['observation', 'achieved_goal', 'desired_goal']: if key not in self.observation_space.spaces: raise error.Error( 'GoalEnv requires the "{}" key to be part of the observation dictionary.' .format(key))
def __init__(self, id, entry_point=None, reward_threshold=None, kwargs=None, nondeterministic=False, tags=None, max_episode_steps=None): self.id = id # Evaluation parameters self.reward_threshold = reward_threshold # Environment properties self.nondeterministic = nondeterministic self.entry_point = entry_point if tags is None: tags = {} self.tags = tags tags['wrapper_config.TimeLimit.max_episode_steps'] = max_episode_steps self.max_episode_steps = max_episode_steps # We may make some of these other parameters public if they're # useful. match = env_id_re.search(id) if not match: raise error.Error( 'Attempted to register malformed environment ID: {}. (Currently all IDs must be of the form {}.)' .format(id, env_id_re.pattern)) self._env_name = match.group(1) self._kwargs = {} if kwargs is None else kwargs
def _reset_sim(self): self.sim.set_state(self.initial_state) self.sim.forward() initial_qpos = self.sim.data.get_joint_qpos('object:joint').copy() initial_pos, initial_quat = initial_qpos[:3], initial_qpos[3:] assert initial_qpos.shape == (7,) assert initial_pos.shape == (3,) assert initial_quat.shape == (4,) initial_qpos = None # Randomization initial rotation. if self.randomize_initial_rotation: if self.target_rotation == 'z': angle = self.np_random.uniform(-np.pi, np.pi) axis = np.array([0., 0., 1.]) offset_quat = quat_from_angle_and_axis(angle, axis) initial_quat = rotations.quat_mul(initial_quat, offset_quat) elif self.target_rotation == 'parallel': angle = self.np_random.uniform(-np.pi, np.pi) axis = np.array([0., 0., 1.]) z_quat = quat_from_angle_and_axis(angle, axis) parallel_quat = self.parallel_quats[self.np_random.randint(len(self.parallel_quats))] offset_quat = rotations.quat_mul(z_quat, parallel_quat) initial_quat = rotations.quat_mul(initial_quat, offset_quat) elif self.target_rotation in ['xyz', 'ignore']: angle = self.np_random.uniform(-np.pi, np.pi) axis = self.np_random.uniform(-1., 1., size=3) offset_quat = quat_from_angle_and_axis(angle, axis) initial_quat = rotations.quat_mul(initial_quat, offset_quat) elif self.target_rotation == 'fixed': pass else: raise error.Error('Unknown target_rotation option "{}".'.format(self.target_rotation)) # Randomize initial position. if self.randomize_initial_position: if self.target_position != 'fixed': initial_pos += self.np_random.normal(size=3, scale=0.005) initial_quat /= np.linalg.norm(initial_quat) initial_qpos = np.concatenate([initial_pos, initial_quat]) self.sim.data.set_joint_qpos('object:joint', initial_qpos) def is_on_palm(): self.sim.forward() cube_middle_idx = self.sim.model.site_name2id('object:center') cube_middle_pos = self.sim.data.site_xpos[cube_middle_idx] is_on_palm = (cube_middle_pos[2] > 0.04) return is_on_palm # Run the simulation for a bunch of timesteps to let everything settle in. for _ in range(10): self._set_action(np.zeros(20)) try: self.sim.step() except mujoco_py.MujocoException: return False return is_on_palm()
def _sample_goal(self): # Select a goal for the object position. target_pos = None if self.target_position == 'random': assert self.target_position_range.shape == (3, 2) offset = self.np_random.uniform(self.target_position_range[:, 0], self.target_position_range[:, 1]) assert offset.shape == (3, ) target_pos = self.sim.data.get_joint_qpos( 'object:joint')[:3] + offset elif self.target_position in ['ignore', 'fixed']: target_pos = self.sim.data.get_joint_qpos('object:joint')[:3] else: raise error.Error('Unknown target_position option "{}".'.format( self.target_position)) assert target_pos is not None assert target_pos.shape == (3, ) # Select a goal for the object rotation. target_quat = None if self.target_rotation == 'z': angle = self.np_random.uniform(-np.pi, np.pi) axis = np.array([0., 0., 1.]) target_quat = quat_from_angle_and_axis(angle, axis) elif self.target_rotation == 'parallel': angle = self.np_random.uniform(-np.pi, np.pi) axis = np.array([0., 0., 1.]) target_quat = quat_from_angle_and_axis(angle, axis) parallel_quat = self.parallel_quats[self.np_random.randint( len(self.parallel_quats))] target_quat = rotations.quat_mul(target_quat, parallel_quat) elif self.target_rotation == 'xyz': angle = self.np_random.uniform(-np.pi, np.pi) axis = self.np_random.uniform(-1., 1., size=3) target_quat = quat_from_angle_and_axis(angle, axis) elif self.target_rotation in ['ignore', 'fixed']: target_quat = self.sim.data.get_joint_qpos('object:joint') else: raise error.Error('Unknown target_rotation option "{}".'.format( self.target_rotation)) assert target_quat is not None assert target_quat.shape == (4, ) target_quat /= np.linalg.norm(target_quat) # normalized quaternion goal = np.concatenate([target_pos, target_quat]) return goal
def np_random(seed=None): if seed is not None and not (isinstance(seed, integer_types) and 0 <= seed): raise error.Error('Seed must be a non-negative integer or omitted, not {}'.format(seed)) seed = create_seed(seed) rng = np.random.RandomState() rng.seed(_int_list_from_bigint(hash_seed(seed))) return rng, seed
def __init__( self, game='pong', mode=None, difficulty=None, obs_type='ram', frameskip=(2, 5), repeat_action_probability=0., full_action_space=False): """Frameskip should be either a tuple (indicating a random range to choose from, with the top value exclude), or an int.""" utils.EzPickle.__init__( self, game, mode, difficulty, obs_type, frameskip, repeat_action_probability) assert obs_type in ('ram', 'image') self.game = game self.game_path = atari_py.get_game_path(game) self.game_mode = mode self.game_difficulty = difficulty if not os.path.exists(self.game_path): msg = 'You asked for game %s but path %s does not exist' raise IOError(msg % (game, self.game_path)) self._obs_type = obs_type self.frameskip = frameskip self.ale = atari_py.ALEInterface() self.viewer = None # Tune (or disable) ALE's action repeat: # https://github.com/openai/gym/issues/349 assert isinstance(repeat_action_probability, (float, int)), \ "Invalid repeat_action_probability: {!r}".format(repeat_action_probability) self.ale.setFloat( 'repeat_action_probability'.encode('utf-8'), repeat_action_probability) self.seed() self._action_set = (self.ale.getLegalActionSet() if full_action_space else self.ale.getMinimalActionSet()) self.action_space = spaces.Discrete(len(self._action_set)) (screen_width, screen_height) = self.ale.getScreenDims() if self._obs_type == 'ram': self.observation_space = spaces.Box(low=0, high=255, dtype=np.uint8, shape=(128,)) elif self._obs_type == 'image': self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3), dtype=np.uint8) else: raise error.Error('Unrecognized observation type: {}'.format(self._obs_type))
def before_reset(self): assert not self.closed if self.done is not None and not self.done and self.steps > 0: raise error.Error( "Tried to reset environment which is not done. While the monitor is active for {}, you cannot call reset() unless the episode is over." .format(self.env_id)) self.done = False if self.initial_reset_timestamp is None: self.initial_reset_timestamp = time.time()
def _int_list_from_bigint(bigint): # Special case 0 if bigint < 0: raise error.Error('Seed must be non-negative, not {}'.format(bigint)) elif bigint == 0: return [0] ints = [] while bigint > 0: bigint, mod = divmod(bigint, 2 ** 32) ints.append(mod) return ints
def get_display(spec): """Convert a display specification (such as :0) into an actual Display object. Pyglet only supports multiple Displays on Linux. """ if spec is None: return None elif isinstance(spec, six.string_types): return pyglet.canvas.Display(spec) else: raise error.Error('Invalid display specification: {}. (Must be a string like :0 or None.)'.format(spec))
def spec(self, path): if ':' in path: mod_name, _sep, id = path.partition(':') try: importlib.import_module(mod_name) # catch ImportError for python2.7 compatibility except ImportError: raise error.Error( 'A module ({}) was specified for the environment but was not found, make sure the package is installed with `pip install` before calling `gym.make()`' .format(mod_name)) else: id = path match = env_id_re.search(id) if not match: raise error.Error( 'Attempted to look up malformed environment ID: {}. (Currently all IDs must be of the form {}.)' .format(id.encode('utf-8'), env_id_re.pattern)) try: return self.env_specs[id] except KeyError: # Parse the env name and check to see if it matches the non-version # part of a valid env (could also check the exact number here) env_name = match.group(1) matching_envs = [ valid_env_name for valid_env_name, valid_env_spec in self.env_specs.items() if env_name == valid_env_spec._env_name ] if matching_envs: raise error.DeprecatedEnv( 'Env {} not found (valid versions include {})'.format( id, matching_envs)) else: raise error.UnregisteredEnv( 'No registered env with id: {}'.format(id))
def make(self, **kwargs): """Instantiates an instance of the environment with appropriate kwargs""" if self.entry_point is None: raise error.Error( 'Attempting to make deprecated env {}. (HINT: is there a newer registered version of this env?)' .format(self.id)) _kwargs = self._kwargs.copy() _kwargs.update(kwargs) if callable(self.entry_point): env = self.entry_point(**_kwargs) else: cls = load(self.entry_point) env = cls(**_kwargs) # Make the enviroment aware of which spec it came from. env.unwrapped.spec = self return env
def create_seed(a=None, max_bytes=8): """Create a strong random seed. Otherwise, Python 2 would seed using the system time, which might be non-robust especially in the presence of concurrency. Args: a (Optional[int, str]): None seeds from an operating system specific randomness source. max_bytes: Maximum number of bytes to use in the seed. """ # Adapted from https://svn.python.org/projects/python/tags/r32/Lib/random.py if a is None: a = _bigint_from_bytes(os.urandom(max_bytes)) elif isinstance(a, str): a = a.encode('utf8') a += hashlib.sha512(a).digest() a = _bigint_from_bytes(a[:max_bytes]) elif isinstance(a, integer_types): a = a % 2**(8 * max_bytes) else: raise error.Error('Invalid type for seed: {} ({})'.format(type(a), a)) return a
def register(self, id, **kwargs): if id in self.env_specs: raise error.Error('Cannot re-register id: {}'.format(id)) self.env_specs[id] = EnvSpec(id, **kwargs)
def __init__(self, env, path=None, metadata=None, enabled=True, base_path=None): modes = env.metadata.get('render.modes', []) self._async = env.metadata.get('semantics.async') self.enabled = enabled # Don't bother setting anything else if not enabled if not self.enabled: return self.ansi_mode = False if 'rgb_array' not in modes: if 'ansi' in modes: self.ansi_mode = True else: logger.info( 'Disabling video recorder because {} neither supports video mode "rgb_array" nor "ansi".' .format(env)) # Whoops, turns out we shouldn't be enabled after all self.enabled = False return if path is not None and base_path is not None: raise error.Error( "You can pass at most one of `path` or `base_path`.") self.last_frame = None self.env = env required_ext = '.json' if self.ansi_mode else '.mp4' if path is None: if base_path is not None: # Base path given, append ext path = base_path + required_ext else: # Otherwise, just generate a unique filename with tempfile.NamedTemporaryFile(suffix=required_ext, delete=False) as f: path = f.name self.path = path path_base, actual_ext = os.path.splitext(self.path) if actual_ext != required_ext: hint = " HINT: The environment is text-only, therefore we're recording its text output in a structured JSON format." if self.ansi_mode else '' raise error.Error( "Invalid path given: {} -- must have file extension {}.{}". format(self.path, required_ext, hint)) # Touch the file in any case, so we know it's present. (This # corrects for platform platform differences. Using ffmpeg on # OS X, the file is precreated, but not on Linux. touch(path) self.frames_per_sec = env.metadata.get('video.frames_per_second', 30) self.encoder = None # lazily start the process self.broken = False # Dump metadata self.metadata = metadata or {} self.metadata[ 'content_type'] = 'video/vnd.openai.ansivid' if self.ansi_mode else 'video/mp4' self.metadata_path = '{}.meta.json'.format(path_base) self.write_metadata() logger.info('Starting new video recorder writing to %s', self.path) self.empty = True
def type(self, type): if type not in ['t', 'e']: raise error.Error( 'Invalid episode type {}: must be t for training or e for evaluation', type) self._type = type