def reset(self): # Enforce that each GoalEnv uses a Goal-compatible observation space. if not isinstance(self.observation_space, scigym.spaces.Dict): raise error.Error( 'GoalEnv requires an observation space of type scigym.spaces.Dict' ) result = super(GoalEnv, self).reset() for key in ['observation', 'achieved_goal', 'desired_goal']: if key not in result: raise error.Error( 'GoalEnv requires the "{}" key to be part of the observation dictionary.' .format(key)) return result
def spec(self, id): match = env_id_re.search(id) if not match: raise error.Error( 'Attempted to look up malformed environment ID: {}. (Currently all IDs must be of the form {}.)' .format(id.encode('utf-8'), env_id_re.pattern)) try: return self.env_specs[id] except KeyError: # Parse the env name and check to see if it matches the non-version # part of a valid env (could also check the exact number here) env_name = match.group(1) matching_envs = [ valid_env_name for valid_env_name, valid_env_spec in self.env_specs.items() if env_name == valid_env_spec._env_name ] if matching_envs: raise error.DeprecatedEnv( 'Env {} not found (valid versions include {})'.format( id, matching_envs)) else: raise error.UnregisteredEnv( 'No registered env with id: {}'.format(id))
def collapse_env_infos(env_infos, training_dir): assert len(env_infos) > 0 first = env_infos[0] for other in env_infos[1:]: if first != other: raise error.Error( 'Found two unequal env_infos: {} and {}. This usually indicates that your training directory {} has commingled results from multiple runs.' .format(first, other, training_dir)) for key in ['env_id', 'scigym_version']: if key not in first: raise error.Error( "env_info {} from training directory {} is missing expected key {}. This is unexpected and likely indicates a bug in gym." .format(first, training_dir, key)) return first
def _set_mode(self, mode): if mode == 'evaluation': type = 'e' elif mode == 'training': type = 't' else: raise error.Error( 'Invalid mode {}: must be "training" or "evaluation"', mode) self.stats_recorder.type = type
def before_reset(self): assert not self.closed if self.done is not None and not self.done and self.steps > 0: raise error.Error( "Tried to reset environment which is not done. While the monitor is active for {}, you cannot call reset() unless the episode is over." .format(self.env_id)) self.done = False if self.initial_reset_timestamp is None: self.initial_reset_timestamp = time.time()
def __init__(self, id, entry_point=None, trials=100, reward_threshold=None, local_only=False, kwargs=None, nondeterministic=False, tags=None, max_episode_steps=None, max_episode_seconds=None, timestep_limit=None): self.id = id # Evaluation parameters self.trials = trials self.reward_threshold = reward_threshold # Environment properties self.nondeterministic = nondeterministic if tags is None: tags = {} self.tags = tags # BACKWARDS COMPAT 2017/1/18 if tags.get('wrapper_config.TimeLimit.max_episode_steps'): max_episode_steps = tags.get( 'wrapper_config.TimeLimit.max_episode_steps') # TODO: Add the following deprecation warning after 2017/02/18 # warnings.warn("DEPRECATION WARNING wrapper_config.TimeLimit has been deprecated. Replace any calls to `register(tags={'wrapper_config.TimeLimit.max_episode_steps': 200)}` with `register(max_episode_steps=200)`. This change was made 2017/1/31 and is included in gym version 0.8.0. If you are getting many of these warnings, you may need to update universe past version 0.21.3") tags['wrapper_config.TimeLimit.max_episode_steps'] = max_episode_steps ###### # BACKWARDS COMPAT 2017/1/31 if timestep_limit is not None: max_episode_steps = timestep_limit # TODO: Add the following deprecation warning after 2017/03/01 # warnings.warn("register(timestep_limit={}) is deprecated. Use register(max_episode_steps={}) instead.".format(timestep_limit, timestep_limit)) ###### self.max_episode_steps = max_episode_steps self.max_episode_seconds = max_episode_seconds # We may make some of these other parameters public if they're # useful. match = env_id_re.search(id) if not match: raise error.Error( 'Attempted to register malformed environment ID: {}. (Currently all IDs must be of the form {}.)' .format(id, env_id_re.pattern)) self._env_name = match.group(1) self._entry_point = entry_point self._local_only = local_only self._kwargs = {} if kwargs is None else kwargs
def _int_list_from_bigint(bigint): # Special case 0 if bigint < 0: raise error.Error('Seed must be non-negative, not {}'.format(bigint)) elif bigint == 0: return [0] ints = [] while bigint > 0: bigint, mod = divmod(bigint, 2**32) ints.append(mod) return ints
def np_random(seed=None): if seed is not None and not (isinstance(seed, integer_types) and 0 <= seed): raise error.Error( 'Seed must be a non-negative integer or omitted, not {}'.format( seed)) seed = create_seed(seed) rng = np.random.RandomState() rng.seed(_int_list_from_bigint(hash_seed(seed))) return rng, seed
def get_display(spec): """Convert a display specification (such as :0) into an actual Display object. Pyglet only supports multiple Displays on Linux. """ if spec is None: return None elif isinstance(spec, six.string_types): return pyglet.canvas.Display(spec) else: raise error.Error( 'Invalid display specification: {}. (Must be a string like :0 or None.)' .format(spec))
def make(self): """Instantiates an instance of the environment with appropriate kwargs""" if self._entry_point is None: raise error.Error( 'Attempting to make deprecated env {}. (HINT: is there a newer registered version of this env?)' .format(self.id)) elif callable(self._entry_point): env = self._entry_point(**self._kwargs) else: cls = load(self._entry_point) env = cls(**self._kwargs) # Make the enviroment aware of which spec it came from. env.unwrapped.spec = self return env
def create_seed(a=None, max_bytes=8): """Create a strong random seed. Otherwise, Python 2 would seed using the system time, which might be non-robust especially in the presence of concurrency. Args: a (Optional[int, str]): None seeds from an operating system specific randomness source. max_bytes: Maximum number of bytes to use in the seed. """ # Adapted from https://svn.python.org/projects/python/tags/r32/Lib/random.py if a is None: a = _bigint_from_bytes(os.urandom(max_bytes)) elif isinstance(a, str): a = a.encode('utf8') a += hashlib.sha512(a).digest() a = _bigint_from_bytes(a[:max_bytes]) elif isinstance(a, integer_types): a = a % 2**(8 * max_bytes) else: raise error.Error('Invalid type for seed: {} ({})'.format(type(a), a)) return a
def __init__(self, env, path=None, metadata=None, enabled=True, base_path=None): modes = env.metadata.get('render.modes', []) self._async = env.metadata.get('semantics.async') self.enabled = enabled # Don't bother setting anything else if not enabled if not self.enabled: return self.ansi_mode = False if 'rgb_array' not in modes: if 'ansi' in modes: self.ansi_mode = True else: logger.info( 'Disabling video recorder because {} neither supports video mode "rgb_array" nor "ansi".' .format(env)) # Whoops, turns out we shouldn't be enabled after all self.enabled = False return if path is not None and base_path is not None: raise error.Error( "You can pass at most one of `path` or `base_path`.") self.last_frame = None self.env = env required_ext = '.json' if self.ansi_mode else '.mp4' if path is None: if base_path is not None: # Base path given, append ext path = base_path + required_ext else: # Otherwise, just generate a unique filename with tempfile.NamedTemporaryFile(suffix=required_ext, delete=False) as f: path = f.name self.path = path path_base, actual_ext = os.path.splitext(self.path) if actual_ext != required_ext: hint = " HINT: The environment is text-only, therefore we're recording its text output in a structured JSON format." if self.ansi_mode else '' raise error.Error( "Invalid path given: {} -- must have file extension {}.{}". format(self.path, required_ext, hint)) # Touch the file in any case, so we know it's present. (This # corrects for platform platform differences. Using ffmpeg on # OS X, the file is precreated, but not on Linux. touch(path) self.frames_per_sec = env.metadata.get('video.frames_per_second', 30) self.encoder = None # lazily start the process self.broken = False # Dump metadata self.metadata = metadata or {} self.metadata[ 'content_type'] = 'video/vnd.scigym.ansivid' if self.ansi_mode else 'video/mp4' self.metadata_path = '{}.meta.json'.format(path_base) self.write_metadata() logger.info('Starting new video recorder writing to %s', self.path) self.empty = True
def _start(self, directory, video_callable=None, force=False, resume=False, write_upon_reset=False, uid=None, mode=None): """Start monitoring. Args: directory (str): A per-training run directory where to record stats. video_callable (Optional[function, False]): function that takes in the index of the episode and outputs a boolean, indicating whether we should record a video on this episode. The default (for video_callable is None) is to take perfect cubes, capped at 1000. False disables video recording. force (bool): Clear out existing training data from this directory (by deleting every file prefixed with "sciencegym."). resume (bool): Retain the training data already in this directory, which will be merged with our new data write_upon_reset (bool): Write the manifest file on each reset. (This is currently a JSON file, so writing it is somewhat expensive.) uid (Optional[str]): A unique id used as part of the suffix for the file. By default, uses os.getpid(). mode (['evaluation', 'training']): Whether this is an evaluation or training episode. """ if self.env.spec is None: logger.warn( "Trying to monitor an environment which has no 'spec' set. This usually means you did not create it via 'scigym.make', and is recommended only for advanced users." ) env_id = '(unknown)' else: env_id = self.env.spec.id if not os.path.exists(directory): logger.info('Creating monitor directory %s', directory) if six.PY3: os.makedirs(directory, exist_ok=True) else: os.makedirs(directory) if video_callable is None: video_callable = capped_cubic_video_schedule elif video_callable == False: video_callable = disable_videos elif not callable(video_callable): raise error.Error( 'You must provide a function, None, or False for video_callable, not {}: {}' .format(type(video_callable), video_callable)) self.video_callable = video_callable # Check on whether we need to clear anything if force: clear_monitor_files(directory) elif not resume: training_manifests = detect_training_manifests(directory) if len(training_manifests) > 0: raise error.Error( '''Trying to write to monitor directory {} with existing monitor files: {}. You should use a unique directory for each training run, or use 'force=True' to automatically clear previous monitor files.''' .format(directory, ', '.join(training_manifests[:5]))) self._monitor_id = monitor_closer.register(self) self.enabled = True self.directory = os.path.abspath(directory) # We use the 'sci-gym' prefix to determine if a file is # ours self.file_prefix = FILE_PREFIX self.file_infix = '{}.{}'.format(self._monitor_id, uid if uid else os.getpid()) self.stats_recorder = stats_recorder.StatsRecorder( directory, '{}.episode_batch.{}'.format(self.file_prefix, self.file_infix), autoreset=self.env_semantics_autoreset, env_id=env_id) if not os.path.exists(directory): os.mkdir(directory) self.write_upon_reset = write_upon_reset if mode is not None: self._set_mode(mode)
def type(self, type): if type not in ['t', 'e']: raise error.Error( 'Invalid episode type {}: must be t for training or e for evaluation', type) self._type = type
def register(self, id, **kwargs): if id in self.env_specs: raise error.Error('Cannot re-register id: {}'.format(id)) self.env_specs[id] = EnvSpec(id, **kwargs)