def _start(self, directory, video_callable=None, force=False, resume=False, write_upon_reset=False, uid=None, mode=None): """Start monitoring. Args: directory (str): A per-training run directory where to record stats. video_callable (Optional[function, False]): function that takes in the index of the episode and outputs a boolean, indicating whether we should record a video on this episode. The default (for video_callable is None) is to take perfect cubes, capped at 1000. False disables video recording. force (bool): Clear out existing training data from this directory (by deleting every file prefixed with "openaigym."). resume (bool): Retain the training data already in this directory, which will be merged with our new data write_upon_reset (bool): Write the manifest file on each reset. (This is currently a JSON file, so writing it is somewhat expensive.) uid (Optional[str]): A unique id used as part of the suffix for the file. By default, uses os.getpid(). mode (['evaluation', 'training']): Whether this is an evaluation or training episode. """ if self.env.spec is None: logger.warning("Trying to monitor an environment which has no 'spec' set. This usually means you did not create it via 'gym.make', and is recommended only for advanced users.") env_id = '(unknown)' else: env_id = self.env.spec.id if not os.path.exists(directory): logger.info('Creating monitor directory %s', directory) if six.PY3: os.makedirs(directory, exist_ok=True) else: os.makedirs(directory) if video_callable is None: video_callable = capped_cubic_video_schedule elif video_callable == False: video_callable = disable_videos elif not callable(video_callable): raise error.Error('You must provide a function, None, or False for video_callable, not {}: {}'.format(type(video_callable), video_callable)) self.video_callable = video_callable # Check on whether we need to clear anything if force: clear_monitor_files(directory) elif not resume: training_manifests = detect_training_manifests(directory) if len(training_manifests) > 0: raise error.Error('''Trying to write to monitor directory {} with existing monitor files: {}. You should use a unique directory for each training run, or use 'force=True' to automatically clear previous monitor files.'''.format(directory, ', '.join(training_manifests[:5]))) self._monitor_id = monitor_closer.register(self) self.enabled = True self.directory = os.path.abspath(directory) # We use the 'openai-gym' prefix to determine if a file is # ours self.file_prefix = FILE_PREFIX self.file_infix = '{}.{}'.format(self._monitor_id, uid if uid else os.getpid()) self.stats_recorder = stats_recorder.StatsRecorder(directory, '{}.episode_batch.{}'.format(self.file_prefix, self.file_infix), autoreset=self.env_semantics_autoreset, env_id=env_id) if not os.path.exists(directory): os.mkdir(directory) self.write_upon_reset = write_upon_reset if mode is not None: self._set_mode(mode)
def _upload(training_dir, algorithm_id=None, writeup=None, benchmark_run_id=None, api_key=None, ignore_open_monitors=False): if not ignore_open_monitors: open_monitors = monitoring._open_monitors() if len(open_monitors) > 0: envs = [ m.env.spec.id if m.env.spec else '(unknown)' for m in open_monitors ] raise error.Error( "Still have an open monitor on {}. You must run 'env.close()' before uploading." .format(', '.join(envs))) env_info, training_episode_batch, training_video = upload_training_data( training_dir, api_key=api_key) env_id = env_info['env_id'] training_episode_batch_id = training_video_id = None if training_episode_batch: training_episode_batch_id = training_episode_batch.id if training_video: training_video_id = training_video.id if logger.level <= logging.INFO: if training_episode_batch_id is not None and training_video_id is not None: logger.info( '[%s] Creating evaluation object from %s with learning curve and training video', env_id, training_dir) elif training_episode_batch_id is not None: logger.info( '[%s] Creating evaluation object from %s with learning curve', env_id, training_dir) elif training_video_id is not None: logger.info( '[%s] Creating evaluation object from %s with training video', env_id, training_dir) else: raise error.Error( "[%s] You didn't have any recorded training data in %s. Once you've used 'env.monitor.start(training_dir)' to start recording, you need to actually run some rollouts. Please join the community chat on https://gym.openai.com if you have any issues." % (env_id, training_dir)) evaluation = resource.Evaluation.create( training_episode_batch=training_episode_batch_id, training_video=training_video_id, env=env_info['env_id'], algorithm={ 'id': algorithm_id, }, benchmark_run_id=benchmark_run_id, writeup=writeup, gym_version=env_info['gym_version'], api_key=api_key, ) return evaluation
def __init__(self, env, path=None, metadata=None, enabled=True, base_path=None): modes = env.metadata.get('render.modes', []) self._async = env.metadata.get('semantics.async') self.enabled = enabled # Don't bother setting anything else if not enabled if not self.enabled: return self.ansi_mode = False if 'rgb_array' not in modes: if 'ansi' in modes: self.ansi_mode = True else: logger.info('Disabling video recorder because {} neither supports video mode "rgb_array" nor "ansi".'.format(env)) # Whoops, turns out we shouldn't be enabled after all self.enabled = False return if path is not None and base_path is not None: raise error.Error("You can pass at most one of `path` or `base_path`.") self.last_frame = None self.env = env required_ext = '.json' if self.ansi_mode else '.mp4' if path is None: if base_path is not None: # Base path given, append ext path = base_path + required_ext else: # Otherwise, just generate a unique filename with tempfile.NamedTemporaryFile(suffix=required_ext, delete=False) as f: path = f.name self.path = path path_base, actual_ext = os.path.splitext(self.path) if actual_ext != required_ext: hint = " HINT: The environment is text-only, therefore we're recording its text output in a structured JSON format." if self.ansi_mode else '' raise error.Error("Invalid path given: {} -- must have file extension {}.{}".format(self.path, required_ext, hint)) # Touch the file in any case, so we know it's present. (This # corrects for platform platform differences. Using ffmpeg on # OS X, the file is precreated, but not on Linux. touch(path) self.frames_per_sec = env.metadata.get('video.frames_per_second', 30) self.encoder = None # lazily start the process self.broken = False # Dump metadata self.metadata = metadata or {} self.metadata['content_type'] = 'video/vnd.openai.ansivid' if self.ansi_mode else 'video/mp4' self.metadata_path = '{}.meta.json'.format(path_base) self.write_metadata() logger.info('Starting new video recorder writing to %s', self.path) self.empty = True
def collapse_env_infos(env_infos, training_dir): assert len(env_infos) > 0 first = env_infos[0] for other in env_infos[1:]: if first != other: raise error.Error('Found two unequal env_infos: {} and {}. This usually indicates that your training directory {} has commingled results from multiple runs.'.format(first, other, training_dir)) for key in ['env_id', 'gym_version']: if key not in first: raise error.Error("env_info {} from training directory {} is missing expected key {}. This is unexpected and likely indicates a bug in gym.".format(first, training_dir, key)) return first
def __init__(self, player_color, opponent, observation_type, illegal_move_mode, board_size): """ Args: player_color: Stone color for the agent. Either 'black' or 'white' opponent: An opponent policy observation_type: State encoding illegal_move_mode: What to do when the agent makes an illegal move. Choices: 'raise' or 'lose' """ assert isinstance( board_size, int) and board_size >= 1, 'Invalid board size: {}'.format( board_size) self.board_size = board_size self._seed() colormap = { 'black': pachi_py.BLACK, 'white': pachi_py.WHITE, } try: self.player_color = colormap[player_color] except KeyError: raise error.Error( "player_color must be 'black' or 'white', not {}".format( player_color)) self.opponent_policy = None self.opponent = opponent assert observation_type in ['image3c'] self.observation_type = observation_type assert illegal_move_mode in ['lose', 'raise'] self.illegal_move_mode = illegal_move_mode if self.observation_type != 'image3c': raise error.Error('Unsupported observation type: {}'.format( self.observation_type)) shape = pachi_py.CreateBoard(self.board_size).encode().shape self.observation_space = spaces.Box(np.zeros(shape), np.ones(shape)) # One action for each board position, pass, and resign self.action_space = spaces.Discrete(self.board_size**2 + 2) # Filled in by _reset() self.state = None self.done = True
def spec(self, id): match = env_id_re.search(id) if not match: raise error.Error( 'Attempted to look up malformed environment ID: {}. (Currently all IDs must be of the form {}.)' .format(id.encode('utf-8'), env_id_re.pattern)) try: return self.env_specs[id] except KeyError: # Parse the env name and check to see if it matches the non-version # part of a valid env (could also check the exact number here) env_name = match.group(1) matching_envs = [ valid_env_name for valid_env_name, valid_env_spec in self.env_specs.items() if env_name == valid_env_spec._env_name ] if matching_envs: raise error.DeprecatedEnv( 'Env {} not found (valid versions include {})'.format( id, matching_envs)) else: raise error.UnregisteredEnv( 'No registered env with id: {}'.format(id))
def __init__(self, game='pong', obs_type='ram', frameskip=(2, 5), repeat_action_probability=0.): """Frameskip should be either a tuple (indicating a random range to choose from, with the top value exclude), or an int.""" utils.EzPickle.__init__(self, game, obs_type) assert obs_type in ('ram', 'image') self.game_path = atari_py.get_game_path(game) if not os.path.exists(self.game_path): raise IOError('You asked for game %s but path %s does not exist'%(game, self.game_path)) self._obs_type = obs_type self.frameskip = frameskip self.ale = atari_py.ALEInterface() self.viewer = None # Tune (or disable) ALE's action repeat: # https://github.com/openai/gym/issues/349 assert isinstance(repeat_action_probability, (float, int)), "Invalid repeat_action_probability: {!r}".format(repeat_action_probability) self.ale.setFloat('repeat_action_probability'.encode('utf-8'), repeat_action_probability) self._seed() (screen_width, screen_height) = self.ale.getScreenDims() self._buffer = np.empty((screen_height, screen_width, 4), dtype=np.uint8) self._action_set = self.ale.getMinimalActionSet() self.action_space = spaces.Discrete(len(self._action_set)) (screen_width,screen_height) = self.ale.getScreenDims() if self._obs_type == 'ram': self.observation_space = spaces.Box(low=np.zeros(128), high=np.zeros(128)+255) elif self._obs_type == 'image': self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3)) else: raise error.Error('Unrecognized observation type: {}'.format(self._obs_type))
def _set_mode(self, mode): if mode == 'evaluation': type = 'e' elif mode == 'training': type = 't' else: raise error.Error('Invalid mode {}: must be "training" or "evaluation"', mode) self.stats_recorder.type = type
def __init__(self, player_color, opponent, observation_type, illegal_move_mode, board_size): """ Args: player_color: Stone color for the agent. Either 'black' or 'white' opponent: An opponent policy observation_type: State encoding illegal_move_mode: What to do when the agent makes an illegal move. Choices: 'raise' or 'lose' board_size: size of the Hex board """ assert isinstance( board_size, int) and board_size >= 1, 'Invalid board size: {}'.format( board_size) self.board_size = board_size colormap = { 'black': HexEnv.BLACK, 'white': HexEnv.WHITE, } try: self.player_color = colormap[player_color] except KeyError: raise error.Error( "player_color must be 'black' or 'white', not {}".format( player_color)) self.opponent = opponent assert observation_type in ['numpy3c'] self.observation_type = observation_type assert illegal_move_mode in ['lose', 'raise'] self.illegal_move_mode = illegal_move_mode if self.observation_type != 'numpy3c': raise error.Error('Unsupported observation type: {}'.format( self.observation_type)) # One action for each board position and resign self.action_space = spaces.Discrete(self.board_size**2 + 1) observation = self.reset() self.observation_space = spaces.Box(np.zeros(observation.shape), np.ones(observation.shape)) self._seed()
def put(self, contents, encode='json'): supplied_headers = { "Content-Type": self.content_type } if encode == 'json': contents = json.dumps(contents) elif encode is None: pass else: raise error.Error('Encode request for put must be "json" or None, not {}'.format(encode)) files = {'file': contents} body, code, headers = api_requestor.http_client.request( 'post', self.post_url, post_data=self.post_fields, files=files, headers={}) if code != 204: raise error.Error("Upload to S3 failed. If error persists, please contact us at [email protected] this message. S3 returned '{} -- {}'. Tried 'POST {}' with fields {}.".format(code, body, self.post_url, self.post_fields))
def _reset_opponent(self, board): if self.opponent == 'random': self.opponent_policy = make_random_policy(self.np_random) elif self.opponent == 'pachi:uct:_2400': self.opponent_policy = make_pachi_policy(board=board, engine_type=six.b('uct'), pachi_timestr=six.b('_2400')) # TODO: strength as argument else: raise error.Error('Unrecognized opponent policy {}'.format(self.opponent))
def _step(self, action): assert self.state.color == self.player_color # If already terminal, then don't do anything if self.done: return self.state.board.encode(), 0., True, {'state': self.state} # If resigned, then we're done if action == _resign_action(self.board_size): self.done = True return self.state.board.encode(), -1., True, {'state': self.state} # Play prev_state = self.state try: self.state = self.state.act(action) except pachi_py.IllegalMove: if self.illegal_move_mode == 'raise': six.reraise(*sys.exc_info()) elif self.illegal_move_mode == 'lose': # Automatic loss on illegal move self.done = True return self.state.board.encode(), -1., True, { 'state': self.state } else: raise error.Error('Unsupported illegal move action: {}'.format( self.illegal_move_mode)) # Opponent play if not self.state.board.is_terminal: self.state, opponent_resigned = self._exec_opponent_play( self.state, prev_state, action) # After opponent play, we should be back to the original color assert self.state.color == self.player_color # If the opponent resigns, then the agent wins if opponent_resigned: self.done = True return self.state.board.encode(), 1., True, { 'state': self.state } # Reward: if nonterminal, then the reward is 0 if not self.state.board.is_terminal: self.done = False return self.state.board.encode(), 0., False, {'state': self.state} # We're in a terminal state. Reward is 1 if won, -1 if lost assert self.state.board.is_terminal self.done = True white_wins = self.state.board.official_score > 0 black_wins = self.state.board.official_score < 0 player_wins = (white_wins and self.player_color == pachi_py.WHITE) or ( black_wins and self.player_color == pachi_py.BLACK) reward = 1. if player_wins else -1. if ( white_wins or black_wins) else 0. return self.state.board.encode(), reward, True, {'state': self.state}
def __init__(self, env_id, trials, max_timesteps, max_seconds, reward_floor, reward_ceiling): self.env_id = env_id self.trials = trials self.max_timesteps = max_timesteps self.max_seconds = max_seconds self.reward_floor = reward_floor self.reward_ceiling = reward_ceiling if max_timesteps is None and max_seconds is None: raise error.Error('Must provide at least one of max_timesteps and max_seconds for {}'.format(self))
def __init__(self, id, entry_point=None, trials=100, reward_threshold=None, local_only=False, kwargs=None, nondeterministic=False, tags=None, max_episode_steps=None, max_episode_seconds=None, timestep_limit=None): self.id = id # Evaluation parameters self.trials = trials self.reward_threshold = reward_threshold # Environment properties self.nondeterministic = nondeterministic if tags is None: tags = {} self.tags = tags # BACKWARDS COMPAT 2017/1/18 if tags.get('wrapper_config.TimeLimit.max_episode_steps'): max_episode_steps = tags.get( 'wrapper_config.TimeLimit.max_episode_steps') # TODO: Add the following deprecation warning after 2017/02/18 # warnings.warn("DEPRECATION WARNING wrapper_config.TimeLimit has been deprecated. Replace any calls to `register(tags={'wrapper_config.TimeLimit.max_episode_steps': 200)}` with `register(max_episode_steps=200)`. This change was made 2017/1/31 and is included in gym version 0.8.0. If you are getting many of these warnings, you may need to update universe past version 0.21.3") tags['wrapper_config.TimeLimit.max_episode_steps'] = max_episode_steps ###### # BACKWARDS COMPAT 2017/1/31 if timestep_limit is not None: max_episode_steps = timestep_limit # TODO: Add the following deprecation warning after 2017/03/01 # warnings.warn("register(timestep_limit={}) is deprecated. Use register(max_episode_steps={}) instead.".format(timestep_limit, timestep_limit)) ###### self.max_episode_steps = max_episode_steps self.max_episode_seconds = max_episode_seconds # We may make some of these other parameters public if they're # useful. match = env_id_re.search(id) if not match: raise error.Error( 'Attempted to register malformed environment ID: {}. (Currently all IDs must be of the form {}.)' .format(id, env_id_re.pattern)) self._env_name = match.group(1) self._entry_point = entry_point self._local_only = local_only self._kwargs = {} if kwargs is None else kwargs
def before_reset(self): assert not self.closed if self.done is not None and not self.done and self.steps > 0: raise error.Error( "Tried to reset environment which is not done. While the monitor is active for {}, you cannot call reset() unless the episode is over." .format(self.env_id)) self.done = False if self.initial_reset_timestamp is None: self.initial_reset_timestamp = time.time()
def np_random(seed=None): if seed is not None and not (isinstance(seed, integer_types) and 0 <= seed): raise error.Error( 'Seed must be a non-negative integer or omitted, not {}'.format( seed)) seed = _seed(seed) rng = np.random.RandomState() rng.seed(_int_list_from_bigint(hash_seed(seed))) return rng, seed
def _int_list_from_bigint(bigint): # Special case 0 if bigint < 0: raise error.Error('Seed must be non-negative, not {}'.format(bigint)) elif bigint == 0: return [0] ints = [] while bigint > 0: bigint, mod = divmod(bigint, 2**32) ints.append(mod) return ints
def upload_training_data(training_dir, api_key=None): # Could have multiple manifests results = monitoring.load_results(training_dir) if not results: raise error.Error('''Could not find any manifest files in {}. (HINT: this usually means you did not yet close() your env.monitor and have not yet exited the process. You should call 'env.monitor.start(training_dir)' at the start of training and 'env.close()' at the end, or exit the process.)''' .format(training_dir)) manifests = results['manifests'] env_info = results['env_info'] data_sources = results['data_sources'] timestamps = results['timestamps'] episode_lengths = results['episode_lengths'] episode_rewards = results['episode_rewards'] episode_types = results['episode_types'] initial_reset_timestamps = results['initial_reset_timestamps'] videos = results['videos'] env_id = env_info['env_id'] logger.debug('[%s] Uploading data from manifest %s', env_id, ', '.join(manifests)) # Do the relevant uploads if len(episode_lengths) > 0: training_episode_batch = upload_training_episode_batch( data_sources, episode_lengths, episode_rewards, episode_types, initial_reset_timestamps, timestamps, api_key, env_id=env_id) else: training_episode_batch = None if len(videos) > MAX_VIDEOS: logger.warning( '[%s] You recorded videos for %s episodes, but the scoreboard only supports up to %s. We will automatically subsample for you, but you also might wish to adjust your video recording rate.', env_id, len(videos), MAX_VIDEOS) subsample_inds = np.linspace(0, len(videos) - 1, MAX_VIDEOS).astype('int') #pylint: disable=E1101 videos = [videos[i] for i in subsample_inds] if len(videos) > 0: training_video = upload_training_video(videos, api_key, env_id=env_id) else: training_video = None return env_info, training_episode_batch, training_video
def get_display(spec): """Convert a display specification (such as :0) into an actual Display object. Pyglet only supports multiple Displays on Linux. """ if spec is None: return None elif isinstance(spec, six.string_types): return pyglet.canvas.Display(spec) else: raise error.Error( 'Invalid display specification: {}. (Must be a string like :0 or None.)' .format(spec))
def make(self): """Instantiates an instance of the environment with appropriate kwargs""" if self._entry_point is None: raise error.Error( 'Attempting to make deprecated env {}. (HINT: is there a newer registered version of this env?)' .format(self.id)) cls = load(self._entry_point) env = cls(**self._kwargs) # Make the enviroment aware of which spec it came from. env.spec = self return env
def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) # Update the random policy if needed if isinstance(self.opponent, str): if self.opponent == 'random': self.opponent_policy = make_random_policy(self.np_random) else: raise error.Error('Unrecognized opponent policy {}'.format( self.opponent)) else: self.opponent_policy = self.opponent return [seed]
def _step(self, action): assert self.to_play == self.player_color # If already terminal, then don't do anything if self.done: return self.state, 0., True, {'state': self.state} # if HexEnv.pass_move(self.board_size, action): # pass if HexEnv.resign_move(self.board_size, action): return self.state, -1, True, {'state': self.state} elif not HexEnv.valid_move(self.state, action): if self.illegal_move_mode == 'raise': raise elif self.illegal_move_mode == 'lose': # Automatic loss on illegal move self.done = True return self.state, -1., True, {'state': self.state} else: raise error.Error('Unsupported illegal move action: {}'.format( self.illegal_move_mode)) else: HexEnv.make_move(self.state, action, self.player_color) # Opponent play a = self.opponent_policy(self.state) # if HexEnv.pass_move(self.board_size, action): # pass # Making move if there are moves left if a is not None: if HexEnv.resign_move(self.board_size, a): return self.state, 1, True, {'state': self.state} else: HexEnv.make_move(self.state, a, 1 - self.player_color) reward = HexEnv.game_finished(self.state) if self.player_color == HexEnv.WHITE: reward = -reward self.done = reward != 0 return self.state, reward, self.done, {'state': self.state}
def _seed(a=None, max_bytes=8): """Create a strong random seed. Otherwise, Python 2 would seed using the system time, which might be non-robust especially in the presence of concurrency. Args: a (Optional[int, str]): None seeds from an operating system specific randomness source. max_bytes: Maximum number of bytes to use in the seed. """ # Adapted from https://svn.python.org/projects/python/tags/r32/Lib/random.py if a is None: a = _bigint_from_bytes(os.urandom(max_bytes)) elif isinstance(a, str): a = a.encode('utf8') a += hashlib.sha512(a).digest() a = _bigint_from_bytes(a[:max_bytes]) elif isinstance(a, integer_types): a = a % 2**(8 * max_bytes) else: raise error.Error('Invalid type for seed: {} ({})'.format(type(a), a)) return a
def write_archive(videos, archive_file, env_id=None): if len(videos) > MAX_VIDEOS: raise error.Error( '[{}] Trying to upload {} videos, but there is a limit of {} currently. If you actually want to upload this many videos, please email [email protected] with your use-case.' .format(env_id, MAX_VIDEOS, len(videos))) logger.debug('[%s] Preparing an archive of %d videos: %s', env_id, len(videos), videos) # Double check that there are no collisions basenames = set() manifest = {'version': 0, 'videos': []} with tarfile.open(fileobj=archive_file, mode='w:gz') as tar: for video_path, metadata_path in videos: video_name = os.path.basename(video_path) metadata_name = os.path.basename(metadata_path) if not os.path.exists(video_path): raise error.Error( '[{}] No such video file {}. (HINT: Your video recorder may have broken midway through the run. You can check this with `video_recorder.functional`.)' .format(env_id, video_path)) elif not os.path.exists(metadata_path): raise error.Error( '[{}] No such metadata file {}. (HINT: this should be automatically created when using a VideoRecorder instance.)' .format(env_id, video_path)) # Do some sanity checking if video_name in basenames: raise error.Error( '[{}] Duplicated video name {} in video list: {}'.format( env_id, video_name, videos)) elif metadata_name in basenames: raise error.Error( '[{}] Duplicated metadata file name {} in video list: {}'. format(env_id, metadata_name, videos)) elif not video_name_re.search(video_name): raise error.Error( '[{}] Invalid video name {} (must match {})'.format( env_id, video_name, video_name_re.pattern)) elif not metadata_name_re.search(metadata_name): raise error.Error( '[{}] Invalid metadata file name {} (must match {})'. format(env_id, metadata_name, metadata_name_re.pattern)) # Record that we've seen these names; add to manifest basenames.add(video_name) basenames.add(metadata_name) manifest['videos'].append((video_name, metadata_name)) # Import the files into the archive tar.add(video_path, arcname=video_name, recursive=False) tar.add(metadata_path, arcname=metadata_name, recursive=False) f = tempfile.NamedTemporaryFile(mode='w+', delete=False) try: json.dump(manifest, f) f.close() tar.add(f.name, arcname='manifest.json') finally: f.close() os.remove(f.name)
def register(self, id, **kwargs): if id in self.env_specs: raise error.Error('Cannot re-register id: {}'.format(id)) self.env_specs[id] = EnvSpec(id, **kwargs)
def upload(training_dir, algorithm_id=None, writeup=None, tags=None, benchmark_id=None, api_key=None, ignore_open_monitors=False): """Upload the results of training (as automatically recorded by your env's monitor) to OpenAI Gym. Args: training_dir (Optional[str]): A directory containing the results of a training run. algorithm_id (Optional[str]): An algorithm id indicating the particular version of the algorithm (including choices of parameters) you are running (visit https://gym.openai.com/algorithms to create an id). If the id doesn't match an existing server id it will create a new algorithm using algorithm_id as the name benchmark_id (Optional[str]): The benchmark that these evaluations belong to. Will recursively search through training_dir for any Gym manifests. This feature is currently pre-release. writeup (Optional[str]): A Gist URL (of the form https://gist.github.com/<user>/<id>) containing your writeup for this evaluation. tags (Optional[dict]): A dictionary of key/values to store with the benchmark run (ignored for nonbenchmark evaluations). Must be jsonable. api_key (Optional[str]): Your OpenAI API key. Can also be provided as an environment variable (OPENAI_GYM_API_KEY). """ if benchmark_id: # We're uploading a benchmark run. directories = [] env_ids = [] for name, _, files in os.walk(training_dir): manifests = monitoring.detect_training_manifests(name, files=files) if manifests: env_info = monitoring.load_env_info_from_manifests( manifests, training_dir) env_ids.append(env_info['env_id']) directories.append(name) # Validate against benchmark spec try: spec = benchmark_spec(benchmark_id) except error.UnregisteredBenchmark: raise error.Error( "Invalid benchmark id: {}. Are you using a benchmark registered in gym/benchmarks/__init__.py?" .format(benchmark_id)) # TODO: verify that the number of trials matches spec_env_ids = [ task.env_id for task in spec.tasks for _ in range(task.trials) ] if not env_ids: raise error.Error( "Could not find any evaluations in {}".format(training_dir)) # This could be more stringent about mixing evaluations if sorted(env_ids) != sorted(spec_env_ids): logger.info( "WARNING: Evaluations do not match spec for benchmark %s. In %s, we found evaluations for %s, expected %s", benchmark_id, training_dir, sorted(env_ids), sorted(spec_env_ids)) benchmark_run = resource.BenchmarkRun.create(benchmark_id=benchmark_id, algorithm_id=algorithm_id, tags=json.dumps(tags)) benchmark_run_id = benchmark_run.id # Actually do the uploads. for training_dir in directories: # N.B. we don't propagate algorithm_id to Evaluation if we're running as part of a benchmark _upload(training_dir, None, writeup, benchmark_run_id, api_key, ignore_open_monitors) logger.info( """ **************************************************** You successfully uploaded your benchmark on %s to OpenAI Gym! You can find it at: %s **************************************************** """.rstrip(), benchmark_id, benchmark_run.web_url()) return benchmark_run_id else: if tags is not None: logger.warning("Tags will NOT be uploaded for this submission.") # Single evalution upload benchmark_run_id = None evaluation = _upload(training_dir, algorithm_id, writeup, benchmark_run_id, api_key, ignore_open_monitors) logger.info( """ **************************************************** You successfully uploaded your evaluation on %s to OpenAI Gym! You can find it at: %s **************************************************** """.rstrip(), evaluation.env, evaluation.web_url()) return None
def type(self, type): if type not in ['t', 'e']: raise error.Error( 'Invalid episode type {}: must be t for training or e for evaluation', type) self._type = type
def monitor(self): raise error.Error( "env.monitor has been deprecated as of 12/23/2016. Remove your call to `env.monitor.start(directory)` and instead wrap your env with `env = gym.wrappers.Monitor(env, directory)` to record data.")