def benchmark_score_from_local(benchmark_id, training_dir): spec = gym.benchmark_spec(benchmark_id) directories = [] for name, _, files in os.walk(training_dir): manifests = gym.monitoring.detect_training_manifests(name, files=files) if manifests: directories.append(name) benchmark_results = defaultdict(list) for training_dir in directories: results = gym.monitoring.load_results(training_dir) env_id = results['env_info']['env_id'] benchmark_result = spec.score_evaluation( env_id, results['data_sources'], results['initial_reset_timestamps'], results['episode_lengths'], results['episode_rewards'], results['episode_types'], results['timestamps']) # from pprint import pprint # pprint(benchmark_result) benchmark_results[env_id].append(benchmark_result) return gym.benchmarks.scoring.benchmark_aggregate_score( spec, benchmark_results)
def upload(training_dir, algorithm_id=None, writeup=None, tags=None, benchmark_id=None, api_key=None, ignore_open_monitors=False): """Upload the results of training (as automatically recorded by your env's monitor) to OpenAI Gym. Args: training_dir (Optional[str]): A directory containing the results of a training run. algorithm_id (Optional[str]): An algorithm id indicating the particular version of the algorithm (including choices of parameters) you are running (visit https://gym.openai.com/algorithms to create an id). If the id doesn't match an existing server id it will create a new algorithm using algorithm_id as the name benchmark_id (Optional[str]): The benchmark that these evaluations belong to. Will recursively search through training_dir for any Gym manifests. This feature is currently pre-release. writeup (Optional[str]): A Gist URL (of the form https://gist.github.com/<user>/<id>) containing your writeup for this evaluation. tags (Optional[dict]): A dictionary of key/values to store with the benchmark run (ignored for nonbenchmark evaluations). Must be jsonable. api_key (Optional[str]): Your OpenAI API key. Can also be provided as an environment variable (OPENAI_GYM_API_KEY). """ if benchmark_id: # We're uploading a benchmark run. directories = [] env_ids = [] for name, _, files in os.walk(training_dir): manifests = monitoring.detect_training_manifests(name, files=files) if manifests: env_info = monitoring.load_env_info_from_manifests( manifests, training_dir) env_ids.append(env_info['env_id']) directories.append(name) # Validate against benchmark spec try: spec = benchmark_spec(benchmark_id) except error.UnregisteredBenchmark: raise error.Error( "Invalid benchmark id: {}. Are you using a benchmark registered in gym/benchmarks/__init__.py?" .format(benchmark_id)) # TODO: verify that the number of trials matches spec_env_ids = [ task.env_id for task in spec.tasks for _ in range(task.trials) ] if not env_ids: raise error.Error( "Could not find any evaluations in {}".format(training_dir)) # This could be more stringent about mixing evaluations if sorted(env_ids) != sorted(spec_env_ids): logger.info( "WARNING: Evaluations do not match spec for benchmark %s. In %s, we found evaluations for %s, expected %s", benchmark_id, training_dir, sorted(env_ids), sorted(spec_env_ids)) benchmark_run = resource.BenchmarkRun.create(benchmark_id=benchmark_id, algorithm_id=algorithm_id, tags=json.dumps(tags)) benchmark_run_id = benchmark_run.id # Actually do the uploads. for training_dir in directories: # N.B. we don't propagate algorithm_id to Evaluation if we're running as part of a benchmark _upload(training_dir, None, writeup, benchmark_run_id, api_key, ignore_open_monitors) logger.info( """ **************************************************** You successfully uploaded your benchmark on %s to OpenAI Gym! You can find it at: %s **************************************************** """.rstrip(), benchmark_id, benchmark_run.web_url()) return benchmark_run_id else: if tags is not None: logger.warning("Tags will NOT be uploaded for this submission.") # Single evalution upload benchmark_run_id = None evaluation = _upload(training_dir, algorithm_id, writeup, benchmark_run_id, api_key, ignore_open_monitors) logger.info( """ **************************************************** You successfully uploaded your evaluation on %s to OpenAI Gym! You can find it at: %s **************************************************** """.rstrip(), evaluation.env, evaluation.web_url()) return None