def get_rewards( action: int, action_name: str, reward_space: str, num_trials: int, max_warmup_steps: int, max_attempts_multiplier: int = 5, ) -> SensitivityAnalysisResult: """Run random trials to get a list of num_trials reward deltas.""" rewards, runtimes = [], [] benchmark = benchmark_from_flags() num_attempts = 0 while (num_attempts < max_attempts_multiplier * num_trials and len(rewards) < num_trials): num_attempts += 1 with env_session_from_flags(benchmark=benchmark) as env: env.observation_space = None env.reward_space = None env.reset(benchmark=benchmark) with Timer() as t: reward = run_one_trial(env, reward_space, action, max_warmup_steps) if reward is not None: rewards.append(reward) runtimes.append(t.time) return SensitivityAnalysisResult(name=action_name, runtimes=np.array(runtimes), rewards=np.array(rewards))
def main(argv): """Main entry point.""" argv = FLAGS(argv) if len(argv) != 1: raise app.UsageError(f"Unknown command line arguments: {argv[1:]}") with env_session_from_flags() as env: action_names = env.action_space.names if FLAGS.action: actions = [action_names.index(a) for a in FLAGS.action] else: actions = list(range(len(action_names))) logs_dir = Path(FLAGS.output_dir or create_logging_dir("benchmark_sensitivity_analysis")) rewards_path = logs_dir / f"actions_{FLAGS.reward}.rewards.csv" runtimes_path = logs_dir / f"actions_{FLAGS.reward}.runtimes.csv" run_action_sensitivity_analysis( rewards_path=rewards_path, runtimes_path=runtimes_path, actions=actions, reward=FLAGS.reward, num_trials=FLAGS.num_trials, max_warmup_steps=FLAGS.max_warmup_steps, nproc=FLAGS.nproc, max_attempts_multiplier=FLAGS.max_attempts_multiplier, )
def main(argv): """Main entry point.""" argv = FLAGS(argv) if len(argv) != 1: raise app.UsageError(f"Unknown command line arguments: {argv[1:]}") # Determine the benchmark that is being analyzed, or use all of them. benchmark = benchmark_from_flags() if benchmark: benchmarks = [benchmark] else: with env_session_from_flags() as env: benchmarks = env.benchmarks logs_dir = Path( FLAGS.output_dir or create_logging_dir("benchmark_sensitivity_analysis") ) rewards_path = logs_dir / f"benchmarks_{FLAGS.reward}.csv" runtimes_path = logs_dir / f"benchmarks_{FLAGS.reward}_runtimes.csv" run_benchmark_sensitivity_analysis( rewards_path=rewards_path, runtimes_path=runtimes_path, benchmarks=benchmarks, reward=FLAGS.reward, num_trials=FLAGS.num_trials, min_steps=FLAGS.min_steps, max_steps=FLAGS.max_steps, nproc=FLAGS.nproc, max_attempts_multiplier=FLAGS.max_attempts_multiplier, )
def run_action_sensitivity_analysis( actions: List[int], rewards_path: Path, runtimes_path: Path, reward_space: str, num_trials: int, max_warmup_steps: int, nproc: int = cpu_count(), max_attempts_multiplier: int = 5, ): """Estimate the reward delta of a given list of actions.""" with env_session_from_flags() as env: action_names = env.action_space.names with ThreadPoolExecutor(max_workers=nproc) as executor: analysis_futures = { executor.submit( get_rewards, action, action_names[action], reward_space, num_trials, max_warmup_steps, max_attempts_multiplier, ) for action in actions } return run_sensitivity_analysis( analysis_futures=analysis_futures, runtimes_path=runtimes_path, rewards_path=rewards_path, )
def get_reward_deltas( benchmark: Union[Benchmark, str], reward: str, num_trials: int, min_steps: int, max_steps: int, max_attempts_multiplier: int = 5, ) -> SensitivityAnalysisResult: """Run random trials to get a list of num_trials reward deltas.""" reward_deltas, runtimes = [], [] num_attempts = 0 while (num_attempts < max_attempts_multiplier * num_trials and len(reward_deltas) < num_trials): num_attempts += 1 with env_session_from_flags(benchmark=benchmark) as env: env.eager_observation_space = None env.eager_reward_space = None env.reset(benchmark=benchmark) benchmark = env.benchmark with Timer() as t: reward_delta = run_one_trial(env, reward, min_steps, max_steps) if reward_delta is not None: reward_deltas.append(reward_delta) runtimes.append(t.time) return SensitivityAnalysisResult(name=env.benchmark, runtimes=np.array(runtimes), rewards=np.array(reward_deltas))