示例#1
0
def get_rewards(
    action: int,
    action_name: str,
    reward_space: str,
    num_trials: int,
    max_warmup_steps: int,
    max_attempts_multiplier: int = 5,
) -> SensitivityAnalysisResult:
    """Run random trials to get a list of num_trials reward deltas."""
    rewards, runtimes = [], []
    benchmark = benchmark_from_flags()
    num_attempts = 0
    while (num_attempts < max_attempts_multiplier * num_trials
           and len(rewards) < num_trials):
        num_attempts += 1
        with env_session_from_flags(benchmark=benchmark) as env:
            env.observation_space = None
            env.reward_space = None
            env.reset(benchmark=benchmark)
            with Timer() as t:
                reward = run_one_trial(env, reward_space, action,
                                       max_warmup_steps)
            if reward is not None:
                rewards.append(reward)
                runtimes.append(t.time)

    return SensitivityAnalysisResult(name=action_name,
                                     runtimes=np.array(runtimes),
                                     rewards=np.array(rewards))
示例#2
0
def main(argv):
    """Main entry point."""
    argv = FLAGS(argv)
    if len(argv) != 1:
        raise app.UsageError(f"Unknown command line arguments: {argv[1:]}")

    with env_session_from_flags() as env:
        action_names = env.action_space.names

    if FLAGS.action:
        actions = [action_names.index(a) for a in FLAGS.action]
    else:
        actions = list(range(len(action_names)))

    logs_dir = Path(FLAGS.output_dir
                    or create_logging_dir("benchmark_sensitivity_analysis"))
    rewards_path = logs_dir / f"actions_{FLAGS.reward}.rewards.csv"
    runtimes_path = logs_dir / f"actions_{FLAGS.reward}.runtimes.csv"

    run_action_sensitivity_analysis(
        rewards_path=rewards_path,
        runtimes_path=runtimes_path,
        actions=actions,
        reward=FLAGS.reward,
        num_trials=FLAGS.num_trials,
        max_warmup_steps=FLAGS.max_warmup_steps,
        nproc=FLAGS.nproc,
        max_attempts_multiplier=FLAGS.max_attempts_multiplier,
    )
示例#3
0
def main(argv):
    """Main entry point."""
    argv = FLAGS(argv)
    if len(argv) != 1:
        raise app.UsageError(f"Unknown command line arguments: {argv[1:]}")

    # Determine the benchmark that is being analyzed, or use all of them.
    benchmark = benchmark_from_flags()
    if benchmark:
        benchmarks = [benchmark]
    else:
        with env_session_from_flags() as env:
            benchmarks = env.benchmarks

    logs_dir = Path(
        FLAGS.output_dir or create_logging_dir("benchmark_sensitivity_analysis")
    )
    rewards_path = logs_dir / f"benchmarks_{FLAGS.reward}.csv"
    runtimes_path = logs_dir / f"benchmarks_{FLAGS.reward}_runtimes.csv"

    run_benchmark_sensitivity_analysis(
        rewards_path=rewards_path,
        runtimes_path=runtimes_path,
        benchmarks=benchmarks,
        reward=FLAGS.reward,
        num_trials=FLAGS.num_trials,
        min_steps=FLAGS.min_steps,
        max_steps=FLAGS.max_steps,
        nproc=FLAGS.nproc,
        max_attempts_multiplier=FLAGS.max_attempts_multiplier,
    )
示例#4
0
def run_action_sensitivity_analysis(
        actions: List[int],
        rewards_path: Path,
        runtimes_path: Path,
        reward_space: str,
        num_trials: int,
        max_warmup_steps: int,
        nproc: int = cpu_count(),
        max_attempts_multiplier: int = 5,
):
    """Estimate the reward delta of a given list of actions."""
    with env_session_from_flags() as env:
        action_names = env.action_space.names

    with ThreadPoolExecutor(max_workers=nproc) as executor:
        analysis_futures = {
            executor.submit(
                get_rewards,
                action,
                action_names[action],
                reward_space,
                num_trials,
                max_warmup_steps,
                max_attempts_multiplier,
            )
            for action in actions
        }
        return run_sensitivity_analysis(
            analysis_futures=analysis_futures,
            runtimes_path=runtimes_path,
            rewards_path=rewards_path,
        )
示例#5
0
def get_reward_deltas(
    benchmark: Union[Benchmark, str],
    reward: str,
    num_trials: int,
    min_steps: int,
    max_steps: int,
    max_attempts_multiplier: int = 5,
) -> SensitivityAnalysisResult:
    """Run random trials to get a list of num_trials reward deltas."""
    reward_deltas, runtimes = [], []
    num_attempts = 0
    while (num_attempts < max_attempts_multiplier * num_trials
           and len(reward_deltas) < num_trials):
        num_attempts += 1
        with env_session_from_flags(benchmark=benchmark) as env:
            env.eager_observation_space = None
            env.eager_reward_space = None
            env.reset(benchmark=benchmark)
            benchmark = env.benchmark
            with Timer() as t:
                reward_delta = run_one_trial(env, reward, min_steps, max_steps)
            if reward_delta is not None:
                reward_deltas.append(reward_delta)
                runtimes.append(t.time)

    return SensitivityAnalysisResult(name=env.benchmark,
                                     runtimes=np.array(runtimes),
                                     rewards=np.array(reward_deltas))