Пример #1
0
def score(df: pd.DataFrame) -> float:
  """Output a score for MDP Playground."""
  df = mdpp_preprocess(df_in=df)
  regret_score = plotting.ave_regret_score(
      df, baseline_regret=BASE_REGRET, episode=NUM_EPISODES)

  norm_score = 1.0 * regret_score # 2.5 was heuristically chosen value to get Sonnet DQN to score approx. 0.75, so that better algorithms like Rainbow can get score close to 1. With a bigger NN this would mean an unclipped score of 1.1 for Sonnet DQN, which is fair I think. However, a2c_rnn even reached 2.0 on this scale. DQN may be not performing as well because its epsilon is not annealed to 0.
  # print("unclipped score:", norm_score)
  norm_score = np.clip(norm_score, 0, 1)
  return norm_score
Пример #2
0
def score(df: pd.DataFrame) -> float:
  """Output a single score for cartpole = 50% regret, 50% has a good run."""
  cp_df = cartpole_preprocess(df_in=df)
  regret_score = plotting.ave_regret_score(
      cp_df, baseline_regret=BASE_REGRET, episode=NUM_EPISODES)

  # Give 50% of score if your "best" episode > GOOD_EPISODE threshold.
  solve_score = np.mean(
      cp_df.groupby('bsuite_id')['best_episode'].max() > GOOD_EPISODE)

  return 0.5 * (regret_score + solve_score)
Пример #3
0
def score(df: pd.DataFrame) -> float:
  """Output a single score = 50% regret, 50% "final accuracy"."""
  regret_score = plotting.ave_regret_score(
      df, baseline_regret=BASE_REGRET, episode=sweep.NUM_EPISODES)

  final_df = df.copy()
  final_df['ave_return'] = (
      1.0 - (final_df.total_regret.diff() / final_df.episode.diff()))
  final_df = final_df[final_df.episode > 0.9 * NUM_EPISODES]
  # Convert (+1, -1) average return --> (+1, 0) accuracy score
  acc_score = np.mean(final_df.ave_return + 1) * 0.5
  return 0.5 * (regret_score + acc_score)
Пример #4
0
def score(df: pd.DataFrame) -> float:
    """Output a single score for swingup = 50% regret, 50% does a swingup."""
    df = cp_swingup_preprocess(df_in=df)
    scores = []
    for _, sub_df in df.groupby('height_threshold'):
        regret_score = plotting.ave_regret_score(
            sub_df,
            baseline_regret=BASE_REGRET,
            episode=NUM_EPISODES,
            regret_column='perfection_regret')
        swingup_score = np.mean(
            sub_df.groupby('bsuite_id')['best_episode'].max() > GOOD_EPISODE)
        scores.append(0.5 * (regret_score + swingup_score))
    return np.mean(scores)
Пример #5
0
def score(df: pd.DataFrame) -> float:
    """Output a single score for catch."""
    return plotting.ave_regret_score(df,
                                     baseline_regret=BASE_REGRET,
                                     episode=sweep.NUM_EPISODES)
Пример #6
0
def score(df: pd.DataFrame) -> float:
    """Output a single score for mountain car."""
    cp_df = mountain_car_preprocess(df_in=df)
    return plotting.ave_regret_score(cp_df,
                                     baseline_regret=BASE_REGRET,
                                     episode=sweep.NUM_EPISODES)