def info( log_dirs: List[Path] = ["~/logs/compiler_gym/llvm_autotuning"], all_runs: bool = False, group_by_working_directory: bool = False, only_nonzero_reward: bool = False, ): experiments = experiments_from_paths(log_dirs) results = [] for experiment in experiments: df = experiment.dataframe # Exclude runs where reward was zero, used for pruning false results if # the environment is flaky or can fail. if only_nonzero_reward: df = df[df.reward != 0] if not len(df): continue df.to_csv(experiment.working_directory / "results.csv", index=False) walltimes = df[["benchmark", "walltime"]].groupby("benchmark").mean() rewards = df[["benchmark", "reward"]].groupby("benchmark").agg(geometric_mean) num_results = len(df) num_benchmarks = len(set(df["benchmark"])) df = pd.concat((walltimes, rewards), axis=1) avg_walltime = df["walltime"].mean() avg_reward = geometric_mean(df["reward"]) df = pd.concat( ( df, pd.DataFrame( [{"walltime": avg_walltime, "reward": avg_reward}], index=["Average"], ), ) ) df = df.reset_index() df.insert(0, "config", experiment.configuration_number) df.insert(0, "timestamp", experiment.timestamp) df.insert(0, "experiment", experiment.experiment) if all_runs: print(experiment.working_directory) print(tabulate(df, showindex=False, headers="keys", tablefmt="grid")) print() results.append( { "working_directory": experiment.working_directory, "experiment": experiment.experiment, "timestamp": experiment.timestamp, "config": experiment.configuration_number, "num_benchmarks": num_benchmarks, "num_results": num_results, "walltime": avg_walltime, "reward": avg_reward, } ) df = pd.DataFrame(results) if not len(df): print("No results") return print("---------------------------------------") print("Aggregate over experiments:") if group_by_working_directory: df = df.groupby(["working_directory"]).mean() else: df = df.groupby(["experiment", "timestamp", "config"]).mean() # Cast float back to int. df["num_benchmarks"] = [int(x) for x in df["num_benchmarks"]] df["num_results"] = [int(x) for x in df["num_results"]] # Better column names. df = df.rename(columns={"reward": "geomean_reward", "walltime": "walltime (s)"}) pd.set_option("display.max_rows", None) print(df)
def train(log_dirs: List[Path] = ["~/logs/compiler_gym/llvm_rl"]): init_logging() models = models_from_paths(log_dirs) dfs = [] for model in models: df = model.dataframe if not len(df): continue # Select only the rows with a checkpoint. df = df[df["checkpoint"].values] df = df[[ "trial_name", "experiment_timestamp", "episodes_total", "episode_reward_geomean", "episode_reward_mean", "evaluation/episode_reward_mean", "evaluation/episode_reward_geomean", "time_total_s", "complete", "cpus", "gpus", ]] sdf = df.groupby( ["experiment", "config", "replica", "experiment_timestamp"]).max() test_results = model.test_dataframes sdf["test_results"] = [ test_results.get(d, pd.DataFrame()) for d in sdf["trial_name"] ] sdf["test_ic_mean"] = [ sum(d["instruction_count_reduction"]) / len(d) if not d.empty else float("nan") for d in sdf["test_results"] ] sdf["test_ic_geomean"] = [ geometric_mean(d["instruction_count_reduction"]) if not d.empty else float("nan") for d in sdf["test_results"] ] sdf["test_os_mean"] = [ sum(d["object_size_reduction"]) / len(d) if not d.empty else float("nan") for d in sdf["test_results"] ] sdf["test_os_geomean"] = [ geometric_mean(d["object_size_reduction"]) if not d.empty else float("nan") for d in sdf["test_results"] ] sdf["test_checkpoint"] = [ int(d["test_checkpoint"].values[0].split("-")[-1]) if not d.empty else "" for d in sdf["test_results"] ] dfs.append(sdf.reset_index()) df = pd.concat(dfs) # Print everything. pd.set_option("display.max_columns", None) pd.set_option("display.max_rows", None) pd.set_option("display.width", None) df = df.rename( columns={ "experiment_timestamp": "timestamp", "episodes_total": "episodes", "evaluation/episode_reward_geomean": "val_geomean", "evaluation/episode_reward_mean": "val_mean", "episode_reward_mean": "train_mean", "episode_reward_geomean": "train_geomean", "time_total_s": "training_time", "test_reward_mean": "test_mean", "test_reward_geomean": "test_geomean", }) # Format for printing. df["complete"] = [f"{x:.1%}" for x in df["complete"]] df["episodes"] = [f"{int(x):,d}" for x in df["episodes"]] df["training_time"] = [ humanize.naturaldelta(x) for x in df["training_time"] ] for reward in [ "train_mean", "train_geomean", "val_mean", "val_geomean", "test_ic_geomean", "test_os_geomean", "test_ic_mean", "test_os_mean", ]: df[reward] = [f"{x:.4f}" for x in df[reward].values] df = df[[ "trial_name", "timestamp", "complete", "episodes", "training_time", "test_checkpoint", "train_geomean", "val_geomean", ]] print(tabulate(df, headers="keys", showindex=False, tablefmt="psql"))
def reward_aggregation(a): return geometric_mean(np.clip(a, 0, None))
def main(argv): assert len(argv) == 1, f"Unknown args: {argv[:1]}" assert FLAGS.n > 0, "n must be > 0" with gym.make("llvm-ic-v0") as env: # Stream verbose CompilerGym logs to file. logger = logging.getLogger("compiler_gym") logger.setLevel(logging.DEBUG) log_handler = logging.FileHandler(FLAGS.leaderboard_logfile) logger.addHandler(log_handler) logger.propagate = False print(f"Writing results to {FLAGS.leaderboard_results}") print(f"Writing logs to {FLAGS.leaderboard_logfile}") # Build the list of benchmarks to evaluate. benchmarks = env.datasets[FLAGS.test_dataset].benchmark_uris() if FLAGS.max_benchmarks: benchmarks = islice(benchmarks, FLAGS.max_benchmarks) benchmarks = list(benchmarks) # Repeat the searches for the requested number of iterations. benchmarks *= FLAGS.n total_count = len(benchmarks) # If we are resuming from a previous job, read the states that have # already been proccessed and remove those benchmarks from the list # of benchmarks to evaluate. init_states = [] if FLAGS.resume and Path(FLAGS.leaderboard_results).is_file(): with CompilerEnvStateReader(open( FLAGS.leaderboard_results)) as reader: for state in reader: init_states.append(state) if state.benchmark in benchmarks: benchmarks.remove(state.benchmark) # Run the benchmark loop in background so that we can asynchronously # log progress. worker = _EvalPolicyWorker(env, benchmarks, policy, init_states) worker.start() timer = Timer().reset() try: print(f"=== Evaluating policy on " f"{humanize.intcomma(total_count)} " f"{FLAGS.test_dataset} benchmarks ===" "\n\n" # Blank lines will be filled below ) while worker.is_alive(): done_count = len(worker.states) remaining_count = total_count - done_count time = timer.time gmean_reward = geometric_mean( [s.reward for s in worker.states]) mean_walltime = (arithmetic_mean( [s.walltime for s in worker.states]) or time) print( "\r\033[2A" "\033[K" f"Runtime: {humanize_duration_hms(time)}. " f"Estimated completion: {humanize_duration_hms(mean_walltime * remaining_count)}. " f"Completed: {humanize.intcomma(done_count)} / {humanize.intcomma(total_count)} " f"({done_count / total_count:.1%})." "\n\033[K" f"Current mean walltime: {mean_walltime:.3f}s / benchmark." "\n\033[K" f"Current geomean reward: {gmean_reward:.4f}.", flush=True, end="", ) sleep(1) except KeyboardInterrupt: print("\nkeyboard interrupt", flush=True) worker.alive = False # User interrupt, don't validate. FLAGS.validate = False if FLAGS.validate: FLAGS.env = "llvm-ic-v0" validate(["argv0", FLAGS.leaderboard_results])
def test_geometric_mean_123(): assert geometric_mean([1, 2, 3]) == approx(1.8171205928321)
def test_geometric_mean_negative(): assert geometric_mean([-1, 1, 2]) == 0
def test_geometric_mean_zero_value(): assert geometric_mean([0, 1, 2]) == 0
def test_geometric_mean_empty_list(): assert geometric_mean([]) == 0
def _trial_to_dataframe(self, directory: Path) -> Optional[pd.DataFrame]: components = directory.name.split("-") if len(components) < 3: logger.warning( "Directory name does not match expected " "{experiment}-{config}-{replica} format: %s", directory, ) return replica = components[-1] config = components[-2] experiment = "-".join(components[:-2]) if not (directory / "progress.csv").is_file(): logger.warning("File not found: %s", directory / "progress.csv") return try: df = pd.read_csv(directory / "progress.csv") except pd.errors.EmptyDataError: return None df.insert(0, "logsdir", str(directory)) df.insert( 0, "experiment_timestamp", " ".join([ self.working_directory.parent.parent.name, self.working_directory.parent.name, ]), ) df.insert(0, "trial_name", directory.name) df.insert(0, "replica", replica) df.insert(0, "config", config) df.insert(0, "experiment", experiment) df["checkpoint"] = [(directory / f"checkpoint_{i:06d}").is_dir() for i in df["training_iteration"]] df["checkpoint_path"] = [ str(directory / f"checkpoint_{i:06d}" / f"checkpoint-{i}") if (directory / f"checkpoint_{i:06d}").is_dir() else None for i in df["training_iteration"] ] df["evaluation/episode_reward_geomean"] = [ geometric_mean(eval(x)) for x in df["evaluation/hist_stats/episode_reward"] ] df["episode_reward_geomean"] = [ geometric_mean(eval(x)) for x in df["hist_stats/episode_reward"] ] df["complete"] = [ min(d / self.training.episodes, 1) for d in df["episodes_total"] ] df["cpus"] = self.executor.cpus df["gpus"] = self.executor.gpus df = df.set_index( ["experiment", "config", "replica", "training_iteration"]) return df