Exemplo n.º 1
0
def info(
    log_dirs: List[Path] = ["~/logs/compiler_gym/llvm_autotuning"],
    all_runs: bool = False,
    group_by_working_directory: bool = False,
    only_nonzero_reward: bool = False,
):
    experiments = experiments_from_paths(log_dirs)

    results = []
    for experiment in experiments:
        df = experiment.dataframe

        # Exclude runs where reward was zero, used for pruning false results if
        # the environment is flaky or can fail.
        if only_nonzero_reward:
            df = df[df.reward != 0]

        if not len(df):
            continue

        df.to_csv(experiment.working_directory / "results.csv", index=False)

        walltimes = df[["benchmark", "walltime"]].groupby("benchmark").mean()
        rewards = df[["benchmark", "reward"]].groupby("benchmark").agg(geometric_mean)
        num_results = len(df)
        num_benchmarks = len(set(df["benchmark"]))

        df = pd.concat((walltimes, rewards), axis=1)
        avg_walltime = df["walltime"].mean()
        avg_reward = geometric_mean(df["reward"])
        df = pd.concat(
            (
                df,
                pd.DataFrame(
                    [{"walltime": avg_walltime, "reward": avg_reward}],
                    index=["Average"],
                ),
            )
        )

        df = df.reset_index()
        df.insert(0, "config", experiment.configuration_number)
        df.insert(0, "timestamp", experiment.timestamp)
        df.insert(0, "experiment", experiment.experiment)

        if all_runs:
            print(experiment.working_directory)
            print(tabulate(df, showindex=False, headers="keys", tablefmt="grid"))
            print()

        results.append(
            {
                "working_directory": experiment.working_directory,
                "experiment": experiment.experiment,
                "timestamp": experiment.timestamp,
                "config": experiment.configuration_number,
                "num_benchmarks": num_benchmarks,
                "num_results": num_results,
                "walltime": avg_walltime,
                "reward": avg_reward,
            }
        )

    df = pd.DataFrame(results)
    if not len(df):
        print("No results")
        return
    print("---------------------------------------")
    print("Aggregate over experiments:")
    if group_by_working_directory:
        df = df.groupby(["working_directory"]).mean()
    else:
        df = df.groupby(["experiment", "timestamp", "config"]).mean()

    # Cast float back to int.
    df["num_benchmarks"] = [int(x) for x in df["num_benchmarks"]]
    df["num_results"] = [int(x) for x in df["num_results"]]

    # Better column names.
    df = df.rename(columns={"reward": "geomean_reward", "walltime": "walltime (s)"})

    pd.set_option("display.max_rows", None)
    print(df)
Exemplo n.º 2
0
def train(log_dirs: List[Path] = ["~/logs/compiler_gym/llvm_rl"]):
    init_logging()

    models = models_from_paths(log_dirs)

    dfs = []
    for model in models:
        df = model.dataframe
        if not len(df):
            continue

        # Select only the rows with a checkpoint.
        df = df[df["checkpoint"].values]

        df = df[[
            "trial_name",
            "experiment_timestamp",
            "episodes_total",
            "episode_reward_geomean",
            "episode_reward_mean",
            "evaluation/episode_reward_mean",
            "evaluation/episode_reward_geomean",
            "time_total_s",
            "complete",
            "cpus",
            "gpus",
        ]]

        sdf = df.groupby(
            ["experiment", "config", "replica", "experiment_timestamp"]).max()

        test_results = model.test_dataframes
        sdf["test_results"] = [
            test_results.get(d, pd.DataFrame()) for d in sdf["trial_name"]
        ]

        sdf["test_ic_mean"] = [
            sum(d["instruction_count_reduction"]) /
            len(d) if not d.empty else float("nan")
            for d in sdf["test_results"]
        ]
        sdf["test_ic_geomean"] = [
            geometric_mean(d["instruction_count_reduction"])
            if not d.empty else float("nan") for d in sdf["test_results"]
        ]
        sdf["test_os_mean"] = [
            sum(d["object_size_reduction"]) /
            len(d) if not d.empty else float("nan")
            for d in sdf["test_results"]
        ]
        sdf["test_os_geomean"] = [
            geometric_mean(d["object_size_reduction"])
            if not d.empty else float("nan") for d in sdf["test_results"]
        ]
        sdf["test_checkpoint"] = [
            int(d["test_checkpoint"].values[0].split("-")[-1])
            if not d.empty else "" for d in sdf["test_results"]
        ]

        dfs.append(sdf.reset_index())

    df = pd.concat(dfs)

    # Print everything.
    pd.set_option("display.max_columns", None)
    pd.set_option("display.max_rows", None)
    pd.set_option("display.width", None)

    df = df.rename(
        columns={
            "experiment_timestamp": "timestamp",
            "episodes_total": "episodes",
            "evaluation/episode_reward_geomean": "val_geomean",
            "evaluation/episode_reward_mean": "val_mean",
            "episode_reward_mean": "train_mean",
            "episode_reward_geomean": "train_geomean",
            "time_total_s": "training_time",
            "test_reward_mean": "test_mean",
            "test_reward_geomean": "test_geomean",
        })

    # Format for printing.
    df["complete"] = [f"{x:.1%}" for x in df["complete"]]
    df["episodes"] = [f"{int(x):,d}" for x in df["episodes"]]
    df["training_time"] = [
        humanize.naturaldelta(x) for x in df["training_time"]
    ]

    for reward in [
            "train_mean",
            "train_geomean",
            "val_mean",
            "val_geomean",
            "test_ic_geomean",
            "test_os_geomean",
            "test_ic_mean",
            "test_os_mean",
    ]:
        df[reward] = [f"{x:.4f}" for x in df[reward].values]

    df = df[[
        "trial_name",
        "timestamp",
        "complete",
        "episodes",
        "training_time",
        "test_checkpoint",
        "train_geomean",
        "val_geomean",
    ]]

    print(tabulate(df, headers="keys", showindex=False, tablefmt="psql"))
Exemplo n.º 3
0
 def reward_aggregation(a):
     return geometric_mean(np.clip(a, 0, None))
Exemplo n.º 4
0
    def main(argv):
        assert len(argv) == 1, f"Unknown args: {argv[:1]}"
        assert FLAGS.n > 0, "n must be > 0"

        with gym.make("llvm-ic-v0") as env:

            # Stream verbose CompilerGym logs to file.
            logger = logging.getLogger("compiler_gym")
            logger.setLevel(logging.DEBUG)
            log_handler = logging.FileHandler(FLAGS.leaderboard_logfile)
            logger.addHandler(log_handler)
            logger.propagate = False

            print(f"Writing results to {FLAGS.leaderboard_results}")
            print(f"Writing logs to {FLAGS.leaderboard_logfile}")

            # Build the list of benchmarks to evaluate.
            benchmarks = env.datasets[FLAGS.test_dataset].benchmark_uris()
            if FLAGS.max_benchmarks:
                benchmarks = islice(benchmarks, FLAGS.max_benchmarks)
            benchmarks = list(benchmarks)

            # Repeat the searches for the requested number of iterations.
            benchmarks *= FLAGS.n
            total_count = len(benchmarks)

            # If we are resuming from a previous job, read the states that have
            # already been proccessed and remove those benchmarks from the list
            # of benchmarks to evaluate.
            init_states = []
            if FLAGS.resume and Path(FLAGS.leaderboard_results).is_file():
                with CompilerEnvStateReader(open(
                        FLAGS.leaderboard_results)) as reader:
                    for state in reader:
                        init_states.append(state)
                        if state.benchmark in benchmarks:
                            benchmarks.remove(state.benchmark)

            # Run the benchmark loop in background so that we can asynchronously
            # log progress.
            worker = _EvalPolicyWorker(env, benchmarks, policy, init_states)
            worker.start()
            timer = Timer().reset()
            try:
                print(f"=== Evaluating policy on "
                      f"{humanize.intcomma(total_count)} "
                      f"{FLAGS.test_dataset} benchmarks ==="
                      "\n\n"  # Blank lines will be filled below
                      )
                while worker.is_alive():
                    done_count = len(worker.states)
                    remaining_count = total_count - done_count
                    time = timer.time
                    gmean_reward = geometric_mean(
                        [s.reward for s in worker.states])
                    mean_walltime = (arithmetic_mean(
                        [s.walltime for s in worker.states]) or time)
                    print(
                        "\r\033[2A"
                        "\033[K"
                        f"Runtime: {humanize_duration_hms(time)}. "
                        f"Estimated completion: {humanize_duration_hms(mean_walltime * remaining_count)}. "
                        f"Completed: {humanize.intcomma(done_count)} / {humanize.intcomma(total_count)} "
                        f"({done_count / total_count:.1%})."
                        "\n\033[K"
                        f"Current mean walltime: {mean_walltime:.3f}s / benchmark."
                        "\n\033[K"
                        f"Current geomean reward: {gmean_reward:.4f}.",
                        flush=True,
                        end="",
                    )
                    sleep(1)
            except KeyboardInterrupt:
                print("\nkeyboard interrupt", flush=True)
                worker.alive = False
                # User interrupt, don't validate.
                FLAGS.validate = False

        if FLAGS.validate:
            FLAGS.env = "llvm-ic-v0"
            validate(["argv0", FLAGS.leaderboard_results])
Exemplo n.º 5
0
def test_geometric_mean_123():
    assert geometric_mean([1, 2, 3]) == approx(1.8171205928321)
Exemplo n.º 6
0
def test_geometric_mean_negative():
    assert geometric_mean([-1, 1, 2]) == 0
Exemplo n.º 7
0
def test_geometric_mean_zero_value():
    assert geometric_mean([0, 1, 2]) == 0
Exemplo n.º 8
0
def test_geometric_mean_empty_list():
    assert geometric_mean([]) == 0
Exemplo n.º 9
0
    def _trial_to_dataframe(self, directory: Path) -> Optional[pd.DataFrame]:
        components = directory.name.split("-")
        if len(components) < 3:
            logger.warning(
                "Directory name does not match expected "
                "{experiment}-{config}-{replica} format: %s",
                directory,
            )
            return

        replica = components[-1]
        config = components[-2]
        experiment = "-".join(components[:-2])

        if not (directory / "progress.csv").is_file():
            logger.warning("File not found: %s", directory / "progress.csv")
            return

        try:
            df = pd.read_csv(directory / "progress.csv")
        except pd.errors.EmptyDataError:
            return None

        df.insert(0, "logsdir", str(directory))
        df.insert(
            0,
            "experiment_timestamp",
            " ".join([
                self.working_directory.parent.parent.name,
                self.working_directory.parent.name,
            ]),
        )
        df.insert(0, "trial_name", directory.name)
        df.insert(0, "replica", replica)
        df.insert(0, "config", config)
        df.insert(0, "experiment", experiment)

        df["checkpoint"] = [(directory / f"checkpoint_{i:06d}").is_dir()
                            for i in df["training_iteration"]]
        df["checkpoint_path"] = [
            str(directory / f"checkpoint_{i:06d}" / f"checkpoint-{i}") if
            (directory / f"checkpoint_{i:06d}").is_dir() else None
            for i in df["training_iteration"]
        ]

        df["evaluation/episode_reward_geomean"] = [
            geometric_mean(eval(x))
            for x in df["evaluation/hist_stats/episode_reward"]
        ]

        df["episode_reward_geomean"] = [
            geometric_mean(eval(x)) for x in df["hist_stats/episode_reward"]
        ]

        df["complete"] = [
            min(d / self.training.episodes, 1) for d in df["episodes_total"]
        ]

        df["cpus"] = self.executor.cpus
        df["gpus"] = self.executor.gpus

        df = df.set_index(
            ["experiment", "config", "replica", "training_iteration"])

        return df