Exemplo n.º 1
0
def list_experiments(project_path,
                     sort=None,
                     output=None,
                     filter_op=None,
                     info_keys=None,
                     limit=None,
                     desc=False):
    """Lists experiments in the directory subtree.

    Args:
        project_path (str): Directory where experiments are located.
            Corresponds to Experiment.local_dir.
        sort (list): Keys to sort by.
        output (str): Name of file where output is saved.
        filter_op (str): Filter operation in the format
            "<column> <operator> <value>".
        info_keys (list): Keys that are displayed.
        limit (int): Number of rows to display.
        desc (bool): Sort ascending vs. descending.
    """
    _check_tabulate()
    base, experiment_folders, _ = next(os.walk(project_path))

    experiment_data_collection = []

    for experiment_dir in experiment_folders:
        analysis_obj, checkpoints_df = None, None
        try:
            analysis_obj = ExperimentAnalysis(
                os.path.join(project_path, experiment_dir))
            checkpoints_df = analysis_obj.dataframe()
        except TuneError:
            logger.debug("No experiment state found in %s", experiment_dir)
            continue

        # Format time-based values.
        stats = analysis_obj.stats()
        time_values = {
            "start_time": stats.get("_start_time"),
            "last_updated": stats.get("timestamp"),
        }

        formatted_time_values = {
            key: datetime.fromtimestamp(val).strftime(TIMESTAMP_FORMAT)
            if val else None
            for key, val in time_values.items()
        }

        experiment_data = {
            "name":
            experiment_dir,
            "total_trials":
            checkpoints_df.shape[0],
            "running_trials":
            (checkpoints_df["status"] == Trial.RUNNING).sum(),
            "terminated_trials":
            (checkpoints_df["status"] == Trial.TERMINATED).sum(),
            "error_trials": (checkpoints_df["status"] == Trial.ERROR).sum(),
        }
        experiment_data.update(formatted_time_values)
        experiment_data_collection.append(experiment_data)

    if not experiment_data_collection:
        print("No experiments found!")
        sys.exit(0)

    info_df = pd.DataFrame(experiment_data_collection)
    if not info_keys:
        info_keys = DEFAULT_PROJECT_INFO_KEYS
    col_keys = [k for k in list(info_keys) if k in info_df]
    if not col_keys:
        print("None of keys {} in experiment data!".format(info_keys))
        sys.exit(0)
    info_df = info_df[col_keys]

    if filter_op:
        col, op, val = filter_op.split(" ")
        col_type = info_df[col].dtype
        if is_numeric_dtype(col_type):
            val = float(val)
        elif is_string_dtype(col_type):
            val = str(val)
        # TODO(Andrew): add support for datetime and boolean
        else:
            raise ValueError("Unsupported dtype for {}: {}".format(
                val, col_type))
        op = OPERATORS[op]
        filtered_index = op(info_df[col], val)
        info_df = info_df[filtered_index]

    if sort:
        if sort not in info_df:
            raise KeyError("{} not in: {}".format(sort, list(info_df)))
        ascending = not desc
        info_df = info_df.sort_values(by=sort, ascending=ascending)

    if limit:
        info_df = info_df[:limit]

    print_format_output(info_df)

    if output:
        file_extension = os.path.splitext(output)[1].lower()
        if file_extension in (".p", ".pkl", ".pickle"):
            info_df.to_pickle(output)
        elif file_extension == ".csv":
            info_df.to_csv(output, index=False)
        else:
            raise ValueError("Unsupported filetype: {}".format(output))
        print("Output saved at:", output)
Exemplo n.º 2
0
class ExperimentAnalysisSuite(unittest.TestCase):
    def setUp(self):
        ray.init(local_mode=True)

        self.test_dir = tempfile.mkdtemp()
        self.test_name = "analysis_exp"
        self.num_samples = 10
        self.metric = "episode_reward_mean"
        self.test_path = os.path.join(self.test_dir, self.test_name)
        self.run_test_exp()

        self.ea = ExperimentAnalysis(self.test_path)

    def tearDown(self):
        shutil.rmtree(self.test_dir, ignore_errors=True)
        ray.shutdown()

    def run_test_exp(self):
        ahb = AsyncHyperBandScheduler(time_attr="training_iteration",
                                      reward_attr=self.metric,
                                      grace_period=5,
                                      max_t=100)

        run(MyTrainableClass,
            name=self.test_name,
            scheduler=ahb,
            local_dir=self.test_dir,
            **{
                "stop": {
                    "training_iteration": 1
                },
                "num_samples": 10,
                "config": {
                    "width":
                    sample_from(lambda spec: 10 + int(90 * random.random())),
                    "height":
                    sample_from(lambda spec: int(100 * random.random())),
                },
            })

    def testDataframe(self):
        df = self.ea.dataframe()

        self.assertTrue(isinstance(df, pd.DataFrame))
        self.assertEquals(df.shape[0], self.num_samples)

    def testTrialDataframe(self):
        cs = self.ea._checkpoints
        idx = random.randint(0, len(cs) - 1)
        trial_df = self.ea.trial_dataframe(
            cs[idx]["trial_id"])  # random trial df

        self.assertTrue(isinstance(trial_df, pd.DataFrame))
        self.assertEqual(trial_df.shape[0], 1)

    def testBestTrainable(self):
        best_trainable = self.ea.get_best_trainable(self.metric,
                                                    MyTrainableClass)

        self.assertTrue(isinstance(best_trainable, MyTrainableClass))

    def testBestConfig(self):
        best_config = self.ea.get_best_config(self.metric)

        self.assertTrue(isinstance(best_config, dict))
        self.assertTrue("width" in best_config)
        self.assertTrue("height" in best_config)

    def testBestTrial(self):
        best_trial = self.ea._get_best_trial(self.metric)

        self.assertTrue(isinstance(best_trial, dict))
        self.assertTrue("local_dir" in best_trial)
        self.assertEqual(best_trial["local_dir"],
                         os.path.expanduser(self.test_path))
        self.assertTrue("config" in best_trial)
        self.assertTrue("width" in best_trial["config"])
        self.assertTrue("height" in best_trial["config"])
        self.assertTrue("last_result" in best_trial)
        self.assertTrue(self.metric in best_trial["last_result"])

    def testCheckpoints(self):
        checkpoints = self.ea._checkpoints

        self.assertTrue(isinstance(checkpoints, list))
        self.assertTrue(isinstance(checkpoints[0], dict))
        self.assertEqual(len(checkpoints), self.num_samples)

    def testStats(self):
        stats = self.ea.stats()

        self.assertTrue(isinstance(stats, dict))
        self.assertTrue("start_time" in stats)
        self.assertTrue("timestamp" in stats)

    def testRunnerData(self):
        runner_data = self.ea.runner_data()

        self.assertTrue(isinstance(runner_data, dict))
        self.assertTrue("_metadata_checkpoint_dir" in runner_data)
        self.assertEqual(runner_data["_metadata_checkpoint_dir"],
                         os.path.expanduser(self.test_path))