Пример #1
0
    def run(self) -> pd.DataFrame:
        """
        Runs the evaluation on the surrogate by applying LOOCV on the datasets being trained on.
        Metrics are then provided per test dataset.

        Returns:
            A data frame with the results for each fold, the metrics being the columns. The rows
                are indexed by the dataset which was left out.
        """
        if isinstance(self.surrogate, AutoGluonSurrogate):
            metrics = [
                self._run_on_dataset(x)
                for x in tqdm(list(loocv_split(self.tracker)))
            ]
        else:
            data = list(loocv_split(self.tracker))
            metrics = run_parallel(
                self._run_on_dataset,
                data=data,
                num_processes=min(
                    num_fitting_processes(
                        cpus_per_process=self.surrogate.required_cpus,
                        memory_per_process=self.surrogate.required_memory,
                    ),
                    len(data),
                ),
            )
        return pd.concat(metrics).set_index("test_dataset")
Пример #2
0
def download(dataset: Optional[str], path: str):
    """
    Downloads and preprocesses either a single dataset or all datasets in the
    registry.
    """
    base = Path(path)

    if dataset is not None:
        dataset_cls = DATASET_REGISTRY[dataset](base)
        dataset_cls.generate()
        dataset_cls.prepare()
        return

    # Start off by downloading an M3 dataset
    dataset_cls = DATASET_REGISTRY["m3_monthly"](base)
    dataset_cls.generate()
    dataset_cls.prepare()

    # Then, we can download the rest in parallel (by preloading, we don't download the M3 data in
    # parallel)
    run_parallel(
        partial(_download_dataset, base=base),
        list(DATASET_REGISTRY.keys()),
        num_processes=min(
            num_fitting_processes(cpus_per_process=1, memory_per_process=8),
            len(DATASET_REGISTRY),
        ),
    )
Пример #3
0
    def run(self) -> List[Dict[str, ModelConfig]]:
        """
        Runs the evaluation on all datasets and returns the selected models for
        each dataset. The config evaluator can be used to construct a data
        frame of performances from the configurations.

        Returns:
            The recommended models. The outer list provides the index of the recommendations, i.e.
                the first item of the list provides all the first recommendations of the
                recommender, etc.
        """
        data = list(loocv_split(self.tracker))
        results = run_parallel(
            self._run_on_dataset,
            data=data,
            num_processes=min(
                len(data),
                num_fitting_processes(
                    cpus_per_process=self.recommender.required_cpus,
                    memory_per_process=self.recommender.required_memory,
                ),
            ),
        )
        recommendations = {k: v for r in results for k, v in r.items()}
        return [{k: v[i]
                 for k, v in recommendations.items()}
                for i in range(self.num_recommendations)]
Пример #4
0
    def run(self) -> Tuple[pd.DataFrame, Dict[str, List[ModelConfig]]]:
        """
        Runs the evaluation on the data provided via the tracker. The data obtained from the
        tracker is partitioned by the dataset and we run "grouped LOOCV" to compute performance
        metrics on datasets. Metrics on each dataset are then returned as data frame.

        Returns:
            The metrics on the individual datasets.
            The model choices for each dataset.
        """
        results = run_parallel(
            self._run_on_dataset,
            data=list(loocv_split(self.tracker)),
            num_processes=num_fitting_processes(),
        )
        performances = [r[0] for r in results]
        member_mapping = {k: v for r in results for k, v in r[1].items()}

        df = pd.concat(performances).set_index("test_dataset")
        return df, member_mapping
Пример #5
0
def simulate(
    data_path: str,
    evaluations_path: str,
    output_path: str,
    max_ensemble_size: int,
    default_samples: int,
    hyperensemble_samples: int,
    random_samples: int,
    sample_datasets: bool,
    seed: int,
):
    """
    Simulates the performance of various ensembles. The ensembles are built from configurations
    (i.e. model types and hyperparameters) for which offline evaluations are available.
    """
    assert any([
        default_samples != 0, hyperensemble_samples != 0, random_samples != 0
    ]), "No samples are specified."

    random.seed(seed)

    # Load the experiments
    print("Loading experiments...")
    tracker = ModelTracker.from_directory(Path(evaluations_path),
                                          data_path=Path(data_path))

    # Sample configurations
    print("Sampling configurations...")
    unique_configurations = tracker.unique_model_configs()
    default_configurations = [
        cast(ModelConfig, c) for c in unique_configurations
        if not isinstance(c, TrainConfig) or c == c.__class__()
    ]
    choices: List[Tuple[ModelConfig]] = []

    # If desired, we combine all base configurations into ensembles of sizes between 2 and the
    # provided maximum. Then, we potentially sample from this collection. For 13 default
    # configurations and a maximum ensemble size of 10, this results in 8,086 ensembles.
    if default_samples != 0:
        available_ensembles = [
            combination for i in range(2, max_ensemble_size + 1)
            for combination in combinations(default_configurations, i)
        ]
        if default_samples == -1:
            choices.extend(available_ensembles)
        else:
            choices.extend(random.sample(available_ensembles, default_samples))

    if hyperensemble_samples != 0:
        available_ensembles = []
        for config in default_configurations:
            all_configs = [
                c for c in unique_configurations
                if isinstance(c, config.__class__) and (
                    not isinstance(c, TrainConfig) or c.training_fraction == 1)
            ]
            if len(all_configs) == 1:
                continue
            hyper_ensembles = [
                combination for i in range(2, max_ensemble_size + 1)
                for combination in combinations(all_configs, i)
            ]
            available_ensembles.extend(hyper_ensembles)

        if hyperensemble_samples == -1:
            choices.extend(available_ensembles)
        else:
            choices.extend(
                random.sample(available_ensembles, hyperensemble_samples))

    # Then, we add some randomly sampled ensembles of model configurations
    for _ in range(random_samples):
        ensemble_size = random.randrange(2, max_ensemble_size + 1)
        configs = random.sample(unique_configurations, ensemble_size)
        choices.append(tuple(configs))

    # Then, we either evaluate each chosen configuration on all datasets or on a randomly sampled
    # one.
    datasets = list(
        {c.dataset
         for c in tracker.get_evaluations().configurations})
    if sample_datasets:
        evaluations = [(model_config, random.choice(datasets))
                       for model_config in choices]
    else:
        evaluations = list(product(choices, datasets))

    # Eventually, we can evaluate the ensembles that we have sampled
    print("Evaluating ensembles...")
    evaluator = EnsembleAnalyzer(tracker)
    results = run_parallel(
        partial(_evaluate_ensemble, evaluator=evaluator),
        evaluations,
        num_fitting_processes(cpus_per_process=1, memory_per_process=8),
    )

    # Afterwards, we can store all configurations along with their results. For now, we are just
    # storing them as pickled objects.
    with Path(output_path).open("wb+") as f:
        pickle.dump(
            [{
                "configurations": list(evaluation[0]),
                "dataset": evaluation[1],
                "performance": result,
            } for evaluation, result in zip(evaluations, results)],
            f,
        )