Пример #1
0
    def _get_ensemble_process(self,
                              time_left_for_ensembles,
                              task=None,
                              metric=None,
                              precision=None,
                              dataset_name=None,
                              max_iterations=-1,
                              ensemble_nbest=None,
                              ensemble_size=None):

        if task is None:
            task = self._task
        else:
            self._task = task

        if metric is None:
            metric = self._metric
        else:
            self._metric = metric

        if precision is None:
            precision = self.precision
        else:
            self.precision = precision

        if dataset_name is None:
            dataset_name = self._dataset_name
        else:
            self._dataset_name = dataset_name

        if ensemble_nbest is None:
            ensemble_nbest = self._ensemble_nbest
        else:
            self._ensemble_nbest = ensemble_nbest

        if ensemble_size is None:
            ensemble_size = self._ensemble_size
        else:
            self._ensemble_size = ensemble_size

        return EnsembleBuilder(backend=self._backend,
                               dataset_name=dataset_name,
                               task_type=task,
                               metric=metric,
                               limit=time_left_for_ensembles,
                               ensemble_size=ensemble_size,
                               ensemble_nbest=ensemble_nbest,
                               seed=self._seed,
                               shared_mode=self._shared_mode,
                               precision=precision,
                               max_iterations=max_iterations)
Пример #2
0
    def _get_ensemble_process(self,
                              time_left_for_ensembles,
                              task=None,
                              precision=None,
                              dataset_name=None,
                              max_iterations=None,
                              ensemble_nbest=None,
                              ensemble_size=None):

        if task is None:
            task = self._task
        else:
            self._task = task

        if precision is None:
            precision = self.precision
        else:
            self.precision = precision

        if dataset_name is None:
            dataset_name = self._dataset_name
        else:
            self._dataset_name = dataset_name

        if ensemble_nbest is None:
            ensemble_nbest = self._ensemble_nbest
        else:
            self._ensemble_nbest = ensemble_nbest

        if ensemble_size is None:
            ensemble_size = self._ensemble_size
        else:
            self._ensemble_size = ensemble_size

        return EnsembleBuilder(
            backend=self._backend,
            dataset_name=dataset_name,
            task_type=task,
            metric=self._metric,
            limit=time_left_for_ensembles,
            ensemble_size=ensemble_size,
            ensemble_nbest=ensemble_nbest,
            max_models_on_disc=self._max_models_on_disc,
            seed=self._seed,
            shared_mode=self._shared_mode,
            precision=precision,
            max_iterations=max_iterations,
            read_at_most=np.inf,
            memory_limit=self._ensemble_memory_limit,
            random_state=self._seed,
        )
Пример #3
0
def testPerformanceRangeThreshold(ensemble_backend,
                                  performance_range_threshold, exp):
    ensbuilder = EnsembleBuilder(
        backend=ensemble_backend,
        dataset_name="TEST",
        task_type=BINARY_CLASSIFICATION,
        metric=roc_auc,
        seed=0,  # important to find the test files
        ensemble_nbest=100,
        performance_range_threshold=performance_range_threshold)
    ensbuilder.read_scores = {
        'A': {
            'ens_score': 1,
            'num_run': 1,
            'loaded': -1,
            "seed": 1
        },
        'B': {
            'ens_score': 2,
            'num_run': 2,
            'loaded': -1,
            "seed": 1
        },
        'C': {
            'ens_score': 3,
            'num_run': 3,
            'loaded': -1,
            "seed": 1
        },
        'D': {
            'ens_score': 4,
            'num_run': 4,
            'loaded': -1,
            "seed": 1
        },
        'E': {
            'ens_score': 5,
            'num_run': 5,
            'loaded': -1,
            "seed": 1
        },
    }
    ensbuilder.read_preds = {
        key: {key_2: True
              for key_2 in (Y_ENSEMBLE, Y_VALID, Y_TEST)}
        for key in ensbuilder.read_scores
    }
    sel_keys = ensbuilder.get_n_best_preds()

    assert len(sel_keys) == exp
Пример #4
0
def test_main(ensemble_backend):

    ensbuilder = EnsembleBuilder(
        backend=ensemble_backend,
        dataset_name="TEST",
        task_type=MULTILABEL_CLASSIFICATION,  # Multilabel Classification
        metric=roc_auc,
        seed=0,  # important to find the test files
        ensemble_nbest=2,
        max_models_on_disc=None,
    )
    ensbuilder.SAVE2DISC = False

    run_history, ensemble_nbest, _, _, _ = ensbuilder.main(
        time_left=np.inf,
        iteration=1,
        return_predictions=False,
    )

    assert len(ensbuilder.read_preds) == 3
    assert ensbuilder.last_hash is not None
    assert ensbuilder.y_true_ensemble is not None

    # Make sure the run history is ok

    # We expect at least 1 element to be in the ensemble
    assert len(run_history) > 0

    # As the data loader loads the same val/train/test
    # we expect 1.0 as score and all keys available
    expected_performance = {
        'ensemble_val_score': 1.0,
        'ensemble_test_score': 1.0,
        'ensemble_optimization_score': 1.0,
    }

    # Make sure that expected performance is a subset of the run history
    assert all(item in run_history[0].items()
               for item in expected_performance.items())
    assert 'Timestamp' in run_history[0]
    assert isinstance(run_history[0]['Timestamp'], pd.Timestamp)

    assert os.path.exists(
        os.path.join(ensemble_backend.internals_directory,
                     'ensemble_read_preds.pkl')), os.listdir(
                         ensemble_backend.internals_directory)
    assert os.path.exists(
        os.path.join(ensemble_backend.internals_directory,
                     'ensemble_read_scores.pkl')), os.listdir(
                         ensemble_backend.internals_directory)
Пример #5
0
    def testGetValidTestPreds(self):

        ensbuilder = EnsembleBuilder(
            backend=self.backend,
            dataset_name="TEST",
            task_type=1,  # Binary Classification
            metric=roc_auc,
            limit=-1,  # not used,
            seed=0,  # important to find the test files
            ensemble_nbest=1)

        ensbuilder.score_ensemble_preds()

        d1 = os.path.join(
            self.backend.temporary_directory,
            ".auto-sklearn/predictions_ensemble/predictions_ensemble_0_1_0.0.npy"
        )
        d2 = os.path.join(
            self.backend.temporary_directory,
            ".auto-sklearn/predictions_ensemble/predictions_ensemble_0_2_0.0.npy"
        )
        d3 = os.path.join(
            self.backend.temporary_directory,
            ".auto-sklearn/predictions_ensemble/predictions_ensemble_0_3_100.0.npy"
        )

        sel_keys = ensbuilder.get_n_best_preds()
        self.assertEqual(len(sel_keys), 1)
        ensbuilder.get_valid_test_preds(selected_keys=sel_keys)

        # Number of read files should be three and
        # predictions_ensemble_0_4_0.0.npy must not be in there
        self.assertEqual(len(ensbuilder.read_preds), 3)
        self.assertNotIn(
            os.path.join(
                self.backend.temporary_directory,
                ".auto-sklearn/predictions_ensemble/predictions_ensemble_0_4_0.0.npy"
            ), ensbuilder.read_preds)

        # not selected --> should still be None
        self.assertIsNone(ensbuilder.read_preds[d1][Y_VALID])
        self.assertIsNone(ensbuilder.read_preds[d1][Y_TEST])
        self.assertIsNone(ensbuilder.read_preds[d3][Y_VALID])
        self.assertIsNone(ensbuilder.read_preds[d3][Y_TEST])

        # selected --> read valid and test predictions
        self.assertIsNotNone(ensbuilder.read_preds[d2][Y_VALID])
        self.assertIsNotNone(ensbuilder.read_preds[d2][Y_TEST])
Пример #6
0
def testMaxModelsOnDisc(ensemble_backend, test_case, exp):
    ensemble_nbest = 4
    ensbuilder = EnsembleBuilder(
        backend=ensemble_backend,
        dataset_name="TEST",
        task_type=BINARY_CLASSIFICATION,
        metric=roc_auc,
        seed=0,  # important to find the test files
        ensemble_nbest=ensemble_nbest,
        max_models_on_disc=test_case,
    )

    with unittest.mock.patch('os.path.getsize') as mock:
        mock.return_value = 100 * 1024 * 1024
        ensbuilder.score_ensemble_preds()
        sel_keys = ensbuilder.get_n_best_preds()
        assert len(sel_keys) == exp, test_case
Пример #7
0
def testGetValidTestPreds(ensemble_backend):

    ensbuilder = EnsembleBuilder(
        backend=ensemble_backend,
        dataset_name="TEST",
        task_type=BINARY_CLASSIFICATION,
        metric=roc_auc,
        seed=0,  # important to find the test files
        ensemble_nbest=1)

    ensbuilder.compute_loss_per_model()

    # d1 is a dummt prediction. d2 and d3 have the same prediction with
    # different name. num_run=2 is selected when doing sorted()
    d1 = os.path.join(
        ensemble_backend.temporary_directory,
        ".auto-sklearn/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy")
    d2 = os.path.join(
        ensemble_backend.temporary_directory,
        ".auto-sklearn/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy")
    d3 = os.path.join(
        ensemble_backend.temporary_directory,
        ".auto-sklearn/runs/0_3_100.0/predictions_ensemble_0_3_100.0.npy")

    sel_keys = ensbuilder.get_n_best_preds()
    assert len(sel_keys) == 1
    ensbuilder.get_valid_test_preds(selected_keys=sel_keys)

    # Number of read files should be three and
    # predictions_ensemble_0_4_0.0.npy must not be in there
    assert len(ensbuilder.read_preds) == 3
    assert os.path.join(
        ensemble_backend.temporary_directory,
        ".auto-sklearn/runs/0_4_0.0/predictions_ensemble_0_4_0.0.npy"
    ) not in ensbuilder.read_preds

    # not selected --> should still be None
    assert ensbuilder.read_preds[d1][Y_VALID] is None
    assert ensbuilder.read_preds[d1][Y_TEST] is None
    assert ensbuilder.read_preds[d3][Y_VALID] is None
    assert ensbuilder.read_preds[d3][Y_TEST] is None

    # selected --> read valid and test predictions
    assert ensbuilder.read_preds[d2][Y_VALID] is not None
    assert ensbuilder.read_preds[d2][Y_TEST] is not None
    def testMain(self):

        ensbuilder = EnsembleBuilder(backend=self.backend,
                                    dataset_name="TEST",
                                    task_type=1,  #Binary Classification
                                    metric=roc_auc,
                                    limit=-1, # not used,
                                    seed=0, # important to find the test files
                                    ensemble_nbest=2,
                                    max_iterations=1 # prevents infinite loop
                                    )
        ensbuilder.SAVE2DISC = False

        ensbuilder.main()

        self.assertEqual(len(ensbuilder.read_preds), 2)
        self.assertIsNotNone(ensbuilder.last_hash)
        self.assertIsNotNone(ensbuilder.y_true_ensemble)
Пример #9
0
def test_run_end_at(ensemble_backend):
    with unittest.mock.patch('pynisher.enforce_limits') as pynisher_mock:
        ensbuilder = EnsembleBuilder(
            backend=ensemble_backend,
            dataset_name="TEST",
            task_type=MULTILABEL_CLASSIFICATION,  # Multilabel Classification
            metric=roc_auc,
            seed=0,  # important to find the test files
            ensemble_nbest=2,
            max_models_on_disc=None,
        )
        ensbuilder.SAVE2DISC = False

        current_time = time.time()

        ensbuilder.run(end_at=current_time + 10, iteration=1)
        # 4 seconds left because: 10 seconds - 5 seconds overhead - very little overhead,
        # but then rounded to an integer
        assert pynisher_mock.call_args_list[0][1]["wall_time_in_s"], 4
Пример #10
0
def testNBest(ensemble_backend, ensemble_nbest, max_models_on_disc, exp):
    ensbuilder = EnsembleBuilder(
        backend=ensemble_backend,
        dataset_name="TEST",
        task_type=BINARY_CLASSIFICATION,
        metric=roc_auc,
        seed=0,  # important to find the test files
        ensemble_nbest=ensemble_nbest,
        max_models_on_disc=max_models_on_disc,
    )

    ensbuilder.score_ensemble_preds()
    sel_keys = ensbuilder.get_n_best_preds()

    assert len(sel_keys) == exp

    fixture = os.path.join(
        ensemble_backend.temporary_directory,
        ".auto-sklearn/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy")
    assert sel_keys[0] == fixture
Пример #11
0
    def testNBest(self):

        ensbuilder = EnsembleBuilder(
            backend=self.backend,
            dataset_name="TEST",
            task_type=1,  #Binary Classification
            metric=roc_auc,
            limit=-1,  # not used,
            seed=0,  # important to find the test files
            ensemble_nbest=1,
        )

        ensbuilder.read_ensemble_preds()
        sel_keys = ensbuilder.get_n_best_preds()

        self.assertEquals(len(sel_keys), 1)

        fixture = os.path.join(
            self.backend.temporary_directory,
            ".auto-sklearn/predictions_ensemble/predictions_ensemble_0_2.npy")
        self.assertEquals(sel_keys[0], fixture)
Пример #12
0
def testFallBackNBest(ensemble_backend):

    ensbuilder = EnsembleBuilder(
        backend=ensemble_backend,
        dataset_name="TEST",
        task_type=BINARY_CLASSIFICATION,
        metric=roc_auc,
        seed=0,  # important to find the test files
        ensemble_nbest=1)

    ensbuilder.score_ensemble_preds()
    print()
    print(ensbuilder.read_preds.keys())
    print(ensbuilder.read_scores.keys())
    print(ensemble_backend.temporary_directory)

    filename = os.path.join(
        ensemble_backend.temporary_directory,
        ".auto-sklearn/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy")
    ensbuilder.read_scores[filename]["ens_score"] = -1

    filename = os.path.join(
        ensemble_backend.temporary_directory,
        ".auto-sklearn/runs/0_3_100.0/predictions_ensemble_0_3_100.0.npy")
    ensbuilder.read_scores[filename]["ens_score"] = -1

    filename = os.path.join(
        ensemble_backend.temporary_directory,
        ".auto-sklearn/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy")
    ensbuilder.read_scores[filename]["ens_score"] = -1

    sel_keys = ensbuilder.get_n_best_preds()

    fixture = os.path.join(
        ensemble_backend.temporary_directory,
        ".auto-sklearn/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy")
    assert len(sel_keys) == 1
    assert sel_keys[0] == fixture
Пример #13
0
    def testMain(self):

        ensbuilder = EnsembleBuilder(
            backend=self.backend,
            dataset_name="TEST",
            task_type=3,  # Multilabel Classification
            metric=roc_auc,
            limit=-1,  # not used,
            seed=0,  # important to find the test files
            ensemble_nbest=2,
            max_iterations=1,  # prevents infinite loop
            max_models_on_disc=None,
        )
        ensbuilder.SAVE2DISC = False

        ensbuilder.main()

        self.assertEqual(len(ensbuilder.read_preds), 3)
        self.assertIsNotNone(ensbuilder.last_hash)
        self.assertIsNotNone(ensbuilder.y_true_ensemble)

        # Make sure the run history is ok
        run_history = ensbuilder.get_ensemble_history()

        # We expect 1 element to be the ensemble
        self.assertEqual(len(run_history), 1)

        # As the data loader loads the same val/train/test
        # we expect 1.0 as score and all keys available
        expected_performance = {
            'ensemble_val_score': 1.0,
            'ensemble_test_score': 1.0,
            'ensemble_optimization_score': 1.0,
        }
        self.assertDictContainsSubset(expected_performance, run_history[0])
        self.assertIn('Timestamp', run_history[0])
        self.assertIsInstance(run_history[0]['Timestamp'], pd.Timestamp)
Пример #14
0
def testRead(ensemble_backend):

    ensbuilder = EnsembleBuilder(
        backend=ensemble_backend,
        dataset_name="TEST",
        task_type=BINARY_CLASSIFICATION,
        metric=roc_auc,
        seed=0,  # important to find the test files
    )

    success = ensbuilder.score_ensemble_preds()
    assert success, str(ensbuilder.read_preds)
    assert len(ensbuilder.read_preds) == 3, ensbuilder.read_preds.keys()
    assert len(ensbuilder.read_scores) == 3, ensbuilder.read_scores.keys()

    filename = os.path.join(
        ensemble_backend.temporary_directory,
        ".auto-sklearn/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy")
    assert ensbuilder.read_scores[filename]["ens_score"] == 0.5

    filename = os.path.join(
        ensemble_backend.temporary_directory,
        ".auto-sklearn/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy")
    assert ensbuilder.read_scores[filename]["ens_score"] == 1.0
Пример #15
0
    def testPerformanceRangeThreshold(self):
        to_test = ((0.0, 4), (0.1, 4), (0.3, 3), (0.5, 2), (0.6, 2), (0.8, 1),
                   (1.0, 1), (1, 1))
        for performance_range_threshold, exp in to_test:
            ensbuilder = EnsembleBuilder(
                backend=self.backend,
                dataset_name="TEST",
                task_type=1,  # Binary Classification
                metric=roc_auc,
                limit=-1,  # not used,
                seed=0,  # important to find the test files
                ensemble_nbest=100,
                performance_range_threshold=performance_range_threshold
            )
            ensbuilder.read_preds = {
                'A': {'ens_score': 1, 'num_run': 1, 0: True, 'loaded': -1, "seed": 1},
                'B': {'ens_score': 2, 'num_run': 2, 0: True, 'loaded': -1, "seed": 1},
                'C': {'ens_score': 3, 'num_run': 3, 0: True, 'loaded': -1, "seed": 1},
                'D': {'ens_score': 4, 'num_run': 4, 0: True, 'loaded': -1, "seed": 1},
                'E': {'ens_score': 5, 'num_run': 5, 0: True, 'loaded': -1, "seed": 1},
            }
            sel_keys = ensbuilder.get_n_best_preds()

            self.assertEqual(len(sel_keys), exp)
Пример #16
0
    def testRead(self):

        ensbuilder = EnsembleBuilder(
            backend=self.backend,
            dataset_name="TEST",
            task_type=1,  #Binary Classification
            metric=roc_auc,
            limit=-1,  # not used,
            seed=0,  # important to find the test files
        )

        success = ensbuilder.read_ensemble_preds()
        self.assertTrue(success, str(ensbuilder.read_preds))
        self.assertEqual(len(ensbuilder.read_preds), 2)

        filename = os.path.join(
            self.backend.temporary_directory,
            ".auto-sklearn/predictions_ensemble/predictions_ensemble_0_1.npy")
        self.assertEqual(ensbuilder.read_preds[filename]["ens_score"], 0.5)

        filename = os.path.join(
            self.backend.temporary_directory,
            ".auto-sklearn/predictions_ensemble/predictions_ensemble_0_2.npy")
        self.assertEqual(ensbuilder.read_preds[filename]["ens_score"], 1.0)
Пример #17
0
    def testMaxModelsOnDisc(self):

        ensemble_nbest = 4
        for (test_case, exp) in [
                # If None, no reduction
            (None, 2),
                # If Int, limit only on exceed
            (4, 2),
            (1, 1),
                # If Float, translate float to # models.
                # below, mock of each file is 100 Mb and
                # 4 files .model and .npy (test/val/pred) exist
            (700.0, 1),
            (800.0, 2),
            (9999.0, 2),
        ]:
            ensbuilder = EnsembleBuilder(
                backend=self.backend,
                dataset_name="TEST",
                task_type=1,  # Binary Classification
                metric=roc_auc,
                limit=-1,  # not used,
                seed=0,  # important to find the test files
                ensemble_nbest=ensemble_nbest,
                max_models_on_disc=test_case,
            )

            with unittest.mock.patch('os.path.getsize') as mock:
                mock.return_value = 100 * 1024 * 1024
                ensbuilder.score_ensemble_preds()
                sel_keys = ensbuilder.get_n_best_preds()
                self.assertEqual(len(sel_keys), exp)

        # Test for Extreme scenarios
        # Make sure that the best predictions are kept
        ensbuilder = EnsembleBuilder(
            backend=self.backend,
            dataset_name="TEST",
            task_type=1,  # Binary Classification
            metric=roc_auc,
            limit=-1,  # not used,
            seed=0,  # important to find the test files
            ensemble_nbest=50,
            max_models_on_disc=10000.0,
        )
        ensbuilder.read_preds = {}
        for i in range(50):
            ensbuilder.read_preds['pred' + str(i)] = {
                'ens_score': i * 10,
                'num_run': i,
                0: True,
                'loaded': 1,
                "seed": 1,
                "disc_space_cost_mb": 50 * i,
            }
        sel_keys = ensbuilder.get_n_best_preds()
        self.assertListEqual(['pred49', 'pred48', 'pred47', 'pred46'],
                             sel_keys)

        # Make sure at least one model is kept alive
        ensbuilder.max_models_on_disc = 0.0
        sel_keys = ensbuilder.get_n_best_preds()
        self.assertListEqual(['pred49'], sel_keys)
def main(task_id, ensemble_dir, performance_range_threshold, ensemble_size,
         max_keep_best, seed, only_portfolio_runs, call_from_cmd):

    if max_keep_best > 1:
        assert max_keep_best == int(max_keep_best)
        max_keep_best = int(max_keep_best)

    memory_limit = 4000
    precision = 32
    metric = make_scorer('balanced_accuracy_fast', BalancedAccuracy())

    if not os.path.exists(ensemble_dir):
        raise NotADirectoryError("%s does not exist")
    if call_from_cmd:
        assert str(task_id) in ensemble_dir

    fl_name = "ensemble_results_%fthresh_%dsize_%fbest" % \
              (performance_range_threshold, ensemble_size, max_keep_best)
    if only_portfolio_runs:
        fl_name += "_only_portfolio"
    fl_name = os.path.join(ensemble_dir, fl_name)
    if os.path.isfile(fl_name):
        raise ValueError("Nothing left to do, %s already exists" % fl_name)

    # figure out how many prediction files are in dir
    if call_from_cmd:
        pred_dir = os.path.join(ensemble_dir, "auto-sklearn-output",
                                ".auto-sklearn", "predictions_ensemble")
        n_models = glob.glob(pred_dir +
                             "/predictions_ensemble_%d_*.npy.gz" % seed)
    else:
        pred_dir = os.path.join(ensemble_dir, ".auto-sklearn",
                                "predictions_ensemble")
        n_models = glob.glob(pred_dir +
                             "/predictions_ensemble_%d_*.npy" % seed)
    n_models.sort(key=lambda x: int(float(x.split("_")[-2])))
    print("\n".join(n_models))
    print("Found %d ensemble predictions" % len(n_models))
    if len(n_models) == 0:
        raise ValueError("%s has no ensemble predictions" % pred_dir)

    # Get start time of ensemble building: 1) load json 2) find key 3) get creation times
    if call_from_cmd:
        timestamps_fl = os.path.join(ensemble_dir, "auto-sklearn-output",
                                     "timestamps.json")
    else:
        timestamps_fl = os.path.join(ensemble_dir, "timestamps.json")
    with open(timestamps_fl, "r") as fh:
        timestamps = json.load(fh)
    model_timestamps = None
    overall_start_time = None
    for k in timestamps:
        if "predictions_ensemble" in k:
            model_timestamps = timestamps[k]
        if "start_time_%d" % seed in timestamps[k]:
            overall_start_time = timestamps[k]["start_time_%d" % seed]
    timestamp_keys = list(model_timestamps.keys())
    for timestamp_key in timestamp_keys:
        if timestamp_key.endswith(
                'lock') or 'predictions_ensemble' not in timestamp_key:
            del model_timestamps[timestamp_key]
    assert model_timestamps is not None and overall_start_time is not None
    assert len(model_timestamps) == len(n_models), (len(model_timestamps),
                                                    len(n_models))
    # Get overall timelimit
    vanilla_results_fl = os.path.join(ensemble_dir, "result.json")
    with open(vanilla_results_fl, "r") as fh:
        vanilla_results = json.load(fh)

    # If only portfolio configurations, read runhistory
    if only_portfolio_runs:
        if call_from_cmd:
            runhistory_fl = os.path.join(ensemble_dir, "auto-sklearn-output",
                                         "smac3-output", "run*",
                                         "runhistory.json")
        else:
            runhistory_fl = os.path.join(ensemble_dir, "smac3-output", "run*",
                                         "runhistory.json")
        runhistory_fl = glob.glob(runhistory_fl)
        assert len(runhistory_fl) == 1
        with open(runhistory_fl[0], "r") as fh:
            runhistory = json.load(fh)

        init_design_num_runs = []
        for i in runhistory["data"]:
            if i[1][3]["configuration_origin"] == "Initial design":
                if "error" in i[1][3]:
                    continue
                init_design_num_runs.append(i[1][3]["num_run"])
        print("Portfolio stopped after %s runs" % str(init_design_num_runs))
        last_run = max(init_design_num_runs)
        print("Cut down to only portfolio runs fom %d" % len(n_models))
        for i, n in enumerate(n_models):
            if int(float(n.split("_")[-2])) > last_run:
                n_models = n_models[:i]
                break
        print("... to %d" % len(n_models))

    # load data
    X_train, y_train, X_test, y_test, cat = load_task(task_id)

    if len(np.unique(y_test)) == 2:
        task_type = BINARY_CLASSIFICATION
    elif len(np.unique(y_test)) > 2:
        task_type = MULTICLASS_CLASSIFICATION
    else:
        raise ValueError("Unknown task type for task %d" % task_id)

    tmp_dir = tempfile.TemporaryDirectory()
    loss_trajectory = []

    # Construct ensemble builder
    context = BackendContextMock(
        temporary_directory=(ensemble_dir + "/auto-sklearn-output/"
                             if call_from_cmd else ensemble_dir),
        output_directory=tmp_dir.name,
        delete_tmp_folder_after_terminate=False,
        delete_output_folder_after_terminate=False,
        shared_mode=False)
    backend = Backend(context)

    ens_builder = EnsembleBuilder(
        backend=backend,
        dataset_name=str(task_id),
        task_type=task_type,
        metric=metric,
        limit=np.inf,
        ensemble_size=ensemble_size,
        ensemble_nbest=max_keep_best,
        performance_range_threshold=performance_range_threshold,
        max_models_on_disc=None,
        seed=seed,
        shared_mode=False,
        precision=precision,
        max_iterations=1,
        read_at_most=1,
        memory_limit=memory_limit,
        random_state=1,
        sleep_duration=0)

    try:
        # iterate over all models, take construction time into account when creating new trajectory
        current_ensemble_timestamp = 0
        skipped = 1
        for midx, model_path in enumerate(n_models):
            tstamp = model_timestamps[model_path.split("/")[-1].replace(
                '.gz', '')] - overall_start_time
            if current_ensemble_timestamp > tstamp:
                # while this model was built, the ensemble script was not yet done
                skipped += 1
                continue

            # Do one ensemble building step
            start = time.time()
            ens_builder.random_state = check_random_state(1)
            print("############## %d: Working on %s (skipped %d)" %
                  (midx + 1, model_path, skipped - 1))
            logging.basicConfig(level=logging.DEBUG)
            ens_builder.read_at_most = skipped
            valid_pred, test_pred = ens_builder.main(return_pred=True)
            last_dur = time.time() - start
            current_ensemble_timestamp = tstamp + last_dur

            if current_ensemble_timestamp >= vanilla_results["0"]["time_limit"]:
                print("############## Went over time %f > %f; Stop here" %
                      (current_ensemble_timestamp,
                       vanilla_results["0"]["time_limit"]))
                break

            # Reset, since we have just read model files
            skipped = 1
            if test_pred is None:
                # Adding this model did not change the ensemble, no new prediction
                continue
            if task_type == BINARY_CLASSIFICATION:
                # Recreate nx2 array
                test_pred = np.concatenate([
                    1 - test_pred.reshape([-1, 1]),
                    test_pred.reshape([-1, 1])
                ],
                                           axis=1)

            # Build trajectory entry
            score = 1 - balanced_accuracy(y_true=y_test, y_pred=test_pred)
            loss_trajectory.append((current_ensemble_timestamp, score))
            print("############## Round %d took %g sec" %
                  (midx, time.time() - start))
    except:
        raise
    finally:
        tmp_dir.cleanup()

    # Store results
    result = dict()
    result[ensemble_size] = {
        'task_id': task_id,
        'time_limit': vanilla_results["0"]["time_limit"],
        'loss': loss_trajectory[-1][1],
        'configuration': {
            "n_models": n_models,
            "performance_range_threshold": performance_range_threshold,
            "ensemble_size": ensemble_size,
            "max_keep_best": max_keep_best,
            "seed": seed,
            "memory_limit": memory_limit,
            "precision": precision,
        },
        'n_models': len(n_models),
        'trajectory': loss_trajectory,
    }

    with open(fl_name, 'wt') as fh:
        json.dump(result, fh, indent=4)
    print("Dumped to %s" % fl_name)