def main(basename, input_dir, params):
    output_dir = os.getcwd()
    D = store_and_or_load_data(data_dir=input_dir,
                               dataset=basename,
                               outputdir=output_dir)

    cs = get_class(D.info).get_hyperparameter_search_space()
    configuration = configuration_space.Configuration(cs, **params)

    global evaluator
    evaluator = HoldoutEvaluator(
        datamanager=D,
        configuration=configuration,
        with_predictions=True,
        all_scoring_functions=True,
        output_dir=output_dir)
    evaluator.fit()
    evaluator.finish_up()
示例#2
0
def make_mode_holdout(data, seed, configuration, num_run):
    try:
        debug_log("Run: %s" % make_mode_holdout.__name__)
        evaluator = HoldoutEvaluator(data, configuration,
                                     seed=seed,
                                     num_run=num_run,
                                     **_get_base_dict())
        debug_log("Fit evaluator")
        evaluator.fit()
        signal.signal(15, empty_signal_handler)
        debug_log("Fit finish up")
        evaluator.finish_up()
        model_directory = os.path.join(os.getcwd(), 'models_%d' % seed)
        debug_log("Check model directory: %s" % model_directory)
        assert os.path.exists(
            model_directory), "Not found model directory: %s" % model_directory
        debug_log("Save models in files")
        model_filename = os.path.join(model_directory,
                                      '%s.model' % num_run)
        with open(model_filename, 'w') as fh:
            pickle.dump(evaluator.model, fh, -1)
    except AssertionError as e:
        debug_log(str(e))
    def test_5000_classes(self):
        weights = ([0.0002] * 4750) + ([0.0001] * 250)
        X, Y = sklearn.datasets.make_classification(n_samples=10000,
                                                    n_features=20,
                                                    n_classes=5000,
                                                    n_clusters_per_class=1,
                                                    n_informative=15,
                                                    n_redundant=5,
                                                    n_repeated=0,
                                                    weights=weights,
                                                    flip_y=0,
                                                    class_sep=1.0,
                                                    hypercube=True,
                                                    shift=None,
                                                    scale=1.0,
                                                    shuffle=True,
                                                    random_state=1)

        self.assertEqual(250, np.sum(np.bincount(Y) == 1))
        D = Dummy()
        D.info = {
            'metric': 'r2_metric',
            'task': MULTICLASS_CLASSIFICATION,
            'is_sparse': False,
            'target_num': 1
        }
        D.data = {'X_train': X, 'Y_train': Y, 'X_valid': X, 'X_test': X}
        D.feat_type = ['numerical'] * 5000

        configuration_space = get_configuration_space(
            D.info,
            include_estimators=['extra_trees'],
            include_preprocessors=['no_preprocessing'])
        configuration = configuration_space.sample_configuration()
        D_ = copy.deepcopy(D)
        evaluator = HoldoutEvaluator(D_, configuration)
        evaluator.fit()