Пример #1
0
def run_tabular_benchmarks(fast_benchmark, subsample_size, perf_threshold, seed_val, fit_args, dataset_indices=None, run_distill=False, crash_in_oof=False):
    print("Running fit with args:")
    print(fit_args)
    # Each train/test dataset must be located in single directory with the given names.
    train_file = 'train_data.csv'
    test_file = 'test_data.csv'
    EPS = 1e-10

    # List containing dicts for each dataset to include in benchmark (try to order based on runtimes)
    datasets = get_benchmark_sets()
    if dataset_indices is not None: # only run some datasets
        datasets = [datasets[i] for i in dataset_indices]

    # Aggregate performance summaries obtained in previous benchmark run:
    prev_perf_vals = [dataset['performance_val'] for dataset in datasets]
    previous_avg_performance = np.mean(prev_perf_vals)
    previous_median_performance = np.median(prev_perf_vals)
    previous_worst_performance = np.max(prev_perf_vals)

    # Run benchmark:
    performance_vals = [0.0] * len(datasets) # performance obtained in this run
    directory_prefix = './datasets/'
    with warnings.catch_warnings(record=True) as caught_warnings:
        for idx in range(len(datasets)):
            dataset = datasets[idx]
            train_data, test_data = load_data(directory_prefix=directory_prefix, train_file=train_file, test_file=test_file, name=dataset['name'], url=dataset['url'])
            if seed_val is not None:
                seed(seed_val)
                np.random.seed(seed_val)
            print("Evaluating Benchmark Dataset %s (%d of %d)" % (dataset['name'], idx+1, len(datasets)))
            directory = directory_prefix + dataset['name'] + "/"
            savedir = directory + 'AutogluonOutput/'
            shutil.rmtree(savedir, ignore_errors=True)  # Delete AutoGluon output directory to ensure previous runs' information has been removed.
            label = dataset['label']
            y_test = test_data[label]
            test_data = test_data.drop(labels=[label], axis=1)
            if fast_benchmark:
                if subsample_size is None:
                    raise ValueError("fast_benchmark specified without subsample_size")
                if subsample_size < len(train_data):
                    # .sample instead of .head to increase diversity and test cases where data index is not monotonically increasing.
                    train_data = train_data.sample(n=subsample_size, random_state=seed_val)  # subsample for fast_benchmark
            predictor = TabularPredictor(label=label, path=savedir).fit(train_data, **fit_args)
            results = predictor.fit_summary(verbosity=4)
            if predictor.problem_type != dataset['problem_type']:
                warnings.warn("For dataset %s: Autogluon inferred problem_type = %s, but should = %s" % (dataset['name'], predictor.problem_type, dataset['problem_type']))
            predictor = TabularPredictor.load(savedir)  # Test loading previously-trained predictor from file
            y_pred_empty = predictor.predict(test_data[0:0])
            assert len(y_pred_empty) == 0
            y_pred = predictor.predict(test_data)
            perf_dict = predictor.evaluate_predictions(y_true=y_test, y_pred=y_pred, auxiliary_metrics=True)
            if dataset['problem_type'] != REGRESSION:
                perf = 1.0 - perf_dict['accuracy']  # convert accuracy to error-rate
            else:
                perf = 1.0 - perf_dict['r2']  # unexplained variance score.
            performance_vals[idx] = perf
            print("Performance on dataset %s: %s   (previous perf=%s)" % (dataset['name'], performance_vals[idx], dataset['performance_val']))
            if (not fast_benchmark) and (performance_vals[idx] > dataset['performance_val'] * perf_threshold):
                warnings.warn("Performance on dataset %s is %s times worse than previous performance." %
                              (dataset['name'], performance_vals[idx]/(EPS+dataset['performance_val'])))
            if predictor._trainer.bagged_mode and not crash_in_oof:
                # TODO: Test index alignment with original training data (first handle duplicated rows / dropped rows edge cases)
                y_pred_oof = predictor.get_oof_pred()
                y_pred_proba_oof = predictor.get_oof_pred_proba(as_multiclass=False)
                y_pred_oof_transformed = predictor.get_oof_pred(transformed=True)
                y_pred_proba_oof_transformed = predictor.get_oof_pred_proba(as_multiclass=False, transformed=True)

                # Assert expected type output
                assert isinstance(y_pred_oof, pd.Series)
                assert isinstance(y_pred_oof_transformed, pd.Series)
                if predictor.problem_type == MULTICLASS:
                    assert isinstance(y_pred_proba_oof, pd.DataFrame)
                    assert isinstance(y_pred_proba_oof_transformed, pd.DataFrame)
                else:
                    if predictor.problem_type == BINARY:
                        assert isinstance(predictor.get_oof_pred_proba(), pd.DataFrame)
                    assert isinstance(y_pred_proba_oof, pd.Series)
                    assert isinstance(y_pred_proba_oof_transformed, pd.Series)

                assert y_pred_oof_transformed.equals(predictor.transform_labels(y_pred_oof, proba=False))

                # Test that the transform_labels method is capable of reproducing the same output when converting back and forth, and test that oof 'transform' parameter works properly.
                y_pred_proba_oof_inverse = predictor.transform_labels(y_pred_proba_oof, proba=True)
                y_pred_proba_oof_inverse_inverse = predictor.transform_labels(y_pred_proba_oof_inverse, proba=True, inverse=True)
                y_pred_oof_inverse = predictor.transform_labels(y_pred_oof)
                y_pred_oof_inverse_inverse = predictor.transform_labels(y_pred_oof_inverse, inverse=True)

                if isinstance(y_pred_proba_oof_transformed, pd.DataFrame):
                    pd.testing.assert_frame_equal(y_pred_proba_oof_transformed, y_pred_proba_oof_inverse)
                    pd.testing.assert_frame_equal(y_pred_proba_oof, y_pred_proba_oof_inverse_inverse)
                else:
                    pd.testing.assert_series_equal(y_pred_proba_oof_transformed, y_pred_proba_oof_inverse)
                    pd.testing.assert_series_equal(y_pred_proba_oof, y_pred_proba_oof_inverse_inverse)
                pd.testing.assert_series_equal(y_pred_oof_transformed, y_pred_oof_inverse)
                pd.testing.assert_series_equal(y_pred_oof, y_pred_oof_inverse_inverse)

                # Test that index of both the internal training data and the oof outputs are consistent in their index values.
                X_internal, y_internal = predictor.load_data_internal()
                y_internal_index = list(y_internal.index)
                assert list(X_internal.index) == y_internal_index
                assert list(y_pred_oof.index) == y_internal_index
                assert list(y_pred_proba_oof.index) == y_internal_index
                assert list(y_pred_oof_transformed.index) == y_internal_index
                assert list(y_pred_proba_oof_transformed.index) == y_internal_index
            else:
                # Raise exception
                with pytest.raises(AssertionError):
                    predictor.get_oof_pred()
                with pytest.raises(AssertionError):
                    predictor.get_oof_pred_proba()
            if run_distill:
                predictor.distill(time_limit=60, augment_args={'size_factor':0.5})

    # Summarize:
    avg_perf = np.mean(performance_vals)
    median_perf = np.median(performance_vals)
    worst_perf = np.max(performance_vals)
    for idx in range(len(datasets)):
        print("Performance on dataset %s: %s   (previous perf=%s)" % (datasets[idx]['name'], performance_vals[idx], datasets[idx]['performance_val']))

    print("Average performance: %s" % avg_perf)
    print("Median performance: %s" % median_perf)
    print("Worst performance: %s" % worst_perf)

    if not fast_benchmark:
        if avg_perf > previous_avg_performance * perf_threshold:
            warnings.warn("Average Performance is %s times worse than previously." % (avg_perf/(EPS+previous_avg_performance)))
        if median_perf > previous_median_performance * perf_threshold:
            warnings.warn("Median Performance is %s times worse than previously." % (median_perf/(EPS+previous_median_performance)))
        if worst_perf > previous_worst_performance * perf_threshold:
            warnings.warn("Worst Performance is %s times worse than previously." % (worst_perf/(EPS+previous_worst_performance)))

    print("Ran fit with args:")
    print(fit_args)
    # List all warnings again to make sure they are seen:
    print("\n\n WARNINGS:")
    for w in caught_warnings:
        warnings.warn(w.message)
Пример #2
0
def autogluon(df, task, timelife):
    pd.options.mode.chained_assignment = None
    df_new = copy.copy(df)
    X, y, _ = return_X_y(df_new)

    if isinstance(y, pd.Series):
        y = y.to_frame()

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.3,
                                                        random_state=1)

    if isinstance(y_train, pd.Series):
        y_train = y_train.to_frame()
    target = y_train.columns[0]

    if isinstance(y_test, pd.Series):
        y_test = y_test.to_frame()

    X_train[target] = y_train
    train = X_train

    test = X_test

    if task == 'classification':
        if len(y[y.columns[0]].unique()) > 2:
            pt = 'multiclass'
            f1 = lambda y_test, y_pred: f1_score(
                y_test, y_pred, average='weighted')
        else:
            pt = 'binary'
            f1 = lambda y_test, y_pred: f1_score(y_test, y_pred)
    else:
        pt = 'regression'
        #, path='/home/riccardo/.local/share/Trash'
    predictor = TabularPredictor(label=target, problem_type=pt).fit(
        train_data=train,
        time_limit=timelife * 60,
        presets=['optimize_for_deployment'
                 ])  # TEMPORANEO -> attenzione salvo sul cestino
    results = predictor.fit_summary()

    y_pred = predictor.predict(test)

    pipelines = (predictor.leaderboard(df, silent=True))  # sono queste

    res = predictor.evaluate_predictions(y_true=y_test.squeeze(),
                                         y_pred=y_pred,
                                         auxiliary_metrics=True)

    shutil.rmtree('./AutogluonModels')

    if (task == 'classification'):
        '''y_test = le.fit_transform(y_test)
    y_pred = le.fit_transform(y_pred)
    if len(np.unique(y_pred)) > 2:
      f1 = f1_score(y_test, y_pred, average='weighted')s
    else:
      f1 = f1_score(y_test, y_pred)
    return (res['accuracy'], f1)'''
        return (res['accuracy'], f1(y_test, y_pred), pipelines)
    else:
        return (res['root_mean_squared_error'], res['r2'], pipelines)
""" Example script for predicting columns of tables, demonstrating simple use-case """

from autogluon.tabular import TabularDataset, TabularPredictor


# Training time:
train_data = TabularDataset(file_path='https://autogluon.s3.amazonaws.com/datasets/Inc/train.csv')  # can be local CSV file as well, returns Pandas DataFrame
train_data = train_data.head(500)  # subsample for faster demo
print(train_data.head())
label = 'class'  # specifies which column do we want to predict
save_path = 'ag_models/'  # where to save trained models

predictor = TabularPredictor(label=label, path=save_path).fit(train_data)
# NOTE: Default settings above are intended to ensure reasonable runtime at the cost of accuracy. To maximize predictive accuracy, do this instead:
# predictor = TabularPredictor(label=label_column, eval_metric=YOUR_METRIC_NAME, path=save_path).fit(train_data, presets='best_quality')
results = predictor.fit_summary()

# Inference time:
test_data = TabularDataset(file_path='https://autogluon.s3.amazonaws.com/datasets/Inc/test.csv')  # another Pandas DataFrame
y_test = test_data[label]
test_data = test_data.drop(labels=[label], axis=1)  # delete labels from test data since we wouldn't have them in practice
print(test_data.head())

predictor = TabularPredictor.load(save_path)  # Unnecessary, we reload predictor just to demonstrate how to load previously-trained predictor from file
y_pred = predictor.predict(test_data)
perf = predictor.evaluate_predictions(y_true=y_test, y_pred=y_pred, auxiliary_metrics=True)
Пример #4
0
    'XGB': {
        'n_estimators': 1000,
        'learning_rate': ag.Real(0.01, 0.1, log=True)
    }
}

predictor = TabularPredictor(label=label, path=save_path).fit(
    train_data,
    hyperparameters=hyperparameters,
    hyperparameter_tune_kwargs='auto',
    time_limit=60)

results = predictor.fit_summary()  # display detailed summary of fit() process
print(results)

# Inference time:
test_data = TabularDataset(
    'https://autogluon.s3.amazonaws.com/datasets/Inc/test.csv'
)  # another Pandas DataFrame
print(test_data.head())

perf = predictor.evaluate(
    test_data
)  # shorthand way to evaluate our predictor if test-labels are available

# Otherwise we make predictions and can evaluate them later:
y_pred = predictor.predict_proba(test_data)
perf = predictor.evaluate_predictions(y_true=test_data[label],
                                      y_pred=y_pred,
                                      auxiliary_metrics=True)