Пример #1
0
def test_advanced_functionality():
    fast_benchmark = True
    dataset = {'url': 'https://autogluon.s3.amazonaws.com/datasets/AdultIncomeBinaryClassification.zip',
                      'name': 'AdultIncomeBinaryClassification',
                      'problem_type': BINARY}
    label = 'class'
    directory_prefix = './datasets/'
    train_file = 'train_data.csv'
    test_file = 'test_data.csv'
    train_data, test_data = load_data(directory_prefix=directory_prefix, train_file=train_file, test_file=test_file, name=dataset['name'], url=dataset['url'])
    if fast_benchmark:  # subsample for fast_benchmark
        subsample_size = 100
        train_data = train_data.head(subsample_size)
        test_data = test_data.head(subsample_size)
    print(f"Evaluating Advanced Functionality on Benchmark Dataset {dataset['name']}")
    directory = directory_prefix + 'advanced/' + dataset['name'] + "/"
    savedir = directory + 'AutogluonOutput/'
    shutil.rmtree(savedir, ignore_errors=True)  # Delete AutoGluon output directory to ensure previous runs' information has been removed.
    predictor = TabularPredictor(label=label, path=savedir).fit(train_data)
    leaderboard = predictor.leaderboard(data=test_data)
    extra_metrics = ['accuracy', 'roc_auc', 'log_loss']
    leaderboard_extra = predictor.leaderboard(data=test_data, extra_info=True, extra_metrics=extra_metrics)
    assert set(predictor.get_model_names()) == set(leaderboard['model'])
    assert set(predictor.get_model_names()) == set(leaderboard_extra['model'])
    assert set(leaderboard_extra.columns).issuperset(set(leaderboard.columns))
    assert len(leaderboard) == len(leaderboard_extra)
    assert set(leaderboard_extra.columns).issuperset(set(extra_metrics))  # Assert that extra_metrics are present in output
    num_models = len(predictor.get_model_names())
    feature_importances = predictor.feature_importance(data=test_data)
    original_features = set(train_data.columns)
    original_features.remove(label)
    assert set(feature_importances.index) == original_features
    assert set(feature_importances.columns) == {'importance', 'stddev', 'p_value', 'n', 'p99_high', 'p99_low'}
    predictor.transform_features()
    predictor.transform_features(data=test_data)
    predictor.info()

    assert predictor.get_model_names_persisted() == []  # Assert that no models were persisted during training
    assert predictor.unpersist_models() == []  # Assert that no models were unpersisted

    persisted_models = predictor.persist_models(models='all', max_memory=None)
    assert set(predictor.get_model_names_persisted()) == set(persisted_models)  # Ensure all models are persisted
    assert predictor.persist_models(models='all', max_memory=None) == []  # Ensure that no additional models are persisted on repeated calls
    unpersised_models = predictor.unpersist_models()
    assert set(unpersised_models) == set(persisted_models)
    assert predictor.get_model_names_persisted() == []  # Assert that all models were unpersisted

    # Raise exception
    with pytest.raises(NetworkXError):
        predictor.persist_models(models=['UNKNOWN_MODEL_1', 'UNKNOWN_MODEL_2'])

    assert predictor.get_model_names_persisted() == []

    assert predictor.unpersist_models(models=['UNKNOWN_MODEL_1', 'UNKNOWN_MODEL_2']) == []

    predictor.persist_models(models='all', max_memory=None)
    predictor.save()  # Save predictor while models are persisted: Intended functionality is that they won't be persisted when loaded.
    predictor_loaded = TabularPredictor.load(predictor.path)  # Assert that predictor loading works
    leaderboard_loaded = predictor_loaded.leaderboard(data=test_data)
    assert len(leaderboard) == len(leaderboard_loaded)
    assert predictor_loaded.get_model_names_persisted() == []  # Assert that models were not still persisted after loading predictor

    assert(predictor.get_model_full_dict() == dict())
    predictor.refit_full()
    assert(len(predictor.get_model_full_dict()) == num_models)
    assert(len(predictor.get_model_names()) == num_models * 2)
    for model in predictor.get_model_names():
        predictor.predict(data=test_data, model=model)
    predictor.refit_full()  # Confirm that refit_models aren't further refit.
    assert(len(predictor.get_model_full_dict()) == num_models)
    assert(len(predictor.get_model_names()) == num_models * 2)
    predictor.delete_models(models_to_keep=[])  # Test that dry-run doesn't delete models
    assert(len(predictor.get_model_names()) == num_models * 2)
    predictor.predict(data=test_data)
    predictor.delete_models(models_to_keep=[], dry_run=False)  # Test that dry-run deletes models
    assert len(predictor.get_model_names()) == 0
    assert len(predictor.leaderboard()) == 0
    assert len(predictor.leaderboard(extra_info=True)) == 0
    try:
        predictor.predict(data=test_data)
    except:
        pass
    else:
        raise AssertionError('predictor.predict should raise exception after all models are deleted')
    print('Tabular Advanced Functionality Test Succeeded.')
Пример #2
0
def inner_test_tabular(testname):

    # Find the named test
    test = None
    for t in tests:
        if t['name'] == testname:
            test = t
    assert test is not None, f"Could not find test {testname}"

    # Build the dataset
    (dftrain, dftest) = make_dataset(request=test, seed=0)

    # Check the synthetic dataset itself hasn't changed.  We round it to 3dp otherwise tiny floating point differences
    # between platforms can give a different hash that still yields same prediction scores.
    # Ultimately it doesn't matter how we do this as long as the same dataset gives the same hash function on
    # different python versions and architectures.
    current_hash = hashlib.sha256(
        dftrain.round(decimals=3).values.tobytes()).hexdigest()[0:10]
    proposedconfig = "Proposed new config:\n"
    proposedconfig += f"'dataset_hash' : '{current_hash}',"
    assert current_hash == test[
        'dataset_hash'], f"Test '{testname}' input dataset has changed.  All scores will change.\n" + proposedconfig

    # Now run the Predictor 1 or more times with various parameters, and make sure we get
    # back the expected results.

    # Params can either omitted, or a single run, or a list of runs.
    if 'params' not in test:
        test['params'] = {'predict': {}, 'fit': {}}
    if not isinstance(test['params'], list):
        test['params'] = [test['params']]
    for params in test['params']:

        # Run this model and set of params
        predictor = TabularPredictor(label='label', **params['predict'])
        predictor.fit(dftrain, **params['fit'])
        leaderboard = predictor.leaderboard(dftest, silent=True)
        leaderboard = leaderboard.sort_values(
            by='model'
        )  # So we can pre-generate sample config in alphabetical order

        # Store proposed new config based on the current run, in case the developer wants to keep thee results (just cut and paste).
        proposedconfig = "Proposed new config:\n"
        proposedconfig += "'expected_score_range' : {\n"
        for model in leaderboard['model']:
            midx_in_leaderboard = leaderboard.index.values[leaderboard['model']
                                                           == model][0]
            if np.isnan(leaderboard['score_test'][midx_in_leaderboard]):
                values = "np.nan, np.nan"
            else:
                if model in test['expected_score_range'] and not np.isnan(
                        test['expected_score_range'][model][1]):
                    currentprecision = test['expected_score_range'][model][1]
                else:
                    currentprecision = 0.01
                values = "{}, {}".format(
                    myfloor(leaderboard['score_test'][midx_in_leaderboard],
                            currentprecision), currentprecision)
            proposedconfig += f"    '{model}': ({values}),\n"
        proposedconfig += "},\n"

        # First validate the model list was as expected.
        assert set(leaderboard['model']) == set(
            test['expected_score_range'].keys()
        ), (f"Test '{testname}' params {params} got unexpected model list.\n" +
            proposedconfig)

        # Now validate the scores for each model were as expected.
        all_assertions_met = True
        currentconfig = "Existing config:\n"
        currentconfig += "'expected_score_range' : {\n"
        for model in sorted(test['expected_score_range']):
            midx_in_leaderboard = leaderboard.index.values[leaderboard['model']
                                                           == model][0]
            assert leaderboard['model'][midx_in_leaderboard] == model
            expectedrange = test['expected_score_range'][model][1]
            expectedmin = test['expected_score_range'][model][0]
            expectedmax = expectedmin + expectedrange

            if np.isnan(expectedmin):
                values = "np.nan, np.nan"
            else:
                values = "{}, {}".format(expectedmin, expectedrange)

            if ((
                (leaderboard['score_test'][midx_in_leaderboard] >= expectedmin)
                    and (leaderboard['score_test'][midx_in_leaderboard] <=
                         expectedmax)) or
                (np.isnan(leaderboard['score_test'][midx_in_leaderboard])
                 and np.isnan(expectedmin))):
                currentconfig += f"    '{model}': ({values}),\n"
            else:
                currentconfig += f"    '{model}': ({values}), # <--- not met, got {leaderboard['score_test'][midx_in_leaderboard]} \n"
                all_assertions_met = False
        currentconfig += "},\n"

        assert all_assertions_met, f"Test '{testname}', params {params} had unexpected scores:\n" + currentconfig + proposedconfig

        # Clean up this model created with specific params.
        predictor.delete_models(models_to_keep=[], dry_run=False)