Пример #1
0
    def serialize(self, path: Path) -> None:
        # call Predictor.serialize() in order to serialize the class name

        super().serialize(path)

        # serialize self.ag_model
        # move autogluon model to where we want to do the serialization
        ag_path = self.ag_model.path
        shutil.move(ag_path, path)
        ag_path = Path(ag_path)
        print(f"Autogluon files moved from {ag_path} to {path}.")
        # reset the path stored in tabular model.
        AutogluonTabularPredictor.load(path / Path(ag_path.name))
        # serialize all remaining constructor parameters
        with (path / "parameters.json").open("w") as fp:
            parameters = dict(
                batch_size=self.batch_size,
                prediction_length=self.prediction_length,
                freq=self.freq,
                dtype=self.dtype,
                time_features=self.time_features,
                lag_indices=self.lag_indices,
                ag_path=path / Path(ag_path.name),
            )
            print(dump_json(parameters), file=fp)
Пример #2
0
def estimate_importance(dataset, model_name):
    if os.path.exists(
            os.path.join('feature_importance', dataset, model_name,
                         'importance.csv')):
        print(f'Found {dataset}, {model_name}')
        return
    model_remote_path = stat_df.loc[model_name, dataset]
    postfix = '/test_score.json'

    remote_dir_name = model_remote_path[:-len(postfix)]

    def downloadDirectoryFroms3(bucketName, remoteDirectoryName,
                                local_dir_path):
        s3_resource = boto3.resource('s3')
        bucket = s3_resource.Bucket(bucketName)
        for obj in bucket.objects.filter(Prefix=remoteDirectoryName):
            print(obj.key)
            download_path = os.path.join(local_dir_path, obj.key)
            if not os.path.exists(os.path.dirname(download_path)):
                os.makedirs(os.path.dirname(download_path), exist_ok=True)
            bucket.download_file(obj.key, download_path)

    local_dir_name = os.path.join(download_path, remote_dir_name)
    if os.path.exists(local_dir_name):
        pass
    else:
        downloadDirectoryFroms3('automl-mm-bench', remote_dir_name,
                                download_path)
    test_dataset = dataset_registry.create(dataset, 'test')
    if model_name == MULTIMODAL_TEXT_MODEL_NAME:
        predictor = MultiModalTextModel.load(
            os.path.join(local_dir_name, 'saved_model'))
    elif model_name == TABULAR_MODEL_NAME:
        predictor = TabularPredictor.load(os.path.join(local_dir_name))
    elif model_name == STACK_ENSEMBLE_MODEL_NAME:
        predictor = TabularPredictor.load(os.path.join(local_dir_name))
    else:
        raise NotImplementedError
    sample_size = min(len(test_dataset.data), 1000)
    if model_name == TABULAR_MODEL_NAME:
        importance_df = predictor.feature_importance(
            test_dataset.data[test_dataset.feature_columns +
                              test_dataset.label_columns],
            subsample_size=sample_size)
    else:
        importance_df = compute_permutation_feature_importance(
            test_dataset.data[test_dataset.feature_columns],
            test_dataset.data[test_dataset.label_columns[0]],
            predict_func=predictor.predict,
            eval_metric=get_metric(test_dataset.metric),
            subsample_size=sample_size,
            num_shuffle_sets=3)
    os.makedirs(os.path.join('feature_importance', dataset, model_name),
                exist_ok=True)
    importance_df.to_csv(
        os.path.join('feature_importance', dataset, model_name,
                     'importance.csv'))
    print(importance_df)
Пример #3
0
def train_model(df_train: pd.DataFrame,
                df_test: pd.DataFrame,
                label: str,
                verbosity: int = 0,
                random_state: int = 0) -> TabularPredictor:
    """
    Train an autogluon model for df_train, df_test. Specify the label column.
    Optionally, you can set verbosity to control how much output AutoGluon
    produces during training.

    The function caches models that have been trained on the same data by
    computing the hash of df_train and comparing that to existing models.

    Returns the predictor object.

    TODO: Optimize this bad boy for experiments. Would be k-fold
    cross-validation instead of train-test split and a AG-preset that opts
    for highest quality model. Also no or very high time_limit.
    """
    logger = logging.getLogger('pfd')
    d = 'agModels'  # folder to store trained models
    checksum = calculate_model_hash(df_train, label, random_state)
    model_path = f'{d}/{checksum}'
    logger.info(f'Calculated a checksum of {checksum}.')
    try:
        predictor = TabularPredictor.load(model_path)
    except FileNotFoundError:
        logger.info("Didn't find a model to load from the cache.")
        p = TabularPredictor(label=label, path=model_path)
        predictor = p.fit(train_data=df_train,
                          tuning_data=df_test,
                          time_limit=20,
                          verbosity=verbosity,
                          presets='medium_quality_faster_train')
    return predictor
def model_fn(model_dir):
    """
    Load the gluon model. Called once when hosting service starts.
    :param: model_dir The directory where model files are stored.
    :return: a model (in this case an AutoGluon network)
    """
    net = TabularPredictor.load(model_dir)
    return net
Пример #5
0
def predict(args):
    if args.use_tabular:
        predictor = TabularPredictor.load(args.model_dir)
    else:
        predictor = TextPredictor.load(args.model_dir)
    test_prediction = predictor.predict(args.test_file, as_pandas=True)
    if args.exp_dir is None:
        args.exp_dir = '.'
    test_prediction.to_csv(os.path.join(args.exp_dir, 'test_prediction.csv'))
def model_fn(model_dir):
    """Load the AutoGluon model. Called when the hosting service starts.

    :param model_dir: The directory where model files are stored.

    :return: AutoGluon model.
    """
    model = TabularPredictor.load(model_dir)
    globals()["column_names"] = model.feature_metadata_in.get_features()
    return model
Пример #7
0
def model_fn(model_dir):
    """
    Load the gluon model. Called once when hosting service starts.
    :param: model_dir The directory where model files are stored.
    :return: a model (in this case a Gluon network) and the column info.
    """
    print(f'Loading model from {model_dir} with contents {os.listdir(model_dir)}')

    net = TabularPredictor.load(model_dir, verbosity=True)
    with open(f'{model_dir}/code/columns.pkl', 'rb') as f:
        column_dict = pickle.load(f)
    return net, column_dict
Пример #8
0
    def deserialize(
        cls,
        path: Path,
        # TODO this is temporary, we should make the callable object serializable in the first place
        scaling: Callable[[pd.Series], Tuple[pd.Series,
                                             float]] = mean_abs_scaling,
        **kwargs,
    ) -> "Predictor":
        # deserialize constructor parameters
        with (path / "parameters.json").open("r") as fp:
            parameters = load_json(fp.read())
        loaded_ag_path = parameters["ag_path"]
        del parameters["ag_path"]
        # load tabular model
        ag_model = AutogluonTabularPredictor.load(loaded_ag_path)

        return TabularPredictor(ag_model=ag_model,
                                scaling=scaling,
                                **parameters)
Пример #9
0
def run_tabular_benchmarks(fast_benchmark, subsample_size, perf_threshold, seed_val, fit_args, dataset_indices=None, run_distill=False, crash_in_oof=False):
    print("Running fit with args:")
    print(fit_args)
    # Each train/test dataset must be located in single directory with the given names.
    train_file = 'train_data.csv'
    test_file = 'test_data.csv'
    EPS = 1e-10

    # List containing dicts for each dataset to include in benchmark (try to order based on runtimes)
    datasets = get_benchmark_sets()
    if dataset_indices is not None: # only run some datasets
        datasets = [datasets[i] for i in dataset_indices]

    # Aggregate performance summaries obtained in previous benchmark run:
    prev_perf_vals = [dataset['performance_val'] for dataset in datasets]
    previous_avg_performance = np.mean(prev_perf_vals)
    previous_median_performance = np.median(prev_perf_vals)
    previous_worst_performance = np.max(prev_perf_vals)

    # Run benchmark:
    performance_vals = [0.0] * len(datasets) # performance obtained in this run
    directory_prefix = './datasets/'
    with warnings.catch_warnings(record=True) as caught_warnings:
        for idx in range(len(datasets)):
            dataset = datasets[idx]
            train_data, test_data = load_data(directory_prefix=directory_prefix, train_file=train_file, test_file=test_file, name=dataset['name'], url=dataset['url'])
            if seed_val is not None:
                seed(seed_val)
                np.random.seed(seed_val)
            print("Evaluating Benchmark Dataset %s (%d of %d)" % (dataset['name'], idx+1, len(datasets)))
            directory = directory_prefix + dataset['name'] + "/"
            savedir = directory + 'AutogluonOutput/'
            shutil.rmtree(savedir, ignore_errors=True)  # Delete AutoGluon output directory to ensure previous runs' information has been removed.
            label = dataset['label']
            y_test = test_data[label]
            test_data = test_data.drop(labels=[label], axis=1)
            if fast_benchmark:
                if subsample_size is None:
                    raise ValueError("fast_benchmark specified without subsample_size")
                if subsample_size < len(train_data):
                    # .sample instead of .head to increase diversity and test cases where data index is not monotonically increasing.
                    train_data = train_data.sample(n=subsample_size, random_state=seed_val)  # subsample for fast_benchmark
            predictor = TabularPredictor(label=label, path=savedir).fit(train_data, **fit_args)
            results = predictor.fit_summary(verbosity=4)
            if predictor.problem_type != dataset['problem_type']:
                warnings.warn("For dataset %s: Autogluon inferred problem_type = %s, but should = %s" % (dataset['name'], predictor.problem_type, dataset['problem_type']))
            predictor = TabularPredictor.load(savedir)  # Test loading previously-trained predictor from file
            y_pred_empty = predictor.predict(test_data[0:0])
            assert len(y_pred_empty) == 0
            y_pred = predictor.predict(test_data)
            perf_dict = predictor.evaluate_predictions(y_true=y_test, y_pred=y_pred, auxiliary_metrics=True)
            if dataset['problem_type'] != REGRESSION:
                perf = 1.0 - perf_dict['accuracy']  # convert accuracy to error-rate
            else:
                perf = 1.0 - perf_dict['r2']  # unexplained variance score.
            performance_vals[idx] = perf
            print("Performance on dataset %s: %s   (previous perf=%s)" % (dataset['name'], performance_vals[idx], dataset['performance_val']))
            if (not fast_benchmark) and (performance_vals[idx] > dataset['performance_val'] * perf_threshold):
                warnings.warn("Performance on dataset %s is %s times worse than previous performance." %
                              (dataset['name'], performance_vals[idx]/(EPS+dataset['performance_val'])))
            if predictor._trainer.bagged_mode and not crash_in_oof:
                # TODO: Test index alignment with original training data (first handle duplicated rows / dropped rows edge cases)
                y_pred_oof = predictor.get_oof_pred()
                y_pred_proba_oof = predictor.get_oof_pred_proba(as_multiclass=False)
                y_pred_oof_transformed = predictor.get_oof_pred(transformed=True)
                y_pred_proba_oof_transformed = predictor.get_oof_pred_proba(as_multiclass=False, transformed=True)

                # Assert expected type output
                assert isinstance(y_pred_oof, pd.Series)
                assert isinstance(y_pred_oof_transformed, pd.Series)
                if predictor.problem_type == MULTICLASS:
                    assert isinstance(y_pred_proba_oof, pd.DataFrame)
                    assert isinstance(y_pred_proba_oof_transformed, pd.DataFrame)
                else:
                    if predictor.problem_type == BINARY:
                        assert isinstance(predictor.get_oof_pred_proba(), pd.DataFrame)
                    assert isinstance(y_pred_proba_oof, pd.Series)
                    assert isinstance(y_pred_proba_oof_transformed, pd.Series)

                assert y_pred_oof_transformed.equals(predictor.transform_labels(y_pred_oof, proba=False))

                # Test that the transform_labels method is capable of reproducing the same output when converting back and forth, and test that oof 'transform' parameter works properly.
                y_pred_proba_oof_inverse = predictor.transform_labels(y_pred_proba_oof, proba=True)
                y_pred_proba_oof_inverse_inverse = predictor.transform_labels(y_pred_proba_oof_inverse, proba=True, inverse=True)
                y_pred_oof_inverse = predictor.transform_labels(y_pred_oof)
                y_pred_oof_inverse_inverse = predictor.transform_labels(y_pred_oof_inverse, inverse=True)

                if isinstance(y_pred_proba_oof_transformed, pd.DataFrame):
                    pd.testing.assert_frame_equal(y_pred_proba_oof_transformed, y_pred_proba_oof_inverse)
                    pd.testing.assert_frame_equal(y_pred_proba_oof, y_pred_proba_oof_inverse_inverse)
                else:
                    pd.testing.assert_series_equal(y_pred_proba_oof_transformed, y_pred_proba_oof_inverse)
                    pd.testing.assert_series_equal(y_pred_proba_oof, y_pred_proba_oof_inverse_inverse)
                pd.testing.assert_series_equal(y_pred_oof_transformed, y_pred_oof_inverse)
                pd.testing.assert_series_equal(y_pred_oof, y_pred_oof_inverse_inverse)

                # Test that index of both the internal training data and the oof outputs are consistent in their index values.
                X_internal, y_internal = predictor.load_data_internal()
                y_internal_index = list(y_internal.index)
                assert list(X_internal.index) == y_internal_index
                assert list(y_pred_oof.index) == y_internal_index
                assert list(y_pred_proba_oof.index) == y_internal_index
                assert list(y_pred_oof_transformed.index) == y_internal_index
                assert list(y_pred_proba_oof_transformed.index) == y_internal_index
            else:
                # Raise exception
                with pytest.raises(AssertionError):
                    predictor.get_oof_pred()
                with pytest.raises(AssertionError):
                    predictor.get_oof_pred_proba()
            if run_distill:
                predictor.distill(time_limit=60, augment_args={'size_factor':0.5})

    # Summarize:
    avg_perf = np.mean(performance_vals)
    median_perf = np.median(performance_vals)
    worst_perf = np.max(performance_vals)
    for idx in range(len(datasets)):
        print("Performance on dataset %s: %s   (previous perf=%s)" % (datasets[idx]['name'], performance_vals[idx], datasets[idx]['performance_val']))

    print("Average performance: %s" % avg_perf)
    print("Median performance: %s" % median_perf)
    print("Worst performance: %s" % worst_perf)

    if not fast_benchmark:
        if avg_perf > previous_avg_performance * perf_threshold:
            warnings.warn("Average Performance is %s times worse than previously." % (avg_perf/(EPS+previous_avg_performance)))
        if median_perf > previous_median_performance * perf_threshold:
            warnings.warn("Median Performance is %s times worse than previously." % (median_perf/(EPS+previous_median_performance)))
        if worst_perf > previous_worst_performance * perf_threshold:
            warnings.warn("Worst Performance is %s times worse than previously." % (worst_perf/(EPS+previous_worst_performance)))

    print("Ran fit with args:")
    print(fit_args)
    # List all warnings again to make sure they are seen:
    print("\n\n WARNINGS:")
    for w in caught_warnings:
        warnings.warn(w.message)
Пример #10
0
def model_fn(model_dir):
    """loads model from previously saved artifact"""
    model = TabularPredictor.load(model_dir)
    globals()["column_names"] = model.feature_metadata_in.get_features()

    return model
""" Example script for predicting columns of tables, demonstrating simple use-case """

from autogluon.tabular import TabularDataset, TabularPredictor


# Training time:
train_data = TabularDataset(file_path='https://autogluon.s3.amazonaws.com/datasets/Inc/train.csv')  # can be local CSV file as well, returns Pandas DataFrame
train_data = train_data.head(500)  # subsample for faster demo
print(train_data.head())
label = 'class'  # specifies which column do we want to predict
save_path = 'ag_models/'  # where to save trained models

predictor = TabularPredictor(label=label, path=save_path).fit(train_data)
# NOTE: Default settings above are intended to ensure reasonable runtime at the cost of accuracy. To maximize predictive accuracy, do this instead:
# predictor = TabularPredictor(label=label_column, eval_metric=YOUR_METRIC_NAME, path=save_path).fit(train_data, presets='best_quality')
results = predictor.fit_summary()

# Inference time:
test_data = TabularDataset(file_path='https://autogluon.s3.amazonaws.com/datasets/Inc/test.csv')  # another Pandas DataFrame
y_test = test_data[label]
test_data = test_data.drop(labels=[label], axis=1)  # delete labels from test data since we wouldn't have them in practice
print(test_data.head())

predictor = TabularPredictor.load(save_path)  # Unnecessary, we reload predictor just to demonstrate how to load previously-trained predictor from file
y_pred = predictor.predict(test_data)
perf = predictor.evaluate_predictions(y_true=y_test, y_pred=y_pred, auxiliary_metrics=True)
Пример #12
0
import os
import autogluon as ag
from autogluon.tabular import TabularDataset, TabularPredictor
import argparse

parser = argparse.ArgumentParser(
    'Pick model that has top-1 performance in the '
    'validation data.')
parser.add_argument('--dir',
                    help='Directory path of the base folder',
                    default='model_results/ag_op_new_split1',
                    required=True)
args = parser.parse_args()

model_candidate_list = ['NeuralNetMXNet', 'LightGBMLarge', 'LightGBM']

for device_type in [
        'gcv_graviton2_csv', 'gcv_rasp4b_csv', 'gcv_skylake_csv', 'gcv_t4_csv',
        'gcv_v100_csv'
]:
    for network_type in os.listdir(os.path.join(args.dir, device_type)):
        ag_model_path = os.path.join(args.dir, device_type, network_type,
                                     'ag_model')
        predictor = TabularPredictor.load(ag_model_path)
        leaderboard = predictor.leaderboard(silent=True)
        leaderboard = leaderboard.set_index('model')
        leaderboard = leaderboard.loc[model_candidate_list]
        max_idx = leaderboard['score_val'].argmax()
        max_model_name = leaderboard.index[max_idx]
        print(device_type, max_model_name)
Пример #13
0
df = pd.DataFrame(mfcc_vad.numpy())
cols_mfcc = df.columns
df["t"] = t_mfcc
df["vad"] = vad_mask.astype(bool)
COL_LABEL = "label"
df[COL_LABEL] = eval_labels(t_mfcc).astype(int)

# %%
# Train AutoGLUON
train = False
if train:
    df_train = df[df["vad"] & (df[COL_LABEL] != LABEL_NONE_ID)]
    predictor = TabularPredictor(label=COL_LABEL).fit(train_data=df_train)
    print("[green]Finished training[/green]")
else:
    predictor = TabularPredictor.load("AutogluonModels/ag-20211002_203405/")
    print("[green]Loaded pre-trained model[/green]")

# predictions = predictor.predict(TEST_DATA.csv)

# %%
predictions = predictor.predict(df)

# %%
# TODO
# Set label = none where VAD=0
# Create training set, removing "none"
# Predict the whole audio and check results

# %%
# Plot waveform, features and labels
Пример #14
0
 if args.use_op_split:
     if exp_name in ['dense_pack.x86']:
         continue
 if not os.path.isdir(os.path.join(args.dir_path, dir_name, exp_name)):
     continue
 if args.model_type == 'nn':
     model = NNRanker.load(
         os.path.join(args.dir_path, dir_name, exp_name))
 elif args.model_type == 'cat_regression':
     model = CatRegressor.load(
         os.path.join(args.dir_path, dir_name, exp_name))
 elif args.model_type == 'cat_ranking':
     model = CatRanker.load(
         os.path.join(args.dir_path, dir_name, exp_name))
 elif 'ag' in args.model_type:
     model = TabularPredictor.load(
         os.path.join(args.dir_path, dir_name, exp_name, 'ag_model'))
 else:
     raise NotImplementedError
 if args.use_op_split:
     data_prefix = os.path.join('split_tuning_dataset_op', dir_name,
                                exp_name)
 else:
     data_prefix = os.path.join('split_tuning_dataset', dir_name,
                                exp_name)
 test_df = read_pd(data_prefix + '.test.pq')
 with open(data_prefix + '.used_key.json', 'r') as in_f:
     used_key = json.load(in_f)
 test_df = test_df[used_key]
 group_indices = get_group_indices(test_df)
 if args.eval_correlation:
     if 'ag' in args.model_type:
Пример #15
0
***
'''
st.write("※実行中の計算内容は右下の[Manage app]ボタンをクリックすることで確認できます")
st.write("(計算時間:サンプルデータを用いた場合で約5分です)")
run_pred = st.checkbox("AutoML/AutoGluonの実行")

if run_pred == True:
    save_path = 'agModels-predictClass'  # specifies folder to store trained models
    predictor = TabularPredictor(label=label,
                                 path=save_path).fit(df_train,
                                                     presets='best_quality')
    y_test = df_test[label]  # values to predict
    test_data_nolab = df_test.drop(
        columns=[label])  # delete label column to prove we're not cheating
    predictor = TabularPredictor.load(
        save_path
    )  # unnecessary, just demonstrates how to load previously-trained predictor from file
    y_pred = predictor.predict(test_data_nolab)
    perf = predictor.evaluate_predictions(y_true=y_test,
                                          y_pred=y_pred,
                                          auxiliary_metrics=True)
    leaderboard = predictor.leaderboard(df_test, silent=True)
    st.dataframe(leaderboard)
    y_predproba = predictor.predict_proba(df_pred)

    # Enter text for testing
    s = 'pd.DataFrame'
    sample_dtypes = {
        'list': [1, 'a', [2, 'c'], {
            'b': 2
        }],
Пример #16
0
def test_advanced_functionality():
    fast_benchmark = True
    dataset = {'url': 'https://autogluon.s3.amazonaws.com/datasets/AdultIncomeBinaryClassification.zip',
                      'name': 'AdultIncomeBinaryClassification',
                      'problem_type': BINARY}
    label = 'class'
    directory_prefix = './datasets/'
    train_file = 'train_data.csv'
    test_file = 'test_data.csv'
    train_data, test_data = load_data(directory_prefix=directory_prefix, train_file=train_file, test_file=test_file, name=dataset['name'], url=dataset['url'])
    if fast_benchmark:  # subsample for fast_benchmark
        subsample_size = 100
        train_data = train_data.head(subsample_size)
        test_data = test_data.head(subsample_size)
    print(f"Evaluating Advanced Functionality on Benchmark Dataset {dataset['name']}")
    directory = directory_prefix + 'advanced/' + dataset['name'] + "/"
    savedir = directory + 'AutogluonOutput/'
    shutil.rmtree(savedir, ignore_errors=True)  # Delete AutoGluon output directory to ensure previous runs' information has been removed.
    predictor = TabularPredictor(label=label, path=savedir).fit(train_data)
    leaderboard = predictor.leaderboard(data=test_data)
    extra_metrics = ['accuracy', 'roc_auc', 'log_loss']
    leaderboard_extra = predictor.leaderboard(data=test_data, extra_info=True, extra_metrics=extra_metrics)
    assert set(predictor.get_model_names()) == set(leaderboard['model'])
    assert set(predictor.get_model_names()) == set(leaderboard_extra['model'])
    assert set(leaderboard_extra.columns).issuperset(set(leaderboard.columns))
    assert len(leaderboard) == len(leaderboard_extra)
    assert set(leaderboard_extra.columns).issuperset(set(extra_metrics))  # Assert that extra_metrics are present in output
    num_models = len(predictor.get_model_names())
    feature_importances = predictor.feature_importance(data=test_data)
    original_features = set(train_data.columns)
    original_features.remove(label)
    assert set(feature_importances.index) == original_features
    assert set(feature_importances.columns) == {'importance', 'stddev', 'p_value', 'n', 'p99_high', 'p99_low'}
    predictor.transform_features()
    predictor.transform_features(data=test_data)
    predictor.info()

    assert predictor.get_model_names_persisted() == []  # Assert that no models were persisted during training
    assert predictor.unpersist_models() == []  # Assert that no models were unpersisted

    persisted_models = predictor.persist_models(models='all', max_memory=None)
    assert set(predictor.get_model_names_persisted()) == set(persisted_models)  # Ensure all models are persisted
    assert predictor.persist_models(models='all', max_memory=None) == []  # Ensure that no additional models are persisted on repeated calls
    unpersised_models = predictor.unpersist_models()
    assert set(unpersised_models) == set(persisted_models)
    assert predictor.get_model_names_persisted() == []  # Assert that all models were unpersisted

    # Raise exception
    with pytest.raises(NetworkXError):
        predictor.persist_models(models=['UNKNOWN_MODEL_1', 'UNKNOWN_MODEL_2'])

    assert predictor.get_model_names_persisted() == []

    assert predictor.unpersist_models(models=['UNKNOWN_MODEL_1', 'UNKNOWN_MODEL_2']) == []

    predictor.persist_models(models='all', max_memory=None)
    predictor.save()  # Save predictor while models are persisted: Intended functionality is that they won't be persisted when loaded.
    predictor_loaded = TabularPredictor.load(predictor.path)  # Assert that predictor loading works
    leaderboard_loaded = predictor_loaded.leaderboard(data=test_data)
    assert len(leaderboard) == len(leaderboard_loaded)
    assert predictor_loaded.get_model_names_persisted() == []  # Assert that models were not still persisted after loading predictor

    assert(predictor.get_model_full_dict() == dict())
    predictor.refit_full()
    assert(len(predictor.get_model_full_dict()) == num_models)
    assert(len(predictor.get_model_names()) == num_models * 2)
    for model in predictor.get_model_names():
        predictor.predict(data=test_data, model=model)
    predictor.refit_full()  # Confirm that refit_models aren't further refit.
    assert(len(predictor.get_model_full_dict()) == num_models)
    assert(len(predictor.get_model_names()) == num_models * 2)
    predictor.delete_models(models_to_keep=[])  # Test that dry-run doesn't delete models
    assert(len(predictor.get_model_names()) == num_models * 2)
    predictor.predict(data=test_data)
    predictor.delete_models(models_to_keep=[], dry_run=False)  # Test that dry-run deletes models
    assert len(predictor.get_model_names()) == 0
    assert len(predictor.leaderboard()) == 0
    assert len(predictor.leaderboard(extra_info=True)) == 0
    try:
        predictor.predict(data=test_data)
    except:
        pass
    else:
        raise AssertionError('predictor.predict should raise exception after all models are deleted')
    print('Tabular Advanced Functionality Test Succeeded.')
Пример #17
0
 def load(cls, path):
     learner = AGLearner(path=path)
     learner._model = TabularPredictor.load(path)
     return learner
Пример #18
0
 def load_models(self, model_path):
     self.model_path = model_path
     self.model = TabularPredictor.load(os.path.join(model_path, 'ag_model'))