Пример #1
0
 def fit(self, data):
     if data.kfold > 1:
         cv_eval = {}
         for k, cv_fold in enumerate(data.Xy_train.keys()):
             [(X_train, y_train), (X_val, y_val)] = data.Xy_train[cv_fold]
             X_train, X_val = from_2d_array_to_nested(X_train), from_2d_array_to_nested(X_val)
             tsf = ComposableTimeSeriesForestRegressor(
                 n_jobs=-1) if data.tasktype=='regression' else ComposableTimeSeriesForestClassifier(
                 n_jobs=-1)
             tsf.fit(X_train, y_train)
             eval_metrics = weareval.eval_output(tsf.predict(X_val), y_val, tasktype=data.tasktype)
             cv_eval[cv_fold] = {'model': tsf, 
                                 # 'data': [(X_train, y_train), (X_val, y_val)], # store just IDs?
                                 'metric': eval_metrics['mae'] if data.tasktype=='regression' else eval_metrics['balanced_acc_adj'],
                                 'metrics': eval_metrics}
         # retain only best model
         tmp = {cv_fold:cv_eval[cv_fold]['metric'] for cv_fold in cv_eval.keys()}
         bst_fold = min(tmp, key=tmp.get) if data.tasktype=='regression' else max(tmp, key=tmp.get)
         self.tsf = cv_eval[bst_fold]['model']
         return {'model': self.tsf, 'metrics': cv_eval[bst_fold]['metrics']}
     else:
         X_train, y_train = data.Xy_train
         X_val, y_val = data.Xy_val
         X_train, X_val = from_2d_array_to_nested(X_train), from_2d_array_to_nested(X_val)
         self.tsf = ComposableTimeSeriesForestRegressor(
             n_jobs=-1) if data.tasktype=='regression' else ComposableTimeSeriesForestClassifier(
             n_jobs=-1)
         self.tsf.fit(X_train, y_train)
         eval_metrics = weareval.eval_output(self.tsf.predict(X_val), y_val, tasktype=data.tasktype)
         return {'model': self.tsf, 'metrics': eval_metrics}
Пример #2
0
def test_TimeSeriesForest_predictions(n_estimators, n_intervals):
    random_state = 1234
    X_train, y_train = load_gunpoint(split="train", return_X_y=True)
    X_test, y_test = load_gunpoint(split="test", return_X_y=True)

    features = [np.mean, np.std, _slope]
    steps = [
        (
            "transform",
            RandomIntervalFeatureExtractor(
                random_state=random_state, features=features
            ),
        ),
        ("clf", DecisionTreeClassifier()),
    ]
    estimator = Pipeline(steps)

    clf1 = ComposableTimeSeriesForestClassifier(
        estimator=estimator, random_state=random_state, n_estimators=n_estimators
    )
    clf1.fit(X_train, y_train)
    a = clf1.predict_proba(X_test)

    # default, semi-modular implementation using
    # RandomIntervalFeatureExtractor internally
    clf2 = ComposableTimeSeriesForestClassifier(
        random_state=random_state, n_estimators=n_estimators
    )
    clf2.fit(X_train, y_train)
    b = clf2.predict_proba(X_test)

    np.testing.assert_array_equal(a, b)
Пример #3
0
def tsf_benchmarking():
    for i in range(0, len(benchmark_datasets)):
        dataset = benchmark_datasets[i]
        print(str(i) + " problem = " + dataset)
        tsf = ib.TimeSeriesForest(n_estimators=100)
        exp.run_experiment(
            overwrite=False,
            problem_path=data_dir,
            results_path=results_dir,
            cls_name="PythonTSF",
            classifier=tsf,
            dataset=dataset,
            train_file=False,
        )
        steps = [
            ("segment", RandomIntervalSegmenter(n_intervals="sqrt")),
            (
                "transform",
                FeatureUnion([
                    (
                        "mean",
                        make_row_transformer(
                            FunctionTransformer(func=np.mean, validate=False)),
                    ),
                    (
                        "std",
                        make_row_transformer(
                            FunctionTransformer(func=np.std, validate=False)),
                    ),
                    (
                        "slope",
                        make_row_transformer(
                            FunctionTransformer(func=_slope, validate=False)),
                    ),
                ]),
            ),
            ("clf", DecisionTreeClassifier()),
        ]
        base_estimator = Pipeline(steps)
        tsf = ComposableTimeSeriesForestClassifier(estimator=base_estimator,
                                                   n_estimators=100)
        exp.run_experiment(
            overwrite=False,
            problem_path=data_dir,
            results_path=results_dir,
            cls_name="PythonTSFComposite",
            classifier=tsf,
            dataset=dataset,
            train_file=False,
        )
Пример #4
0
def rise_benchmarking():
    for i in range(0, len(benchmark_datasets)):
        dataset = benchmark_datasets[i]
        print(str(i) + " problem = " + dataset)
        rise = fb.RandomIntervalSpectralForest(n_estimators=100)
        exp.run_experiment(
            overwrite=True,
            problem_path=data_dir,
            results_path=results_dir,
            cls_name="PythonRISE",
            classifier=rise,
            dataset=dataset,
            train_file=False,
        )
        steps = [
            ("segment", RandomIntervalSegmenter(n_intervals=1, min_length=5)),
            (
                "transform",
                FeatureUnion([
                    (
                        "acf",
                        make_row_transformer(
                            FunctionTransformer(func=acf_coefs,
                                                validate=False)),
                    ),
                    (
                        "ps",
                        make_row_transformer(
                            FunctionTransformer(func=powerspectrum,
                                                validate=False)),
                    ),
                ]),
            ),
            ("tabularise", Tabularizer()),
            ("clf", DecisionTreeClassifier()),
        ]
        base_estimator = Pipeline(steps)
        rise = ComposableTimeSeriesForestClassifier(estimator=base_estimator,
                                                    n_estimators=100)
        exp.run_experiment(
            overwrite=True,
            problem_path=data_dir,
            results_path=results_dir,
            cls_name="PythonRISEComposite",
            classifier=rise,
            dataset=dataset,
            train_file=False,
        )
Пример #5
0
def test_predict_proba():
    clf = ComposableTimeSeriesForestClassifier(n_estimators=2)
    clf.fit(X, y)
    proba = clf.predict_proba(X)

    assert proba.shape == (X.shape[0], n_classes)
    np.testing.assert_array_equal(np.ones(X.shape[0]), np.sum(proba, axis=1))

    # test single row input
    y_proba = clf.predict_proba(X.iloc[[0], :])
    assert y_proba.shape == (1, n_classes)

    y_pred = clf.predict(X.iloc[[0], :])
    assert y_pred.shape == (1,)
Пример #6
0
def test_stat():
    """Test sign ranks."""
    data = load_gunpoint(split="train", return_X_y=False)
    dataset = RAMDataset(dataset=data, name="gunpoint")
    task = TSCTask(target="class_val")

    fc = ComposableTimeSeriesForestClassifier(n_estimators=1, random_state=1)
    strategy_fc = TSCStrategy(fc, name="tsf")
    pf = KNeighborsTimeSeriesClassifier()
    strategy_pf = TSCStrategy(pf, name="pf")

    # result backend
    results = RAMResults()
    orchestrator = Orchestrator(
        datasets=[dataset],
        tasks=[task],
        strategies=[strategy_pf, strategy_fc],
        cv=SingleSplit(random_state=1),
        results=results,
    )

    orchestrator.fit_predict(save_fitted_strategies=False)

    analyse = Evaluator(results)
    metric = PairwiseMetric(func=accuracy_score, name="accuracy")
    _ = analyse.evaluate(metric=metric)

    ranks = analyse.rank(ascending=True)
    pf_rank = ranks.loc[ranks.strategy == "pf",
                        "accuracy_mean_rank"].item()  # 1
    fc_rank = ranks.loc[ranks.strategy == "tsf",
                        "accuracy_mean_rank"].item()  # 2
    rank_array = [pf_rank, fc_rank]
    rank_array_test = [1, 2]
    _, sign_test_df = analyse.sign_test()

    sign_array = [
        [sign_test_df["pf"][0], sign_test_df["pf"][1]],
        [sign_test_df["tsf"][0], sign_test_df["tsf"][1]],
    ]
    sign_array_test = [[1, 1], [1, 1]]
    np.testing.assert_equal([rank_array, sign_array],
                            [rank_array_test, sign_array_test])
Пример #7
0
# -*- coding: utf-8 -*-
import pytest
from sktime.benchmarking.strategies import TSCStrategy
from sktime.benchmarking.tasks import TSCTask
from sktime.datasets import load_gunpoint
from sktime.datasets import load_italy_power_demand
from sktime.classification.compose import ComposableTimeSeriesForestClassifier

classifier = ComposableTimeSeriesForestClassifier(n_estimators=2)

DATASET_LOADERS = (load_gunpoint, load_italy_power_demand)


# Test output of time-series classification strategies
@pytest.mark.parametrize("dataset", DATASET_LOADERS)
def test_TSCStrategy(dataset):
    train = dataset(split="train")
    test = dataset(split="test")
    s = TSCStrategy(classifier)
    task = TSCTask(target="class_val")
    s.fit(task, train)
    y_pred = s.predict(test)
    assert y_pred.shape == test[task.target].shape
Пример #8
0
norm_data = norm_data.apply(lambda x: (x - x.min()) / (x.max() - x.min()),
                            axis=1)
X_norm = norm_data.values

#label binário
lb = LabelBinarizer()
y = lb.fit_transform(label)
y = y.reshape(-1)[:]

#será necessário converter os dados de tabular para nested para aplicar algoritmos da sktime
X_nested = from_2d_array_to_nested(X_norm)[:]

#definição dos modelos e parametros
model_params = {
    'ComposableTSF': {
        'model': ComposableTimeSeriesForestClassifier(),
        'params': {
            'n_estimators': [200, 300, 350, 400, 500]
        }
    }
}

#definição das métricas e parametros
scoring = {
    'acc': 'accuracy',
    'prec': make_scorer(precision_score, pos_label=pos_label),
    'avg_prec': make_scorer(average_precision_score, pos_label=pos_label),
    'recall': make_scorer(recall_score, pos_label=pos_label),
    'f1': make_scorer(f1_score, pos_label=pos_label),
    'bal_acc': 'balanced_accuracy'
}