Пример #1
0
    def _fit(self, X, y=None):
        """
        Fit transformer, generating random interval indices.

        Parameters
        ----------
        X : pandas DataFrame of shape [n_samples, n_features]
            Input data
        y : pandas Series, shape (n_samples, ...), optional
            Targets for supervised learning.

        Returns
        -------
        self : RandomIntervalSegmenter
            This estimator
        """
        # We use composition rather than inheritance here, because this transformer
        # has a different transform type (returns tabular) compared to the
        # RandomIntervalSegmenter (returns panel).
        self._interval_segmenter = RandomIntervalSegmenter(
            self.n_intervals, self.min_length, self.max_length, self.random_state
        )
        self._interval_segmenter.fit(X, y)
        self.intervals_ = self._interval_segmenter.intervals_
        self.input_shape_ = self._interval_segmenter.input_shape_
        self._time_index = self._interval_segmenter._time_index
        return self
Пример #2
0
def tsf_benchmarking():
    for i in range(0, len(benchmark_datasets)):
        dataset = benchmark_datasets[i]
        print(str(i) + " problem = " + dataset)
        tsf = ib.TimeSeriesForest(n_estimators=100)
        exp.run_experiment(
            overwrite=False,
            problem_path=data_dir,
            results_path=results_dir,
            cls_name="PythonTSF",
            classifier=tsf,
            dataset=dataset,
            train_file=False,
        )
        steps = [
            ("segment", RandomIntervalSegmenter(n_intervals="sqrt")),
            (
                "transform",
                FeatureUnion(
                    [
                        (
                            "mean",
                            make_row_transformer(
                                FunctionTransformer(func=np.mean, validate=False)
                            ),
                        ),
                        (
                            "std",
                            make_row_transformer(
                                FunctionTransformer(func=np.std, validate=False)
                            ),
                        ),
                        (
                            "slope",
                            make_row_transformer(
                                FunctionTransformer(func=_slope, validate=False)
                            ),
                        ),
                    ]
                ),
            ),
            ("clf", DecisionTreeClassifier()),
        ]
        base_estimator = Pipeline(steps)
        tsf = TimeSeriesForestClassifier(estimator=base_estimator, n_estimators=100)
        exp.run_experiment(
            overwrite=False,
            problem_path=data_dir,
            results_path=results_dir,
            cls_name="PythonTSFComposite",
            classifier=tsf,
            dataset=dataset,
            train_file=False,
        )
Пример #3
0
def rise_benchmarking():
    for i in range(0, len(benchmark_datasets)):
        dataset = benchmark_datasets[i]
        print(str(i) + " problem = " + dataset)
        rise = fb.RandomIntervalSpectralForest(n_estimators=100)
        exp.run_experiment(
            overwrite=True,
            problem_path=data_dir,
            results_path=results_dir,
            cls_name="PythonRISE",
            classifier=rise,
            dataset=dataset,
            train_file=False,
        )
        steps = [
            ("segment", RandomIntervalSegmenter(n_intervals=1, min_length=5)),
            (
                "transform",
                FeatureUnion(
                    [
                        (
                            "acf",
                            make_row_transformer(
                                FunctionTransformer(func=acf_coefs, validate=False)
                            ),
                        ),
                        (
                            "ps",
                            make_row_transformer(
                                FunctionTransformer(func=powerspectrum, validate=False)
                            ),
                        ),
                    ]
                ),
            ),
            ("tabularise", Tabularizer()),
            ("clf", DecisionTreeClassifier()),
        ]
        base_estimator = Pipeline(steps)
        rise = TimeSeriesForestClassifier(estimator=base_estimator, n_estimators=100)
        exp.run_experiment(
            overwrite=True,
            problem_path=data_dir,
            results_path=results_dir,
            cls_name="PythonRISEComposite",
            classifier=rise,
            dataset=dataset,
            train_file=False,
        )
def test_different_implementations():
    random_state = 1233
    X_train, y_train = make_classification_problem()

    # Compare with chained transformations.
    tran1 = RandomIntervalSegmenter(n_intervals=1, random_state=random_state)
    tran2 = SeriesToPrimitivesRowTransformer(FunctionTransformer(
        func=np.mean, validate=False),
                                             check_transformer=False)
    A = tran2.fit_transform(tran1.fit_transform(X_train))

    tran = RandomIntervalFeatureExtractor(n_intervals=1,
                                          features=[np.mean],
                                          random_state=random_state)
    B = tran.fit_transform(X_train)

    np.testing.assert_array_almost_equal(A, B)
def test_different_pipelines():
    """Compare with transformer pipeline using TSFeatureUnion."""
    random_state = 1233
    X_train, y_train = make_classification_problem()
    steps = [
        (
            "segment",
            RandomIntervalSegmenter(n_intervals=1, random_state=random_state),
        ),
        (
            "transform",
            FeatureUnion([
                (
                    "mean",
                    SeriesToPrimitivesRowTransformer(
                        FunctionTransformer(func=np.mean, validate=False),
                        check_transformer=False,
                    ),
                ),
                (
                    "std",
                    SeriesToPrimitivesRowTransformer(
                        FunctionTransformer(func=np.std, validate=False),
                        check_transformer=False,
                    ),
                ),
                (
                    "slope",
                    SeriesToPrimitivesRowTransformer(
                        FunctionTransformer(func=_slope, validate=False),
                        check_transformer=False,
                    ),
                ),
            ]),
        ),
    ]
    pipe = Pipeline(steps)
    a = pipe.fit_transform(X_train)
    tran = RandomIntervalFeatureExtractor(
        n_intervals=1,
        features=[np.mean, np.std, _slope],
        random_state=random_state,
    )
    b = tran.fit_transform(X_train)
    np.testing.assert_array_equal(a, b)
    np.testing.assert_array_equal(pipe.steps[0][1].intervals_, tran.intervals_)
Пример #6
0
def test_FeatureUnion_pipeline():
    # pipeline with segmentation plus multiple feature extraction

    steps = [
        ("segment", RandomIntervalSegmenter(n_intervals=1)),
        (
            "transform",
            FeatureUnion([("mean", mean_transformer),
                          ("std", std_transformer)]),
        ),
        ("clf", DecisionTreeClassifier()),
    ]
    clf = Pipeline(steps)

    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    assert y_pred.shape[0] == y_test.shape[0]
    np.testing.assert_array_equal(np.unique(y_pred), np.unique(y_test))
Пример #7
0
def test_equivalent_model_specifications(n_intervals, n_estimators):
    """Test composable TSF vs an equivalent model."""
    random_state = 1234
    X_train, y_train = load_unit_test(split="train")
    X_test, y_test = load_unit_test(split="test")

    # Due to tie-breaking/floating point rounding in the final decision tree
    # classifier, the results depend on the
    # exact column order of the input data

    # Compare pipeline predictions outside of ensemble.
    steps = [
        (
            "segment",
            RandomIntervalSegmenter(n_intervals=n_intervals,
                                    random_state=random_state),
        ),
        (
            "transform",
            FeatureUnion([("mean", mean_transformer),
                          ("std", std_transformer)]),
        ),
        ("clf", DecisionTreeClassifier(random_state=random_state)),
    ]
    clf1 = Pipeline(steps)
    clf1.fit(X_train, y_train)
    a = clf1.predict(X_test)

    steps = [
        (
            "transform",
            RandomIntervalFeatureExtractor(
                n_intervals=n_intervals,
                features=[np.mean, np.std],
                random_state=random_state,
            ),
        ),
        ("clf", DecisionTreeClassifier(random_state=random_state)),
    ]
    clf2 = Pipeline(steps)
    clf2.fit(X_train, y_train)
    b = clf2.predict(X_test)
    np.array_equal(a, b)
Пример #8
0
def test_output_format_dim(n_timepoints, n_instances, n_intervals):
    X = _make_nested_from_array(
        np.ones(n_timepoints), n_instances=n_instances, n_columns=1
    )

    trans = RandomIntervalSegmenter(n_intervals=n_intervals)
    Xt = trans.fit_transform(X)

    # Check number of rows and output type.
    assert isinstance(Xt, pd.DataFrame)
    assert Xt.shape[0] == X.shape[0]

    # Check number of generated intervals/columns.
    if n_intervals != "random":
        if np.issubdtype(type(n_intervals), np.floating):
            assert Xt.shape[1] == np.maximum(1, int(n_timepoints * n_intervals))
        elif np.issubdtype(type(n_intervals), np.integer):
            assert Xt.shape[1] == n_intervals
        elif n_intervals == "sqrt":
            assert Xt.shape[1] == np.maximum(1, int(np.sqrt(n_timepoints)))
        elif n_intervals == "log":
            assert Xt.shape[1] == np.maximum(1, int(np.log(n_timepoints)))
Пример #9
0
def set_classifier(cls, resampleId):
    """
    Basic way of determining the classifier to build. To differentiate settings just and another elif. So, for example, if
    you wanted tuned TSF, you just pass TuneTSF and set up the tuning mechanism in the elif.
    This may well get superceded, it is just how e have always done it
    :param cls: String indicating which classifier you want
    :return: A classifier.

    """
    if cls.lower() == "pf":
        return pf.ProximityForest(random_state=resampleId)
    elif cls.lower() == "pt":
        return pf.ProximityTree(random_state=resampleId)
    elif cls.lower() == "ps":
        return pf.ProximityStump(random_state=resampleId)
    elif cls.lower() == "rise":
        return fb.RandomIntervalSpectralForest(random_state=resampleId)
    elif cls.lower() == "tsf":
        return ib.TimeSeriesForest(random_state=resampleId)
    elif cls.lower() == "boss":
        return db.BOSSEnsemble()
    elif cls.lower() == "st":
        return st.ShapeletTransformClassifier(time_contract_in_mins=1500)
    elif cls.lower() == "dtw":
        return nn.KNeighborsTimeSeriesClassifier(metric="dtw")
    elif cls.lower() == "ee" or cls.lower() == "elasticensemble":
        return dist.ElasticEnsemble()
    elif cls.lower() == "shapedtw_raw":
        return ShapeDTW(subsequence_length=30,
                        shape_descriptor_function="raw",
                        metric_params=None)
    elif cls.lower() == "shapedtw_dwt":
        return ShapeDTW(
            subsequence_length=30,
            shape_descriptor_function="dwt",
            metric_params={"num_levels_dwt": 3},
        )
    elif cls.lower() == "shapedtw_paa":
        return ShapeDTW(
            subsequence_length=30,
            shape_descriptor_function="paa",
            metric_params={"num_intervals_paa": 5},
        )
    elif cls.lower() == "shapedtw_slope":
        return ShapeDTW(
            subsequence_length=30,
            shape_descriptor_function="slope",
            metric_params={"num_intervals_slope": 5},
        )
    elif cls.lower() == "shapedtw_hog1d":
        return ShapeDTW(
            subsequence_length=30,
            shape_descriptor_function="hog1d",
            metric_params={
                "num_bins_hog1d": 8,
                "num_intervals_hog1d": 2,
                "scaling_factor_hog1d": 0.1,
            },
        )
    elif cls.lower() == "tsfcomposite":
        # It defaults to TSF
        return ensemble.TimeSeriesForestClassifier()
    elif cls.lower() == "risecomposite":
        steps = [
            ("segment", RandomIntervalSegmenter(n_intervals=1, min_length=5)),
            (
                "transform",
                FeatureUnion([
                    (
                        "acf",
                        make_row_transformer(
                            FunctionTransformer(func=acf_coefs,
                                                validate=False)),
                    ),
                    (
                        "ps",
                        make_row_transformer(
                            FunctionTransformer(func=powerspectrum,
                                                validate=False)),
                    ),
                ]),
            ),
            ("tabularise", Tabularizer()),
            ("clf", DecisionTreeClassifier()),
        ]
        base_estimator = Pipeline(steps)
        return ensemble.TimeSeriesForestClassifier(estimator=base_estimator,
                                                   n_estimators=100)
    else:
        raise Exception("UNKNOWN CLASSIFIER")
Пример #10
0
def test_bad_input_args(bad_interval):
    X = _make_nested_from_array(np.ones(10), n_instances=10, n_columns=2)
    with pytest.raises(ValueError):
        RandomIntervalSegmenter(n_intervals=bad_interval).fit(X)
Пример #11
0
def set_classifier(cls, resampleId):
    """
    Basic way of determining the classifier to build. To differentiate settings just and another elif. So, for example, if
    you wanted tuned TSF, you just pass TuneTSF and set up the tuning mechanism in the elif.
    This may well get superceded, it is just how e have always done it
    :param cls: String indicating which classifier you want
    :return: A classifier.

    """
    if cls.lower() == "pf":
        return pf.ProximityForest(random_state=resampleId)
    elif cls.lower() == "pt":
        return pf.ProximityTree(random_state=resampleId)
    elif cls.lower() == "ps":
        return pf.ProximityStump(random_state=resampleId)
    elif cls.lower() == "rise":
        return fb.RandomIntervalSpectralForest(random_state=resampleId)
    elif cls.lower() == "tsf":
        return ib.TimeSeriesForest(random_state=resampleId)
    elif cls.lower() == "cif":
        return CanonicalIntervalForest(random_state=resampleId)
    elif cls.lower() == "boss":
        return BOSSEnsemble(random_state=resampleId)
    elif cls.lower() == "cboss":
        return ContractableBOSS(random_state=resampleId)
    elif cls.lower() == "tde":
        return TemporalDictionaryEnsemble(random_state=resampleId)
    elif cls.lower() == "st":
        return st.ShapeletTransformClassifier(time_contract_in_mins=1500)
    elif cls.lower() == "dtwcv":
        return nn.KNeighborsTimeSeriesClassifier(metric="dtwcv")
    elif cls.lower() == "ee" or cls.lower() == "elasticensemble":
        return dist.ElasticEnsemble()
    elif cls.lower() == "tsfcomposite":
        # It defaults to TSF
        return ensemble.TimeSeriesForestClassifier()
    elif cls.lower() == "risecomposite":
        steps = [
            ("segment", RandomIntervalSegmenter(n_intervals=1, min_length=5)),
            (
                "transform",
                FeatureUnion([
                    (
                        "acf",
                        make_row_transformer(
                            FunctionTransformer(func=acf_coefs,
                                                validate=False)),
                    ),
                    (
                        "ps",
                        make_row_transformer(
                            FunctionTransformer(func=powerspectrum,
                                                validate=False)),
                    ),
                ]),
            ),
            ("tabularise", Tabularizer()),
            ("clf", DecisionTreeClassifier()),
        ]
        base_estimator = Pipeline(steps)
        return ensemble.TimeSeriesForestClassifier(estimator=base_estimator,
                                                   n_estimators=100)
    elif cls.lower() == "rocket":
        rocket_pipeline = make_pipeline(
            Rocket(random_state=resampleId),
            RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True),
        )
        return rocket_pipeline
    else:
        raise Exception("UNKNOWN CLASSIFIER")