示例#1
0
def _make_fit_args(estimator, **kwargs):
    if isinstance(estimator, BaseForecaster):
        # we need to handle the TransformedTargetForecaster separately
        if isinstance(estimator, _SeriesToSeriesTransformer):
            y = _make_series(**kwargs)
        else:
            y = make_forecasting_problem(**kwargs)
        fh = 1
        X = None
        return y, X, fh
    elif isinstance(estimator, BaseSeriesAnnotator):
        X = make_annotation_problem(**kwargs)
        return (X,)
    elif isinstance(estimator, BaseClassifier):
        return make_classification_problem(**kwargs)
    elif isinstance(estimator, BaseRegressor):
        return make_regression_problem(**kwargs)
    elif isinstance(
        estimator, (_SeriesToPrimitivesTransformer, _SeriesToSeriesTransformer)
    ):
        X = _make_series(**kwargs)
        return (X,)
    elif isinstance(estimator, (_PanelToTabularTransformer, _PanelToPanelTransformer)):
        return make_classification_problem(**kwargs)
    elif isinstance(estimator, BaseClusterer):
        return (make_clustering_problem(**kwargs),)
    else:
        raise ValueError(_get_err_msg(estimator))
示例#2
0
def _make_fit_args(estimator, **kwargs):
    if isinstance(estimator, BaseForecaster):
        y = make_forecasting_problem(**kwargs)
        fh = 1
        X = None
        return y, X, fh
    elif isinstance(estimator, BaseClassifier):
        return make_classification_problem(**kwargs)
    elif isinstance(estimator, BaseRegressor):
        return make_regression_problem(**kwargs)
    elif isinstance(
            estimator,
        (_SeriesToPrimitivesTransformer, _SeriesToSeriesTransformer)):
        X = _make_series(**kwargs)
        return (X, )
    elif isinstance(
            estimator,
        (
            _PanelToTabularTransformer,
            _PanelToPanelTransformer,
        ),
    ):
        return make_classification_problem(**kwargs)
    else:
        raise ValueError(_get_err_msg(estimator))
示例#3
0
def _make_fit_args(estimator, **kwargs):
    if isinstance(estimator, BaseForecaster):
        # we need to handle the TransformedTargetForecaster separately
        if isinstance(estimator, _SeriesToSeriesTransformer):
            y = _make_series(**kwargs)
        else:
            # create matching n_columns input, if n_columns not passed
            # e.g., to give bivariate y to strictly multivariate forecaster
            if "n_columns" not in kwargs.keys():
                n_columns = _get_n_columns(
                    estimator.get_tag(tag_name="scitype:y",
                                      raise_error=False))[0]
                y = make_forecasting_problem(n_columns=n_columns, **kwargs)
            else:
                y = make_forecasting_problem(**kwargs)
        fh = 1
        X = None
        return y, X, fh
    elif isinstance(estimator, BaseSeriesAnnotator):
        X = make_annotation_problem(**kwargs)
        return (X, )
    elif isinstance(estimator, BaseClassifier):
        return make_classification_problem(**kwargs)
    elif isinstance(estimator, BaseRegressor):
        return make_regression_problem(**kwargs)
    elif isinstance(
            estimator,
        (_SeriesToPrimitivesTransformer, _SeriesToSeriesTransformer)):
        X = _make_series(**kwargs)
        return (X, )
    elif isinstance(estimator,
                    (_PanelToTabularTransformer, _PanelToPanelTransformer)):
        return make_classification_problem(**kwargs)
    elif isinstance(estimator,
                    BaseTransformer) and estimator.get_tag("requires_y"):
        return make_classification_problem(**kwargs)
    elif isinstance(estimator, BaseTransformer):
        X = _make_series(**kwargs)
        return (X, )
    elif isinstance(estimator, BaseClusterer):
        return (make_clustering_problem(**kwargs), )
    elif isinstance(estimator, BasePairwiseTransformer):
        return None, None
    elif isinstance(estimator, BasePairwiseTransformerPanel):
        return None, None
    elif isinstance(estimator, BaseAligner):
        X = [
            _make_series(n_columns=2, **kwargs),
            _make_series(n_columns=2, **kwargs)
        ]
        return (X, )
    else:
        raise ValueError(_get_err_msg(estimator))
示例#4
0
def test_from_nested_to_2d_array(n_instances, n_columns, n_timepoints):
    nested, _ = make_classification_problem(n_instances, n_columns,
                                            n_timepoints)

    array = from_nested_to_2d_array(nested)
    assert array.shape == (n_instances, n_columns * n_timepoints)
    assert array.index.equals(nested.index)
示例#5
0
def test_check_X_enforce_min_columns():
    X, y = make_classification_problem(n_columns=2)
    msg = r"columns"
    with pytest.raises(ValueError, match=msg):
        check_X(X, enforce_min_columns=3)

    with pytest.raises(ValueError, match=msg):
        check_X_y(X, y, enforce_min_columns=3)
示例#6
0
def test_check_X_enforce_univariate():
    X, y = make_classification_problem(n_columns=2)
    msg = r"univariate"
    with pytest.raises(ValueError, match=msg):
        check_X(X, enforce_univariate=True)

    with pytest.raises(ValueError, match=msg):
        check_X_y(X, y, enforce_univariate=True)
示例#7
0
def test_from_nested_to_3d_numpy(n_instances, n_columns, n_timepoints):
    nested, _ = make_classification_problem(n_instances, n_columns, n_timepoints)
    array = from_nested_to_3d_numpy(nested)

    # check types and shapes
    assert isinstance(array, np.ndarray)
    assert array.shape == (n_instances, n_columns, n_timepoints)

    # check values of random series
    np.testing.assert_array_equal(nested.iloc[1, 0], array[1, 0, :])
示例#8
0
def test_check_enforce_min_instances():
    X, y = make_classification_problem(n_instances=3)
    msg = r"instance"
    with pytest.raises(ValueError, match=msg):
        check_X(X, enforce_min_instances=4)

    with pytest.raises(ValueError, match=msg):
        check_X_y(X, y, enforce_min_instances=4)

    with pytest.raises(ValueError, match=msg):
        check_y(y, enforce_min_instances=4)
示例#9
0
def test_from_nested_to_multi_index(n_instances, n_columns, n_timepoints):
    nested, _ = make_classification_problem(n_instances, n_columns, n_timepoints)
    mi_df = from_nested_to_multi_index(
        nested, instance_index="case_id", time_index="reading_id"
    )

    # n_timepoints_max = nested.applymap(_nested_cell_timepoints).sum().max()

    assert isinstance(mi_df, pd.DataFrame)
    assert mi_df.shape == (n_instances * n_timepoints, n_columns)
    assert mi_df.index.names == ["case_id", "reading_id"]
示例#10
0
def test_from_nested_to_long(n_instances, n_columns, n_timepoints):
    nested, _ = make_classification_problem(n_instances, n_columns, n_timepoints)
    X_long = from_nested_to_long(
        nested,
        instance_column_name="case_id",
        time_column_name="reading_id",
        dimension_column_name="dim_id",
    )

    assert isinstance(X_long, pd.DataFrame)
    assert X_long.shape == (n_instances * n_timepoints * n_columns, 4)
    assert (X_long.columns == ["case_id", "reading_id", "dim_id", "value"]).all()
示例#11
0
def test_are_columns_nested(n_instances, n_columns, n_timepoints):
    nested, _ = make_classification_problem(n_instances, n_columns, n_timepoints)
    zero_df = pd.DataFrame(np.zeros_like(nested))
    nested_heterogenous1 = pd.concat([zero_df, nested], axis=1)
    nested_heterogenous2 = nested.copy()
    nested_heterogenous2["primitive_col"] = 1.0

    assert [*are_columns_nested(nested)] == [True] * n_columns
    assert [*are_columns_nested(nested_heterogenous1)] == [False] * n_columns + [
        True
    ] * n_columns
    assert [*are_columns_nested(nested_heterogenous2)] == [True] * n_columns + [False]
示例#12
0
def test_tsfresh_extractor(default_fc_parameters):
    """Test that mean feature of TSFreshFeatureExtract is identical with sample mean."""
    X, _ = make_classification_problem()

    transformer = TSFreshFeatureExtractor(
        default_fc_parameters=default_fc_parameters, disable_progressbar=True)

    Xt = transformer.fit_transform(X)
    actual = Xt.filter(like="__mean", axis=1).values.ravel()
    converted = convert(X, from_type="nested_univ", to_type="pd-wide")
    expected = converted.mean(axis=1).values
    assert expected[0] == X.iloc[0, 0].mean()
    np.testing.assert_allclose(actual, expected)
示例#13
0
def test_tsfresh_extractor(default_fc_parameters):
    X, y = make_classification_problem()
    X_train, X_test, y_train, y_test = train_test_split(X, y)

    transformer = TSFreshFeatureExtractor(
        default_fc_parameters=default_fc_parameters, disable_progressbar=True)

    Xt = transformer.fit_transform(X_train, y_train)
    actual = Xt.filter(like="__mean", axis=1).values.ravel()
    expected = from_nested_to_2d_array(X_train).mean(axis=1).values

    assert expected[0] == X_train.iloc[0, 0].mean()
    np.testing.assert_allclose(actual, expected)
示例#14
0
def test_is_nested_dataframe(n_instances, n_columns, n_timepoints):
    array = np.random.normal(size=(n_instances, n_columns, n_timepoints))
    nested, _ = make_classification_problem(n_instances, n_columns, n_timepoints)
    zero_df = pd.DataFrame(np.zeros_like(nested))
    nested_heterogenous = pd.concat([zero_df, nested], axis=1)

    mi_df = make_multi_index_dataframe(
        n_instances=n_instances, n_timepoints=n_timepoints, n_columns=n_columns
    )

    assert not is_nested_dataframe(array)
    assert not is_nested_dataframe(mi_df)
    assert is_nested_dataframe(nested)
    assert is_nested_dataframe(nested_heterogenous)
示例#15
0
def test_make_classification_problem(
    n_instances, n_columns, n_timepoints, n_classes, return_numpy
):
    X, y = make_classification_problem(
        n_instances=n_instances,
        n_classes=n_classes,
        n_columns=n_columns,
        n_timepoints=n_timepoints,
        return_numpy=return_numpy,
    )

    # check dimensions of generated data
    _check_X_y(X, y, n_instances, n_columns, n_timepoints, check_numpy=return_numpy)

    # check number of classes
    assert len(np.unique(y)) == n_classes
def test_different_pipelines():
    """Compare with transformer pipeline using TSFeatureUnion."""
    random_state = 1233
    X_train, y_train = make_classification_problem()
    steps = [
        (
            "segment",
            RandomIntervalSegmenter(n_intervals=1, random_state=random_state),
        ),
        (
            "transform",
            FeatureUnion([
                (
                    "mean",
                    SeriesToPrimitivesRowTransformer(
                        FunctionTransformer(func=np.mean, validate=False),
                        check_transformer=False,
                    ),
                ),
                (
                    "std",
                    SeriesToPrimitivesRowTransformer(
                        FunctionTransformer(func=np.std, validate=False),
                        check_transformer=False,
                    ),
                ),
                (
                    "slope",
                    SeriesToPrimitivesRowTransformer(
                        FunctionTransformer(func=_slope, validate=False),
                        check_transformer=False,
                    ),
                ),
            ]),
        ),
    ]
    pipe = Pipeline(steps)
    a = pipe.fit_transform(X_train)
    tran = RandomIntervalFeatureExtractor(
        n_intervals=1,
        features=[np.mean, np.std, _slope],
        random_state=random_state,
    )
    b = tran.fit_transform(X_train)
    np.testing.assert_array_equal(a, b)
    np.testing.assert_array_equal(pipe.steps[0][1].intervals_, tran.intervals_)
def test_different_implementations():
    random_state = 1233
    X_train, y_train = make_classification_problem()

    # Compare with chained transformations.
    tran1 = RandomIntervalSegmenter(n_intervals=1, random_state=random_state)
    tran2 = SeriesToPrimitivesRowTransformer(FunctionTransformer(
        func=np.mean, validate=False),
                                             check_transformer=False)
    A = tran2.fit_transform(tran1.fit_transform(X_train))

    tran = RandomIntervalFeatureExtractor(n_intervals=1,
                                          features=[np.mean],
                                          random_state=random_state)
    B = tran.fit_transform(X_train)

    np.testing.assert_array_almost_equal(A, B)
def test_results(n_instances, n_timepoints, n_intervals):
    X, _ = make_classification_problem(n_instances=n_instances,
                                       n_timepoints=n_timepoints,
                                       return_numpy=True)
    transformer = RandomIntervalFeatureExtractor(n_intervals=n_intervals,
                                                 features=[np.mean, np.std])
    Xt = transformer.fit_transform(X)
    Xt = Xt.loc[:, ~Xt.columns.duplicated()]
    # Check results
    intervals = transformer.intervals_
    for start, end in intervals:
        expected_mean = np.mean(X[:, 0, start:end], axis=-1)
        expected_std = np.std(X[:, 0, start:end], axis=-1)

        actual_means = Xt.loc[:, f"{start}_{end}_mean"].to_numpy().ravel()
        actual_stds = Xt.loc[:, f"{start}_{end}_std"].to_numpy().ravel()

        np.testing.assert_array_equal(actual_means, expected_mean)
        np.testing.assert_array_equal(actual_stds, expected_std)
from sklearn.pipeline import FeatureUnion
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import FunctionTransformer
from sklearn.tree import DecisionTreeClassifier

from sktime.classification.compose._ensemble import TimeSeriesForestClassifier
from sktime.transformations.panel.compose import (
    SeriesToPrimitivesRowTransformer,
)
from sktime.transformations.panel.segment import IntervalSegmenter
from sktime.transformations.panel.summarize._extract import (
    RandomIntervalFeatureExtractor,
)
from sktime.utils._testing.panel import make_classification_problem

X_train, y_train = make_classification_problem()


# Check results of a simple case of single estimator, single feature and
# single interval from different but equivalent implementations
def test_feature_importances_single_feature_interval_and_estimator():
    random_state = 1234

    # Compute using default method
    features = [np.mean]
    steps = [
        (
            "transform",
            RandomIntervalFeatureExtractor(
                n_intervals=1, features=features, random_state=random_state
            ),
def test_bad_n_intervals(bad_n_intervals):
    """Check that exception is raised for bad input args."""
    X, y = make_classification_problem()
    with pytest.raises(ValueError):
        RandomIntervalFeatureExtractor(n_intervals=bad_n_intervals).fit(X)
示例#21
0
from sklearn.preprocessing import FunctionTransformer
from sklearn.tree import DecisionTreeClassifier

from sktime.classification.compose import ComposableTimeSeriesForestClassifier
from sktime.datasets import load_gunpoint
from sktime.transformations.panel.compose import (
    SeriesToPrimitivesRowTransformer,
)
from sktime.transformations.panel.segment import RandomIntervalSegmenter
from sktime.transformations.panel.summarize import (
    RandomIntervalFeatureExtractor,
)
from sktime.utils._testing.panel import make_classification_problem
from sktime.utils.slope_and_trend import _slope

X, y = make_classification_problem()
n_classes = len(np.unique(y))

mean_transformer = SeriesToPrimitivesRowTransformer(
    FunctionTransformer(func=np.mean, validate=False, kw_args={"axis": 0}),
    check_transformer=False,
)
std_transformer = SeriesToPrimitivesRowTransformer(
    FunctionTransformer(func=np.std, validate=False, kw_args={"axis": 0}),
    check_transformer=False,
)


# Check simple cases.
def test_predict_proba():
    clf = ComposableTimeSeriesForestClassifier(n_estimators=2)
def test_bad_features(bad_features):
    X, y = make_classification_problem()
    with pytest.raises(ValueError):
        RandomIntervalFeatureExtractor(n_intervals=bad_features).fit(X)