Пример #1
0
    def fit(self, X, y):
        X, y = check_X_y(X, y, enforce_univariate=True, coerce_to_numpy=True)

        self.n_classes = np.unique(y).shape[0]
        self.classes_ = class_distribution(np.asarray(y).reshape(-1, 1))[0][0]

        cv_size = 10
        _, counts = np.unique(y, return_counts=True)
        min_class = np.min(counts)
        if min_class < cv_size:
            cv_size = min_class

        self.stc = ShapeletTransformClassifier(
            random_state=self.random_state,
            time_contract_in_mins=60,
        )
        self.stc.fit(X, y)
        train_preds = cross_val_predict(
            ShapeletTransformClassifier(
                random_state=self.random_state,
                time_contract_in_mins=60,
            ),
            X=X,
            y=y,
            cv=cv_size,
        )
        self.stc_weight = accuracy_score(y, train_preds)**4

        self.tsf = TimeSeriesForest(random_state=self.random_state)
        self.tsf.fit(X, y)
        train_preds = cross_val_predict(
            TimeSeriesForest(random_state=self.random_state),
            X=X,
            y=y,
            cv=cv_size,
        )
        self.tsf_weight = accuracy_score(y, train_preds)**4

        self.rise = RandomIntervalSpectralForest(
            random_state=self.random_state)
        self.fit(X, y)
        train_preds = cross_val_predict(
            RandomIntervalSpectralForest(random_state=self.random_state),
            X=X,
            y=y,
            cv=cv_size,
        )
        self.rise_weight = accuracy_score(y, train_preds)**4

        self.cboss = ContractableBOSS(random_state=self.random_state)
        self.cboss.fit(X, y)
        train_probs = self.cboss._get_train_probs(X)
        train_preds = self.cboss.classes_[np.argmax(train_probs, axis=1)]
        self.cboss_weight = accuracy_score(y, train_preds)**4

        return self
Пример #2
0
def test_y_proba_on_gunpoint():
    X, y = load_gunpoint(return_X_y=True)
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.1,
                                                        random_state=42)
    estimator = RandomIntervalSpectralForest(random_state=42, n_estimators=20)
    estimator.fit(X_train, y_train)
    actual = estimator.predict_proba(X_test)
    np.testing.assert_array_equal(actual, expected)
Пример #3
0
def test_risf_on_unit_test_data():
    """Test of RandomIntervalSpectralEnsemble on unit test data."""
    # load unit test data
    X_train, y_train = load_unit_test(split="train")
    X_test, y_test = load_unit_test(split="test")
    indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False)

    # train RISE
    rise = RandomIntervalSpectralForest(n_estimators=10, random_state=0)
    rise.fit(X_train, y_train)

    # assert probabilities are the same
    probas = rise.predict_proba(X_test.iloc[indices])
    testing.assert_array_equal(probas, rise_unit_test_probas)
Пример #4
0
def forecasting_example():
    name = "C:\\Users\\Tony\\OneDrive - University of East Anglia\\Research\\Alex " \
           "Mcgregor Grant\\randomNoise.csv"


    y = pd.read_csv(name, index_col=0, squeeze=True, dtype={1: np.float})
    forecast_horizon = np.arange(1, 2)
    forecaster = NaiveForecaster(strategy="last")
    forecaster.fit(y)
    y_pred = forecaster.predict(forecast_horizon)
    print("Next predicted value = ",y_pred)
    # https://github.com/alan-turing-institute/sktime/blob/main/examples/01_forecasting.ipynb
    #Reduce to a regression problem through windowing.
    ##Transform forecasting into regression

    np_y = y.to_numpy()
    v = sliding_window_view(y, 100)
    print("Window shape =",v.shape)
    v_3d = np.expand_dims(v, axis=1)
    print("Window shape =",v.shape)
    print(v_3d.shape)
    z = v[:,2]
    print(z.shape)
    regressor = CNNRegressor()
    classifier = CNNClassifier()
    regressor.fit(v_3d,z)
    p = regressor.predict(v_3d)
    #print(p)
    d = np.array([0.0])
    c = np.digitize(z,d)
    classifier = RandomIntervalSpectralForest()
    classifier.fit(v_3d,c)
    cls = classifier.predict(v_3d)
    print(cls)
Пример #5
0
def set_classifier(cls, resampleId=None):
    """
    Basic way of creating the classifier to build using the default settings. This
    set up is to help with batch jobs for multiple problems to facilitate easy
    reproducability. You can set up bespoke classifier in many other ways.

    :param cls: String indicating which classifier you want
    :param resampleId: classifier random seed

    :return: A classifier.

    """
    name = cls.lower()
    # Distance based
    if name == "pf" or name == "proximityforest":
        return ProximityForest(random_state=resampleId)
    elif name == "pt" or name == "proximitytree":
        return ProximityTree(random_state=resampleId)
    elif name == "ps" or name == "proximityStump":
        return ProximityStump(random_state=resampleId)
    elif name == "dtwcv" or name == "kneighborstimeseriesclassifier":
        return KNeighborsTimeSeriesClassifier(distance="dtwcv")
    elif name == "dtw" or name == "1nn-dtw":
        return KNeighborsTimeSeriesClassifier(distance="dtw")
    elif name == "msm" or name == "1nn-msm":
        return KNeighborsTimeSeriesClassifier(distance="msm")
    elif name == "ee" or name == "elasticensemble":
        return ElasticEnsemble()
    elif name == "shapedtw":
        return ShapeDTW()
    # Dictionary based
    elif name == "boss" or name == "bossensemble":
        return BOSSEnsemble(random_state=resampleId)
    elif name == "cboss" or name == "contractableboss":
        return ContractableBOSS(random_state=resampleId)
    elif name == "tde" or name == "temporaldictionaryensemble":
        return TemporalDictionaryEnsemble(random_state=resampleId)
    elif name == "weasel":
        return WEASEL(random_state=resampleId)
    elif name == "muse":
        return MUSE(random_state=resampleId)
    # Interval based
    elif name == "rise" or name == "randomintervalspectralforest":
        return RandomIntervalSpectralForest(random_state=resampleId)
    elif name == "tsf" or name == "timeseriesforestclassifier":
        return TimeSeriesForestClassifier(random_state=resampleId)
    elif name == "cif" or name == "canonicalintervalforest":
        return CanonicalIntervalForest(random_state=resampleId)
    elif name == "drcif":
        return DrCIF(random_state=resampleId)
    # Shapelet based
    elif name == "stc" or name == "shapelettransformclassifier":
        return ShapeletTransformClassifier(
            random_state=resampleId, time_contract_in_mins=1
        )
    elif name == "mrseql" or name == "mrseqlclassifier":
        return MrSEQLClassifier(seql_mode="fs", symrep=["sax", "sfa"])
    elif name == "rocket":
        return ROCKETClassifier(random_state=resampleId)
    elif name == "arsenal":
        return Arsenal(random_state=resampleId)
    # Hybrid
    elif name == "catch22":
        return Catch22ForestClassifier(random_state=resampleId)
    elif name == "hivecotev1":
        return HIVECOTEV1(random_state=resampleId)
    else:
        raise Exception("UNKNOWN CLASSIFIER")
Пример #6
0
class HIVECOTEV1(BaseClassifier):
    """Hierarchical Vote Collective of Transformation-based Ensembles (HIVE-COTE) V1.

    An ensemble of the STC, TSF, RISE and cBOSS classifiers from different feature
    representations using the CAWPE structure as described in [1].

    Parameters
    ----------
    verbose                 : int, level of output printed to
    the console (for information only) (default = 0)
    n_jobs                  : int, optional (default=1)
    The number of jobs to run in parallel for both `fit` and `predict`.
    ``-1`` means using all processors.
    random_state            : int or None, seed for random, integer,
    optional (default to no seed)

    Attributes
    ----------
    n_classes               : extracted from the data

    Notes
    -----
    ..[1] Anthony Bagnall, Michael Flynn, James Large, Jason Lines and
    Matthew Middlehurst.
        "On the usage and performance of the Hierarchical Vote Collective of
            Transformation-based Ensembles version 1.0 (hive-cote v1. 0)"
        International Workshop on Advanced Analytics and Learning on Temporal
            Data 2020

    Java version
    https://github.com/uea-machine-learning/tsml/blob/master/src/main/java/
    tsml/classifiers/hybrids/HIVE_COTE.java

    """

    # Capability tags
    capabilities = {
        "multivariate": False,
        "unequal_length": False,
        "missing_values": False,
        "train_estimate": False,
        "contractable": False,
    }

    def __init__(
        self,
        stc_params=None,
        tsf_params=None,
        rise_params=None,
        cboss_params=None,
        verbose=0,
        n_jobs=1,
        random_state=None,
    ):
        if stc_params is None:
            stc_params = {"n_estimators": 500}
        if tsf_params is None:
            tsf_params = {"n_estimators": 500}
        if rise_params is None:
            rise_params = {"n_estimators": 500}
        if cboss_params is None:
            cboss_params = {}

        self.stc_params = stc_params
        self.tsf_params = tsf_params
        self.rise_params = rise_params
        self.cboss_params = cboss_params

        self.verbose = verbose
        self.n_jobs = n_jobs
        self.random_state = random_state

        self.stc = None
        self.tsf = None
        self.rise = None
        self.cboss = None

        self.stc_weight = 0
        self.tsf_weight = 0
        self.rise_weight = 0
        self.cboss_weight = 0

        self.n_classes = 0
        self.classes_ = []

        super(HIVECOTEV1, self).__init__()

    def fit(self, X, y):
        """Fit a HIVE-COTEv1.0 classifier.

        Parameters
        ----------
        X : nested pandas DataFrame of shape [n_instances, 1]
            Nested dataframe with univariate time-series in cells.
        y : array-like, shape = [n_instances] The class labels.

        Returns
        -------
        self : object
        """
        X, y = check_X_y(X, y, enforce_univariate=True)

        self.n_classes = np.unique(y).shape[0]
        self.classes_ = class_distribution(np.asarray(y).reshape(-1, 1))[0][0]

        cv_size = 10
        _, counts = np.unique(y, return_counts=True)
        min_class = np.min(counts)
        if min_class < cv_size:
            cv_size = min_class

        self.stc = ShapeletTransformClassifier(
            **self.stc_params,
            random_state=self.random_state,
        )
        self.stc.fit(X, y)

        if self.verbose > 0:
            print("STC ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"))  # noqa

        train_preds = cross_val_predict(
            ShapeletTransformClassifier(
                **self.stc_params,
                random_state=self.random_state,
            ),
            X=X,
            y=y,
            cv=cv_size,
            n_jobs=self.n_jobs,
        )
        self.stc_weight = accuracy_score(y, train_preds)**4

        if self.verbose > 0:
            print(  # noqa
                "STC train estimate ",
                datetime.now().strftime("%H:%M:%S %d/%m/%Y"),
            )
            print("STC weight = " + str(self.stc_weight))  # noqa

        self.tsf = TimeSeriesForestClassifier(
            **self.tsf_params,
            random_state=self.random_state,
            n_jobs=self.n_jobs,
        )
        self.tsf.fit(X, y)

        if self.verbose > 0:
            print("TSF ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"))  # noqa

        train_preds = cross_val_predict(
            TimeSeriesForestClassifier(**self.tsf_params,
                                       random_state=self.random_state),
            X=X,
            y=y,
            cv=cv_size,
            n_jobs=self.n_jobs,
        )
        self.tsf_weight = accuracy_score(y, train_preds)**4

        if self.verbose > 0:
            print(  # noqa
                "TSF train estimate ",
                datetime.now().strftime("%H:%M:%S %d/%m/%Y"),
            )
            print("TSF weight = " + str(self.tsf_weight))  # noqa

        self.rise = RandomIntervalSpectralForest(
            **self.rise_params,
            random_state=self.random_state,
            n_jobs=self.n_jobs,
        )
        self.rise.fit(X, y)

        if self.verbose > 0:
            print("RISE ",
                  datetime.now().strftime("%H:%M:%S %d/%m/%Y"))  # noqa

        train_preds = cross_val_predict(
            RandomIntervalSpectralForest(
                **self.rise_params,
                random_state=self.random_state,
            ),
            X=X,
            y=y,
            cv=cv_size,
            n_jobs=self.n_jobs,
        )
        self.rise_weight = accuracy_score(y, train_preds)**4

        if self.verbose > 0:
            print(  # noqa
                "RISE train estimate ",
                datetime.now().strftime("%H:%M:%S %d/%m/%Y"),
            )
            print("RISE weight = " + str(self.rise_weight))  # noqa

        self.cboss = ContractableBOSS(**self.cboss_params,
                                      random_state=self.random_state,
                                      n_jobs=self.n_jobs)
        self.cboss.fit(X, y)
        train_probs = self.cboss._get_train_probs(X)
        train_preds = self.cboss.classes_[np.argmax(train_probs, axis=1)]
        self.cboss_weight = accuracy_score(y, train_preds)**4

        if self.verbose > 0:
            print(  # noqa
                "cBOSS (estimate included) ",
                datetime.now().strftime("%H:%M:%S %d/%m/%Y"),
            )
            print("cBOSS weight = " + str(self.cboss_weight))  # noqa

        self._is_fitted = True
        return self

    def predict(self, X):
        """Make predictions for all cases in X.

        Parameters
        ----------
        X : The testing input samples of shape [n_instances,1].

        Returns
        -------
        output : numpy array of shape = [n_instances]
        """
        rng = check_random_state(self.random_state)
        return np.array([
            self.classes_[int(rng.choice(np.flatnonzero(prob == prob.max())))]
            for prob in self.predict_proba(X)
        ])

    def predict_proba(self, X):
        """Make class probability estimates on each case in X.

        Parameters
        ----------
        X - pandas dataframe of testing data of shape [n_instances,1].

        Returns
        -------
        output : numpy array of shape =
                [n_instances, num_classes] of probabilities
        """
        self.check_is_fitted()
        X = check_X(X, enforce_univariate=True)

        dists = np.zeros((X.shape[0], self.n_classes))

        dists = np.add(
            dists,
            self.stc.predict_proba(X) *
            (np.ones(self.n_classes) * self.stc_weight),
        )
        dists = np.add(
            dists,
            self.tsf.predict_proba(X) *
            (np.ones(self.n_classes) * self.tsf_weight),
        )
        dists = np.add(
            dists,
            self.rise.predict_proba(X) *
            (np.ones(self.n_classes) * self.rise_weight),
        )
        dists = np.add(
            dists,
            self.cboss.predict_proba(X) *
            (np.ones(self.n_classes) * self.cboss_weight),
        )

        return dists / dists.sum(axis=1, keepdims=True)
Пример #7
0
    def fit(self, X, y):
        """Fit a HIVE-COTEv1.0 classifier.

        Parameters
        ----------
        X : nested pandas DataFrame of shape [n_instances, 1]
            Nested dataframe with univariate time-series in cells.
        y : array-like, shape = [n_instances] The class labels.

        Returns
        -------
        self : object
        """
        X, y = check_X_y(X, y, enforce_univariate=True)

        self.n_classes = np.unique(y).shape[0]
        self.classes_ = class_distribution(np.asarray(y).reshape(-1, 1))[0][0]

        cv_size = 10
        _, counts = np.unique(y, return_counts=True)
        min_class = np.min(counts)
        if min_class < cv_size:
            cv_size = min_class

        self.stc = ShapeletTransformClassifier(
            **self.stc_params,
            random_state=self.random_state,
        )
        self.stc.fit(X, y)

        if self.verbose > 0:
            print("STC ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"))  # noqa

        train_preds = cross_val_predict(
            ShapeletTransformClassifier(
                **self.stc_params,
                random_state=self.random_state,
            ),
            X=X,
            y=y,
            cv=cv_size,
            n_jobs=self.n_jobs,
        )
        self.stc_weight = accuracy_score(y, train_preds)**4

        if self.verbose > 0:
            print(  # noqa
                "STC train estimate ",
                datetime.now().strftime("%H:%M:%S %d/%m/%Y"),
            )
            print("STC weight = " + str(self.stc_weight))  # noqa

        self.tsf = TimeSeriesForestClassifier(
            **self.tsf_params,
            random_state=self.random_state,
            n_jobs=self.n_jobs,
        )
        self.tsf.fit(X, y)

        if self.verbose > 0:
            print("TSF ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"))  # noqa

        train_preds = cross_val_predict(
            TimeSeriesForestClassifier(**self.tsf_params,
                                       random_state=self.random_state),
            X=X,
            y=y,
            cv=cv_size,
            n_jobs=self.n_jobs,
        )
        self.tsf_weight = accuracy_score(y, train_preds)**4

        if self.verbose > 0:
            print(  # noqa
                "TSF train estimate ",
                datetime.now().strftime("%H:%M:%S %d/%m/%Y"),
            )
            print("TSF weight = " + str(self.tsf_weight))  # noqa

        self.rise = RandomIntervalSpectralForest(
            **self.rise_params,
            random_state=self.random_state,
            n_jobs=self.n_jobs,
        )
        self.rise.fit(X, y)

        if self.verbose > 0:
            print("RISE ",
                  datetime.now().strftime("%H:%M:%S %d/%m/%Y"))  # noqa

        train_preds = cross_val_predict(
            RandomIntervalSpectralForest(
                **self.rise_params,
                random_state=self.random_state,
            ),
            X=X,
            y=y,
            cv=cv_size,
            n_jobs=self.n_jobs,
        )
        self.rise_weight = accuracy_score(y, train_preds)**4

        if self.verbose > 0:
            print(  # noqa
                "RISE train estimate ",
                datetime.now().strftime("%H:%M:%S %d/%m/%Y"),
            )
            print("RISE weight = " + str(self.rise_weight))  # noqa

        self.cboss = ContractableBOSS(**self.cboss_params,
                                      random_state=self.random_state,
                                      n_jobs=self.n_jobs)
        self.cboss.fit(X, y)
        train_probs = self.cboss._get_train_probs(X)
        train_preds = self.cboss.classes_[np.argmax(train_probs, axis=1)]
        self.cboss_weight = accuracy_score(y, train_preds)**4

        if self.verbose > 0:
            print(  # noqa
                "cBOSS (estimate included) ",
                datetime.now().strftime("%H:%M:%S %d/%m/%Y"),
            )
            print("cBOSS weight = " + str(self.cboss_weight))  # noqa

        self._is_fitted = True
        return self
Пример #8
0
class HIVECOTEV1(BaseClassifier):
    """
    Hierarchical Vote Collective of Transformation-based Ensembles (HIVE-COTE) V1
    as described in [1].

    An ensemble of the STC, TSF, RISE and cBOSS classifiers from different feature
    representations using the CAWPE structure.


    Parameters
    ----------
    random_state            : int or None, seed for random, integer,
    optional (default to no seed)

    Attributes
    ----------
    n_classes               : extracted from the data

    Notes
    -----
    @article{bagnall2020usage,
      title={On the Usage and Performance of The Hierarchical Vote Collective of
      Transformation-based Ensembles version 1.0 (HIVE-COTE 1.0)},
      author={Bagnall, Anthony and Flynn, Michael and Large, James and Lines, Jason and
      Middlehurst, Matthew},
      journal={arXiv preprint arXiv:2004.06069},
      year={2020}
    }

    Java version
    https://github.com/uea-machine-learning/tsml/blob/master/src/main/java/
    tsml/classifiers/hybrids/HIVE_COTE.java

    """

    # Capability tags
    capabilities = {
        "multivariate": False,
        "unequal_length": False,
        "missing_values": False,
    }

    def __init__(
        self,
        random_state=None,
    ):
        self.random_state = random_state

        self.stc = None
        self.tsf = None
        self.rise = None
        self.cboss = None

        self.stc_weight = 0
        self.tsf_weight = 0
        self.rise_weight = 0
        self.cboss_weight = 0

        self.n_classes = 0
        self.classes_ = []

        super(HIVECOTEV1, self).__init__()

    def fit(self, X, y):
        X, y = check_X_y(X, y, enforce_univariate=True, coerce_to_numpy=True)

        self.n_classes = np.unique(y).shape[0]
        self.classes_ = class_distribution(np.asarray(y).reshape(-1, 1))[0][0]

        cv_size = 10
        _, counts = np.unique(y, return_counts=True)
        min_class = np.min(counts)
        if min_class < cv_size:
            cv_size = min_class

        self.stc = ShapeletTransformClassifier(
            random_state=self.random_state,
            time_contract_in_mins=60,
        )
        self.stc.fit(X, y)
        train_preds = cross_val_predict(
            ShapeletTransformClassifier(
                random_state=self.random_state,
                time_contract_in_mins=60,
            ),
            X=X,
            y=y,
            cv=cv_size,
        )
        self.stc_weight = accuracy_score(y, train_preds)**4

        self.tsf = TimeSeriesForest(random_state=self.random_state)
        self.tsf.fit(X, y)
        train_preds = cross_val_predict(
            TimeSeriesForest(random_state=self.random_state),
            X=X,
            y=y,
            cv=cv_size,
        )
        self.tsf_weight = accuracy_score(y, train_preds)**4

        self.rise = RandomIntervalSpectralForest(
            random_state=self.random_state)
        self.fit(X, y)
        train_preds = cross_val_predict(
            RandomIntervalSpectralForest(random_state=self.random_state),
            X=X,
            y=y,
            cv=cv_size,
        )
        self.rise_weight = accuracy_score(y, train_preds)**4

        self.cboss = ContractableBOSS(random_state=self.random_state)
        self.cboss.fit(X, y)
        train_probs = self.cboss._get_train_probs(X)
        train_preds = self.cboss.classes_[np.argmax(train_probs, axis=1)]
        self.cboss_weight = accuracy_score(y, train_preds)**4

        return self

    def predict(self, X):
        rng = check_random_state(self.random_state)
        return np.array([
            self.classes_[int(rng.choice(np.flatnonzero(prob == prob.max())))]
            for prob in self.predict_proba(X)
        ])

    def predict_proba(self, X):
        self.check_is_fitted()
        X = check_X(X, enforce_univariate=True, coerce_to_numpy=True)

        dists = np.zeros((X.shape[0], self.n_classes))

        dists = np.add(
            dists,
            self.stc.predict_proba(X) *
            (np.ones(self.n_classes) * self.stc_weight),
        )
        dists = np.add(
            dists,
            self.tsf.predict_proba(X) *
            (np.ones(self.n_classes) * self.tsf_weight),
        )
        dists = np.add(
            dists,
            self.rise.predict_proba(X) *
            (np.ones(self.n_classes) * self.rise_weight),
        )
        dists = np.add(
            dists,
            self.cboss.predict_proba(X) *
            (np.ones(self.n_classes) * self.cboss_weight),
        )

        return dists / dists.sum(axis=1, keepdims=True)
Пример #9
0
def set_classifier(cls, resample_id=None):
    """Construct a classifier.

    Basic way of creating the classifier to build using the default settings. This
    set up is to help with batch jobs for multiple problems to facilitate easy
    reproducibility for use with load_and_run_classification_experiment. You can pass a
    classifier object instead to run_classification_experiment.

    Parameters
    ----------
    cls : String
        String indicating which classifier you want.
    resample_id : Random seed as an int, default=None
        Classifier random seed.

    Return
    ------
    classifier : A BaseClassifier.
        The classifier matching the input classifier name.
    """
    name = cls.lower()
    # Dictionary based
    if name == "boss" or name == "bossensemble":
        return BOSSEnsemble(random_state=resample_id)
    elif name == "cboss" or name == "contractableboss":
        return ContractableBOSS(random_state=resample_id)
    elif name == "tde" or name == "temporaldictionaryensemble":
        return TemporalDictionaryEnsemble(random_state=resample_id)
    elif name == "weasel":
        return WEASEL(random_state=resample_id)
    elif name == "muse":
        return MUSE(random_state=resample_id)
    # Distance based
    elif name == "pf" or name == "proximityforest":
        return ProximityForest(random_state=resample_id)
    elif name == "pt" or name == "proximitytree":
        return ProximityTree(random_state=resample_id)
    elif name == "ps" or name == "proximityStump":
        return ProximityStump(random_state=resample_id)
    elif name == "dtwcv" or name == "kneighborstimeseriesclassifier":
        return KNeighborsTimeSeriesClassifier(distance="dtwcv")
    elif name == "dtw" or name == "1nn-dtw":
        return KNeighborsTimeSeriesClassifier(distance="dtw")
    elif name == "msm" or name == "1nn-msm":
        return KNeighborsTimeSeriesClassifier(distance="msm")
    elif name == "ee" or name == "elasticensemble":
        return ElasticEnsemble()
    elif name == "shapedtw":
        return ShapeDTW()
    # Feature based: Removed because of soft dependency
    elif name == "catch22":
        return Catch22Classifier(
            random_state=resample_id,
            estimator=RandomForestClassifier(n_estimators=500))
    elif name == "matrixprofile":
        return MatrixProfileClassifier(random_state=resample_id)
    elif name == "signature":
        return SignatureClassifier(
            random_state=resample_id,
            classifier=RandomForestClassifier(n_estimators=500),
        )
    elif name == "tsfresh":
        return TSFreshClassifier(
            random_state=resample_id,
            estimator=RandomForestClassifier(n_estimators=500))
    elif name == "tsfresh-r":
        return TSFreshClassifier(
            random_state=resample_id,
            estimator=RandomForestClassifier(n_estimators=500),
            relevant_feature_extractor=True,
        )
    # Hybrid
    elif name == "hivecotev1":
        return HIVECOTEV1(random_state=resample_id)
    # Interval based
    elif name == "rise" or name == "randomintervalspectralforest":
        return RandomIntervalSpectralForest(random_state=resample_id,
                                            n_estimators=500)
    elif name == "tsf" or name == "timeseriesforestclassifier":
        return TimeSeriesForestClassifier(random_state=resample_id,
                                          n_estimators=500)
    elif name == "cif" or name == "canonicalintervalforest":
        return CanonicalIntervalForest(random_state=resample_id,
                                       n_estimators=500)
    elif name == "stsf":
        return SupervisedTimeSeriesForest(random_state=resample_id,
                                          n_estimators=500)
    elif name == "drcif":
        return DrCIF(random_state=resample_id, n_estimators=500)
    # Kernel based
    elif name == "rocket":
        return ROCKETClassifier(random_state=resample_id)
    elif name == "arsenal":
        return Arsenal(random_state=resample_id)
    # Shapelet based
    elif name == "stc" or name == "shapelettransformclassifier":
        return ShapeletTransformClassifier(random_state=resample_id,
                                           n_estimators=500)
    elif name == "mrseql" or name == "mrseqlclassifier":
        return MrSEQLClassifier(seql_mode="fs", symrep=["sax", "sfa"])
    else:
        raise Exception("UNKNOWN CLASSIFIER")
Пример #10
0
def set_classifier(cls, resample_id=None, train_file=False):
    """Construct a classifier, possibly seeded.

    Basic way of creating the classifier to build using the default settings. This
    set up is to help with batch jobs for multiple problems to facilitate easy
    reproducibility for use with load_and_run_classification_experiment. You can pass a
    classifier object instead to run_classification_experiment.

    Parameters
    ----------
    cls : str
        String indicating which classifier you want.
    resample_id : int or None, default=None
        Classifier random seed.
    train_file : bool, default=False
        Whether a train file is being produced.

    Return
    ------
    classifier : A BaseClassifier.
        The classifier matching the input classifier name.
    """
    name = cls.lower()
    # Dictionary based
    if name == "boss" or name == "bossensemble":
        return BOSSEnsemble(random_state=resample_id)
    elif name == "cboss" or name == "contractableboss":
        return ContractableBOSS(random_state=resample_id)
    elif name == "tde" or name == "temporaldictionaryensemble":
        return TemporalDictionaryEnsemble(random_state=resample_id,
                                          save_train_predictions=train_file)
    elif name == "weasel":
        return WEASEL(random_state=resample_id)
    elif name == "muse":
        return MUSE(random_state=resample_id)
    # Distance based
    elif name == "pf" or name == "proximityforest":
        return ProximityForest(random_state=resample_id)
    elif name == "pt" or name == "proximitytree":
        return ProximityTree(random_state=resample_id)
    elif name == "ps" or name == "proximityStump":
        return ProximityStump(random_state=resample_id)
    elif name == "dtwcv" or name == "kneighborstimeseriesclassifier":
        return KNeighborsTimeSeriesClassifier(distance="dtwcv")
    elif name == "dtw" or name == "1nn-dtw":
        return KNeighborsTimeSeriesClassifier(distance="dtw")
    elif name == "msm" or name == "1nn-msm":
        return KNeighborsTimeSeriesClassifier(distance="msm")
    elif name == "ee" or name == "elasticensemble":
        return ElasticEnsemble(random_state=resample_id)
    elif name == "shapedtw":
        return ShapeDTW()
    # Feature based
    elif name == "summary":
        return SummaryClassifier(
            random_state=resample_id,
            estimator=RandomForestClassifier(n_estimators=500))
    elif name == "summary-intervals":
        return RandomIntervalClassifier(
            random_state=resample_id,
            interval_transformers=SummaryTransformer(
                summary_function=("mean", "std", "min", "max"),
                quantiles=(0.25, 0.5, 0.75),
            ),
            estimator=RandomForestClassifier(n_estimators=500),
        )
    elif name == "summary-catch22":
        return RandomIntervalClassifier(
            random_state=resample_id,
            estimator=RandomForestClassifier(n_estimators=500))
    elif name == "catch22":
        return Catch22Classifier(
            random_state=resample_id,
            estimator=RandomForestClassifier(n_estimators=500))
    elif name == "matrixprofile":
        return MatrixProfileClassifier(random_state=resample_id)
    elif name == "signature":
        return SignatureClassifier(
            random_state=resample_id,
            estimator=RandomForestClassifier(n_estimators=500),
        )
    elif name == "tsfresh":
        return TSFreshClassifier(
            random_state=resample_id,
            estimator=RandomForestClassifier(n_estimators=500))
    elif name == "tsfresh-r":
        return TSFreshClassifier(
            random_state=resample_id,
            estimator=RandomForestClassifier(n_estimators=500),
            relevant_feature_extractor=True,
        )
    elif name == "freshprince":
        return FreshPRINCE(random_state=resample_id,
                           save_transformed_data=train_file)
    # Hybrid
    elif name == "hc1" or name == "hivecotev1":
        return HIVECOTEV1(random_state=resample_id)
    elif name == "hc2" or name == "hivecotev2":
        return HIVECOTEV2(random_state=resample_id)
    # Interval based
    elif name == "rise" or name == "randomintervalspectralforest":
        return RandomIntervalSpectralForest(random_state=resample_id,
                                            n_estimators=500)
    elif name == "tsf" or name == "timeseriesforestclassifier":
        return TimeSeriesForestClassifier(random_state=resample_id,
                                          n_estimators=500)
    elif name == "cif" or name == "canonicalintervalforest":
        return CanonicalIntervalForest(random_state=resample_id,
                                       n_estimators=500)
    elif name == "stsf" or name == "supervisedtimeseriesforest":
        return SupervisedTimeSeriesForest(random_state=resample_id,
                                          n_estimators=500)
    elif name == "drcif":
        return DrCIF(random_state=resample_id,
                     n_estimators=500,
                     save_transformed_data=train_file)
    # Kernel based
    elif name == "rocket":
        return RocketClassifier(random_state=resample_id)
    elif name == "mini-rocket":
        return RocketClassifier(random_state=resample_id,
                                rocket_transform="minirocket")
    elif name == "multi-rocket":
        return RocketClassifier(random_state=resample_id,
                                rocket_transform="multirocket")
    elif name == "arsenal":
        return Arsenal(random_state=resample_id,
                       save_transformed_data=train_file)
    elif name == "mini-arsenal":
        return Arsenal(
            random_state=resample_id,
            save_transformed_data=train_file,
            rocket_transform="minirocket",
        )
    elif name == "multi-arsenal":
        return Arsenal(
            random_state=resample_id,
            save_transformed_data=train_file,
            rocket_transform="multirocket",
        )
    # Shapelet based
    elif name == "stc" or name == "shapelettransformclassifier":
        return ShapeletTransformClassifier(
            transform_limit_in_minutes=120,
            random_state=resample_id,
            save_transformed_data=train_file,
        )
    else:
        raise Exception("UNKNOWN CLASSIFIER")
Пример #11
0
class HIVECOTEV1(BaseClassifier):
    """Hierarchical Vote Collective of Transformation-based Ensembles (HIVE-COTE) V1.

    An ensemble of the STC, TSF, RISE and cBOSS classifiers from different feature
    representations using the CAWPE structure as described in [1]_.

    Parameters
    ----------
    stc_params : dict or None, default=None
        Parameters for the ShapeletTransformClassifier module. If None, uses the
        default parameters with a 2 hour transform contract.
    tsf_params : dict or None, default=None
        Parameters for the TimeSeriesForestClassifier module. If None, uses the default
        parameters with n_estimators set to 500.
    rise_params : dict or None, default=None
        Parameters for the RandomIntervalSpectralForest module. If None, uses the
        default parameters with n_estimators set to 500.
    cboss_params : dict or None, default=None
        Parameters for the ContractableBOSS module. If None, uses the default
        parameters.
    verbose : int, default=0
        Level of output printed to the console (for information only).
    n_jobs : int, default=1
        The number of jobs to run in parallel for both `fit` and `predict`.
        ``-1`` means using all processors.
    random_state : int or None, default=None
        Seed for random number generation.

    Attributes
    ----------
    n_classes_ : int
        The number of classes.
    classes_ : list
        The unique class labels.
    n_jobs_ : int
        The number of threads used.
    stc_weight_ : float
        The weight for STC probabilities.
    tsf_weight_ : float
        The weight for TSF probabilities.
    rise_weight_ : float
        The weight for RISE probabilities.
    cboss_weight_ : float
        The weight for cBOSS probabilities.

    See Also
    --------
    HIVECOTEV2, ShapeletTransformClassifier, TimeSeriesForestClassifier,
    RandomIntervalSpectralForest, ContractableBOSS

    Notes
    -----
    For the Java version, see
    `https://github.com/uea-machine-learning/tsml/blob/master/src/main/java/
    tsml/classifiers/hybrids/HIVE_COTE.java`_.

    References
    ----------
    .. [1] Anthony Bagnall, Michael Flynn, James Large, Jason Lines and
       Matthew Middlehurst. "On the usage and performance of the Hierarchical Vote
       Collective of Transformation-based Ensembles version 1.0 (hive-cote v1.0)"
       International Workshop on Advanced Analytics and Learning on Temporal Data 2020

    Examples
    --------
    >>> from sktime.classification.hybrid import HIVECOTEV1
    >>> from sktime.contrib.vector_classifiers._rotation_forest import RotationForest
    >>> from sktime.datasets import load_unit_test
    >>> X_train, y_train = load_unit_test(split="train", return_X_y=True)
    >>> X_test, y_test = load_unit_test(split="test", return_X_y=True)
    >>> clf = HIVECOTEV1(
    ...     stc_params={
    ...         "estimator": RotationForest(n_estimators=3),
    ...         "n_shapelet_samples": 500,
    ...         "max_shapelets": 20,
    ...         "batch_size": 100,
    ...     },
    ...     tsf_params={"n_estimators": 10},
    ...     rise_params={"n_estimators": 10},
    ...     cboss_params={"n_parameter_samples": 25, "max_ensemble_size": 5},
    ... )
    >>> clf.fit(X_train, y_train)
    HIVECOTEV1(...)
    >>> y_pred = clf.predict(X_test)
    """

    _tags = {
        "capability:multivariate": False,
        "capability:unequal_length": False,
        "capability:missing_values": False,
        "capability:train_estimate": False,
        "capability:contractable": False,
    }

    def __init__(
        self,
        stc_params=None,
        tsf_params=None,
        rise_params=None,
        cboss_params=None,
        verbose=0,
        n_jobs=1,
        random_state=None,
    ):
        self.stc_params = stc_params
        self.tsf_params = tsf_params
        self.rise_params = rise_params
        self.cboss_params = cboss_params

        self.verbose = verbose
        self.n_jobs = n_jobs
        self.random_state = random_state

        self.n_classes_ = 0
        self.classes_ = []
        self.n_jobs_ = n_jobs
        self.stc_weight_ = 0
        self.tsf_weight_ = 0
        self.rise_weight_ = 0
        self.cboss_weight_ = 0

        self._stc_params = stc_params
        self._tsf_params = tsf_params
        self._rise_params = rise_params
        self._cboss_params = cboss_params
        self._stc = None
        self._tsf = None
        self._rise = None
        self._cboss = None

        super(HIVECOTEV1, self).__init__()

    def _fit(self, X, y):
        """Fit HIVE-COTE 1.0 to training data.

        Parameters
        ----------
        X : 3D np.array of shape = [n_instances, n_dimensions, series_length]
            The training data.
        y : array-like, shape = [n_instances]
            The class labels.

        Returns
        -------
        self :
            Reference to self.

        Notes
        -----
        Changes state by creating a fitted model that updates attributes
        ending in "_" and sets is_fitted flag to True.
        """
        self.n_jobs_ = check_n_jobs(self.n_jobs)

        self.n_classes_ = np.unique(y).shape[0]
        self.classes_ = class_distribution(np.asarray(y).reshape(-1, 1))[0][0]

        # Default values from HC1 paper
        if self.stc_params is None:
            self._stc_params = {"transform_limit_in_minutes": 120}
        if self.tsf_params is None:
            self._tsf_params = {"n_estimators": 500}
        if self.rise_params is None:
            self._rise_params = {"n_estimators": 500}
        if self.cboss_params is None:
            self._cboss_params = {}

        # Cross-validation size for TSF and RISE
        cv_size = 10
        _, counts = np.unique(y, return_counts=True)
        min_class = np.min(counts)
        if min_class < cv_size:
            cv_size = min_class

        # Build STC
        self._stc = ShapeletTransformClassifier(
            **self._stc_params,
            save_transformed_data=True,
            random_state=self.random_state,
            n_jobs=self.n_jobs_,
        )
        self._stc.fit(X, y)

        if self.verbose > 0:
            print("STC ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"))  # noqa

        # Find STC weight using train set estimate
        train_probs = self._stc._get_train_probs(X, y)
        train_preds = self._stc.classes_[np.argmax(train_probs, axis=1)]
        self.stc_weight_ = accuracy_score(y, train_preds)**4

        if self.verbose > 0:
            print(  # noqa
                "STC train estimate ",
                datetime.now().strftime("%H:%M:%S %d/%m/%Y"),
            )
            print("STC weight = " + str(self.stc_weight_))  # noqa

        # Build TSF
        self._tsf = TimeSeriesForestClassifier(
            **self._tsf_params,
            random_state=self.random_state,
            n_jobs=self.n_jobs_,
        )
        self._tsf.fit(X, y)

        if self.verbose > 0:
            print("TSF ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"))  # noqa

        # Find TSF weight using train set estimate found through CV
        train_preds = cross_val_predict(
            TimeSeriesForestClassifier(**self._tsf_params,
                                       random_state=self.random_state),
            X=X,
            y=y,
            cv=cv_size,
            n_jobs=self.n_jobs_,
        )
        self.tsf_weight_ = accuracy_score(y, train_preds)**4

        if self.verbose > 0:
            print(  # noqa
                "TSF train estimate ",
                datetime.now().strftime("%H:%M:%S %d/%m/%Y"),
            )
            print("TSF weight = " + str(self.tsf_weight_))  # noqa

        # Build RISE
        self._rise = RandomIntervalSpectralForest(
            **self._rise_params,
            random_state=self.random_state,
            n_jobs=self.n_jobs_,
        )
        self._rise.fit(X, y)

        if self.verbose > 0:
            print("RISE ",
                  datetime.now().strftime("%H:%M:%S %d/%m/%Y"))  # noqa

        # Find RISE weight using train set estimate found through CV
        train_preds = cross_val_predict(
            RandomIntervalSpectralForest(
                **self._rise_params,
                random_state=self.random_state,
            ),
            X=X,
            y=y,
            cv=cv_size,
            n_jobs=self.n_jobs_,
        )
        self.rise_weight_ = accuracy_score(y, train_preds)**4

        if self.verbose > 0:
            print(  # noqa
                "RISE train estimate ",
                datetime.now().strftime("%H:%M:%S %d/%m/%Y"),
            )
            print("RISE weight = " + str(self.rise_weight_))  # noqa

        # Build cBOSS
        self._cboss = ContractableBOSS(**self._cboss_params,
                                       random_state=self.random_state,
                                       n_jobs=self.n_jobs_)
        self._cboss.fit(X, y)

        # Find cBOSS weight using train set estimate
        train_probs = self._cboss._get_train_probs(X, y)
        train_preds = self._cboss.classes_[np.argmax(train_probs, axis=1)]
        self.cboss_weight_ = accuracy_score(y, train_preds)**4

        if self.verbose > 0:
            print(  # noqa
                "cBOSS (estimate included)",
                datetime.now().strftime("%H:%M:%S %d/%m/%Y"),
            )
            print("cBOSS weight = " + str(self.cboss_weight_))  # noqa

    def _predict(self, X):
        """Predicts labels for sequences in X.

        Parameters
        ----------
        X : 3D np.array of shape = [n_instances, n_dimensions, series_length]
            The data to make predictions for.

        Returns
        -------
        y : array-like, shape = [n_instances]
            Predicted class labels.
        """
        rng = check_random_state(self.random_state)
        return np.array([
            self.classes_[int(rng.choice(np.flatnonzero(prob == prob.max())))]
            for prob in self.predict_proba(X)
        ])

    def _predict_proba(self, X):
        """Predicts labels probabilities for sequences in X.

        Parameters
        ----------
        X : 3D np.array of shape = [n_instances, n_dimensions, series_length]
            The data to make predict probabilities for.

        Returns
        -------
        y : array-like, shape = [n_instances, n_classes_]
            Predicted probabilities using the ordering in classes_.
        """
        dists = np.zeros((X.shape[0], self.n_classes_))

        # Call predict proba on each classifier, multiply the probabilities by the
        # classifiers weight then add them to the current HC1 probabilities
        dists = np.add(
            dists,
            self._stc.predict_proba(X) *
            (np.ones(self.n_classes_) * self.stc_weight_),
        )
        dists = np.add(
            dists,
            self._tsf.predict_proba(X) *
            (np.ones(self.n_classes_) * self.tsf_weight_),
        )
        dists = np.add(
            dists,
            self._rise.predict_proba(X) *
            (np.ones(self.n_classes_) * self.rise_weight_),
        )
        dists = np.add(
            dists,
            self._cboss.predict_proba(X) *
            (np.ones(self.n_classes_) * self.cboss_weight_),
        )

        # Make each instances probability array sum to 1 and return
        return dists / dists.sum(axis=1, keepdims=True)
Пример #12
0
    def _fit(self, X, y):
        """Fit HIVE-COTE 1.0 to training data.

        Parameters
        ----------
        X : 3D np.array of shape = [n_instances, n_dimensions, series_length]
            The training data.
        y : array-like, shape = [n_instances]
            The class labels.

        Returns
        -------
        self :
            Reference to self.

        Notes
        -----
        Changes state by creating a fitted model that updates attributes
        ending in "_" and sets is_fitted flag to True.
        """
        self.n_jobs_ = check_n_jobs(self.n_jobs)

        self.n_classes_ = np.unique(y).shape[0]
        self.classes_ = class_distribution(np.asarray(y).reshape(-1, 1))[0][0]

        # Default values from HC1 paper
        if self.stc_params is None:
            self._stc_params = {"transform_limit_in_minutes": 120}
        if self.tsf_params is None:
            self._tsf_params = {"n_estimators": 500}
        if self.rise_params is None:
            self._rise_params = {"n_estimators": 500}
        if self.cboss_params is None:
            self._cboss_params = {}

        # Cross-validation size for TSF and RISE
        cv_size = 10
        _, counts = np.unique(y, return_counts=True)
        min_class = np.min(counts)
        if min_class < cv_size:
            cv_size = min_class

        # Build STC
        self._stc = ShapeletTransformClassifier(
            **self._stc_params,
            save_transformed_data=True,
            random_state=self.random_state,
            n_jobs=self.n_jobs_,
        )
        self._stc.fit(X, y)

        if self.verbose > 0:
            print("STC ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"))  # noqa

        # Find STC weight using train set estimate
        train_probs = self._stc._get_train_probs(X, y)
        train_preds = self._stc.classes_[np.argmax(train_probs, axis=1)]
        self.stc_weight_ = accuracy_score(y, train_preds)**4

        if self.verbose > 0:
            print(  # noqa
                "STC train estimate ",
                datetime.now().strftime("%H:%M:%S %d/%m/%Y"),
            )
            print("STC weight = " + str(self.stc_weight_))  # noqa

        # Build TSF
        self._tsf = TimeSeriesForestClassifier(
            **self._tsf_params,
            random_state=self.random_state,
            n_jobs=self.n_jobs_,
        )
        self._tsf.fit(X, y)

        if self.verbose > 0:
            print("TSF ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"))  # noqa

        # Find TSF weight using train set estimate found through CV
        train_preds = cross_val_predict(
            TimeSeriesForestClassifier(**self._tsf_params,
                                       random_state=self.random_state),
            X=X,
            y=y,
            cv=cv_size,
            n_jobs=self.n_jobs_,
        )
        self.tsf_weight_ = accuracy_score(y, train_preds)**4

        if self.verbose > 0:
            print(  # noqa
                "TSF train estimate ",
                datetime.now().strftime("%H:%M:%S %d/%m/%Y"),
            )
            print("TSF weight = " + str(self.tsf_weight_))  # noqa

        # Build RISE
        self._rise = RandomIntervalSpectralForest(
            **self._rise_params,
            random_state=self.random_state,
            n_jobs=self.n_jobs_,
        )
        self._rise.fit(X, y)

        if self.verbose > 0:
            print("RISE ",
                  datetime.now().strftime("%H:%M:%S %d/%m/%Y"))  # noqa

        # Find RISE weight using train set estimate found through CV
        train_preds = cross_val_predict(
            RandomIntervalSpectralForest(
                **self._rise_params,
                random_state=self.random_state,
            ),
            X=X,
            y=y,
            cv=cv_size,
            n_jobs=self.n_jobs_,
        )
        self.rise_weight_ = accuracy_score(y, train_preds)**4

        if self.verbose > 0:
            print(  # noqa
                "RISE train estimate ",
                datetime.now().strftime("%H:%M:%S %d/%m/%Y"),
            )
            print("RISE weight = " + str(self.rise_weight_))  # noqa

        # Build cBOSS
        self._cboss = ContractableBOSS(**self._cboss_params,
                                       random_state=self.random_state,
                                       n_jobs=self.n_jobs_)
        self._cboss.fit(X, y)

        # Find cBOSS weight using train set estimate
        train_probs = self._cboss._get_train_probs(X, y)
        train_preds = self._cboss.classes_[np.argmax(train_probs, axis=1)]
        self.cboss_weight_ = accuracy_score(y, train_preds)**4

        if self.verbose > 0:
            print(  # noqa
                "cBOSS (estimate included)",
                datetime.now().strftime("%H:%M:%S %d/%m/%Y"),
            )
            print("cBOSS weight = " + str(self.cboss_weight_))  # noqa