def test_hivecote_v2_on_basic_motions(): """Test of HIVEVOTEV2 on basic motions data.""" # load basic motions data X_train, y_train = load_basic_motions(split="train", return_X_y=True) X_test, y_test = load_basic_motions(split="test", return_X_y=True) indices = np.random.RandomState(4).choice(len(y_train), 15, replace=False) # train HIVE-COTE v2 hc2 = HIVECOTEV2( random_state=0, stc_params={ "estimator": RotationForest(n_estimators=3), "n_shapelet_samples": 500, "max_shapelets": 20, "batch_size": 100, }, drcif_params={"n_estimators": 10}, arsenal_params={ "num_kernels": 100, "n_estimators": 5 }, tde_params={ "n_parameter_samples": 25, "max_ensemble_size": 5, "randomly_selected_params": 10, }, ) hc2.fit(X_train.iloc[indices], y_train[indices]) # assert probabilities are the same probas = hc2.predict_proba(X_test.iloc[indices[:10]]) testing.assert_array_equal(probas, stc_basic_motions_probas)
def test_stc_on_unit_test_data(): """Test of ShapeletTransformClassifier on unit test data.""" # load unit test data X_train, y_train = load_unit_test(split="train", return_X_y=True) X_test, y_test = load_unit_test(split="test", return_X_y=True) indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False) # train STC stc = ShapeletTransformClassifier( estimator=RotationForest(n_estimators=3), max_shapelets=20, n_shapelet_samples=500, batch_size=100, random_state=0, save_transformed_data=True, ) stc.fit(X_train, y_train) # assert probabilities are the same probas = stc.predict_proba(X_test.iloc[indices]) testing.assert_array_equal(probas, stc_unit_test_probas) # test train estimate train_probas = stc._get_train_probs(X_train, y_train) train_preds = stc.classes_[np.argmax(train_probas, axis=1)] assert accuracy_score(y_train, train_preds) >= 0.75
def test_hivecote_v1_on_unit_test_data(): """Test of HIVECOTEV1 on unit test data.""" # load unit test data X_train, y_train = load_unit_test(split="train", return_X_y=True) X_test, y_test = load_unit_test(split="test", return_X_y=True) indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False) # train HIVE-COTE v1 hc1 = HIVECOTEV1( random_state=0, stc_params={ "estimator": RotationForest(n_estimators=3), "n_shapelet_samples": 500, "max_shapelets": 20, "batch_size": 100, }, tsf_params={"n_estimators": 10}, rise_params={"n_estimators": 10}, cboss_params={ "n_parameter_samples": 25, "max_ensemble_size": 5 }, ) hc1.fit(X_train.iloc[indices], y_train[indices]) # assert probabilities are the same probas = hc1.predict_proba(X_test.iloc[indices]) testing.assert_array_almost_equal(probas, hivecote_v1_unit_test_probas, decimal=2)
def _fit(self, X, y): """Fit a pipeline on cases (X,y), where y is the target variable. Parameters ---------- X : 3D np.array of shape = [n_instances, n_dimensions, series_length] The training data. y : array-like, shape = [n_instances] The class labels. Returns ------- self : Reference to self. Notes ----- Changes state by creating a fitted model that updates attributes ending in "_" and sets is_fitted flag to True. """ self.n_instances_, self.n_dims_, self.series_length_ = X.shape self._rotf = RotationForest( n_estimators=self.n_estimators, save_transformed_data=self.save_transformed_data, n_jobs=self._threads_to_use, random_state=self.random_state, ) self._tsfresh = TSFreshFeatureExtractor( default_fc_parameters=self.default_fc_parameters, n_jobs=self._threads_to_use, chunksize=self.chunksize, show_warnings=self.verbose > 1, disable_progressbar=self.verbose < 1, ) X_t = self._tsfresh.fit_transform(X, y) self._rotf.fit(X_t, y) if self.save_transformed_data: self.transformed_data_ = X_t return self
def _fit(self, X, y): self._n_jobs = check_n_jobs(self.n_jobs) self.n_instances, self.n_dims, self.series_length = X.shape self.n_classes = np.unique(y).shape[0] self.classes_ = class_distribution(np.asarray(y).reshape(-1, 1))[0][0] if self.time_limit_in_minutes > 0: # contracting 2/3 transform (with 1/5 of that taken away for final # transform), 1/3 classifier third = self.time_limit_in_minutes / 3 self._classifier_limit_in_minutes = third self._transform_limit_in_minutes = (third * 2) / 5 * 4 elif self.transform_limit_in_minutes > 0: self._transform_limit_in_minutes = self.transform_limit_in_minutes self._transformer = RandomShapeletTransform( n_shapelet_samples=self.n_shapelet_samples, max_shapelets=self.max_shapelets, max_shapelet_length=self.max_shapelet_length, time_limit_in_minutes=self._transform_limit_in_minutes, contract_max_n_shapelet_samples=self. contract_max_n_shapelet_samples, n_jobs=self.n_jobs, batch_size=self.batch_size, random_state=self.random_state, ) self._estimator = _clone_estimator( RotationForest() if self.estimator is None else self.estimator, self.random_state, ) if isinstance(self._estimator, RotationForest): self._estimator.save_transformed_data = self.save_transformed_data m = getattr(self._estimator, "n_jobs", None) if m is not None: self._estimator.n_jobs = self._n_jobs m = getattr(self._estimator, "time_limit_in_minutes", None) if m is not None and self.time_limit_in_minutes > 0: self._estimator.time_limit_in_minutes = self._classifier_limit_in_minutes X_t = self._transformer.fit_transform(X, y).to_numpy() if self.save_transformed_data: self.transformed_data = X_t self._estimator.fit(X_t, y)
def test_contracted_stc_on_unit_test_data(): """Test of contracted ShapeletTransformClassifier on unit test data.""" # load unit test data X_train, y_train = load_unit_test(split="train") # train contracted STC stc = ShapeletTransformClassifier( estimator=RotationForest(contract_max_n_estimators=3), max_shapelets=20, time_limit_in_minutes=0.25, contract_max_n_shapelet_samples=500, batch_size=100, random_state=0, ) stc.fit(X_train, y_train)
def _fit(self, X, y): """Fit a pipeline on cases (X,y), where y is the target variable. Parameters ---------- X : 3D np.array of shape = [n_instances, n_dimensions, series_length] The training data. y : array-like, shape = [n_instances] The class labels. Returns ------- self : Reference to self. Notes ----- Changes state by creating a fitted model that updates attributes ending in "_" and sets is_fitted flag to True. """ interval_transformers = (Catch22(outlier_norm=True, replace_nans=True) if self.interval_transformers is None else self.interval_transformers) self._transformer = RandomIntervals( n_intervals=self.n_intervals, transformers=interval_transformers, random_state=self.random_state, n_jobs=self._threads_to_use, ) self._estimator = _clone_estimator( RotationForest() if self.estimator is None else self.estimator, self.random_state, ) m = getattr(self._estimator, "n_jobs", None) if m is not None: self._estimator.n_jobs = self._threads_to_use X_t = self._transformer.fit_transform(X, y) self._estimator.fit(X_t, y) return self
def test_stc_on_basic_motions(): """Test of ShapeletTransformClassifier on basic motions data.""" # load basic motions data X_train, y_train = load_basic_motions(split="train", return_X_y=True) X_test, y_test = load_basic_motions(split="test", return_X_y=True) indices = np.random.RandomState(4).choice(len(y_train), 15, replace=False) # train STC stc = ShapeletTransformClassifier( estimator=RotationForest(n_estimators=3), max_shapelets=20, n_shapelet_samples=500, batch_size=100, random_state=0, ) stc.fit(X_train.iloc[indices], y_train[indices]) # assert probabilities are the same probas = stc.predict_proba(X_test.iloc[indices[:10]]) testing.assert_array_equal(probas, stc_basic_motions_probas)
def test_contracted_hivecote_v2_on_unit_test_data(): """Test of contracted HIVECOTEV2 on unit test data.""" # load unit test data X_train, y_train = load_unit_test(split="train") # train contracted HIVE-COTE v2 hc2 = HIVECOTEV2( time_limit_in_minutes=2, random_state=0, stc_params={ "estimator": RotationForest(contract_max_n_estimators=3), "contract_max_n_shapelet_samples": 500, "max_shapelets": 20, "batch_size": 100, }, drcif_params={"contract_max_n_estimators": 10}, arsenal_params={"contract_max_n_estimators": 5}, tde_params={ "contract_max_n_parameter_samples": 10, "max_ensemble_size": 5, "randomly_selected_params": 5, }, ) hc2.fit(X_train, y_train)
class FreshPRINCE(BaseClassifier): """Fresh Pipeline with RotatIoN forest Classifier. This classifier simply transforms the input data using the TSFresh [1]_ transformer with comprehensive features and builds a RotationForest estimator using the transformed data. Parameters ---------- default_fc_parameters : str, default="comprehensive" Set of TSFresh features to be extracted, options are "minimal", "efficient" or "comprehensive". n_estimators : int, default=200 Number of estimators for the RotationForest ensemble. verbose : int, default=0 Level of output printed to the console (for information only) n_jobs : int, default=1 The number of jobs to run in parallel for both `fit` and `predict`. ``-1`` means using all processors. chunksize : int or None, default=None Number of series processed in each parallel TSFresh job, should be optimised for efficient parallelisation. random_state : int or None, default=None Seed for random, integer. Attributes ---------- n_classes_ : int Number of classes. Extracted from the data. classes_ : ndarray of shape (n_classes_) Holds the label for each class. See Also -------- TSFreshFeatureExtractor, TSFreshClassifier, RotationForest References ---------- .. [1] Christ, Maximilian, et al. "Time series feature extraction on basis of scalable hypothesis tests (tsfresh–a python package)." Neurocomputing 307 (2018): 72-77. https://www.sciencedirect.com/science/article/pii/S0925231218304843 Examples -------- >>> from sktime.classification.feature_based import FreshPRINCE >>> from sktime.contrib.vector_classifiers._rotation_forest import RotationForest >>> from sktime.datasets import load_unit_test >>> X_train, y_train = load_unit_test(split="train", return_X_y=True) >>> X_test, y_test = load_unit_test(split="test", return_X_y=True) >>> clf = FreshPRINCE( ... default_fc_parameters="minimal", ... n_estimators=10, ... ) >>> clf.fit(X_train, y_train) FreshPRINCE(...) >>> y_pred = clf.predict(X_test) """ _tags = { "capability:multivariate": True, "capability:multithreading": True, "capability:train_estimate": True, } def __init__( self, default_fc_parameters="comprehensive", n_estimators=200, save_transformed_data=False, verbose=0, n_jobs=1, chunksize=None, random_state=None, ): self.default_fc_parameters = default_fc_parameters self.n_estimators = n_estimators self.save_transformed_data = save_transformed_data self.verbose = verbose self.n_jobs = n_jobs self.chunksize = chunksize self.random_state = random_state self.n_instances_ = 0 self.n_dims_ = 0 self.series_length_ = 0 self.transformed_data_ = [] self._rotf = None self._tsfresh = None super(FreshPRINCE, self).__init__() def _fit(self, X, y): """Fit a pipeline on cases (X,y), where y is the target variable. Parameters ---------- X : 3D np.array of shape = [n_instances, n_dimensions, series_length] The training data. y : array-like, shape = [n_instances] The class labels. Returns ------- self : Reference to self. Notes ----- Changes state by creating a fitted model that updates attributes ending in "_" and sets is_fitted flag to True. """ self.n_instances_, self.n_dims_, self.series_length_ = X.shape self._rotf = RotationForest( n_estimators=self.n_estimators, save_transformed_data=self.save_transformed_data, n_jobs=self._threads_to_use, random_state=self.random_state, ) self._tsfresh = TSFreshFeatureExtractor( default_fc_parameters=self.default_fc_parameters, n_jobs=self._threads_to_use, chunksize=self.chunksize, show_warnings=self.verbose > 1, disable_progressbar=self.verbose < 1, ) X_t = self._tsfresh.fit_transform(X, y) self._rotf.fit(X_t, y) if self.save_transformed_data: self.transformed_data_ = X_t return self def _predict(self, X): """Predict class values of n instances in X. Parameters ---------- X : 3D np.array of shape = [n_instances, n_dimensions, series_length] The data to make predictions for. Returns ------- y : array-like, shape = [n_instances] Predicted class labels. """ return self._rotf.predict(self._tsfresh.transform(X)) def _predict_proba(self, X): """Predict class probabilities for n instances in X. Parameters ---------- X : 3D np.array of shape = [n_instances, n_dimensions, series_length] The data to make predict probabilities for. Returns ------- y : array-like, shape = [n_instances, n_classes_] Predicted probabilities using the ordering in classes_. """ return self._rotf.predict_proba(self._tsfresh.transform(X)) def _get_train_probs(self, X, y): self.check_is_fitted() X, y = check_X_y(X, y, coerce_to_numpy=True) n_instances, n_dims, series_length = X.shape if (n_instances != self.n_instances_ or n_dims != self.n_dims_ or series_length != self.series_length_): raise ValueError( "n_instances, n_dims, series_length mismatch. X should be " "the same as the training data used in fit for generating train " "probabilities.") if not self.save_transformed_data: raise ValueError( "Currently only works with saved transform data from fit.") return self._rotf._get_train_probs(self.transformed_data_, y)
"suppress_warnings": True, "max_p": 2, "max_q": 2, "seasonal": False, }, MultiplexForecaster: { "forecasters": [ ("Naive_mean", NaiveForecaster(strategy="mean")), ("Naive_last", NaiveForecaster(strategy="last")), ("Naive_drift", NaiveForecaster(strategy="drift")), ], "selected_forecaster": "Naive_mean", }, ShapeletTransformClassifier: { "estimator": RotationForest(n_estimators=3), "max_shapelets": 5, "n_shapelet_samples": 50, "batch_size": 20, }, ContractedShapeletTransform: { "time_contract_in_mins": 0.025 }, ShapeletTransform: { "max_shapelets_to_store_per_class": 1, "min_shapelet_length": 3, "max_shapelet_length": 4, }, RandomShapeletTransform: { "max_shapelets": 5, "n_shapelet_samples": 50,
"RocketClassifier - UnitTest", _reproduce_classification_unit_test( RocketClassifier(num_kernels=500, random_state=0) ), ) _print_array( "RocketClassifier - BasicMotions", _reproduce_classification_basic_motions( RocketClassifier(num_kernels=500, random_state=0) ), ) _print_array( "ShapeletTransformClassifier - UnitTest", _reproduce_classification_unit_test( ShapeletTransformClassifier( estimator=RotationForest(n_estimators=3), max_shapelets=20, n_shapelet_samples=500, batch_size=100, random_state=0, ) ), ) _print_array( "ShapeletTransformClassifier - BasicMotions", _reproduce_classification_basic_motions( ShapeletTransformClassifier( estimator=RotationForest(n_estimators=3), max_shapelets=20, n_shapelet_samples=500, batch_size=100,
def _fit(self, X, y): """Fit STC to training data. Parameters ---------- X : 3D np.array of shape = [n_instances, n_dimensions, series_length] The training data. y : array-like, shape = [n_instances] The class labels. Returns ------- self : Reference to self. Notes ----- Changes state by creating a fitted model that updates attributes ending in "_" and sets is_fitted flag to True. """ self.n_instances_, self.n_dims_, self.series_length_ = X.shape if self.time_limit_in_minutes > 0: # contracting 2/3 transform (with 1/5 of that taken away for final # transform), 1/3 classifier third = self.time_limit_in_minutes / 3 self._classifier_limit_in_minutes = third self._transform_limit_in_minutes = (third * 2) / 5 * 4 elif self.transform_limit_in_minutes > 0: self._transform_limit_in_minutes = self.transform_limit_in_minutes self._transformer = RandomShapeletTransform( n_shapelet_samples=self.n_shapelet_samples, max_shapelets=self.max_shapelets, max_shapelet_length=self.max_shapelet_length, time_limit_in_minutes=self._transform_limit_in_minutes, contract_max_n_shapelet_samples=self. contract_max_n_shapelet_samples, n_jobs=self.n_jobs, batch_size=self.batch_size, random_state=self.random_state, ) self._estimator = _clone_estimator( RotationForest() if self.estimator is None else self.estimator, self.random_state, ) if isinstance(self._estimator, RotationForest): self._estimator.save_transformed_data = self.save_transformed_data m = getattr(self._estimator, "n_jobs", None) if m is not None: self._estimator.n_jobs = self._threads_to_use m = getattr(self._estimator, "time_limit_in_minutes", None) if m is not None and self.time_limit_in_minutes > 0: self._estimator.time_limit_in_minutes = self._classifier_limit_in_minutes X_t = self._transformer.fit_transform(X, y).to_numpy() if self.save_transformed_data: self.transformed_data_ = X_t self._estimator.fit(X_t, y) return self