def fit(self, X, y): X, y = check_X_y(X, y, enforce_univariate=True, coerce_to_numpy=True) self.n_classes = np.unique(y).shape[0] self.classes_ = class_distribution(np.asarray(y).reshape(-1, 1))[0][0] cv_size = 10 _, counts = np.unique(y, return_counts=True) min_class = np.min(counts) if min_class < cv_size: cv_size = min_class self.stc = ShapeletTransformClassifier( random_state=self.random_state, time_contract_in_mins=60, ) self.stc.fit(X, y) train_preds = cross_val_predict( ShapeletTransformClassifier( random_state=self.random_state, time_contract_in_mins=60, ), X=X, y=y, cv=cv_size, ) self.stc_weight = accuracy_score(y, train_preds)**4 self.tsf = TimeSeriesForest(random_state=self.random_state) self.tsf.fit(X, y) train_preds = cross_val_predict( TimeSeriesForest(random_state=self.random_state), X=X, y=y, cv=cv_size, ) self.tsf_weight = accuracy_score(y, train_preds)**4 self.rise = RandomIntervalSpectralForest( random_state=self.random_state) self.fit(X, y) train_preds = cross_val_predict( RandomIntervalSpectralForest(random_state=self.random_state), X=X, y=y, cv=cv_size, ) self.rise_weight = accuracy_score(y, train_preds)**4 self.cboss = ContractableBOSS(random_state=self.random_state) self.cboss.fit(X, y) train_probs = self.cboss._get_train_probs(X) train_preds = self.cboss.classes_[np.argmax(train_probs, axis=1)] self.cboss_weight = accuracy_score(y, train_preds)**4 return self
def test_contracted_stc_on_unit_test_data(): """Test of contracted ShapeletTransformClassifier on unit test data.""" # load unit test data X_train, y_train = load_unit_test(split="train") # train contracted STC stc = ShapeletTransformClassifier( estimator=RotationForest(contract_max_n_estimators=3), max_shapelets=20, time_limit_in_minutes=0.25, contract_max_n_shapelet_samples=500, batch_size=100, random_state=0, ) stc.fit(X_train, y_train)
def test_stc_on_basic_motions(): """Test of ShapeletTransformClassifier on basic motions data.""" # load basic motions data X_train, y_train = load_basic_motions(split="train", return_X_y=True) X_test, y_test = load_basic_motions(split="test", return_X_y=True) indices = np.random.RandomState(4).choice(len(y_train), 15, replace=False) # train STC stc = ShapeletTransformClassifier( estimator=RotationForest(n_estimators=3), max_shapelets=20, n_shapelet_samples=500, batch_size=100, random_state=0, ) stc.fit(X_train.iloc[indices], y_train[indices]) # assert probabilities are the same probas = stc.predict_proba(X_test.iloc[indices[:10]]) testing.assert_array_equal(probas, stc_basic_motions_probas)
def test_stc_train_estimate(): """Test of ShapeletTransformClassifier train estimate on unit test data.""" # load unit test data X_train, y_train = load_unit_test(split="train") # train STC stc = ShapeletTransformClassifier( estimator=RotationForest(n_estimators=2), max_shapelets=3, n_shapelet_samples=10, batch_size=5, random_state=0, save_transformed_data=True, ) stc.fit(X_train, y_train) # test train estimate train_probas = stc._get_train_probs(X_train, y_train) assert train_probas.shape == (20, 2) train_preds = stc.classes_[np.argmax(train_probas, axis=1)] assert accuracy_score(y_train, train_preds) >= 0.6
def test_stc_on_unit_test_data(): """Test of ShapeletTransformClassifier on unit test data.""" # load unit test data X_train, y_train = load_unit_test(split="train", return_X_y=True) X_test, y_test = load_unit_test(split="test", return_X_y=True) indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False) # train STC stc = ShapeletTransformClassifier( estimator=RotationForest(n_estimators=3), max_shapelets=20, n_shapelet_samples=500, batch_size=100, random_state=0, save_transformed_data=True, ) stc.fit(X_train, y_train) # assert probabilities are the same probas = stc.predict_proba(X_test.iloc[indices]) testing.assert_array_equal(probas, stc_unit_test_probas) # test train estimate train_probas = stc._get_train_probs(X_train, y_train) train_preds = stc.classes_[np.argmax(train_probas, axis=1)] assert accuracy_score(y_train, train_preds) >= 0.75
def fit(self, X, y): """Fit a HIVE-COTEv1.0 classifier. Parameters ---------- X : nested pandas DataFrame of shape [n_instances, 1] Nested dataframe with univariate time-series in cells. y : array-like, shape = [n_instances] The class labels. Returns ------- self : object """ X, y = check_X_y(X, y, enforce_univariate=True) self.n_classes = np.unique(y).shape[0] self.classes_ = class_distribution(np.asarray(y).reshape(-1, 1))[0][0] cv_size = 10 _, counts = np.unique(y, return_counts=True) min_class = np.min(counts) if min_class < cv_size: cv_size = min_class self.stc = ShapeletTransformClassifier( **self.stc_params, random_state=self.random_state, ) self.stc.fit(X, y) if self.verbose > 0: print("STC ", datetime.now().strftime("%H:%M:%S %d/%m/%Y")) # noqa train_preds = cross_val_predict( ShapeletTransformClassifier( **self.stc_params, random_state=self.random_state, ), X=X, y=y, cv=cv_size, n_jobs=self.n_jobs, ) self.stc_weight = accuracy_score(y, train_preds)**4 if self.verbose > 0: print( # noqa "STC train estimate ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"), ) print("STC weight = " + str(self.stc_weight)) # noqa self.tsf = TimeSeriesForestClassifier( **self.tsf_params, random_state=self.random_state, n_jobs=self.n_jobs, ) self.tsf.fit(X, y) if self.verbose > 0: print("TSF ", datetime.now().strftime("%H:%M:%S %d/%m/%Y")) # noqa train_preds = cross_val_predict( TimeSeriesForestClassifier(**self.tsf_params, random_state=self.random_state), X=X, y=y, cv=cv_size, n_jobs=self.n_jobs, ) self.tsf_weight = accuracy_score(y, train_preds)**4 if self.verbose > 0: print( # noqa "TSF train estimate ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"), ) print("TSF weight = " + str(self.tsf_weight)) # noqa self.rise = RandomIntervalSpectralForest( **self.rise_params, random_state=self.random_state, n_jobs=self.n_jobs, ) self.rise.fit(X, y) if self.verbose > 0: print("RISE ", datetime.now().strftime("%H:%M:%S %d/%m/%Y")) # noqa train_preds = cross_val_predict( RandomIntervalSpectralForest( **self.rise_params, random_state=self.random_state, ), X=X, y=y, cv=cv_size, n_jobs=self.n_jobs, ) self.rise_weight = accuracy_score(y, train_preds)**4 if self.verbose > 0: print( # noqa "RISE train estimate ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"), ) print("RISE weight = " + str(self.rise_weight)) # noqa self.cboss = ContractableBOSS(**self.cboss_params, random_state=self.random_state, n_jobs=self.n_jobs) self.cboss.fit(X, y) train_probs = self.cboss._get_train_probs(X) train_preds = self.cboss.classes_[np.argmax(train_probs, axis=1)] self.cboss_weight = accuracy_score(y, train_preds)**4 if self.verbose > 0: print( # noqa "cBOSS (estimate included) ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"), ) print("cBOSS weight = " + str(self.cboss_weight)) # noqa self._is_fitted = True return self
def set_classifier(cls, resample_id=None, train_file=False): """Construct a classifier. Basic way of creating the classifier to build using the default settings. This set up is to help with batch jobs for multiple problems to facilitate easy reproducibility for use with load_and_run_classification_experiment. You can pass a classifier object instead to run_classification_experiment. Parameters ---------- cls : str String indicating which classifier you want. resample_id : int or None, default=None Classifier random seed. train_file : bool, default=False Whether a train file is being produced. Return ------ classifier : A BaseClassifier. The classifier matching the input classifier name. """ name = cls.lower() # Dictionary based if name == "boss" or name == "bossensemble": return BOSSEnsemble(random_state=resample_id) elif name == "cboss" or name == "contractableboss": return ContractableBOSS(random_state=resample_id) elif name == "tde" or name == "temporaldictionaryensemble": return TemporalDictionaryEnsemble( random_state=resample_id, save_train_predictions=train_file ) elif name == "weasel": return WEASEL(random_state=resample_id) elif name == "muse": return MUSE(random_state=resample_id) # Distance based elif name == "pf" or name == "proximityforest": return ProximityForest(random_state=resample_id) elif name == "pt" or name == "proximitytree": return ProximityTree(random_state=resample_id) elif name == "ps" or name == "proximityStump": return ProximityStump(random_state=resample_id) elif name == "dtwcv" or name == "kneighborstimeseriesclassifier": return KNeighborsTimeSeriesClassifier(distance="dtwcv") elif name == "dtw" or name == "1nn-dtw": return KNeighborsTimeSeriesClassifier(distance="dtw") elif name == "msm" or name == "1nn-msm": return KNeighborsTimeSeriesClassifier(distance="msm") elif name == "ee" or name == "elasticensemble": return ElasticEnsemble(random_state=resample_id) elif name == "shapedtw": return ShapeDTW() # Feature based elif name == "catch22": return Catch22Classifier( random_state=resample_id, estimator=RandomForestClassifier(n_estimators=500) ) elif name == "matrixprofile": return MatrixProfileClassifier(random_state=resample_id) elif name == "signature": return SignatureClassifier( random_state=resample_id, estimator=RandomForestClassifier(n_estimators=500), ) elif name == "tsfresh": return TSFreshClassifier( random_state=resample_id, estimator=RandomForestClassifier(n_estimators=500) ) elif name == "tsfresh-r": return TSFreshClassifier( random_state=resample_id, estimator=RandomForestClassifier(n_estimators=500), relevant_feature_extractor=True, ) # Hybrid elif name == "hc1" or name == "hivecotev1": return HIVECOTEV1(random_state=resample_id) elif name == "hc2" or name == "hivecotev2": return HIVECOTEV2(random_state=resample_id) # Interval based elif name == "rise" or name == "randomintervalspectralforest": return RandomIntervalSpectralEnsemble( random_state=resample_id, n_estimators=500 ) elif name == "tsf" or name == "timeseriesforestclassifier": return TimeSeriesForestClassifier(random_state=resample_id, n_estimators=500) elif name == "cif" or name == "canonicalintervalforest": return CanonicalIntervalForest(random_state=resample_id, n_estimators=500) elif name == "stsf" or name == "supervisedtimeseriesforest": return SupervisedTimeSeriesForest(random_state=resample_id, n_estimators=500) elif name == "drcif": return DrCIF( random_state=resample_id, n_estimators=500, save_transformed_data=train_file ) # Kernel based elif name == "rocket": return ROCKETClassifier(random_state=resample_id) elif name == "arsenal": return Arsenal(random_state=resample_id, save_transformed_data=train_file) # Shapelet based elif name == "stc" or name == "shapelettransformclassifier": return ShapeletTransformClassifier( random_state=resample_id, save_transformed_data=train_file ) elif name == "mrseql" or name == "mrseqlclassifier": return MrSEQLClassifier(seql_mode="fs", symrep=["sax", "sfa"]) else: raise Exception("UNKNOWN CLASSIFIER")
class HIVECOTEV2(BaseClassifier): """Hierarchical Vote Collective of Transformation-based Ensembles (HIVE-COTE) V2. An ensemble of the STC, DrCIF, Arsenal and TDE classifiers from different feature representations using the CAWPE structure as described in [1]. Parameters ---------- stc_params : dict or None, default=None Parameters for the ShapeletTransformClassifier module. If None, uses the default parameters with a 2 hour transform contract. drcif_params : dict or None, default=None Parameters for the DrCIF module. If None, uses the default parameters with n_estimators set to 500. arsenal_params : dict or None, default=None Parameters for the Arsenal module. If None, uses the default parameters. tde_params : dict or None, default=None Parameters for the TemporalDictionaryEnsemble module. If None, uses the default parameters. time_limit_in_minutes : int, default=0 Time contract to limit build time in minutes, overriding n_estimators/n_parameter_samples for each component. Default of 0 means n_estimators/n_parameter_samples for each component is used. save_component_probas : bool, default=False When predict/predict_proba is called, save each HIVE-COTEV2 component probability predictions in component_probas. verbose : int, default=0 Level of output printed to the console (for information only). n_jobs : int, default=1 The number of jobs to run in parallel for both `fit` and `predict`. ``-1`` means using all processors. random_state : int or None, default=None Seed for random number generation. Attributes ---------- n_classes_ : int The number of classes. classes_ : list The unique class labels. stc_weight_ : float The weight for STC probabilities. drcif_weight_ : float The weight for DrCIF probabilities. arsenal_weight_ : float The weight for Arsenal probabilities. tde_weight_ : float The weight for TDE probabilities. component_probas : dict Only used if save_component_probas is true. Saved probability predictions for each HIVE-COTEV2 component. See Also -------- HIVECOTEV1, ShapeletTransformClassifier, DrCIF, Arsenal, TemporalDictionaryEnsemble Notes ----- For the Java version, see `https://github.com/uea-machine-learning/tsml/blob/master/src/main/java/ tsml/classifiers/hybrids/HIVE_COTE.java`_. References ---------- .. [1] Middlehurst, Matthew, James Large, Michael Flynn, Jason Lines, Aaron Bostrom, and Anthony Bagnall. "HIVE-COTE 2.0: a new meta ensemble for time series classification." Machine Learning (2021). Examples -------- >>> from sktime.classification.hybrid import HIVECOTEV2 >>> from sktime.contrib.vector_classifiers._rotation_forest import RotationForest >>> from sktime.datasets import load_unit_test >>> X_train, y_train = load_unit_test(split="train", return_X_y=True) >>> X_test, y_test = load_unit_test(split="test", return_X_y=True) >>> clf = HIVECOTEV2( ... stc_params={ ... "estimator": RotationForest(n_estimators=3), ... "n_shapelet_samples": 500, ... "max_shapelets": 20, ... "batch_size": 100, ... }, ... drcif_params={"n_estimators": 10}, ... arsenal_params={"num_kernels": 100, "n_estimators": 5}, ... tde_params={ ... "n_parameter_samples": 25, ... "max_ensemble_size": 5, ... "randomly_selected_params": 10, ... }, ... ) >>> clf.fit(X_train, y_train) HIVECOTEV2(...) >>> y_pred = clf.predict(X_test) """ _tags = { "capability:multivariate": True, "capability:contractable": True, "capability:multithreading": True, } def __init__( self, stc_params=None, drcif_params=None, arsenal_params=None, tde_params=None, time_limit_in_minutes=0, save_component_probas=False, verbose=0, n_jobs=1, random_state=None, ): self.stc_params = stc_params self.drcif_params = drcif_params self.arsenal_params = arsenal_params self.tde_params = tde_params self.time_limit_in_minutes = time_limit_in_minutes self.save_component_probas = save_component_probas self.verbose = verbose self.n_jobs = n_jobs self.random_state = random_state self.stc_weight_ = 0 self.drcif_weight_ = 0 self.arsenal_weight_ = 0 self.tde_weight_ = 0 self.component_probas = {} self._stc_params = stc_params self._drcif_params = drcif_params self._arsenal_params = arsenal_params self._tde_params = tde_params self._stc = None self._drcif = None self._arsenal = None self._tde = None super(HIVECOTEV2, self).__init__() def _fit(self, X, y): """Fit HIVE-COTE 2.0 to training data. Parameters ---------- X : 3D np.array of shape = [n_instances, n_dimensions, series_length] The training data. y : array-like, shape = [n_instances] The class labels. Returns ------- self : Reference to self. Notes ----- Changes state by creating a fitted model that updates attributes ending in "_" and sets is_fitted flag to True. """ # Default values from HC2 paper if self.stc_params is None: self._stc_params = {"transform_limit_in_minutes": 120} if self.drcif_params is None: self._drcif_params = {"n_estimators": 500} if self.arsenal_params is None: self._arsenal_params = {} if self.tde_params is None: self._tde_params = {} # If we are contracting split the contract time between each algorithm if self.time_limit_in_minutes > 0: # Leave 1/3 for train estimates ct = self.time_limit_in_minutes / 6 self._stc_params["time_limit_in_minutes"] = ct self._drcif_params["time_limit_in_minutes"] = ct self._arsenal_params["time_limit_in_minutes"] = ct self._tde_params["time_limit_in_minutes"] = ct # Build STC self._stc = ShapeletTransformClassifier( **self._stc_params, save_transformed_data=True, random_state=self.random_state, n_jobs=self._threads_to_use, ) self._stc.fit(X, y) if self.verbose > 0: print("STC ", datetime.now().strftime("%H:%M:%S %d/%m/%Y")) # noqa # Find STC weight using train set estimate train_probs = self._stc._get_train_probs(X, y) train_preds = self._stc.classes_[np.argmax(train_probs, axis=1)] self.stc_weight_ = accuracy_score(y, train_preds) ** 4 if self.verbose > 0: print( # noqa "STC train estimate ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"), ) print("STC weight = " + str(self.stc_weight_)) # noqa # Build DrCIF self._drcif = DrCIF( **self._drcif_params, save_transformed_data=True, random_state=self.random_state, n_jobs=self._threads_to_use, ) self._drcif.fit(X, y) if self.verbose > 0: print("DrCIF ", datetime.now().strftime("%H:%M:%S %d/%m/%Y")) # noqa # Find DrCIF weight using train set estimate train_probs = self._drcif._get_train_probs(X, y) train_preds = self._drcif.classes_[np.argmax(train_probs, axis=1)] self.drcif_weight_ = accuracy_score(y, train_preds) ** 4 if self.verbose > 0: print( # noqa "DrCIF train estimate ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"), ) print("DrCIF weight = " + str(self.drcif_weight_)) # noqa # Build Arsenal self._arsenal = Arsenal( **self._arsenal_params, save_transformed_data=True, random_state=self.random_state, n_jobs=self._threads_to_use, ) self._arsenal.fit(X, y) if self.verbose > 0: print("Arsenal ", datetime.now().strftime("%H:%M:%S %d/%m/%Y")) # noqa # Find Arsenal weight using train set estimate train_probs = self._arsenal._get_train_probs(X, y) train_preds = self._arsenal.classes_[np.argmax(train_probs, axis=1)] self.arsenal_weight_ = accuracy_score(y, train_preds) ** 4 if self.verbose > 0: print( # noqa "Arsenal train estimate ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"), ) print("Arsenal weight = " + str(self.arsenal_weight_)) # noqa # Build TDE self._tde = TemporalDictionaryEnsemble( **self._tde_params, save_train_predictions=True, random_state=self.random_state, n_jobs=self._threads_to_use, ) self._tde.fit(X, y) if self.verbose > 0: print("TDE ", datetime.now().strftime("%H:%M:%S %d/%m/%Y")) # noqa # Find TDE weight using train set estimate train_probs = self._tde._get_train_probs(X, y, train_estimate_method="loocv") train_preds = self._tde.classes_[np.argmax(train_probs, axis=1)] self.tde_weight_ = accuracy_score(y, train_preds) ** 4 if self.verbose > 0: print( # noqa "TDE train estimate ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"), ) print("TDE weight = " + str(self.tde_weight_)) # noqa return self def _predict(self, X): """Predicts labels for sequences in X. Parameters ---------- X : 3D np.array of shape = [n_instances, n_dimensions, series_length] The data to make predictions for. Returns ------- y : array-like, shape = [n_instances] Predicted class labels. """ rng = check_random_state(self.random_state) return np.array( [ self.classes_[int(rng.choice(np.flatnonzero(prob == prob.max())))] for prob in self.predict_proba(X) ] ) def _predict_proba(self, X, return_component_probas=False): """Predicts labels probabilities for sequences in X. Parameters ---------- X : 3D np.array of shape = [n_instances, n_dimensions, series_length] The data to make predict probabilities for. Returns ------- y : array-like, shape = [n_instances, n_classes_] Predicted probabilities using the ordering in classes_. """ dists = np.zeros((X.shape[0], self.n_classes_)) # Call predict proba on each classifier, multiply the probabilities by the # classifiers weight then add them to the current HC2 probabilities stc_probas = self._stc.predict_proba(X) dists = np.add( dists, stc_probas * (np.ones(self.n_classes_) * self.stc_weight_), ) drcif_probas = self._drcif.predict_proba(X) dists = np.add( dists, drcif_probas * (np.ones(self.n_classes_) * self.drcif_weight_), ) arsenal_probas = self._arsenal.predict_proba(X) dists = np.add( dists, arsenal_probas * (np.ones(self.n_classes_) * self.arsenal_weight_), ) tde_probas = self._tde.predict_proba(X) dists = np.add( dists, tde_probas * (np.ones(self.n_classes_) * self.tde_weight_), ) if self.save_component_probas: self.component_probas = { "STC": stc_probas, "DrCIF": drcif_probas, "Arsenal": arsenal_probas, "TDE": tde_probas, } # Make each instances probability array sum to 1 and return return dists / dists.sum(axis=1, keepdims=True)
def _fit(self, X, y): """Fit HIVE-COTE 2.0 to training data. Parameters ---------- X : 3D np.array of shape = [n_instances, n_dimensions, series_length] The training data. y : array-like, shape = [n_instances] The class labels. Returns ------- self : Reference to self. Notes ----- Changes state by creating a fitted model that updates attributes ending in "_" and sets is_fitted flag to True. """ # Default values from HC2 paper if self.stc_params is None: self._stc_params = {"transform_limit_in_minutes": 120} if self.drcif_params is None: self._drcif_params = {"n_estimators": 500} if self.arsenal_params is None: self._arsenal_params = {} if self.tde_params is None: self._tde_params = {} # If we are contracting split the contract time between each algorithm if self.time_limit_in_minutes > 0: # Leave 1/3 for train estimates ct = self.time_limit_in_minutes / 6 self._stc_params["time_limit_in_minutes"] = ct self._drcif_params["time_limit_in_minutes"] = ct self._arsenal_params["time_limit_in_minutes"] = ct self._tde_params["time_limit_in_minutes"] = ct # Build STC self._stc = ShapeletTransformClassifier( **self._stc_params, save_transformed_data=True, random_state=self.random_state, n_jobs=self._threads_to_use, ) self._stc.fit(X, y) if self.verbose > 0: print("STC ", datetime.now().strftime("%H:%M:%S %d/%m/%Y")) # noqa # Find STC weight using train set estimate train_probs = self._stc._get_train_probs(X, y) train_preds = self._stc.classes_[np.argmax(train_probs, axis=1)] self.stc_weight_ = accuracy_score(y, train_preds) ** 4 if self.verbose > 0: print( # noqa "STC train estimate ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"), ) print("STC weight = " + str(self.stc_weight_)) # noqa # Build DrCIF self._drcif = DrCIF( **self._drcif_params, save_transformed_data=True, random_state=self.random_state, n_jobs=self._threads_to_use, ) self._drcif.fit(X, y) if self.verbose > 0: print("DrCIF ", datetime.now().strftime("%H:%M:%S %d/%m/%Y")) # noqa # Find DrCIF weight using train set estimate train_probs = self._drcif._get_train_probs(X, y) train_preds = self._drcif.classes_[np.argmax(train_probs, axis=1)] self.drcif_weight_ = accuracy_score(y, train_preds) ** 4 if self.verbose > 0: print( # noqa "DrCIF train estimate ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"), ) print("DrCIF weight = " + str(self.drcif_weight_)) # noqa # Build Arsenal self._arsenal = Arsenal( **self._arsenal_params, save_transformed_data=True, random_state=self.random_state, n_jobs=self._threads_to_use, ) self._arsenal.fit(X, y) if self.verbose > 0: print("Arsenal ", datetime.now().strftime("%H:%M:%S %d/%m/%Y")) # noqa # Find Arsenal weight using train set estimate train_probs = self._arsenal._get_train_probs(X, y) train_preds = self._arsenal.classes_[np.argmax(train_probs, axis=1)] self.arsenal_weight_ = accuracy_score(y, train_preds) ** 4 if self.verbose > 0: print( # noqa "Arsenal train estimate ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"), ) print("Arsenal weight = " + str(self.arsenal_weight_)) # noqa # Build TDE self._tde = TemporalDictionaryEnsemble( **self._tde_params, save_train_predictions=True, random_state=self.random_state, n_jobs=self._threads_to_use, ) self._tde.fit(X, y) if self.verbose > 0: print("TDE ", datetime.now().strftime("%H:%M:%S %d/%m/%Y")) # noqa # Find TDE weight using train set estimate train_probs = self._tde._get_train_probs(X, y, train_estimate_method="loocv") train_preds = self._tde.classes_[np.argmax(train_probs, axis=1)] self.tde_weight_ = accuracy_score(y, train_preds) ** 4 if self.verbose > 0: print( # noqa "TDE train estimate ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"), ) print("TDE weight = " + str(self.tde_weight_)) # noqa return self
RocketClassifier(num_kernels=500, random_state=0) ), ) _print_array( "RocketClassifier - BasicMotions", _reproduce_classification_basic_motions( RocketClassifier(num_kernels=500, random_state=0) ), ) _print_array( "ShapeletTransformClassifier - UnitTest", _reproduce_classification_unit_test( ShapeletTransformClassifier( estimator=RotationForest(n_estimators=3), max_shapelets=20, n_shapelet_samples=500, batch_size=100, random_state=0, ) ), ) _print_array( "ShapeletTransformClassifier - BasicMotions", _reproduce_classification_basic_motions( ShapeletTransformClassifier( estimator=RotationForest(n_estimators=3), max_shapelets=20, n_shapelet_samples=500, batch_size=100, random_state=0, )
"RocketClassifier - UnitTest", _reproduce_classification_unit_test( RocketClassifier(num_kernels=100, random_state=0)), ) _print_array( "RocketClassifier - BasicMotions", _reproduce_classification_basic_motions( RocketClassifier(num_kernels=100, random_state=0)), ) _print_array( "ShapeletTransformClassifier - UnitTest", _reproduce_classification_unit_test( ShapeletTransformClassifier( estimator=RandomForestClassifier(n_estimators=5), n_shapelet_samples=50, max_shapelets=10, batch_size=10, random_state=0, )), ) _print_array( "ShapeletTransformClassifier - BasicMotions", _reproduce_classification_basic_motions( ShapeletTransformClassifier( estimator=RandomForestClassifier(n_estimators=5), n_shapelet_samples=50, max_shapelets=10, batch_size=10, random_state=0, )), )
class HIVECOTEV1(BaseClassifier): """ Hierarchical Vote Collective of Transformation-based Ensembles (HIVE-COTE) V1 as described in [1]. An ensemble of the STC, TSF, RISE and cBOSS classifiers from different feature representations using the CAWPE structure. Parameters ---------- random_state : int or None, seed for random, integer, optional (default to no seed) Attributes ---------- n_classes : extracted from the data Notes ----- @article{bagnall2020usage, title={On the Usage and Performance of The Hierarchical Vote Collective of Transformation-based Ensembles version 1.0 (HIVE-COTE 1.0)}, author={Bagnall, Anthony and Flynn, Michael and Large, James and Lines, Jason and Middlehurst, Matthew}, journal={arXiv preprint arXiv:2004.06069}, year={2020} } Java version https://github.com/uea-machine-learning/tsml/blob/master/src/main/java/ tsml/classifiers/hybrids/HIVE_COTE.java """ # Capability tags capabilities = { "multivariate": False, "unequal_length": False, "missing_values": False, } def __init__( self, random_state=None, ): self.random_state = random_state self.stc = None self.tsf = None self.rise = None self.cboss = None self.stc_weight = 0 self.tsf_weight = 0 self.rise_weight = 0 self.cboss_weight = 0 self.n_classes = 0 self.classes_ = [] super(HIVECOTEV1, self).__init__() def fit(self, X, y): X, y = check_X_y(X, y, enforce_univariate=True, coerce_to_numpy=True) self.n_classes = np.unique(y).shape[0] self.classes_ = class_distribution(np.asarray(y).reshape(-1, 1))[0][0] cv_size = 10 _, counts = np.unique(y, return_counts=True) min_class = np.min(counts) if min_class < cv_size: cv_size = min_class self.stc = ShapeletTransformClassifier( random_state=self.random_state, time_contract_in_mins=60, ) self.stc.fit(X, y) train_preds = cross_val_predict( ShapeletTransformClassifier( random_state=self.random_state, time_contract_in_mins=60, ), X=X, y=y, cv=cv_size, ) self.stc_weight = accuracy_score(y, train_preds)**4 self.tsf = TimeSeriesForest(random_state=self.random_state) self.tsf.fit(X, y) train_preds = cross_val_predict( TimeSeriesForest(random_state=self.random_state), X=X, y=y, cv=cv_size, ) self.tsf_weight = accuracy_score(y, train_preds)**4 self.rise = RandomIntervalSpectralForest( random_state=self.random_state) self.fit(X, y) train_preds = cross_val_predict( RandomIntervalSpectralForest(random_state=self.random_state), X=X, y=y, cv=cv_size, ) self.rise_weight = accuracy_score(y, train_preds)**4 self.cboss = ContractableBOSS(random_state=self.random_state) self.cboss.fit(X, y) train_probs = self.cboss._get_train_probs(X) train_preds = self.cboss.classes_[np.argmax(train_probs, axis=1)] self.cboss_weight = accuracy_score(y, train_preds)**4 return self def predict(self, X): rng = check_random_state(self.random_state) return np.array([ self.classes_[int(rng.choice(np.flatnonzero(prob == prob.max())))] for prob in self.predict_proba(X) ]) def predict_proba(self, X): self.check_is_fitted() X = check_X(X, enforce_univariate=True, coerce_to_numpy=True) dists = np.zeros((X.shape[0], self.n_classes)) dists = np.add( dists, self.stc.predict_proba(X) * (np.ones(self.n_classes) * self.stc_weight), ) dists = np.add( dists, self.tsf.predict_proba(X) * (np.ones(self.n_classes) * self.tsf_weight), ) dists = np.add( dists, self.rise.predict_proba(X) * (np.ones(self.n_classes) * self.rise_weight), ) dists = np.add( dists, self.cboss.predict_proba(X) * (np.ones(self.n_classes) * self.cboss_weight), ) return dists / dists.sum(axis=1, keepdims=True)
class HIVECOTEV1(BaseClassifier): """Hierarchical Vote Collective of Transformation-based Ensembles (HIVE-COTE) V1. An ensemble of the STC, TSF, RISE and cBOSS classifiers from different feature representations using the CAWPE structure as described in [1]_. Parameters ---------- stc_params : dict or None, default=None Parameters for the ShapeletTransformClassifier module. If None, uses the default parameters with a 2 hour transform contract. tsf_params : dict or None, default=None Parameters for the TimeSeriesForestClassifier module. If None, uses the default parameters with n_estimators set to 500. rise_params : dict or None, default=None Parameters for the RandomIntervalSpectralForest module. If None, uses the default parameters with n_estimators set to 500. cboss_params : dict or None, default=None Parameters for the ContractableBOSS module. If None, uses the default parameters. verbose : int, default=0 Level of output printed to the console (for information only). n_jobs : int, default=1 The number of jobs to run in parallel for both `fit` and `predict`. ``-1`` means using all processors. random_state : int or None, default=None Seed for random number generation. Attributes ---------- n_classes_ : int The number of classes. classes_ : list The unique class labels. stc_weight_ : float The weight for STC probabilities. tsf_weight_ : float The weight for TSF probabilities. rise_weight_ : float The weight for RISE probabilities. cboss_weight_ : float The weight for cBOSS probabilities. See Also -------- HIVECOTEV2, ShapeletTransformClassifier, TimeSeriesForestClassifier, RandomIntervalSpectralForest, ContractableBOSS Notes ----- For the Java version, see `https://github.com/uea-machine-learning/tsml/blob/master/src/main/java/ tsml/classifiers/hybrids/HIVE_COTE.java`_. References ---------- .. [1] Anthony Bagnall, Michael Flynn, James Large, Jason Lines and Matthew Middlehurst. "On the usage and performance of the Hierarchical Vote Collective of Transformation-based Ensembles version 1.0 (hive-cote v1.0)" International Workshop on Advanced Analytics and Learning on Temporal Data 2020 Examples -------- >>> from sktime.classification.hybrid import HIVECOTEV1 >>> from sktime.contrib.vector_classifiers._rotation_forest import RotationForest >>> from sktime.datasets import load_unit_test >>> X_train, y_train = load_unit_test(split="train", return_X_y=True) >>> X_test, y_test = load_unit_test(split="test", return_X_y=True) >>> clf = HIVECOTEV1( ... stc_params={ ... "estimator": RotationForest(n_estimators=3), ... "n_shapelet_samples": 500, ... "max_shapelets": 20, ... "batch_size": 100, ... }, ... tsf_params={"n_estimators": 10}, ... rise_params={"n_estimators": 10}, ... cboss_params={"n_parameter_samples": 25, "max_ensemble_size": 5}, ... ) >>> clf.fit(X_train, y_train) HIVECOTEV1(...) >>> y_pred = clf.predict(X_test) """ _tags = { "capability:multithreading": True, } def __init__( self, stc_params=None, tsf_params=None, rise_params=None, cboss_params=None, verbose=0, n_jobs=1, random_state=None, ): self.stc_params = stc_params self.tsf_params = tsf_params self.rise_params = rise_params self.cboss_params = cboss_params self.verbose = verbose self.n_jobs = n_jobs self.random_state = random_state self.stc_weight_ = 0 self.tsf_weight_ = 0 self.rise_weight_ = 0 self.cboss_weight_ = 0 self._stc_params = stc_params self._tsf_params = tsf_params self._rise_params = rise_params self._cboss_params = cboss_params self._stc = None self._tsf = None self._rise = None self._cboss = None super(HIVECOTEV1, self).__init__() def _fit(self, X, y): """Fit HIVE-COTE 1.0 to training data. Parameters ---------- X : 3D np.array of shape = [n_instances, n_dimensions, series_length] The training data. y : array-like, shape = [n_instances] The class labels. Returns ------- self : Reference to self. Notes ----- Changes state by creating a fitted model that updates attributes ending in "_" and sets is_fitted flag to True. """ # Default values from HC1 paper if self.stc_params is None: self._stc_params = {"transform_limit_in_minutes": 120} if self.tsf_params is None: self._tsf_params = {"n_estimators": 500} if self.rise_params is None: self._rise_params = {"n_estimators": 500} if self.cboss_params is None: self._cboss_params = {} # Cross-validation size for TSF and RISE cv_size = 10 _, counts = np.unique(y, return_counts=True) min_class = np.min(counts) if min_class < cv_size: cv_size = min_class # Build STC self._stc = ShapeletTransformClassifier( **self._stc_params, save_transformed_data=True, random_state=self.random_state, n_jobs=self._threads_to_use, ) self._stc.fit(X, y) if self.verbose > 0: print("STC ", datetime.now().strftime("%H:%M:%S %d/%m/%Y")) # noqa # Find STC weight using train set estimate train_probs = self._stc._get_train_probs(X, y) train_preds = self._stc.classes_[np.argmax(train_probs, axis=1)] self.stc_weight_ = accuracy_score(y, train_preds)**4 if self.verbose > 0: print( # noqa "STC train estimate ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"), ) print("STC weight = " + str(self.stc_weight_)) # noqa # Build TSF self._tsf = TimeSeriesForestClassifier( **self._tsf_params, random_state=self.random_state, n_jobs=self._threads_to_use, ) self._tsf.fit(X, y) if self.verbose > 0: print("TSF ", datetime.now().strftime("%H:%M:%S %d/%m/%Y")) # noqa # Find TSF weight using train set estimate found through CV train_preds = cross_val_predict( TimeSeriesForestClassifier(**self._tsf_params, random_state=self.random_state), X=X, y=y, cv=cv_size, n_jobs=self._threads_to_use, ) self.tsf_weight_ = accuracy_score(y, train_preds)**4 if self.verbose > 0: print( # noqa "TSF train estimate ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"), ) print("TSF weight = " + str(self.tsf_weight_)) # noqa # Build RISE self._rise = RandomIntervalSpectralEnsemble( **self._rise_params, random_state=self.random_state, n_jobs=self._threads_to_use, ) self._rise.fit(X, y) if self.verbose > 0: print("RISE ", datetime.now().strftime("%H:%M:%S %d/%m/%Y")) # noqa # Find RISE weight using train set estimate found through CV train_preds = cross_val_predict( RandomIntervalSpectralEnsemble( **self._rise_params, random_state=self.random_state, ), X=X, y=y, cv=cv_size, n_jobs=self._threads_to_use, ) self.rise_weight_ = accuracy_score(y, train_preds)**4 if self.verbose > 0: print( # noqa "RISE train estimate ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"), ) print("RISE weight = " + str(self.rise_weight_)) # noqa # Build cBOSS self._cboss = ContractableBOSS( **self._cboss_params, random_state=self.random_state, n_jobs=self._threads_to_use, ) self._cboss.fit(X, y) # Find cBOSS weight using train set estimate train_probs = self._cboss._get_train_probs(X, y) train_preds = self._cboss.classes_[np.argmax(train_probs, axis=1)] self.cboss_weight_ = accuracy_score(y, train_preds)**4 if self.verbose > 0: print( # noqa "cBOSS (estimate included)", datetime.now().strftime("%H:%M:%S %d/%m/%Y"), ) print("cBOSS weight = " + str(self.cboss_weight_)) # noqa return self def _predict(self, X): """Predicts labels for sequences in X. Parameters ---------- X : 3D np.array of shape = [n_instances, n_dimensions, series_length] The data to make predictions for. Returns ------- y : array-like, shape = [n_instances] Predicted class labels. """ rng = check_random_state(self.random_state) return np.array([ self.classes_[int(rng.choice(np.flatnonzero(prob == prob.max())))] for prob in self.predict_proba(X) ]) def _predict_proba(self, X): """Predicts labels probabilities for sequences in X. Parameters ---------- X : 3D np.array of shape = [n_instances, n_dimensions, series_length] The data to make predict probabilities for. Returns ------- y : array-like, shape = [n_instances, n_classes_] Predicted probabilities using the ordering in classes_. """ dists = np.zeros((X.shape[0], self.n_classes_)) # Call predict proba on each classifier, multiply the probabilities by the # classifiers weight then add them to the current HC1 probabilities dists = np.add( dists, self._stc.predict_proba(X) * (np.ones(self.n_classes_) * self.stc_weight_), ) dists = np.add( dists, self._tsf.predict_proba(X) * (np.ones(self.n_classes_) * self.tsf_weight_), ) dists = np.add( dists, self._rise.predict_proba(X) * (np.ones(self.n_classes_) * self.rise_weight_), ) dists = np.add( dists, self._cboss.predict_proba(X) * (np.ones(self.n_classes_) * self.cboss_weight_), ) # Make each instances probability array sum to 1 and return return dists / dists.sum(axis=1, keepdims=True)
def _fit(self, X, y): """Fit HIVE-COTE 1.0 to training data. Parameters ---------- X : 3D np.array of shape = [n_instances, n_dimensions, series_length] The training data. y : array-like, shape = [n_instances] The class labels. Returns ------- self : Reference to self. Notes ----- Changes state by creating a fitted model that updates attributes ending in "_" and sets is_fitted flag to True. """ # Default values from HC1 paper if self.stc_params is None: self._stc_params = {"transform_limit_in_minutes": 120} if self.tsf_params is None: self._tsf_params = {"n_estimators": 500} if self.rise_params is None: self._rise_params = {"n_estimators": 500} if self.cboss_params is None: self._cboss_params = {} # Cross-validation size for TSF and RISE cv_size = 10 _, counts = np.unique(y, return_counts=True) min_class = np.min(counts) if min_class < cv_size: cv_size = min_class # Build STC self._stc = ShapeletTransformClassifier( **self._stc_params, save_transformed_data=True, random_state=self.random_state, n_jobs=self._threads_to_use, ) self._stc.fit(X, y) if self.verbose > 0: print("STC ", datetime.now().strftime("%H:%M:%S %d/%m/%Y")) # noqa # Find STC weight using train set estimate train_probs = self._stc._get_train_probs(X, y) train_preds = self._stc.classes_[np.argmax(train_probs, axis=1)] self.stc_weight_ = accuracy_score(y, train_preds)**4 if self.verbose > 0: print( # noqa "STC train estimate ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"), ) print("STC weight = " + str(self.stc_weight_)) # noqa # Build TSF self._tsf = TimeSeriesForestClassifier( **self._tsf_params, random_state=self.random_state, n_jobs=self._threads_to_use, ) self._tsf.fit(X, y) if self.verbose > 0: print("TSF ", datetime.now().strftime("%H:%M:%S %d/%m/%Y")) # noqa # Find TSF weight using train set estimate found through CV train_preds = cross_val_predict( TimeSeriesForestClassifier(**self._tsf_params, random_state=self.random_state), X=X, y=y, cv=cv_size, n_jobs=self._threads_to_use, ) self.tsf_weight_ = accuracy_score(y, train_preds)**4 if self.verbose > 0: print( # noqa "TSF train estimate ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"), ) print("TSF weight = " + str(self.tsf_weight_)) # noqa # Build RISE self._rise = RandomIntervalSpectralEnsemble( **self._rise_params, random_state=self.random_state, n_jobs=self._threads_to_use, ) self._rise.fit(X, y) if self.verbose > 0: print("RISE ", datetime.now().strftime("%H:%M:%S %d/%m/%Y")) # noqa # Find RISE weight using train set estimate found through CV train_preds = cross_val_predict( RandomIntervalSpectralEnsemble( **self._rise_params, random_state=self.random_state, ), X=X, y=y, cv=cv_size, n_jobs=self._threads_to_use, ) self.rise_weight_ = accuracy_score(y, train_preds)**4 if self.verbose > 0: print( # noqa "RISE train estimate ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"), ) print("RISE weight = " + str(self.rise_weight_)) # noqa # Build cBOSS self._cboss = ContractableBOSS( **self._cboss_params, random_state=self.random_state, n_jobs=self._threads_to_use, ) self._cboss.fit(X, y) # Find cBOSS weight using train set estimate train_probs = self._cboss._get_train_probs(X, y) train_preds = self._cboss.classes_[np.argmax(train_probs, axis=1)] self.cboss_weight_ = accuracy_score(y, train_preds)**4 if self.verbose > 0: print( # noqa "cBOSS (estimate included)", datetime.now().strftime("%H:%M:%S %d/%m/%Y"), ) print("cBOSS weight = " + str(self.cboss_weight_)) # noqa return self
class HIVECOTEV1(BaseClassifier): """Hierarchical Vote Collective of Transformation-based Ensembles (HIVE-COTE) V1. An ensemble of the STC, TSF, RISE and cBOSS classifiers from different feature representations using the CAWPE structure as described in [1]. Parameters ---------- verbose : int, level of output printed to the console (for information only) (default = 0) n_jobs : int, optional (default=1) The number of jobs to run in parallel for both `fit` and `predict`. ``-1`` means using all processors. random_state : int or None, seed for random, integer, optional (default to no seed) Attributes ---------- n_classes : extracted from the data Notes ----- ..[1] Anthony Bagnall, Michael Flynn, James Large, Jason Lines and Matthew Middlehurst. "On the usage and performance of the Hierarchical Vote Collective of Transformation-based Ensembles version 1.0 (hive-cote v1. 0)" International Workshop on Advanced Analytics and Learning on Temporal Data 2020 Java version https://github.com/uea-machine-learning/tsml/blob/master/src/main/java/ tsml/classifiers/hybrids/HIVE_COTE.java """ # Capability tags capabilities = { "multivariate": False, "unequal_length": False, "missing_values": False, "train_estimate": False, "contractable": False, } def __init__( self, stc_params=None, tsf_params=None, rise_params=None, cboss_params=None, verbose=0, n_jobs=1, random_state=None, ): if stc_params is None: stc_params = {"n_estimators": 500} if tsf_params is None: tsf_params = {"n_estimators": 500} if rise_params is None: rise_params = {"n_estimators": 500} if cboss_params is None: cboss_params = {} self.stc_params = stc_params self.tsf_params = tsf_params self.rise_params = rise_params self.cboss_params = cboss_params self.verbose = verbose self.n_jobs = n_jobs self.random_state = random_state self.stc = None self.tsf = None self.rise = None self.cboss = None self.stc_weight = 0 self.tsf_weight = 0 self.rise_weight = 0 self.cboss_weight = 0 self.n_classes = 0 self.classes_ = [] super(HIVECOTEV1, self).__init__() def fit(self, X, y): """Fit a HIVE-COTEv1.0 classifier. Parameters ---------- X : nested pandas DataFrame of shape [n_instances, 1] Nested dataframe with univariate time-series in cells. y : array-like, shape = [n_instances] The class labels. Returns ------- self : object """ X, y = check_X_y(X, y, enforce_univariate=True) self.n_classes = np.unique(y).shape[0] self.classes_ = class_distribution(np.asarray(y).reshape(-1, 1))[0][0] cv_size = 10 _, counts = np.unique(y, return_counts=True) min_class = np.min(counts) if min_class < cv_size: cv_size = min_class self.stc = ShapeletTransformClassifier( **self.stc_params, random_state=self.random_state, ) self.stc.fit(X, y) if self.verbose > 0: print("STC ", datetime.now().strftime("%H:%M:%S %d/%m/%Y")) # noqa train_preds = cross_val_predict( ShapeletTransformClassifier( **self.stc_params, random_state=self.random_state, ), X=X, y=y, cv=cv_size, n_jobs=self.n_jobs, ) self.stc_weight = accuracy_score(y, train_preds)**4 if self.verbose > 0: print( # noqa "STC train estimate ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"), ) print("STC weight = " + str(self.stc_weight)) # noqa self.tsf = TimeSeriesForestClassifier( **self.tsf_params, random_state=self.random_state, n_jobs=self.n_jobs, ) self.tsf.fit(X, y) if self.verbose > 0: print("TSF ", datetime.now().strftime("%H:%M:%S %d/%m/%Y")) # noqa train_preds = cross_val_predict( TimeSeriesForestClassifier(**self.tsf_params, random_state=self.random_state), X=X, y=y, cv=cv_size, n_jobs=self.n_jobs, ) self.tsf_weight = accuracy_score(y, train_preds)**4 if self.verbose > 0: print( # noqa "TSF train estimate ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"), ) print("TSF weight = " + str(self.tsf_weight)) # noqa self.rise = RandomIntervalSpectralForest( **self.rise_params, random_state=self.random_state, n_jobs=self.n_jobs, ) self.rise.fit(X, y) if self.verbose > 0: print("RISE ", datetime.now().strftime("%H:%M:%S %d/%m/%Y")) # noqa train_preds = cross_val_predict( RandomIntervalSpectralForest( **self.rise_params, random_state=self.random_state, ), X=X, y=y, cv=cv_size, n_jobs=self.n_jobs, ) self.rise_weight = accuracy_score(y, train_preds)**4 if self.verbose > 0: print( # noqa "RISE train estimate ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"), ) print("RISE weight = " + str(self.rise_weight)) # noqa self.cboss = ContractableBOSS(**self.cboss_params, random_state=self.random_state, n_jobs=self.n_jobs) self.cboss.fit(X, y) train_probs = self.cboss._get_train_probs(X) train_preds = self.cboss.classes_[np.argmax(train_probs, axis=1)] self.cboss_weight = accuracy_score(y, train_preds)**4 if self.verbose > 0: print( # noqa "cBOSS (estimate included) ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"), ) print("cBOSS weight = " + str(self.cboss_weight)) # noqa self._is_fitted = True return self def predict(self, X): """Make predictions for all cases in X. Parameters ---------- X : The testing input samples of shape [n_instances,1]. Returns ------- output : numpy array of shape = [n_instances] """ rng = check_random_state(self.random_state) return np.array([ self.classes_[int(rng.choice(np.flatnonzero(prob == prob.max())))] for prob in self.predict_proba(X) ]) def predict_proba(self, X): """Make class probability estimates on each case in X. Parameters ---------- X - pandas dataframe of testing data of shape [n_instances,1]. Returns ------- output : numpy array of shape = [n_instances, num_classes] of probabilities """ self.check_is_fitted() X = check_X(X, enforce_univariate=True) dists = np.zeros((X.shape[0], self.n_classes)) dists = np.add( dists, self.stc.predict_proba(X) * (np.ones(self.n_classes) * self.stc_weight), ) dists = np.add( dists, self.tsf.predict_proba(X) * (np.ones(self.n_classes) * self.tsf_weight), ) dists = np.add( dists, self.rise.predict_proba(X) * (np.ones(self.n_classes) * self.rise_weight), ) dists = np.add( dists, self.cboss.predict_proba(X) * (np.ones(self.n_classes) * self.cboss_weight), ) return dists / dists.sum(axis=1, keepdims=True)
def set_classifier(cls, resampleId=None): """ Basic way of creating the classifier to build using the default settings. This set up is to help with batch jobs for multiple problems to facilitate easy reproducability. You can set up bespoke classifier in many other ways. :param cls: String indicating which classifier you want :param resampleId: classifier random seed :return: A classifier. """ name = cls.lower() # Distance based if name == "pf" or name == "proximityforest": return ProximityForest(random_state=resampleId) elif name == "pt" or name == "proximitytree": return ProximityTree(random_state=resampleId) elif name == "ps" or name == "proximityStump": return ProximityStump(random_state=resampleId) elif name == "dtwcv" or name == "kneighborstimeseriesclassifier": return KNeighborsTimeSeriesClassifier(distance="dtwcv") elif name == "dtw" or name == "1nn-dtw": return KNeighborsTimeSeriesClassifier(distance="dtw") elif name == "msm" or name == "1nn-msm": return KNeighborsTimeSeriesClassifier(distance="msm") elif name == "ee" or name == "elasticensemble": return ElasticEnsemble() elif name == "shapedtw": return ShapeDTW() # Dictionary based elif name == "boss" or name == "bossensemble": return BOSSEnsemble(random_state=resampleId) elif name == "cboss" or name == "contractableboss": return ContractableBOSS(random_state=resampleId) elif name == "tde" or name == "temporaldictionaryensemble": return TemporalDictionaryEnsemble(random_state=resampleId) elif name == "weasel": return WEASEL(random_state=resampleId) elif name == "muse": return MUSE(random_state=resampleId) # Interval based elif name == "rise" or name == "randomintervalspectralforest": return RandomIntervalSpectralForest(random_state=resampleId) elif name == "tsf" or name == "timeseriesforestclassifier": return TimeSeriesForestClassifier(random_state=resampleId) elif name == "cif" or name == "canonicalintervalforest": return CanonicalIntervalForest(random_state=resampleId) elif name == "drcif": return DrCIF(random_state=resampleId) # Shapelet based elif name == "stc" or name == "shapelettransformclassifier": return ShapeletTransformClassifier( random_state=resampleId, time_contract_in_mins=1 ) elif name == "mrseql" or name == "mrseqlclassifier": return MrSEQLClassifier(seql_mode="fs", symrep=["sax", "sfa"]) elif name == "rocket": return ROCKETClassifier(random_state=resampleId) elif name == "arsenal": return Arsenal(random_state=resampleId) # Hybrid elif name == "catch22": return Catch22ForestClassifier(random_state=resampleId) elif name == "hivecotev1": return HIVECOTEV1(random_state=resampleId) else: raise Exception("UNKNOWN CLASSIFIER")
def build_model(dataset, pipeline, experiment, current_target='class', test_size=0.3): models_dir = './results/{}_{}_{}/models/'.format(dataset, pipeline, experiment) reports_dir = './results/{}_{}_{}/reports/'.format(dataset, pipeline, experiment) experiment_index_file = './results/{}_{}_{}/index.json'.format( dataset, pipeline, experiment) log_file = './results/{}_{}_{}/model_build.log'.format( dataset, pipeline, experiment) scoring = make_scorer(precision_score, zero_division=1, average='micro') os.makedirs(models_dir, exist_ok=True) os.makedirs(reports_dir, exist_ok=True) # Setup logging logger.setup(filename=log_file, filemode='w', root_level=logging.DEBUG, log_level=logging.DEBUG, logger='build_model') index_name = 'index' if '.' in dataset: splits = dataset.split(".") dataset = splits[0] index_name = splits[1] # Load the dataset index dataset_index = load_dataset(dataset, return_index=True, index_name=index_name) # Dynamically import the pipeline we want to use for building the model logger.info('Start experiment: {} using {} on {} with target {}'.format( experiment, pipeline, dataset, current_target)) reports = ReportCollection(dataset, pipeline, experiment) for _sym, data in {'BTC': dataset_index['BTC']}.items(): try: logger.info('Start processing: {}'.format(_sym)) features = pd.read_csv(data['csv'], sep=',', encoding='utf-8', index_col='Date', parse_dates=True) targets = pd.read_csv(data['target_csv'], sep=',', encoding='utf-8', index_col='Date', parse_dates=True) # Drop columns whose values are all NaN, as well as rows with ANY nan value, then # replace infinity values with nan so that they can later be imputed to a finite value features = features.dropna( axis='columns', how='all').dropna().replace([np.inf, -np.inf], np.nan) target = targets.loc[features.index][current_target] #X_train, X_test, y_train, y_test = train_test_split(features, target, shuffle=False, test_size=test_size) all_size = features.shape[0] train_size = int(all_size * (1 - test_size)) features = detabularise( features[[c for c in features.columns if 'close' in c]]) X_train = features.iloc[0:train_size] y_train = target.iloc[0:train_size] X_test = features.iloc[train_size:all_size] y_test = target.iloc[train_size:all_size] # Summarize distribution logger.info("Start Grid search") clf = ShapeletTransformClassifier(time_contract_in_mins=5) clf.fit(X_train, y_train) print('{} Score: {}'.format(_sym, clf.score(X_test, y_test))) pred = clf.predict(X_test) print(classification_report(y_test, pred)) logger.info("End Grid search") logger.info("--- {} end ---".format(_sym)) except Exception as e: logger.error( "Exception while building model pipeline: {} dataset: {} symbol: {}\nException:\n{}" .format(pipeline, dataset, _sym, e)) traceback.print_exc() return reports