def test_dunder_mul(): """Test the mul dunder method.""" RAND_SEED = 42 y = _make_classification_y(n_instances=10, random_state=RAND_SEED) X = _make_panel_X(n_instances=10, n_timepoints=20, random_state=RAND_SEED, y=y) X_test = _make_panel_X(n_instances=5, n_timepoints=20, random_state=RAND_SEED) t1 = ExponentTransformer(power=4) t2 = ExponentTransformer(power=0.25) c = KNeighborsTimeSeriesClassifier() t12c_1 = t1 * (t2 * c) t12c_2 = (t1 * t2) * c t12c_3 = t1 * t2 * c assert isinstance(t12c_1, ClassifierPipeline) assert isinstance(t12c_2, ClassifierPipeline) assert isinstance(t12c_3, ClassifierPipeline) y_pred = c.fit(X, y).predict(X_test) _assert_array_almost_equal(y_pred, t12c_1.fit(X, y).predict(X_test)) _assert_array_almost_equal(y_pred, t12c_2.fit(X, y).predict(X_test)) _assert_array_almost_equal(y_pred, t12c_3.fit(X, y).predict(X_test))
def main(): generator = DataGenerator(labeled_data_file=args.labeled_data_file, data_util_file=args.data_util_file, threshold=args.threshold, dt=args.dt, L=args.L, tmin=args.tmin, tmax=args.tmax) training_data, test_data = generator.get_data(ts_nth_element=args.ts_nth_element, training_frac=0.7) knn = KNeighborsTimeSeriesClassifier(n_neighbors=args.n_neighbors, verbose=1, metric="dtw") x = detabularize(pd.DataFrame(training_data[:,1:])) try: with parallel_backend('threading', n_jobs=args.n_jobs): knn = knn.fit(x, training_data[:,0]) with open('{save_file_name}.pickle'.format(save_file_name=args.save_file_name), 'wb') \ as KNeighborsTimeSeriesModel: pickle.dump(knn, KNeighborsTimeSeriesModel, protocol=pickle.HIGHEST_PROTOCOL) except Exception as ex: print(ex)
def test_mul_sklearn_autoadapt(): """Test auto-adapter for sklearn in mul.""" RAND_SEED = 42 y = _make_classification_y(n_instances=10, random_state=RAND_SEED) X = _make_panel_X(n_instances=10, n_timepoints=20, random_state=RAND_SEED, y=y) X_test = _make_panel_X(n_instances=10, n_timepoints=20, random_state=RAND_SEED) t1 = ExponentTransformer(power=2) t2 = StandardScaler() c = KNeighborsTimeSeriesClassifier() t12c_1 = t1 * (t2 * c) t12c_2 = (t1 * t2) * c t12c_3 = t1 * t2 * c assert isinstance(t12c_1, ClassifierPipeline) assert isinstance(t12c_2, ClassifierPipeline) assert isinstance(t12c_3, ClassifierPipeline) y_pred = t12c_1.fit(X, y).predict(X_test) _assert_array_almost_equal(y_pred, t12c_2.fit(X, y).predict(X_test)) _assert_array_almost_equal(y_pred, t12c_3.fit(X, y).predict(X_test))
def get_test_params(cls, parameter_set="default"): """Return testing parameter settings for the estimator. Parameters ---------- parameter_set : str, default="default" Name of the set of test parameters to return, for use in tests. If no special parameters are defined for a value, will return `"default"` set. For classifiers, a "default" set of parameters should be provided for general testing, and a "results_comparison" set for comparing against previously recorded results if the general set does not produce suitable probabilities to compare against. Returns ------- params : dict or list of dict, default={} Parameters to create testing instances of the class. Each dict are parameters to construct an "interesting" test instance, i.e., `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. `create_test_instance` uses the first (or only) dictionary in `params`. """ # imports from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier from sktime.transformations.series.exponent import ExponentTransformer t1 = ExponentTransformer(power=2) t2 = ExponentTransformer(power=0.5) c = KNeighborsTimeSeriesClassifier() # construct without names return {"transformers": [t1, t2], "classifier": c}
def fit(self, X, y): """ Method to perform training on the classifier. Parameters ---------- X - pandas dataframe of training data of shape [n_instances,1]. y - list of class labels of shape [n_instances]. Returns ------- self : the shapeDTW object """ # Perform preprocessing on params. if not (isinstance(self.shape_descriptor_function, str)): raise TypeError( "shape_descriptor_function must be an 'str'. \ Found '" + type(self.shape_descriptor_function).__name__ + "' instead." ) X, y = check_X_y(X, y, enforce_univariate=False) if self.metric_params is None: self.metric_params = {} # If the shape descriptor is 'compound', # calculate the appropriate weighting_factor if self.shape_descriptor_function == "compound": self._calculate_weighting_factor_value(X, y) # Fit the SlidingWindowSegmenter sw = SlidingWindowSegmenter(self.subsequence_length) sw.fit(X) self.sw = sw # Transform the training data. X = self._preprocess(X) # Fit the kNN classifier self.knn = KNeighborsTimeSeriesClassifier(n_neighbors=self.n_neighbors) self.knn.fit(X, y) self.classes_ = self.knn.classes_ return self
def fit(self, data): if data.kfold > 1: cv_eval = {} for k, cv_fold in enumerate(data.Xy_train.keys()): # print(' cv_fold: ', cv_fold) [(X_train, y_train), (X_val, y_val)] = data.Xy_train[cv_fold] X_train, X_val = from_2d_array_to_nested(X_train), from_2d_array_to_nested(X_val) knn = KNeighborsTimeSeriesClassifier(n_neighbors=5, distance="dtw", n_jobs=-1) knn.fit(X_train, y_train) eval_metrics = weareval.eval_output(knn.predict(X_val), y_val, tasktype=data.tasktype) cv_eval[cv_fold] = {'model': knn, # 'data': [(X_train, y_train), (X_val, y_val)], # store just IDs? 'metric': eval_metrics['mae'] if data.tasktype=='regression' else eval_metrics['balanced_acc_adj'], 'metrics': eval_metrics} # retain only best model tmp = {cv_fold:cv_eval[cv_fold]['metric'] for cv_fold in cv_eval.keys()} bst_fold = min(tmp, key=tmp.get) if data.tasktype=='regression' else max(tmp, key=tmp.get) self.knn = cv_eval[bst_fold]['model'] return {'model': self.knn, 'metrics': cv_eval[bst_fold]['metrics']} else: X_train, y_train = data.Xy_train X_val, y_val = data.Xy_val X_train, X_val = from_2d_array_to_nested(X_train), from_2d_array_to_nested(X_val) self.knn = knn = KNeighborsTimeSeriesClassifier(n_neighbors=5, distance="dtw", n_jobs=-1) self.knn.fit(X_train, y_train) eval_metrics = weareval.eval_output(self.knn.predict(X_val), y_val, tasktype=data.tasktype) return {'model': self.knn, 'metrics': eval_metrics}
def test_missing_unequal_tag_inference(): """Test that ClassifierPipeline infers missing/unequal tags correctly.""" c = KNeighborsTimeSeriesClassifier() c1 = ExponentTransformer() * PaddingTransformer() * ExponentTransformer( ) * c c2 = ExponentTransformer() * ExponentTransformer() * c c3 = Imputer() * ExponentTransformer() * c c4 = ExponentTransformer() * Imputer() * c assert c1.get_tag("capability:unequal_length") assert not c2.get_tag("capability:unequal_length") assert c3.get_tag("capability:missing_values") assert not c4.get_tag("capability:missing_values")
def main(): #Load arff file into Tuple of size 2. #First element has the time-series data in arrays and Second element has the description of the attributes TRAIN = arff.loadarff('ItalyPowerDemand_TRAIN.arff') TEST = arff.loadarff('ItalyPowerDemand_TEST.arff') #Convert the data from the first Tuple elemento to a tabularized dataframe df_TRAIN = pd.DataFrame(TRAIN[0]) df_TEST = pd.DataFrame(TEST[0]) #Using sktime to handle the data print(df_TRAIN.head()) print('\n Is nested the df above?', is_nested_dataframe(df_TRAIN), '\n') #Handling the datasets X_train = df_TRAIN.drop('target', axis=1) y_train = df_TRAIN['target'].astype(int) print(X_train.head(), y_train.head(), '\n') X_test = df_TEST.drop('target', axis=1) y_test = df_TEST['target'].astype(int) #Detabularizing and Nesting X_train, X_test X_train_detab = detabularize(X_train) X_test_detab = detabularize(X_test) print(X_train_detab.head()) print('Is nested the detabularized df above?', is_nested_dataframe(X_train_detab), '\n') #The lines above could be simplified with the following method from sktime X, y = load_from_arff_to_dataframe('ItalyPowerDemand_TRAIN.arff') print(X_train_detab.head(), X.head(), type(y_train), type(y)) #Classifier algorithm knn = KNeighborsTimeSeriesClassifier(n_neighbors=1, metric="dtw") knn.fit(X_train_detab, y_train) print('The score of the KNN classifier is:', round(knn.score(X_test_detab, y_test), 4))
def test_stat(): """Test sign ranks.""" data = load_gunpoint(split="train", return_X_y=False) dataset = RAMDataset(dataset=data, name="gunpoint") task = TSCTask(target="class_val") fc = ComposableTimeSeriesForestClassifier(n_estimators=1, random_state=1) strategy_fc = TSCStrategy(fc, name="tsf") pf = KNeighborsTimeSeriesClassifier() strategy_pf = TSCStrategy(pf, name="pf") # result backend results = RAMResults() orchestrator = Orchestrator( datasets=[dataset], tasks=[task], strategies=[strategy_pf, strategy_fc], cv=SingleSplit(random_state=1), results=results, ) orchestrator.fit_predict(save_fitted_strategies=False) analyse = Evaluator(results) metric = PairwiseMetric(func=accuracy_score, name="accuracy") _ = analyse.evaluate(metric=metric) ranks = analyse.rank(ascending=True) pf_rank = ranks.loc[ranks.strategy == "pf", "accuracy_mean_rank"].item() # 1 fc_rank = ranks.loc[ranks.strategy == "tsf", "accuracy_mean_rank"].item() # 2 rank_array = [pf_rank, fc_rank] rank_array_test = [1, 2] _, sign_test_df = analyse.sign_test() sign_array = [ [sign_test_df["pf"][0], sign_test_df["pf"][1]], [sign_test_df["tsf"][0], sign_test_df["tsf"][1]], ] sign_array_test = [[1, 1], [1, 1]] np.testing.assert_equal([rank_array, sign_array], [rank_array_test, sign_array_test])
def set_classifier(cls, resampleId=None): """ Basic way of creating the classifier to build using the default settings. This set up is to help with batch jobs for multiple problems to facilitate easy reproducability. You can set up bespoke classifier in many other ways. :param cls: String indicating which classifier you want :param resampleId: classifier random seed :return: A classifier. """ name = cls.lower() # Distance based if name == "pf" or name == "proximityforest": return ProximityForest(random_state=resampleId) elif name == "pt" or name == "proximitytree": return ProximityTree(random_state=resampleId) elif name == "ps" or name == "proximityStump": return ProximityStump(random_state=resampleId) elif name == "dtwcv" or name == "kneighborstimeseriesclassifier": return KNeighborsTimeSeriesClassifier(distance="dtwcv") elif name == "dtw" or name == "1nn-dtw": return KNeighborsTimeSeriesClassifier(distance="dtw") elif name == "msm" or name == "1nn-msm": return KNeighborsTimeSeriesClassifier(distance="msm") elif name == "ee" or name == "elasticensemble": return ElasticEnsemble() elif name == "shapedtw": return ShapeDTW() # Dictionary based elif name == "boss" or name == "bossensemble": return BOSSEnsemble(random_state=resampleId) elif name == "cboss" or name == "contractableboss": return ContractableBOSS(random_state=resampleId) elif name == "tde" or name == "temporaldictionaryensemble": return TemporalDictionaryEnsemble(random_state=resampleId) elif name == "weasel": return WEASEL(random_state=resampleId) elif name == "muse": return MUSE(random_state=resampleId) # Interval based elif name == "rise" or name == "randomintervalspectralforest": return RandomIntervalSpectralForest(random_state=resampleId) elif name == "tsf" or name == "timeseriesforestclassifier": return TimeSeriesForestClassifier(random_state=resampleId) elif name == "cif" or name == "canonicalintervalforest": return CanonicalIntervalForest(random_state=resampleId) elif name == "drcif": return DrCIF(random_state=resampleId) # Shapelet based elif name == "stc" or name == "shapelettransformclassifier": return ShapeletTransformClassifier( random_state=resampleId, time_contract_in_mins=1 ) elif name == "mrseql" or name == "mrseqlclassifier": return MrSEQLClassifier(seql_mode="fs", symrep=["sax", "sfa"]) elif name == "rocket": return ROCKETClassifier(random_state=resampleId) elif name == "arsenal": return Arsenal(random_state=resampleId) # Hybrid elif name == "catch22": return Catch22ForestClassifier(random_state=resampleId) elif name == "hivecotev1": return HIVECOTEV1(random_state=resampleId) else: raise Exception("UNKNOWN CLASSIFIER")
class ShapeDTW(BaseClassifier): """ShapeDTW classifier. ShapeDTW[1] works by initially extracting a set of subsequences describing local neighbourhoods around each data point in a time series. These subsequences are then passed into a shape descriptor function that transforms these local neighbourhoods into a new representation. This new representation is then sent into DTW with 1-NN. Parameters ---------- n_neighbours : int, int, set k for knn (default =1). subsequence_length : int, defines the length of the subsequences(default=sqrt(n_timepoints)). shape_descriptor_function : string, defines the function to describe the set of subsequences (default = 'raw'). The possible shape descriptor functions are as follows: - 'raw' : use the raw subsequence as the shape descriptor function. - params = None - 'paa' : use PAA as the shape descriptor function. - params = num_intervals_paa (default=8) - 'dwt' : use DWT (Discrete Wavelet Transform) as the shape descriptor function. - params = num_levels_dwt (default=3) - 'slope' : use the gradient of each subsequence fitted by a total least squares regression as the shape descriptor function. - params = num_intervals_slope (default=8) - 'derivative' : use the derivative of each subsequence as the shape descriptor function. - params = None - 'hog1d' : use a histogram of gradients in one dimension as the shape desciptor function. - params = num_intervals_hog1d (defualt=2) = num_bins_hod1d (default=8) = scaling_factor_hog1d (default=0.1) - 'compound' : use a combination of two shape descriptors simultaneously. - params = weighting_factor (default=None) Defines how to scale values of a shape descriptor. If a value is not given, this value is tuned by 10-fold cross-validation on the training data. shape_descriptor_functions : string list, only applicable when the shape_descriptor_function is set to 'compound'. Use a list of shape descriptor functions at the same time. (default = ['raw','derivative']) metric_params : dictionary for metric parameters (default = None). Notes ----- ..[1] Jiaping Zhao and Laurent Itti, "shapeDTW: Shape Dynamic Time Warping", Pattern Recognition, 74, pp 171-184, 2018 http://www.sciencedirect.com/science/article/pii/S0031320317303710, """ def __init__( self, n_neighbours=1, subsequence_length=30, shape_descriptor_function="raw", shape_descriptor_functions=["raw", "derivative"], # noqa from flake8 B006 metric_params=None, ): self.n_neighbors = n_neighbours self.subsequence_length = subsequence_length self.shape_descriptor_function = shape_descriptor_function self.shape_descriptor_functions = shape_descriptor_functions self.metric_params = metric_params super(ShapeDTW, self).__init__() def _fit(self, X, y): """Train the classifier. Parameters ---------- X - pandas dataframe of training data of shape [n_instances,1]. y - list of class labels of shape [n_instances]. Returns ------- self : the shapeDTW object """ # Perform preprocessing on params. if not (isinstance(self.shape_descriptor_function, str)): raise TypeError("shape_descriptor_function must be an 'str'. \ Found '" + type(self.shape_descriptor_function).__name__ + "' instead.") if self.metric_params is None: self.metric_params = {} # If the shape descriptor is 'compound', # calculate the appropriate weighting_factor if self.shape_descriptor_function == "compound": self._calculate_weighting_factor_value(X, y) # Fit the SlidingWindowSegmenter sw = SlidingWindowSegmenter(self.subsequence_length) sw.fit(X) self.sw = sw # Transform the training data. X = self._preprocess(X) # Fit the kNN classifier self.knn = KNeighborsTimeSeriesClassifier(n_neighbors=self.n_neighbors) self.knn.fit(X, y) self.classes_ = self.knn.classes_ return self def _calculate_weighting_factor_value(self, X, y): """Calculate the appropriate weighting_factor. Check for the compound shape descriptor. If a value is given, the weighting_factor is set as the given value. If not, its tuned via a 10-fold cross-validation on the training data. Parameters ---------- X - training data in a dataframe of shape [n_instances,1] y - training data classes of shape [n_instances]. """ self.metric_params = { k.lower(): v for k, v in self.metric_params.items() } # Get the weighting_factor if one is provided if self.metric_params.get("weighting_factor") is not None: self.weighting_factor = self.metric_params.get("weighting_factor") else: # Tune it otherwise self._param_matrix = { "metric_params": [ { "weighting_factor": 0.1 }, { "weighting_factor": 0.125 }, { "weighting_factor": (1 / 6) }, { "weighting_factor": 0.25 }, { "weighting_factor": 0.5 }, { "weighting_factor": 1 }, { "weighting_factor": 2 }, { "weighting_factor": 4 }, { "weighting_factor": 6 }, { "weighting_factor": 8 }, { "weighting_factor": 10 }, ] } n = self.n_neighbors sl = self.subsequence_length sdf = self.shape_descriptor_function sdfs = self.shape_descriptor_functions if sdfs is None or not (len(sdfs) == 2): raise ValueError("When using 'compound', " + "shape_descriptor_functions must be a " + "string array of length 2.") mp = self.metric_params grid = GridSearchCV( estimator=ShapeDTW( n_neighbours=n, subsequence_length=sl, shape_descriptor_function=sdf, shape_descriptor_functions=sdfs, metric_params=mp, ), param_grid=self._param_matrix, cv=KFold(n_splits=10, shuffle=True), scoring="accuracy", ) grid.fit(X, y) self.weighting_factor = grid.best_params_["metric_params"][ "weighting_factor"] def _preprocess(self, X): # private method for performing the transformations on # the test/training data. It extracts the subsequences # and then performs the shape descriptor function on # each subsequence. X = self.sw.transform(X) # Feed X into the appropriate shape descriptor function X = self._generate_shape_descriptors(X) return X def _predict_proba(self, X): """Perform predictions on the testing data X. This function returns the probabilities for each class. Parameters ---------- X - pandas dataframe of testing data of shape [n_instances,1]. Returns ------- output : numpy array of shape = [n_instances, num_classes] of probabilities """ # Transform the test data in the same way as the training data. X = self._preprocess(X) # Classify the test data return self.knn.predict_proba(X) def _predict(self, X): """Find predictions for all cases in X. Parameters ---------- X : The testing input samples of shape [n_instances,1]. Returns ------- output : numpy array of shape = [n_instances] """ # Transform the test data in the same way as the training data. X = self._preprocess(X) # Classify the test data return self.knn.predict(X) def _generate_shape_descriptors(self, data): """Generate shape descriptors. This function is used to convert a list of subsequences into a list of shape descriptors to be used for classification. """ # Get the appropriate transformer objects if self.shape_descriptor_function != "compound": self.transformer = [ self._get_transformer(self.shape_descriptor_function) ] else: self.transformer = [] for x in self.shape_descriptor_functions: self.transformer.append(self._get_transformer(x)) if not (len(self.transformer) == 2): raise ValueError("When using 'compound', " + "shape_descriptor_functions must be a " + "string array of length 2.") # To hold the result of each transformer dataFrames = [] col_names = [x for x in range(len(data.columns))] # Apply each transformer on the set of subsequences for t in self.transformer: if t is None: # Do no transformations dataFrames.append(data) else: # Do the transformation and extract the resulting data frame. t.fit(data) newData = t.transform(data) dataFrames.append(newData) # Combine the arrays into one dataframe if self.shape_descriptor_function == "compound": result = self._combine_data_frames(dataFrames, self.weighting_factor, col_names) else: result = dataFrames[0] result.columns = col_names return result def _get_transformer(self, tName): """Extract the appropriate transformer. Parameters ---------- self : the ShapeDTW object. tName : the name of the required transformer. Returns ------- output : Base Transformer object corresponding to the class (or classes if its a compound transformer) of the required transformer. The transformer is configured with the parameters given in self.metric_params. throws : ValueError if a shape descriptor doesn't exist. """ parameters = self.metric_params tName = tName.lower() if parameters is None: parameters = {} parameters = {k.lower(): v for k, v in parameters.items()} self._check_metric_params(parameters) if tName == "raw": return None elif tName == "paa": num_intervals = parameters.get("num_intervals_paa") if num_intervals is None: return PAA() return PAA(num_intervals) elif tName == "dwt": num_levels = parameters.get("num_levels_dwt") if num_levels is None: return DWTTransformer() return DWTTransformer(num_levels) elif tName == "slope": num_intervals = parameters.get("num_intervals_slope") if num_intervals is None: return SlopeTransformer() return SlopeTransformer(num_intervals) elif tName == "derivative": return DerivativeSlopeTransformer() elif tName == "hog1d": num_intervals = parameters.get("num_intervals_hog1d") num_bins = parameters.get("num_bins_hog1d") scaling_factor = parameters.get("scaling_factor_hog1d") # All 3 paramaters are None if num_intervals is None and num_bins is None and scaling_factor is None: return HOG1DTransformer() # 2 parameters are None if num_intervals is None and num_bins is None: return HOG1DTransformer(scaling_factor=scaling_factor) if num_intervals is None and scaling_factor is None: return HOG1DTransformer(num_bins=num_bins) if num_bins is None and scaling_factor is None: return HOG1DTransformer(num_intervals=num_intervals) # 1 parameter is None if num_intervals is None: return HOG1DTransformer(scaling_factor=scaling_factor, num_bins=num_bins) if scaling_factor is None: return HOG1DTransformer(num_intervals=num_intervals, num_bins=num_bins) if num_bins is None: return HOG1DTransformer(scaling_factor=scaling_factor, num_intervals=num_intervals) # All parameters are given return HOG1DTransformer( num_intervals=num_intervals, num_bins=num_bins, scaling_factor=scaling_factor, ) else: raise ValueError("Invalid shape desciptor function.") def _check_metric_params(self, parameters): """Check for an invalid metric_params.""" valid_metric_params = [ "num_intervals_paa", "num_levels_dwt", "num_intervals_slope", "num_intervals_hog1d", "num_bins_hog1d", "scaling_factor_hog1d", "weighting_factor", ] names = list(parameters.keys()) for x in names: if not (x in valid_metric_params): raise ValueError(x + " is not a valid metric parameter." + "Make sure the shape descriptor function" + " name is at the end of the metric " + "parameter name.") def _combine_data_frames(self, dataFrames, weighting_factor, col_names): """Combine two dataframes together into a single dataframe. Used when the shape_descriptor_function is set to "compound". """ first_desc = dataFrames[0] second_desc = dataFrames[1] first_desc_array = [] second_desc_array = [] # Convert the dataframes into arrays for x in first_desc.columns: first_desc_array.append( from_nested_to_2d_array(first_desc[x], return_numpy=True)) for x in second_desc.columns: second_desc_array.append( from_nested_to_2d_array(second_desc[x], return_numpy=True)) # Concatenate the arrays together res = [] for x in range(len(first_desc_array)): dim1 = [] for y in range(len(first_desc_array[x])): dim2 = [] dim2.extend(first_desc_array[x][y]) dim2.extend(second_desc_array[x][y] * weighting_factor) dim1.append(dim2) res.append(dim1) res = np.asarray(res) # Convert to pandas dataframe df = pd.DataFrame() for col in col_names: colToAdd = [] for row in range(len(res[col])): inst = res[col][row] colToAdd.append(pd.Series(inst)) df[col] = colToAdd return df
# -------------- SETUP ------------------------------------------ # Dataset path folder DATA_PATH = os.path.join(os.path.dirname(__file__), "Datasets") # Datasets paths # [ [train set path, test set path, dataset name], ...] datasets_path = [[ "RacketSports/RacketSports_TRAIN.ts", "RacketSports/RacketSports_TEST.ts", "RacketSport" ]] # Setup classifier # [ [classifier, classifier name], ...] classifiers = [[ KNeighborsTimeSeriesClassifier(1, 'uniform', 'brute', 'dtw', None), "DTW-1NN" ], [ KNeighborsTimeSeriesClassifier(4, 'uniform', 'brute', 'dtw', None), "DTW-4NN" ]] # --------------- MAIN PROGRAM --------------------------------- # Load data # [ ((train_data, train_class), (test_data, test_class), dataset name), ...] data = [] for train_path, test_path, name in datasets_path: data += [ (load_from_tsfile_to_dataframe(os.path.join(DATA_PATH, train_path)),
def set_classifier(cls, resample_id=None, train_file=False): """Construct a classifier. Basic way of creating the classifier to build using the default settings. This set up is to help with batch jobs for multiple problems to facilitate easy reproducibility for use with load_and_run_classification_experiment. You can pass a classifier object instead to run_classification_experiment. Parameters ---------- cls : str String indicating which classifier you want. resample_id : int or None, default=None Classifier random seed. train_file : bool, default=False Whether a train file is being produced. Return ------ classifier : A BaseClassifier. The classifier matching the input classifier name. """ name = cls.lower() # Dictionary based if name == "boss" or name == "bossensemble": return BOSSEnsemble(random_state=resample_id) elif name == "cboss" or name == "contractableboss": return ContractableBOSS(random_state=resample_id) elif name == "tde" or name == "temporaldictionaryensemble": return TemporalDictionaryEnsemble( random_state=resample_id, save_train_predictions=train_file ) elif name == "weasel": return WEASEL(random_state=resample_id) elif name == "muse": return MUSE(random_state=resample_id) # Distance based elif name == "pf" or name == "proximityforest": return ProximityForest(random_state=resample_id) elif name == "pt" or name == "proximitytree": return ProximityTree(random_state=resample_id) elif name == "ps" or name == "proximityStump": return ProximityStump(random_state=resample_id) elif name == "dtwcv" or name == "kneighborstimeseriesclassifier": return KNeighborsTimeSeriesClassifier(distance="dtwcv") elif name == "dtw" or name == "1nn-dtw": return KNeighborsTimeSeriesClassifier(distance="dtw") elif name == "msm" or name == "1nn-msm": return KNeighborsTimeSeriesClassifier(distance="msm") elif name == "ee" or name == "elasticensemble": return ElasticEnsemble(random_state=resample_id) elif name == "shapedtw": return ShapeDTW() # Feature based elif name == "catch22": return Catch22Classifier( random_state=resample_id, estimator=RandomForestClassifier(n_estimators=500) ) elif name == "matrixprofile": return MatrixProfileClassifier(random_state=resample_id) elif name == "signature": return SignatureClassifier( random_state=resample_id, estimator=RandomForestClassifier(n_estimators=500), ) elif name == "tsfresh": return TSFreshClassifier( random_state=resample_id, estimator=RandomForestClassifier(n_estimators=500) ) elif name == "tsfresh-r": return TSFreshClassifier( random_state=resample_id, estimator=RandomForestClassifier(n_estimators=500), relevant_feature_extractor=True, ) # Hybrid elif name == "hc1" or name == "hivecotev1": return HIVECOTEV1(random_state=resample_id) elif name == "hc2" or name == "hivecotev2": return HIVECOTEV2(random_state=resample_id) # Interval based elif name == "rise" or name == "randomintervalspectralforest": return RandomIntervalSpectralEnsemble( random_state=resample_id, n_estimators=500 ) elif name == "tsf" or name == "timeseriesforestclassifier": return TimeSeriesForestClassifier(random_state=resample_id, n_estimators=500) elif name == "cif" or name == "canonicalintervalforest": return CanonicalIntervalForest(random_state=resample_id, n_estimators=500) elif name == "stsf" or name == "supervisedtimeseriesforest": return SupervisedTimeSeriesForest(random_state=resample_id, n_estimators=500) elif name == "drcif": return DrCIF( random_state=resample_id, n_estimators=500, save_transformed_data=train_file ) # Kernel based elif name == "rocket": return ROCKETClassifier(random_state=resample_id) elif name == "arsenal": return Arsenal(random_state=resample_id, save_transformed_data=train_file) # Shapelet based elif name == "stc" or name == "shapelettransformclassifier": return ShapeletTransformClassifier( random_state=resample_id, save_transformed_data=train_file ) elif name == "mrseql" or name == "mrseqlclassifier": return MrSEQLClassifier(seql_mode="fs", symrep=["sax", "sfa"]) else: raise Exception("UNKNOWN CLASSIFIER")
from sklearn.metrics.cluster import contingency_matrix import numpy as np import os import sktime import time # -------------- SETUP ----------------- # Datasets path folder DATA_PATH = os.path.join(os.path.dirname(__file__), "Datasets") # Classifiers # [ [classifier, classifier name], ...] classifiers = [ [KNeighborsTimeSeriesClassifier(1, 'uniform', 'dtw'), "DTW-1NN"], [KNeighborsTimeSeriesClassifier(4, 'uniform', 'dtw'), "DTW-4NN"] ] # Number of cores to use (-1 -> all) nb_jobs = -1 # Split strategy nb_split = 10 cv = KFold(n_splits=nb_split) # --------------- MAIN PROGRAM -------------------- # Display some info nbDatasets = 0 for dataset in os.listdir(DATA_PATH): nbDatasets+=1
# Run the fit and predict for i, dataset in enumerate(datasets): print(f'Dataset: {i + 1}/{n_datasets} {dataset}') # pre-allocate results results = np.zeros(3) # load data train_file = os.path.join(data_path, f'{dataset}/{dataset}_TRAIN.ts') test_file = os.path.join(data_path, f'{dataset}/{dataset}_TEST.ts') x_train, y_train = load_from_tsfile_to_dataframe(train_file) x_test, y_test = load_from_tsfile_to_dataframe(test_file) tsf = KNeighborsTimeSeriesClassifier() # fit try: s = time.time() tsf.fit(x_train, y_train) results[0] = time.time() - s # predict s = time.time() y_pred = tsf.predict(x_test) results[1] = time.time() - s # catch and raise user exceptions except (KeyboardInterrupt, SystemExit): raise