class SAST(BaseEstimator, ClassifierMixin): def __init__(self, cand_length_list, shp_step = 1, nb_inst_per_class = 1, random_state = None, classifier = None): super(SAST, self).__init__() self.cand_length_list = cand_length_list self.shp_step = shp_step self.nb_inst_per_class = nb_inst_per_class self.kernels_ = None self.kernel_orig_ = None # not z-normalized kernels self.kernels_generators_ = {} self.random_state = np.random.RandomState(random_state) if not isinstance(random_state, np.random.RandomState) else random_state self.classifier = classifier def get_params(self, deep=True): return { 'cand_length_list': self.cand_length_list, 'shp_step': self.shp_step, 'nb_inst_per_class': self.nb_inst_per_class, 'classifier': self.classifier } def init_sast(self, X, y): self.cand_length_list = np.array(sorted(self.cand_length_list)) assert self.cand_length_list.ndim == 1, 'Invalid shapelet length list: required list or tuple, or a 1d numpy array' if self.classifier is None: self.classifier = RandomForestClassifier(min_impurity_decrease=0.05, max_features=None) classes = np.unique(y) self.num_classes = classes.shape[0] candidates_ts = [] for c in classes: X_c = X[y==c] # convert to int because if self.nb_inst_per_class is float, the result of np.min() will be float cnt = np.min([self.nb_inst_per_class, X_c.shape[0]]).astype(int) choosen = self.random_state.permutation(X_c.shape[0])[:cnt] candidates_ts.append(X_c[choosen]) self.kernels_generators_[c] = X_c[choosen] candidates_ts = np.concatenate(candidates_ts, axis=0) self.cand_length_list = self.cand_length_list[self.cand_length_list <= X.shape[1]] max_shp_length = max(self.cand_length_list) n, m = candidates_ts.shape n_kernels = n * np.sum([m - l + 1 for l in self.cand_length_list]) self.kernels_ = np.full((n_kernels, max_shp_length), dtype=np.float32, fill_value=np.nan) self.kernel_orig_ = [] k = 0 for shp_length in self.cand_length_list: for i in range(candidates_ts.shape[0]): for j in range(0, candidates_ts.shape[1] - shp_length + 1, self.shp_step): end = j + shp_length can = np.squeeze(candidates_ts[i][j : end]) self.kernel_orig_.append(can) self.kernels_[k, :shp_length] = znormalize_array(can) k += 1 def fit(self, X, y): X, y = check_X_y(X, y) # check the shape of the data self.init_sast(X, y) # randomly choose reference time series and generate kernels X_transformed = apply_kernels(X, self.kernels_) # subsequence transform of X self.classifier.fit(X_transformed, y) # fit the classifier return self def predict(self, X): check_is_fitted(self) # make sure the classifier is fitted X = check_array(X) # validate the shape of X X_transformed = apply_kernels(X, self.kernels_) # subsequence transform of X return self.classifier.predict(X_transformed) def predict_proba(self, X): check_is_fitted(self) # make sure the classifier is fitted X = check_array(X) # validate the shape of X X_transformed = apply_kernels(X, self.kernels_) # subsequence transform of X if isinstance(self.classifier, LinearClassifierMixin): return self.classifier._predict_proba_lr(X_transformed) return self.classifier.predict_proba(X_transformed)