def _train_probas_for_estimator(self, y, idx): rs = 255 if self.random_state == 0 else self.random_state rs = (None if self.random_state is None else (rs * 37 * (idx + 1)) % np.iinfo(np.int32).max) rng = check_random_state(rs) indices = range(self.n_instances_) subsample = rng.choice(self.n_instances_, size=self.n_instances_) oob = [n for n in indices if n not in subsample] results = np.zeros((self.n_instances_, self.n_classes_)) if len(oob) == 0: return [results, oob] clf = _clone_estimator(self._base_estimator, rs) clf.fit(self.transformed_data_[idx][subsample], y[subsample]) probas = clf.predict_proba(self.transformed_data_[idx][oob]) if probas.shape[1] != self.n_classes_: new_probas = np.zeros((probas.shape[0], self.n_classes_)) for i, cls in enumerate(clf.classes_): cls_idx = self._class_dictionary[cls] new_probas[:, cls_idx] = probas[:, i] probas = new_probas for n, proba in enumerate(probas): results[oob[n]] += proba return [results, oob]
def fit(self, X, y): """Fit an estimator using transformed data from the MatrixProfile transformer. Parameters ---------- X : nested pandas DataFrame of shape [n_instances, 1] Nested dataframe with univariate time-series in cells. y : array-like, shape = [n_instances] The class labels. Returns ------- self : object """ X, y = check_X_y(X, y, enforce_univariate=True) self.classes_ = np.unique(y) self.n_classes = self.classes_.shape[0] self._transformer = MatrixProfile(m=self.subsequence_length) self._estimator = _clone_estimator( KNeighborsClassifier( n_neighbors=1) if self.estimator is None else self.estimator, self.random_state, ) m = getattr(self._estimator, "n_jobs", None) if callable(m): self._estimator.n_jobs = self.n_jobs X_t = self._transformer.fit_transform(X, y) self._estimator.fit(X_t, y) self._is_fitted = True return self
def _fit(self, X, y): """Fit a pipeline on cases (X,y), where y is the target variable. Parameters ---------- X : 3D np.array of shape = [n_instances, n_dimensions, series_length] The training data. y : array-like, shape = [n_instances] The class labels. Returns ------- self : Reference to self. Notes ----- Changes state by creating a fitted model that updates attributes ending in "_" and sets is_fitted flag to True. """ self._transformer = MatrixProfile(m=self.subsequence_length) self._estimator = _clone_estimator( KNeighborsClassifier( n_neighbors=1) if self.estimator is None else self.estimator, self.random_state, ) m = getattr(self._estimator, "n_jobs", None) if m is not None: self._estimator.n_jobs = self._threads_to_use X_t = self._transformer.fit_transform(X, y) self._estimator.fit(X_t, y) return self
def _fit(self, X, y): """Fit an estimator using transformed data from the Catch22 transformer. Parameters ---------- X : nested pandas DataFrame of shape [n_instances, n_dims] Nested dataframe with univariate time-series in cells. y : array-like, shape = [n_instances] The class labels. Returns ------- self : object """ self.classes_ = class_distribution(np.asarray(y).reshape(-1, 1))[0][0] self.n_classes = np.unique(y).shape[0] self._transformer = Catch22(outlier_norm=self.outlier_norm) self._estimator = _clone_estimator( RandomForestClassifier(n_estimators=200) if self.estimator is None else self.estimator, self.random_state, ) m = getattr(self._estimator, "n_jobs", None) if m is not None: self._estimator.n_jobs = self.n_jobs X_t = self._transformer.fit_transform(X, y) X_t = np.nan_to_num(X_t, False, 0, 0, 0) self._estimator.fit(X_t, y) return self
def _fit_estimator(self, X, y, idx): c22 = Catch22(outlier_norm=True) rs = 255 if self.random_state == 0 else self.random_state rs = None if self.random_state is None else rs * 37 * (idx + 1) rng = check_random_state(rs) transformed_x = np.empty( shape=(self._att_subsample_size * self._n_intervals, self.n_instances), dtype=np.float32, ) atts = rng.choice(25, self._att_subsample_size, replace=False) dims = rng.choice(self.n_dims, self._n_intervals, replace=True) intervals = np.zeros((self._n_intervals, 2), dtype=int) # Find the random intervals for classifier i and concatenate # features for j in range(0, self._n_intervals): if rng.random() < 0.5: intervals[j][0] = rng.randint( 0, self.series_length - self._min_interval ) len_range = min( self.series_length - intervals[j][0], self._max_interval, ) length = ( rng.randint(0, len_range - self._min_interval) + self._min_interval ) intervals[j][1] = intervals[j][0] + length else: intervals[j][1] = ( rng.randint(0, self.series_length - self._min_interval) + self._min_interval ) len_range = min(intervals[j][1], self._max_interval) length = ( rng.randint(0, len_range - self._min_interval) + self._min_interval if len_range - self._min_interval > 0 else self._min_interval ) intervals[j][0] = intervals[j][1] - length for a in range(0, self._att_subsample_size): transformed_x[self._att_subsample_size * j + a] = _cif_feature( X, intervals[j], dims[j], atts[a], c22 ) tree = _clone_estimator(self._base_estimator, random_state=rs) transformed_x = transformed_x.T transformed_x = transformed_x.round(8) transformed_x = np.nan_to_num(transformed_x, False, 0, 0, 0) tree.fit(transformed_x, y) return [tree, intervals, dims, atts]
def fit(self, X, y=None): """Fit the random interval transform. Parameters ---------- X : pandas DataFrame or 3d numpy array, input time series y : array_like, target values (optional, ignored) """ X = check_X(X, coerce_to_numpy=True) _, n_dims, series_length = X.shape if self.transformers is None: self._transformers = [ SummaryTransformer( summary_function=("mean", "std", "min", "max"), quantiles=(0.25, 0.5, 0.75), ) ] if not isinstance(self._transformers, list): self._transformers = [self._transformers] li = [] for i in range(len(self._transformers)): li.append( _clone_estimator( self._transformers[i], self.random_state, )) m = getattr(li[i], "n_jobs", None) if m is not None: li[i].n_jobs = self.n_jobs self._transformers = li rng = check_random_state(self.random_state) self._dims = rng.choice(n_dims, self.n_intervals, replace=True) self._intervals = np.zeros((self.n_intervals, 2), dtype=int) for i in range(0, self.n_intervals): if rng.random() < 0.5: self._intervals[i][0] = rng.randint(0, series_length - 3) length = ( rng.randint(0, series_length - self._intervals[i][0] - 3) + 3 if series_length - self._intervals[i][0] - 3 > 0 else 3) self._intervals[i][1] = self._intervals[i][0] + length else: self._intervals[i][1] = rng.randint(0, series_length - 3) + 3 length = (rng.randint(0, self._intervals[i][1] - 3) + 3 if self._intervals[i][1] - 3 > 0 else 3) self._intervals[i][0] = self._intervals[i][1] - length self._is_fitted = True return self
def _setup_classification_pipeline(self): """Set up the full signature method pipeline.""" # Use rf if no classifier is set if self.estimator is None: classifier = RandomForestClassifier(random_state=self.random_state) else: classifier = _clone_estimator(self.estimator, self.random_state) # Main classification pipeline self.pipeline = Pipeline([("signature_method", self.signature_method), ("classifier", classifier)])
def _fit_estimator(self, X, X_cls_split, y, idx): rs = 255 if self.random_state == 0 else self.random_state rs = (None if self.random_state is None else (rs * 37 * (idx + 1)) % np.iinfo(np.int32).max) rng = check_random_state(rs) groups = self._generate_groups(rng) pcas = [] # construct the slices to fit the PCAs too. for group in groups: classes = rng.choice( range(self.n_classes), size=rng.randint(1, self.n_classes + 1), replace=False, ) # randomly add the classes with the randomly selected attributes. X_t = np.zeros((0, len(group))) for cls_idx in classes: c = X_cls_split[cls_idx] X_t = np.concatenate((X_t, c[:, group]), axis=0) sample_ind = rng.choice( X_t.shape[0], max(1, int(X_t.shape[0] * self.remove_proportion)), replace=False, ) X_t = X_t[sample_ind] # try to fit the PCA if it fails, remake it, and add 10 random data instances. while True: # ignore err state on PCA because we account if it fails. with np.errstate(divide="ignore", invalid="ignore"): # differences between os occasionally. seems to happen when there # are low amounts of cases in the fit pca = PCA(random_state=rs).fit(X_t) if not np.isnan(pca.explained_variance_ratio_).all(): break X_t = np.concatenate((X_t, rng.random_sample( (10, X_t.shape[1]))), axis=0) pcas.append(pca) # merge all the pca_transformed data into one instance and build a classifier on it. X_t = np.concatenate( [pcas[i].transform(X[:, group]) for i, group in enumerate(groups)], axis=1) tree = _clone_estimator(self._base_estimator, random_state=rs) tree.fit(X_t, y) return tree, pcas, groups, X_t if self.save_transformed_data else None
def fit(self, X, y): """Build a forest of trees from the training set (X, y). Parameters ---------- Xt: np.ndarray or pd.DataFrame Panel training data. y : np.ndarray The class labels. Returns ------- self : object An fitted instance of the classifier """ X, y = check_X_y( X, y, enforce_univariate=not self.capabilities["multivariate"], coerce_to_numpy=True, ) X = X.squeeze(1) n_instances, self.series_length = X.shape n_jobs = check_n_jobs(self.n_jobs) rng = check_random_state(self.random_state) self.n_classes = np.unique(y).shape[0] self.classes_ = class_distribution(np.asarray(y).reshape(-1, 1))[0][0] self.n_intervals = int(math.sqrt(self.series_length)) if self.n_intervals == 0: self.n_intervals = 1 if self.series_length < self.min_interval: self.min_interval = self.series_length self.intervals_ = [ _get_intervals(self.n_intervals, self.min_interval, self.series_length, rng) for _ in range(self.n_estimators) ] self.estimators_ = Parallel(n_jobs=n_jobs)( delayed(_fit_estimator)(_clone_estimator(self.base_estimator, rng), X, y, self.intervals_[i]) for i in range(self.n_estimators)) self._is_fitted = True return self
def _fit(self, X, y): self._n_jobs = check_n_jobs(self.n_jobs) self.n_instances, self.n_dims, self.series_length = X.shape self.n_classes = np.unique(y).shape[0] self.classes_ = class_distribution(np.asarray(y).reshape(-1, 1))[0][0] if self.time_limit_in_minutes > 0: # contracting 2/3 transform (with 1/5 of that taken away for final # transform), 1/3 classifier third = self.time_limit_in_minutes / 3 self._classifier_limit_in_minutes = third self._transform_limit_in_minutes = (third * 2) / 5 * 4 elif self.transform_limit_in_minutes > 0: self._transform_limit_in_minutes = self.transform_limit_in_minutes self._transformer = RandomShapeletTransform( n_shapelet_samples=self.n_shapelet_samples, max_shapelets=self.max_shapelets, max_shapelet_length=self.max_shapelet_length, time_limit_in_minutes=self._transform_limit_in_minutes, contract_max_n_shapelet_samples=self. contract_max_n_shapelet_samples, n_jobs=self.n_jobs, batch_size=self.batch_size, random_state=self.random_state, ) self._estimator = _clone_estimator( RotationForest() if self.estimator is None else self.estimator, self.random_state, ) if isinstance(self._estimator, RotationForest): self._estimator.save_transformed_data = self.save_transformed_data m = getattr(self._estimator, "n_jobs", None) if m is not None: self._estimator.n_jobs = self._n_jobs m = getattr(self._estimator, "time_limit_in_minutes", None) if m is not None and self.time_limit_in_minutes > 0: self._estimator.time_limit_in_minutes = self._classifier_limit_in_minutes X_t = self._transformer.fit_transform(X, y).to_numpy() if self.save_transformed_data: self.transformed_data = X_t self._estimator.fit(X_t, y)
def _fit(self, X, y): """Fit a pipeline on cases (X,y), where y is the target variable. Parameters ---------- X : 3D np.array of shape = [n_instances, n_dimensions, series_length] The training data. y : array-like, shape = [n_instances] The class labels. Returns ------- self : Reference to self. Notes ----- Changes state by creating a fitted model that updates attributes ending in "_" and sets is_fitted flag to True. """ self._transformer = (TSFreshRelevantFeatureExtractor( default_fc_parameters=self.default_fc_parameters, n_jobs=self._threads_to_use, chunksize=self.chunksize, ) if self.relevant_feature_extractor else TSFreshFeatureExtractor( default_fc_parameters=self.default_fc_parameters, n_jobs=self._threads_to_use, chunksize=self.chunksize, )) self._estimator = _clone_estimator( RandomForestClassifier(n_estimators=200) if self.estimator is None else self.estimator, self.random_state, ) if self.verbose < 2: self._transformer.show_warnings = False if self.verbose < 1: self._transformer.disable_progressbar = True m = getattr(self._estimator, "n_jobs", None) if m is not None: self._estimator.n_jobs = self._threads_to_use X_t = self._transformer.fit_transform(X, y) self._estimator.fit(X_t, y) return self
def _fit_estimator(self, X, y, i): rs = 255 if self.random_state == 0 else self.random_state rs = None if self.random_state is None else rs * 37 * (i + 1) rng = check_random_state(rs) estimator = _clone_estimator( CanonicalIntervalForest() if self.estimator is None else self.estimator, rng, ) estimator.fit(X[:, :, : self._classification_points[i]], y) m = getattr(estimator, "n_jobs", None) if m is not None: estimator.n_jobs = self._threads_to_use return estimator
def fit(self, X, y): """Fit an estimator using transformed data from the Catch22 transformer. Parameters ---------- X : nested pandas DataFrame of shape [n_instances, n_dims] Nested dataframe with univariate time-series in cells. y : array-like, shape = [n_instances] The class labels. Returns ------- self : object """ X, y = check_X_y(X, y) self.classes_ = class_distribution(np.asarray(y).reshape(-1, 1))[0][0] self.n_classes = np.unique(y).shape[0] self._transformer = (TSFreshRelevantFeatureExtractor( default_fc_parameters=self.default_fc_parameters, n_jobs=self.n_jobs, chunksize=self.chunksize, ) if self.relevant_feature_extractor else TSFreshFeatureExtractor( default_fc_parameters=self.default_fc_parameters, n_jobs=self.n_jobs, chunksize=self.chunksize, )) self._estimator = _clone_estimator( RandomForestClassifier(n_estimators=200) if self.estimator is None else self.estimator, self.random_state, ) if self.verbose < 2: self._transformer.show_warnings = False if self.verbose < 1: self._transformer.disable_progressbar = True m = getattr(self._estimator, "n_jobs", None) if callable(m): self._estimator.n_jobs = self.n_jobs X_t = self._transformer.fit_transform(X, y) self._estimator.fit(X_t, y) self._is_fitted = True return self
def _fit(self, X, y): """Fit a pipeline on cases (X,y), where y is the target variable. Parameters ---------- X : 3D np.array of shape = [n_instances, n_dimensions, series_length] The training data. y : array-like, shape = [n_instances] The class labels. Returns ------- self : Reference to self. Notes ----- Changes state by creating a fitted model that updates attributes ending in "_" and sets is_fitted flag to True. """ self._transformer = SummaryTransformer( summary_function=self.summary_functions, quantiles=self.summary_quantiles, ) self._estimator = _clone_estimator( RandomForestClassifier(n_estimators=200) if self.estimator is None else self.estimator, self.random_state, ) m = getattr(self._estimator, "n_jobs", None) if m is not None: self._estimator.n_jobs = self._threads_to_use X_t = self._transformer.fit_transform(X, y) if X_t.shape[0] > len(y): X_t = X_t.to_numpy().reshape((len(y), -1)) self._transform_atts = X_t.shape[1] self._estimator.fit(X_t, y) return self
def _train_probas_for_estimator(self, y, idx): rs = 255 if self.random_state == 0 else self.random_state rs = None if self.random_state is None else rs * 37 * (idx + 1) rng = check_random_state(rs) indices = range(self.n_instances) subsample = rng.choice(self.n_instances, size=self.n_instances) oob = [n for n in indices if n not in subsample] clf = _clone_estimator(self._base_estimator, rs) clf.fit(self.transformed_data[idx][subsample], y[subsample]) probas = clf.predict_proba(self.transformed_data[idx][oob]) results = np.zeros((self.n_instances, self.n_classes)) for n, proba in enumerate(probas): results[oob[n]] += proba return [results, oob]
def _fit(self, X, y): """Fit a pipeline on cases (X,y), where y is the target variable. Parameters ---------- X : 3D np.array of shape = [n_instances, n_dimensions, series_length] The training data. y : array-like, shape = [n_instances] The class labels. Returns ------- self : Reference to self. Notes ----- Changes state by creating a fitted model that updates attributes ending in "_" and sets is_fitted flag to True. """ interval_transformers = (Catch22(outlier_norm=True, replace_nans=True) if self.interval_transformers is None else self.interval_transformers) self._transformer = RandomIntervals( n_intervals=self.n_intervals, transformers=interval_transformers, random_state=self.random_state, n_jobs=self._threads_to_use, ) self._estimator = _clone_estimator( RotationForest() if self.estimator is None else self.estimator, self.random_state, ) m = getattr(self._estimator, "n_jobs", None) if m is not None: self._estimator.n_jobs = self._threads_to_use X_t = self._transformer.fit_transform(X, y) self._estimator.fit(X_t, y) return self
def _get_train_probs(self, X, y): self.check_is_fitted() X, y = check_X_y(X, y, coerce_to_pandas=True) n_instances, n_dims = X.shape if n_instances != self.n_instances_ or n_dims != self.n_dims_: raise ValueError( "n_instances, n_dims mismatch. X should be " "the same as the training data used in fit for generating train " "probabilities.") if not self.save_transformed_data: raise ValueError( "Currently only works with saved transform data from fit.") if isinstance(self.estimator, RotationForest) or self.estimator is None: return self._estimator._get_train_probs(self.transformed_data_, y) else: m = getattr(self._estimator, "predict_proba", None) if not callable(m): raise ValueError("Estimator must have a predict_proba method.") cv_size = 10 _, counts = np.unique(y, return_counts=True) min_class = np.min(counts) if min_class < cv_size: cv_size = min_class estimator = _clone_estimator(self.estimator, self.random_state) return cross_val_predict( estimator, X=self.transformed_data_, y=y, cv=cv_size, method="predict_proba", n_jobs=self._threads_to_use, )
def _fit(self, X, y): """Fit a pipeline on cases (X,y), where y is the target variable. Parameters ---------- X : 3D np.array of shape = [n_instances, n_dimensions, series_length] The training data. y : array-like, shape = [n_instances] The class labels. Returns ------- self : Reference to self. Notes ----- Changes state by creating a fitted model that updates attributes ending in "_" and sets is_fitted flag to True. """ self._transformer = Catch22(outlier_norm=self.outlier_norm) self._estimator = _clone_estimator( RandomForestClassifier(n_estimators=200) if self.estimator is None else self.estimator, self.random_state, ) m = getattr(self._estimator, "n_jobs", None) if m is not None: self._estimator.n_jobs = self._threads_to_use X_t = self._transformer.fit_transform(X, y) X_t = np.nan_to_num(X_t, False, 0, 0, 0) self._estimator.fit(X_t, y) return self
def _fit_estimator(self, X, X_p, X_d, y, idx): c22 = Catch22(outlier_norm=True) T = [X, X_p, X_d] rs = 255 if self.random_state == 0 else self.random_state rs = None if self.random_state is None else rs * 37 * (idx + 1) rng = check_random_state(rs) transformed_x = np.empty( shape=(self._att_subsample_size * self.total_intervals, self.n_instances), dtype=np.float32, ) atts = rng.choice(29, self._att_subsample_size, replace=False) dims = rng.choice(self.n_dims, self.total_intervals, replace=True) intervals = np.zeros((self.total_intervals, 2), dtype=int) p = 0 j = 0 for r in range(0, len(T)): transform_length = T[r].shape[2] # Find the random intervals for classifier i, transformation r # and concatenate features for _ in range(0, self._n_intervals[r]): if rng.random() < 0.5: intervals[j][0] = rng.randint( 0, transform_length - self._min_interval[r] ) len_range = min( transform_length - intervals[j][0], self._max_interval[r], ) length = ( rng.randint(0, len_range - self._min_interval[r]) + self._min_interval[r] ) intervals[j][1] = intervals[j][0] + length else: intervals[j][1] = ( rng.randint(0, transform_length - self._min_interval[r]) + self._min_interval[r] ) len_range = min(intervals[j][1], self._max_interval[r]) length = ( rng.randint(0, len_range - self._min_interval[r]) + self._min_interval[r] if len_range - self._min_interval[r] > 0 else self._min_interval[r] ) intervals[j][0] = intervals[j][1] - length for a in range(0, self._att_subsample_size): transformed_x[p] = _drcif_feature( T[r], intervals[j], dims[j], atts[a], c22 ) p += 1 j += 1 tree = _clone_estimator(self._base_estimator, random_state=rs) transformed_x = transformed_x.T transformed_x = transformed_x.round(8) transformed_x = np.nan_to_num(transformed_x, False, 0, 0, 0) tree.fit(transformed_x, y) return [ tree, intervals, dims, atts, transformed_x if self.save_transformed_data else None, ]
def _fit(self, X, y): """Fit Arsenal to training data. Parameters ---------- X : 3D np.array of shape = [n_instances, n_dimensions, series_length] The training data. y : array-like, shape = [n_instances] The class labels. Returns ------- self : Reference to self. Notes ----- Changes state by creating a fitted model that updates attributes ending in "_" and sets is_fitted flag to True. """ self.n_instances_, self.n_dims_, self.series_length_ = X.shape time_limit = self.time_limit_in_minutes * 60 start_time = time.time() train_time = 0 if self.rocket_transform == "rocket": base_rocket = Rocket(num_kernels=self.num_kernels) elif self.rocket_transform == "minirocket": if self.n_dims_ > 1: base_rocket = MiniRocketMultivariate( num_kernels=self.num_kernels, max_dilations_per_kernel=self.max_dilations_per_kernel, ) else: base_rocket = MiniRocket( num_kernels=self.num_kernels, max_dilations_per_kernel=self.max_dilations_per_kernel, ) elif self.rocket_transform == "multirocket": if self.n_dims_ > 1: base_rocket = MultiRocketMultivariate( num_kernels=self.num_kernels, max_dilations_per_kernel=self.max_dilations_per_kernel, n_features_per_kernel=self.n_features_per_kernel, ) else: base_rocket = MultiRocket( num_kernels=self.num_kernels, max_dilations_per_kernel=self.max_dilations_per_kernel, n_features_per_kernel=self.n_features_per_kernel, ) else: raise ValueError(f"Invalid Rocket transformer: {self.rocket_transform}") if time_limit > 0: self.n_estimators = 0 self.estimators_ = [] self.transformed_data_ = [] while ( train_time < time_limit and self.n_estimators < self.contract_max_n_estimators ): fit = Parallel(n_jobs=self._threads_to_use)( delayed(self._fit_estimator)( _clone_estimator( base_rocket, None if self.random_state is None else (255 if self.random_state == 0 else self.random_state) * 37 * (i + 1), ), X, y, ) for i in range(self._threads_to_use) ) estimators, transformed_data = zip(*fit) self.estimators_ += estimators self.transformed_data_ += transformed_data self.n_estimators += self._threads_to_use train_time = time.time() - start_time else: fit = Parallel(n_jobs=self._threads_to_use)( delayed(self._fit_estimator)( _clone_estimator( base_rocket, None if self.random_state is None else (255 if self.random_state == 0 else self.random_state) * 37 * (i + 1), ), X, y, ) for i in range(self.n_estimators) ) self.estimators_, self.transformed_data_ = zip(*fit) self.weights_ = [] self._weight_sum = 0 for rocket_pipeline in self.estimators_: weight = rocket_pipeline.steps[1][1].best_score_ self.weights_.append(weight) self._weight_sum += weight return self
def _fit(self, X, y): self._n_jobs = check_n_jobs(self.n_jobs) self.n_instances, self.n_dims, self.series_length = X.shape self.n_classes = np.unique(y).shape[0] self.classes_ = class_distribution(np.asarray(y).reshape(-1, 1))[0][0] for index, classVal in enumerate(self.classes_): self._class_dictionary[classVal] = index time_limit = self.time_limit_in_minutes * 60 start_time = time.time() train_time = 0 base_rocket = Rocket(num_kernels=self.num_kernels) if time_limit > 0: self.n_estimators = 0 self.estimators_ = [] self.transformed_data = [] while ( train_time < time_limit and self.n_estimators < self.contract_max_n_estimators ): fit = Parallel(n_jobs=self._n_jobs)( delayed(self._fit_estimator)( _clone_estimator( base_rocket, None if self.random_state is None else (255 if self.random_state == 0 else self.random_state) * 37 * (i + 1), ), X, y, ) for i in range(self._n_jobs) ) estimators, transformed_data = zip(*fit) self.estimators_ += estimators self.transformed_data += transformed_data self.n_estimators += self._n_jobs train_time = time.time() - start_time else: fit = Parallel(n_jobs=self._n_jobs)( delayed(self._fit_estimator)( _clone_estimator( base_rocket, None if self.random_state is None else (255 if self.random_state == 0 else self.random_state) * 37 * (i + 1), ), X, y, ) for i in range(self.n_estimators) ) self.estimators_, self.transformed_data = zip(*fit) self.weights = [] self._weight_sum = 0 for rocket_pipeline in self.estimators_: weight = rocket_pipeline.steps[1][1].best_score_ self.weights.append(weight) self._weight_sum += weight
def _fit_estimator(self, X, y, i): rs = 255 if self.random_state == 0 else self.random_state rs = None if self.random_state is None else rs * 37 * (i + 1) rng = check_random_state(rs) default = MUSE() if X.shape[1] > 1 else WEASEL() estimator = _clone_estimator( default if self.estimator is None else self.estimator, rng, ) m = getattr(estimator, "n_jobs", None) if m is not None: estimator.n_jobs = self._threads_to_use # fit estimator for this threshold estimator.fit(X[:, :, : self._classification_points[i]], y) # get train set probability estimates for this estimator if callable(getattr(estimator, "_get_train_probs", None)) and ( getattr(estimator, "_save_transformed_data", False) or getattr(estimator, "_save_train_predictions", False) ): train_probas = estimator._get_train_probs(X, y) else: cv_size = 5 _, counts = np.unique(y, return_counts=True) min_class = np.min(counts) if min_class < cv_size: cv_size = min_class train_probas = cross_val_predict( estimator, X, y=y, cv=cv_size, method="predict_proba" ) train_preds = [ int(rng.choice(np.flatnonzero(prob == prob.max()))) for prob in train_probas ] # create train set for the one class classifier using train probas with the # minimum difference to the predicted probability train_probas = self._generate_one_class_features(X, train_preds, train_probas) X_oc = [] for i in range(len(X)): if train_preds[i] == self._class_dictionary[y[i]]: X_oc.append(train_probas[i]) # fit one class classifier and grid search parameters if a grid is provided one_class_classifier = None if len(X_oc) > 1: one_class_classifier = ( OneClassSVM(tol=self._svm_tol, nu=self._svm_nu) if self.one_class_classifier is None else _clone_estimator(self.one_class_classifier, random_state=rs) ) param_grid = ( {"gamma": self._svm_gammas} if self.one_class_classifier is None and self.one_class_param_grid is None else self.one_class_param_grid ) cv_size = min(len(X_oc), 10) gs = GridSearchCV( estimator=one_class_classifier, param_grid=param_grid, scoring="accuracy", cv=cv_size, ) gs.fit(X_oc, np.ones(len(X_oc))) one_class_classifier = gs.best_estimator_ return estimator, one_class_classifier, train_probas, train_preds
def _fit(self, X, y): """Fit STC to training data. Parameters ---------- X : 3D np.array of shape = [n_instances, n_dimensions, series_length] The training data. y : array-like, shape = [n_instances] The class labels. Returns ------- self : Reference to self. Notes ----- Changes state by creating a fitted model that updates attributes ending in "_" and sets is_fitted flag to True. """ self.n_instances_, self.n_dims_, self.series_length_ = X.shape if self.time_limit_in_minutes > 0: # contracting 2/3 transform (with 1/5 of that taken away for final # transform), 1/3 classifier third = self.time_limit_in_minutes / 3 self._classifier_limit_in_minutes = third self._transform_limit_in_minutes = (third * 2) / 5 * 4 elif self.transform_limit_in_minutes > 0: self._transform_limit_in_minutes = self.transform_limit_in_minutes self._transformer = RandomShapeletTransform( n_shapelet_samples=self.n_shapelet_samples, max_shapelets=self.max_shapelets, max_shapelet_length=self.max_shapelet_length, time_limit_in_minutes=self._transform_limit_in_minutes, contract_max_n_shapelet_samples=self. contract_max_n_shapelet_samples, n_jobs=self.n_jobs, batch_size=self.batch_size, random_state=self.random_state, ) self._estimator = _clone_estimator( RotationForest() if self.estimator is None else self.estimator, self.random_state, ) if isinstance(self._estimator, RotationForest): self._estimator.save_transformed_data = self.save_transformed_data m = getattr(self._estimator, "n_jobs", None) if m is not None: self._estimator.n_jobs = self._threads_to_use m = getattr(self._estimator, "time_limit_in_minutes", None) if m is not None and self.time_limit_in_minutes > 0: self._estimator.time_limit_in_minutes = self._classifier_limit_in_minutes X_t = self._transformer.fit_transform(X, y).to_numpy() if self.save_transformed_data: self.transformed_data_ = X_t self._estimator.fit(X_t, y) return self