def _get_random_walk(): numpy.random.seed(0) # Generate a random walk time series n_ts, sz, d = 1, 100, 1 dataset = random_walks(n_ts=n_ts, sz=sz, d=d) scaler = TimeSeriesScalerMeanVariance(mu=0., std=1.) return scaler.fit_transform(dataset)
def perform_sax(dataset, gram_number, symbols, segments): scaler = TimeSeriesScalerMeanVariance( mu=0., std=np.std(dataset)) # Rescale time series dataset = scaler.fit_transform(dataset) # SAX transform sax = SymbolicAggregateApproximation(n_segments=segments, alphabet_size_avg=symbols) sax_dataset_inv = sax.inverse_transform(sax.fit_transform(dataset)) # print(pd.DataFrame(sax_dataset_inv[0])[0].value_counts()) # sax_dataset_inv = sax.fit_transform(dataset) # print(len(sax_dataset_inv[0])) # Convert result to strings df_sax = pd.DataFrame(sax_dataset_inv[0]) sax_series = df_sax[0] # Convert sax from numeric to characters sax_values = sax_series.unique() alphabet = 'abcdefghijklmnopqrstuvw' sax_dict = {x: alphabet[i] for i, x in enumerate(sax_values)} sax_list = [sax_dict[x] for x in sax_series] # Convert the list of characters to n_grams based on input parameter tri = n_grams(gram_number, sax_list) # print(Counter(tri)) return tri
def getNormalize(data_baseline): data_baseline = TimeSeriesScalerMeanVariance().fit_transform(data_baseline) data_baseline = data_baseline.reshape( (data_baseline.shape[0], data_baseline.shape[1])) return data_baseline
def _update_centroids(self, X): for k in range(self.n_clusters): self.cluster_centers_[k] = self._shape_extraction(X, k) self.cluster_centers_ = TimeSeriesScalerMeanVariance( mu=0., std=1.).fit_transform(self.cluster_centers_) self._norms_centroids = numpy.linalg.norm(self.cluster_centers_, axis=(1, 2))
def reshape_data(self): ts_value = self.input_df.T.values ts_value = ts_value.reshape(ts_value.shape[0], ts_value.shape[1], 1) scaler = TimeSeriesScalerMeanVariance(mu=0., std=1.) data_scaled = scaler.fit_transform(ts_value) data_scaled = np.nan_to_num(data_scaled) self.data_scaled = data_scaled
def test_single_value_ts_no_nan(): X = to_time_series_dataset([[1, 1, 1, 1]]) standard_scaler = TimeSeriesScalerMeanVariance() assert np.sum(np.isnan(standard_scaler.fit_transform(X))) == 0 minmax_scaler = TimeSeriesScalerMinMax() assert np.sum(np.isnan(minmax_scaler.fit_transform(X))) == 0
def getStdData(originData): n_paa_segments = 120 #一天分成4份,每6个小时整合为一段 paa_data = PiecewiseAggregateApproximation( n_segments=n_paa_segments).fit_transform(originData) #进行平均值归一化 scaler = TimeSeriesScalerMeanVariance(mu=0., std=1.) dataset = scaler.fit_transform(paa_data) dataset = dataset.reshape(dataset.shape[0], dataset.shape[1]) return dataset
def normalize(df): df_normalized = df.copy() df_normalized = df_normalized normalize = TimeSeriesScalerMeanVariance(mu=0, std=1) for col in df: df_normalized[col] = normalize.fit_transform(df_normalized[col])[0] return df_normalized
def _transform(self, X, y=None): n_ts, sz, d = X.shape if d > 1: raise NotImplementedError("We currently don't support using " "multi-dimensional matrix profiles " "from the stumpy library.") output_size = sz - self.subsequence_length + 1 X_transformed = np.empty((n_ts, output_size, 1)) if self.implementation == "stump": if not STUMPY_INSTALLED: raise ImportError(stumpy_msg) for i_ts in range(n_ts): result = stumpy.stump(T_A=X[i_ts, :, 0].ravel(), m=self.subsequence_length) X_transformed[i_ts, :, 0] = result[:, 0].astype(np.float) elif self.implementation == "gpu_stump": if not STUMPY_INSTALLED: raise ImportError(stumpy_msg) for i_ts in range(n_ts): result = stumpy.gpu_stump(T_A=X[i_ts, :, 0].ravel(), m=self.subsequence_length) X_transformed[i_ts, :, 0] = result[:, 0].astype(np.float) elif self.implementation == "numpy": scaler = TimeSeriesScalerMeanVariance() band_width = int(np.ceil(self.subsequence_length / 4)) for i_ts in range(n_ts): segments = _series_to_segments(X[i_ts], self.subsequence_length) if self.scale: segments = scaler.fit_transform(segments) n_segments = segments.shape[0] segments_2d = segments.reshape( (-1, self.subsequence_length * d)) dists = squareform(pdist(segments_2d, "euclidean")) band = (np.tri( n_segments, n_segments, band_width, dtype=np.bool ) & ~np.tri( n_segments, n_segments, -(band_width + 1), dtype=np.bool)) dists[band] = np.inf X_transformed[i_ts] = dists.min(axis=1, keepdims=True) else: available_implementations = ["numpy", "stump", "gpu_stump"] raise ValueError( 'This "{}" matrix profile implementation is not' ' recognized. Available implementations are {}.'.format( self.implementation, available_implementations)) return X_transformed
def cor(x, y): """ Correlation-based distance (COR) between two multivariate time series given as arrays of shape (timesteps, dim) """ scaler = TimeSeriesScalerMeanVariance() x_norm = scaler.fit_transform(x) y_norm = scaler.fit_transform(y) pcc = np.mean(x_norm * y_norm) # Pearson correlation coefficients d = np.sqrt(2.0 * (1.0 - pcc + 1e-9)) # correlation-based similarities return np.sum(d)
def saa_pax(dataset, title): """ Show the graph of PAA and SAX of time series data :param dataset: time series of a stock :return: """ n_ts, sz, d = 1, 100, 1 scaler = TimeSeriesScalerMeanVariance(mu=0., std=1.) # Rescale time series dataset = scaler.fit_transform(dataset) # PAA transform (and inverse transform) of the data n_paa_segments = 10 paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments) paa_dataset_inv = paa.inverse_transform(paa.fit_transform(dataset)) # SAX transform n_sax_symbols = 8 sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols) sax_dataset_inv = sax.inverse_transform(sax.fit_transform(dataset)) # 1d-SAX transform n_sax_symbols_avg = 8 n_sax_symbols_slope = 8 one_d_sax = OneD_SymbolicAggregateApproximation( n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols_avg, alphabet_size_slope=n_sax_symbols_slope) one_d_sax_dataset_inv = one_d_sax.inverse_transform( one_d_sax.fit_transform(dataset)) plt.figure() plt.subplot(2, 2, 1) # First, raw time series plt.plot(dataset[0].ravel(), "b-") plt.title("Raw time series " + title) plt.subplot(2, 2, 2) # Second, PAA plt.plot(dataset[0].ravel(), "b-", alpha=0.4) plt.plot(paa_dataset_inv[0].ravel(), "b-") plt.title("PAA " + title) plt.subplot(2, 2, 3) # Then SAX plt.plot(dataset[0].ravel(), "b-", alpha=0.4) plt.plot(sax_dataset_inv[0].ravel(), "b-") plt.title("SAX, %d symbols" % n_sax_symbols) plt.subplot(2, 2, 4) # Finally, 1d-SAX plt.plot(dataset[0].ravel(), "b-", alpha=0.4) plt.plot(one_d_sax_dataset_inv[0].ravel(), "b-") plt.title("1d-SAX, %d symbols (%dx%d)" % (n_sax_symbols_avg * n_sax_symbols_slope, n_sax_symbols_avg, n_sax_symbols_slope)) plt.tight_layout() plt.show()
def ApplyPaa(n_paa_segments, df, ckt): circuito = ckt print("Quantidade de segmentos de PAA: {}".format(n_paa_segments)) paa = PiecewiseAggregateApproximation(n_paa_segments) scaler = TimeSeriesScalerMeanVariance() dadosPaa = df for i in range(0, len(df)): dataset = scaler.fit_transform(df[i]) dadosPaa[i] = paa.inverse_transform(paa.fit_transform(dataset))[0] dadosPaa = dadosPaa.T return dadosPaa
def standardize(self, data): """ Standardize des TS, moyenne: 0 et ecart type: 1 Data: dataframe """ # prepare data for standardization values = data["Valeur"] values = values.values.reshape((len(values), 1)) # train the standardization t = TimeSeriesScalerMeanVariance().fit_transform(values) print(t) data["valeur"] = TimeSeriesScalerMeanVariance().fit_transform(values) return data
def check_classifiers_classes(name, classifier_orig): # Case of shapelet models if name == 'SerializableShapeletModel': raise SkipTest('Skipping check_classifiers_classes for shapelets' ' due to convergence issues...') elif name == 'ShapeletModel': X_multiclass, y_multiclass = _create_large_ts_dataset() classifier_orig = clone(classifier_orig) classifier_orig.max_iter = 1000 else: X_multiclass, y_multiclass = _create_small_ts_dataset() X_multiclass, y_multiclass = shuffle(X_multiclass, y_multiclass, random_state=7) scaler = TimeSeriesScalerMeanVariance() X_multiclass = scaler.fit_transform(X_multiclass) X_multiclass = np.reshape(X_multiclass, (X_multiclass.shape[0], X_multiclass.shape[1])) X_binary = X_multiclass[y_multiclass != 2] y_binary = y_multiclass[y_multiclass != 2] X_multiclass = pairwise_estimator_convert_X(X_multiclass, classifier_orig) X_binary = pairwise_estimator_convert_X(X_binary, classifier_orig) labels_multiclass = ["one", "two", "three"] labels_binary = ["one", "two"] y_names_multiclass = np.take(labels_multiclass, y_multiclass) y_names_binary = np.take(labels_binary, y_binary) problems = [(X_binary, y_binary, y_names_binary)] if not classifier_orig._get_tags()['binary_only']: problems.append((X_multiclass, y_multiclass, y_names_multiclass)) for X, y, y_names in problems: for y_names_i in [y_names, y_names.astype('O')]: y_ = choose_check_classifiers_labels(name, y, y_names_i) check_classifiers_predictions(X, y_, name, classifier_orig) labels_binary = [-1, 1] y_names_binary = np.take(labels_binary, y_binary) y_binary = choose_check_classifiers_labels(name, y_binary, y_names_binary) check_classifiers_predictions(X_binary, y_binary, name, classifier_orig)
def fit(self, X, y=None): """Compute k-Shape clustering. Parameters ---------- X : array-like of shape=(n_ts, sz, d) Time series dataset. """ X_ = to_time_series_dataset(X) X_ = TimeSeriesScalerMeanVariance(mu=0., std=1.).fit_transform(X_) assert X_.shape[-1] == 1, "kShape is supposed to work on monomodal data, provided data has dimension %d" % \ X_.shape[-1] rs = check_random_state(self.random_state) best_correct_centroids = None min_inertia = numpy.inf n_successful = 0 n_attempts = 0 while n_successful < self.n_init and n_attempts < self.max_attempts: try: if self.verbose and self.n_init > 1: print("Init %d" % (n_successful + 1)) n_attempts += 1 self._fit_one_init(X_, rs) if self.inertia_ < min_inertia: best_correct_centroids = self.cluster_centers_.copy() min_inertia = self.inertia_ n_successful += 1 except EmptyClusterError: if self.verbose: print("Resumed because of empty cluster") self._post_fit(X_, best_correct_centroids, min_inertia) return self
def load_casas(dataset, fixed_length): # X = np.load('./npy/{}-x.npy'.format(dataset), allow_pickle=True); X=X[:-1*(X.shape[0]%fixed_length)].reshape(-1, fixed_length, 1); print(X.shape) # Y = np.load('./npy/{}-y.npy'.format(dataset), allow_pickle=True); Y=Y[:-1*(Y.shape[0]%fixed_length)].reshape(-1, fixed_length, 1); print(Y.shape); Y=np.array(Y, dtype=int) X = np.load('./npy/{}-x-noidle.npy'.format(dataset), allow_pickle=True) if -1*(X.shape[0]%fixed_length)!=0: X=X[:-1*(X.shape[0]%fixed_length)] X=X.reshape(-1, fixed_length, 1); print(X.shape) Y = np.load('./npy/{}-y-noidle.npy'.format(dataset), allow_pickle=True) if -1*(Y.shape[0]%fixed_length)!=0: Y=Y[:-1*(Y.shape[0]%fixed_length)] Y=Y.reshape(-1, fixed_length, 1); print(Y.shape); Y=np.array(Y, dtype=int) y=[] for i in range(X.shape[0]): y.append(np.argmax(np.bincount(Y[i].flatten()))) print(Counter(y)) X=np.array(X, dtype=object) y=np.array(y, dtype=object); y = y.reshape(-1,1) label_encoder = LabelEncoder() y = label_encoder.fit_transform(y) dictActivities = np.load('./npy/{}-labels-noidle.npy'.format(dataset), allow_pickle=True).item() X_scaled=TimeSeriesScalerMeanVariance().fit_transform(X) return X_scaled, y, dictActivities
def preprocessing(lc_data, windowSize=50): lc_nor = TimeSeriesScalerMeanVariance(mu=0., std=1.).fit_transform( [lc_data['instances']]) lc_data['lc_nor'] = lc_nor paa = ut_gen.genListPAA(instances_nor=lc_nor, windowSize=windowSize, timestamp=lc_data['timestamp']) lc_data['paa'] = paa sax = ut_gen.genListSAX(instances_nor=lc_nor, windowSize=windowSize, timestamp=lc_data['timestamp'], n_sax_symbols=n_sax_symbols) lc_data['sax'] = sax one_sax = ut_gen.genList1D_SAX(instances_nor=lc_nor, windowSize=windowSize, timestamp=lc_data['timestamp'], n_sax_symbols_slope=n_sax_symbols_slope, n_sax_symbols_avg=n_sax_symbols_avg) lc_data['one_sax'] = one_sax corePlot = sketchDyBinService(windowSize=windowSize, initialBin=5, isOnline=False) sketchInstances = corePlot.sketchMode(instances=list(lc_nor[0].ravel())) lc_data['dy_bin'] = { 'sketchInstances': sketchInstances, 'timestamp': lc_data['timestamp'] } print("a")
def processData(self, transmission: Transmission): self.t = transmission self.set_data_column_combo_box() if not self.ctrls['Apply'].isChecked(): return self.t = transmission.copy() mu = self.ctrls['mu'].value() std = self.ctrls['std'].value() params = { 'data_column': self.data_column, 'mu': mu, 'std': std, 'units': self.t.last_unit } output_column = '_SCALER_MEAN_VARIANCE' self.t.df[output_column] = self.t.df[self.data_column].apply( lambda a: TimeSeriesScalerMeanVariance(mu=mu, std=std ).fit_transform(a)[:, :, 0]) self.t.history_trace.add_operation(data_block_id='all', operation='scaler_mean_variance', parameters=params) self.t.last_output = output_column return self.t
def update_plot_means(self, *args, **kwargs): """Update the means plot""" padded = self.pad_input_data(self.input_arrays, 'fill-size') scaled = TimeSeriesScalerMeanVariance().fit_transform(padded)[:, :, 0] if self.control_widget.ui.radioButtonXZeroZero.isChecked(): xzero = 'zero' elif self.control_widget.ui.radioButtonXZeroMaxima.isChecked(): xzero = 'maxima' else: raise ValueError('Must select an option for set x = 0 at') if self.control_widget.ui.comboBoxErrorBand.currentText( ) == 'standard deviation': ci = 'sd' elif self.control_widget.ui.comboBoxErrorBand.currentText( ) == 'confidence interval': ci = 95 elif self.control_widget.ui.comboBoxErrorBand.currentText() == 'None': ci = None self.plot_means.set_plots(scaled, self.n_clusters, self.y_pred, xzero_pos=xzero, error_band=ci) self.plot_means.show()
def check_clustering(name, clusterer_orig, readonly_memmap=False): clusterer = clone(clusterer_orig) X, y = _create_small_ts_dataset() X, y = shuffle(X, y, random_state=7) X = TimeSeriesScalerMeanVariance().fit_transform(X) rng = np.random.RandomState(42) X_noise = X + (rng.randn(*X.shape) / 5) n_samples, n_features, dim = X.shape # catch deprecation and neighbors warnings if hasattr(clusterer, "n_clusters"): clusterer.set_params(n_clusters=3) set_random_state(clusterer) # fit clusterer.fit(X) # with lists clusterer.fit(X.tolist()) pred = clusterer.labels_ assert_equal(pred.shape, (n_samples,)) assert_greater(adjusted_rand_score(pred, y), 0.4) if clusterer._get_tags()['non_deterministic']: return set_random_state(clusterer) with warnings.catch_warnings(record=True): pred2 = clusterer.fit_predict(X) assert_array_equal(pred, pred2) # fit_predict(X) and labels_ should be of type int assert pred.dtype in [np.dtype('int32'), np.dtype('int64')] assert pred2.dtype in [np.dtype('int32'), np.dtype('int64')] # Add noise to X to test the possible values of the labels labels = clusterer.fit_predict(X_noise) # There should be at least one sample in every original cluster labels_sorted = np.unique(labels) assert_array_equal(labels_sorted, np.arange(0, 3)) # Labels should be less than n_clusters - 1 if hasattr(clusterer, 'n_clusters'): n_clusters = getattr(clusterer, 'n_clusters') assert_greater_equal(n_clusters - 1, labels_sorted[-1])
def load_tslearn_data(): """ Time series data with variable length """ X_train, y_train, X_test, y_test = CachedDatasets().load_dataset("Trace") X_train = X_train[y_train < 4] # Keep first 3 classes np.random.shuffle(X_train) X_train = TimeSeriesScalerMeanVariance().fit_transform(X_train[:50]) # Keep only 50 time series X_train = TimeSeriesResampler(sz=40).fit_transform(X_train) # Make time series shorter X_train = X_train.reshape(50,-1) return X_train
def test_variable_length_knn(): X = to_time_series_dataset([[1, 2, 3, 4], [1, 2, 3], [9, 8, 7, 6, 5, 2], [8, 7, 6, 5, 3]]) y = [0, 0, 1, 1] clf = KNeighborsTimeSeriesClassifier(metric="dtw", n_neighbors=1) clf.fit(X, y) assert_allclose(clf.predict(X), [0, 0, 1, 1]) clf = KNeighborsTimeSeriesClassifier(metric="softdtw", n_neighbors=1) clf.fit(X, y) assert_allclose(clf.predict(X), [0, 0, 1, 1]) scaler = TimeSeriesScalerMeanVariance() clf = KNeighborsTimeSeriesClassifier(metric="sax", n_neighbors=1, metric_params={'n_segments': 2}) X_transf = scaler.fit_transform(X) clf.fit(X_transf, y) assert_allclose(clf.predict(X_transf), [0, 0, 1, 1])
def load_ucr(dataset='CBF'): X_train, y_train, X_test, y_test = ucr.load_dataset(dataset) X = np.concatenate((X_train, X_test)) y = np.concatenate((y_train, y_test)) if dataset == 'HandMovementDirection': # this one has special labels y = [yy[0] for yy in y] y = LabelEncoder().fit_transform(y) # sometimes labels are strings or start from 1 assert(y.min() == 0) # assert labels are integers and start from 0 # preprocess data (standardization) X_scaled = TimeSeriesScalerMeanVariance().fit_transform(X) return X_scaled, y
def get_distance_matrix(numpy_array): sc = TimeSeriesScalerMeanVariance() X_s = sc.fit_transform(to_time_series_dataset(numpy_array)) size = len(X_s) idx = [(i, j) for i in range(0, size) for j in range(i + 1, size)] def calc_dtw(my_idx): i, j = my_idx return dtw(X_s[i], X_s[j]) with mp.Pool(mp.cpu_count() - 1) as p: distances = p.map(calc_dtw, idx) dm = np.zeros(shape=(size, size)) for (i, j), v in zip(idx, distances): dm[i, j] = v dm[j, i] = v return dm
def _transform(self, X, y=None): n_ts, sz, d = X.shape output_size = sz - self.subsequence_length + 1 X_transformed = numpy.empty((n_ts, output_size, 1)) scaler = TimeSeriesScalerMeanVariance() for i_ts in range(n_ts): Xi = X[i_ts] elem_size = Xi.strides[0] segments = as_strided( Xi, strides=(elem_size, elem_size, Xi.strides[1]), shape=(Xi.shape[0] - self.subsequence_length + 1, self.subsequence_length, d), writeable=False) if self.scale: segments = scaler.fit_transform(segments) segments_2d = segments.reshape((-1, self.subsequence_length * d)) dists = squareform(pdist(segments_2d, "euclidean")) numpy.fill_diagonal(dists, numpy.inf) X_transformed[i_ts] = dists.min(axis=1, keepdims=True) return X_transformed
def approximate(self, series: np.ndarray, window: int = 1, should_fit: bool = True) -> np.ndarray: # series is already in batches debug('TSLearnApproximatorWrapper.approximate: series shape {}'.format( series.shape)) debug( 'TSLearnApproximatorWrapper.approximate: to_time_series shape {}'. format(series.shape)) ts_representation = list() debug( f'TSLearnApproximatorWrapper.approximate: param series \n{series} ' ) for segment in series: if isinstance(self.transformer, SymbolicAggregateApproximation) or isinstance( self.transformer, OneD_SymbolicAggregateApproximation): logger.info( "Scaling the data so that they consist a normal distribution." ) scaler = TimeSeriesScalerMeanVariance( mu=0., std=1.) # Rescale time series segment = scaler.fit_transform(segment) ts_representation.append(self.transformer.fit_transform(segment)) # debug('TSLearnApproximatorWrapper.approximate: ts_representation \n{}'.format(ts_representation)) debug( 'TSLearnApproximatorWrapper.approximate: ts_representation shape {}' .format(np.shape(ts_representation))) ts_representation = np.reshape( ts_representation, (np.shape(ts_representation)[0], np.shape(ts_representation)[1] * np.shape(ts_representation)[2])) debug('TSLearnApproximatorWrapper.approximate: ts_representation \n{}'. format(ts_representation)) debug( 'TSLearnApproximatorWrapper.approximate: ts_representation shape {}' .format(ts_representation.shape)) return ts_representation
def test_serialize_kshape(): n, sz, d = 15, 10, 3 rng = numpy.random.RandomState(0) time_series = rng.randn(n, sz, d) X = TimeSeriesScalerMeanVariance().fit_transform(time_series) ks = KShape(n_clusters=3, verbose=True) _check_not_fitted(ks) ks.fit(X) _check_params_predict(ks, X, ['predict'])
def ApplyPaa(n_paa_segments,df): ''' Aplica o PAA no dataframe fornecido. :param n_paa_segments: quantidade de segmento do PAA para redução de dados :param df: dataframe com dados em que se deseja aplicar o PAA :return: df após aplicação do PAA ''' df = df.values.T.tolist() scaler = TimeSeriesScalerMeanVariance(mu=0., std=1.) dadosPaa = scaler.fit_transform(df) print("Quantidade de segmentos de PAA: {}".format(n_paa_segments)) paa = PiecewiseAggregateApproximation(n_paa_segments) dadosPaa = paa.inverse_transform(paa.fit_transform(dadosPaa)) df = pd.DataFrame() for i in range(len(dadosPaa.T)): for j in range(len(dadosPaa.T[0])): df[j] = dadosPaa.T[i][j] return df
def Preprocess(self, x=None): """ dataを(batch, len(data)//time_span)の形に整形する。 """ if str(type(x)) == "<class 'NoneType'>": self.n_data = len(self.data) // self.time_span self.n_use = self.time_span * self.n_data ts = self.data.loc[:self.data.index[self.n_use - 1]] ts = np.array(ts.TEMPERATURE).reshape(1, -1) ts = TimeSeriesScalerMeanVariance().fit_transform(ts) ts = np.array(ts).reshape(self.n_data, -1) ts = TimeSeriesResampler(sz=self.batch).fit_transform(ts) self.ts = ts else: self.x_data = len(x) // self.time_span self.x_use = self.time_span * self.x_data ts = x.loc[:x.index[self.x_use - 1]] ts = np.array(ts.TEMPERATURE).reshape(1, -1) ts = TimeSeriesScalerMeanVariance().fit_transform(ts) ts = np.array(ts).reshape(self.x_data, -1) ts = TimeSeriesResampler(sz=self.batch).fit_transform(ts) return ts
def test_kshape(): n, sz, d = 15, 10, 3 rng = np.random.RandomState(0) time_series = rng.randn(n, sz, d) time_series = TimeSeriesScalerMeanVariance().fit_transform(time_series) ks = KShape(n_clusters=3, n_init=1, verbose=False, random_state=rng).fit(time_series) dists = ks._cross_dists(time_series) np.testing.assert_allclose(ks.labels_, dists.argmin(axis=1)) np.testing.assert_allclose(ks.labels_, ks.predict(time_series)) assert KShape(n_clusters=101, verbose=False, random_state=rng).fit(time_series)._X_fit is None