def test_accurate_results(): """Test that the actual results are the expected ones.""" X_features = csr_matrix((n_samples, 0), dtype=np.int64) vocabulary_ = {} weasel = WEASEL( word_size=4, n_bins=3, window_sizes=[5, 10], window_steps=None, anova=True, drop_sum=True, norm_mean=True, norm_std=True, strategy='entropy', chi2_threshold=2, alphabet=None ) for window_size, n_windows in zip([5, 10], [40, 20]): X_windowed = X.reshape(n_samples, n_windows, window_size) X_windowed = X_windowed.reshape(n_samples * n_windows, window_size) sfa = SymbolicFourierApproximation( n_coefs=4, drop_sum=True, anova=True, norm_mean=True, norm_std=True, n_bins=3, strategy='entropy', alphabet=None ) y_repeated = np.repeat(y, n_windows) X_sfa = sfa.fit_transform(X_windowed, y_repeated) X_word = np.asarray([''.join(X_sfa[i]) for i in range((n_samples * n_windows))]) X_word = X_word.reshape(n_samples, n_windows) X_bow = np.asarray([' '.join(X_word[i]) for i in range(n_samples)]) vectorizer = CountVectorizer(ngram_range=(1, 2)) X_counts = vectorizer.fit_transform(X_bow) chi2_statistics, _ = chi2(X_counts, y) relevant_features = np.where( chi2_statistics > 2)[0] X_features = hstack([X_features, X_counts[:, relevant_features]]) old_length_vocab = len(vocabulary_) vocabulary = {value: key for (key, value) in vectorizer.vocabulary_.items()} for i, idx in enumerate(relevant_features): vocabulary_[i + old_length_vocab] = \ str(window_size) + " " + vocabulary[idx] arr_desired = X_features.toarray() # Accuracte results for fit followed by transform arr_actual_1 = weasel.fit_transform(X, y).toarray() np.testing.assert_allclose(arr_actual_1, arr_desired, atol=1e-5, rtol=0) assert weasel.vocabulary_ == vocabulary_ # Accuracte results for fit_transform arr_actual_2 = weasel.fit(X, y).transform(X).toarray() np.testing.assert_allclose(arr_actual_2, arr_desired, atol=1e-5, rtol=0) assert weasel.vocabulary_ == vocabulary_
def test_fit_transform(params): """Test that 'fit_transform' and 'fit' then 'transform' yield same res.""" weasel_1 = WEASEL(**params, sparse=False) weasel_2 = WEASEL(**params, sparse=False) X_fit_transform = weasel_1.fit_transform(X, y) X_fit_then_transform = weasel_2.fit(X, y).transform(X) # Test that the transformation are identical np.testing.assert_allclose(X_fit_transform, X_fit_then_transform, atol=1e-5, rtol=0.) # Test that the vocabulary_ attributes are identical assert weasel_1.vocabulary_ == weasel_2.vocabulary_
This example shows how the WEASEL algorithm transforms a time series of real numbers into a sequence of frequencies of words. It is implemented as :class:`pyts.transformation.WEASEL`. """ import numpy as np import matplotlib.pyplot as plt from pyts.datasets import load_gunpoint from pyts.transformation import WEASEL # Toy dataset X_train, _, y_train, _ = load_gunpoint(return_X_y=True) # WEASEL transformation weasel = WEASEL(word_size=2, n_bins=2, window_sizes=[12, 36], sparse=False) X_weasel = weasel.fit_transform(X_train, y_train) # Visualize the transformation for the first time series plt.figure(figsize=(8, 5)) vocabulary_length = len(weasel.vocabulary_) width = 0.3 plt.bar(np.arange(vocabulary_length) - width / 2, X_weasel[y_train == 1][0], width=width, label='First time series in class 1') plt.bar(np.arange(vocabulary_length) + width / 2, X_weasel[y_train == 2][0], width=width, label='First time series in class 2') plt.xticks(np.arange(vocabulary_length), np.vectorize(weasel.vocabulary_.get)(np.arange(X_weasel[0].size)), fontsize=12, rotation=60) y_max = np.max(np.concatenate([X_weasel[y_train == 1][0], X_weasel[y_train == 2][0]]))
def test_sparse_dense(sparse, instance): """Test that the expected type is returned.""" weasel = WEASEL(strategy='quantile', sparse=sparse) assert isinstance(weasel.fit(X, y).transform(X), instance) assert isinstance(weasel.fit_transform(X, y), instance)
def test_parameter_check(params, error, err_msg): """Test parameter validation.""" weasel = WEASEL(**params) with pytest.raises(error, match=re.escape(err_msg)): weasel.fit(X, y)
import numpy as np import matplotlib.pyplot as plt from pyts.transformation import WEASEL # Parameters n_samples, n_timestamps = 100, 300 n_classes = 2 # Toy dataset rng = np.random.RandomState(41) X = rng.randn(n_samples, n_timestamps) y = rng.randint(n_classes, size=n_samples) # WEASEL transformation weasel = WEASEL(word_size=2, n_bins=2, window_sizes=[12, 36]) X_weasel = weasel.fit_transform(X, y).toarray() # Visualize the transformation for the first time series plt.figure(figsize=(12, 8)) vocabulary_length = len(weasel.vocabulary_) width = 0.3 plt.bar(np.arange(vocabulary_length) - width / 2, X_weasel[0], width=width, label='First time series') plt.bar(np.arange(vocabulary_length) + width / 2, X_weasel[1], width=width, label='Second time series') plt.xticks(np.arange(vocabulary_length), np.vectorize(weasel.vocabulary_.get)(np.arange(X_weasel[0].size)), fontsize=12, rotation=60) plt.yticks(np.arange(np.max(X_weasel[:2] + 1)), fontsize=12) plt.xlabel("Words", fontsize=18)
return_path=True) DTW_Classic_train.append(dtw_classic) DTW_Classic_train = np.array(DTW_Classic_train) DTW_Classic_train.resize(y_train.shape[0], int(len(DTW_Classic_train) / y_train.shape[0])) DTW_Classic_test = np.array(DTW_Classic_test) DTW_Classic_test.resize(y_test.shape[0], int(len(DTW_Classic_test) / y_test.shape[0])) train_concat_mv = TimeSeriesScalerMeanVariance().fit_transform( DTW_Classic_train) test_concat_mv = TimeSeriesScalerMeanVariance().fit_transform(DTW_Classic_test) train_concat_mv.resize(DTW_Classic_train.shape[0], DTW_Classic_train.shape[1]) test_concat_mv.resize(DTW_Classic_test.shape[0], DTW_Classic_test.shape[1]) #SVM clf = svm.SVC(gamma='scale') clf.fit(DTW_Classic_train, y_train) print('Accuracy: ', clf.score(DTW_Classic_test, y_test)) #WEASEL weasel_adiac = WEASEL(word_size=5, window_sizes=np.arange(6, X_train.shape[1])) pipeline_adiac = Pipeline([("weasel", weasel_adiac), ("clf", clf)]) accuracy_adiac = pipeline_adiac.fit(X_train, y_train).score(X_test, y_test) print("Accuracy on the testing set: {0:.3f}".format(accuracy_adiac))
:class:`pyts.transformation.WEASEL`. """ import numpy as np import matplotlib.pyplot as plt from pyts.transformation import WEASEL # Parameters n_samples, n_features = 100, 144 n_classes = 2 # Toy dataset rng = np.random.RandomState(41) X = rng.randn(n_samples, n_features) y = rng.randint(n_classes, size=n_samples) # WEASEL transformation weasel = WEASEL(n_coefs=2, window_sizes=[12, 24, 36], pvalue_threshold=0.2) X_weasel = weasel.fit_transform(X, y).toarray() # Visualize the transformation for the first time series plt.figure(figsize=(12, 8)) plt.bar(np.arange(X_weasel[0].size), X_weasel[0]) plt.xticks(np.arange(X_weasel[0].size), np.vectorize(weasel.vocabulary_.get)(np.arange(X_weasel[0].size)), fontsize=12, rotation=60) plt.xlabel("Words", fontsize=18) plt.ylabel("Frequencies", fontsize=18) plt.show()
print("Error rate BOSSVS {0:.4f}".format(1 - accuracy_BOSSVS)) error_BOSSVS_list.append(1 - accuracy_BOSSVS) else: clf_bossvs = BOSSVS(word_size=5, n_bins=4, norm_mean=False, drop_sum=False, window_size=40) accuracy_BOSSVS = clf_bossvs.fit(X_train, y_train).score(X_test, y_test) print('Accuracy BOSSVS: ', accuracy_BOSSVS) print("Error rate BOSSVS {0:.4f}".format(1 - accuracy_BOSSVS)) error_BOSSVS_list.append(1 - accuracy_BOSSVS) #WEASEL weasel = WEASEL(word_size=3, window_sizes=np.arange(4, X_train.shape[1])) clf_weasel = LogisticRegression(penalty='l2', C=1, fit_intercept=True, solver='liblinear', multi_class='ovr') pipeline = Pipeline([("weasel", weasel), ("clf", clf_weasel)]) accuracy_weasel = pipeline.fit(X_train, y_train).score(X_test, y_test) print('Accuracy WEASEL: ', accuracy_weasel) print("Error rate WEASEL {0:.4f}".format(1 - accuracy_weasel)) error_WEASEL_list.append(1 - accuracy_weasel) print() er_final = [