def test_accurate_results_without_numerosity_reduction(): """Test that the actual results are the expected ones.""" boss = BOSS( word_size=4, n_bins=3, window_size=100, window_step=100, anova=False, drop_sum=False, norm_mean=False, norm_std=False, strategy='quantile', alphabet=None, numerosity_reduction=False ) X_windowed = X.reshape(8, 2, 100).reshape(16, 100) sfa = SymbolicFourierApproximation( n_coefs=4, drop_sum=False, anova=False, norm_mean=False, norm_std=False, n_bins=3, strategy='quantile', alphabet=None ) y_repeated = np.repeat(y, 2) X_sfa = sfa.fit_transform(X_windowed, y_repeated) X_word = np.asarray([''.join(X_sfa[i]) for i in range(16)]) X_word = X_word.reshape(8, 2) X_bow = np.asarray([' '.join(X_word[i]) for i in range(8)]) vectorizer = CountVectorizer() arr_desired = vectorizer.fit_transform(X_bow).toarray() vocabulary_desired = {value: key for key, value in vectorizer.vocabulary_.items()} arr_actual = boss.fit_transform(X, y).toarray() np.testing.assert_allclose(arr_actual, arr_desired, atol=1e-5, rtol=0) assert boss.vocabulary_ == vocabulary_desired arr_actual = boss.fit(X, y).transform(X).toarray() np.testing.assert_allclose(arr_actual, arr_desired, atol=1e-5, rtol=0) assert boss.vocabulary_ == vocabulary_desired
def test_accurate_results(): """Test that the actual results are the expected ones.""" X_features = csr_matrix((n_samples, 0), dtype=np.int64) vocabulary_ = {} weasel = WEASEL( word_size=4, n_bins=3, window_sizes=[5, 10], window_steps=None, anova=True, drop_sum=True, norm_mean=True, norm_std=True, strategy='entropy', chi2_threshold=2, alphabet=None ) for window_size, n_windows in zip([5, 10], [40, 20]): X_windowed = X.reshape(n_samples, n_windows, window_size) X_windowed = X_windowed.reshape(n_samples * n_windows, window_size) sfa = SymbolicFourierApproximation( n_coefs=4, drop_sum=True, anova=True, norm_mean=True, norm_std=True, n_bins=3, strategy='entropy', alphabet=None ) y_repeated = np.repeat(y, n_windows) X_sfa = sfa.fit_transform(X_windowed, y_repeated) X_word = np.asarray([''.join(X_sfa[i]) for i in range((n_samples * n_windows))]) X_word = X_word.reshape(n_samples, n_windows) X_bow = np.asarray([' '.join(X_word[i]) for i in range(n_samples)]) vectorizer = CountVectorizer(ngram_range=(1, 2)) X_counts = vectorizer.fit_transform(X_bow) chi2_statistics, _ = chi2(X_counts, y) relevant_features = np.where( chi2_statistics > 2)[0] X_features = hstack([X_features, X_counts[:, relevant_features]]) old_length_vocab = len(vocabulary_) vocabulary = {value: key for (key, value) in vectorizer.vocabulary_.items()} for i, idx in enumerate(relevant_features): vocabulary_[i + old_length_vocab] = \ str(window_size) + " " + vocabulary[idx] arr_desired = X_features.toarray() # Accuracte results for fit followed by transform arr_actual_1 = weasel.fit_transform(X, y).toarray() np.testing.assert_allclose(arr_actual_1, arr_desired, atol=1e-5, rtol=0) assert weasel.vocabulary_ == vocabulary_ # Accuracte results for fit_transform arr_actual_2 = weasel.fit(X, y).transform(X).toarray() np.testing.assert_allclose(arr_actual_2, arr_desired, atol=1e-5, rtol=0) assert weasel.vocabulary_ == vocabulary_
def test_actual_results_no_numerosity_reduction(): """Test that the actual results are the expected ones.""" bossvs = BOSSVS( word_size=4, n_bins=3, window_size=10, window_step=10, anova=False, drop_sum=False, norm_mean=False, norm_std=False, strategy='quantile', alphabet=None, numerosity_reduction=False, use_idf=True, smooth_idf=False, sublinear_tf=True ) X_windowed = X.reshape(8, 2, 10).reshape(16, 10) sfa = SymbolicFourierApproximation( n_coefs=4, drop_sum=False, anova=False, norm_mean=False, norm_std=False, n_bins=3, strategy='quantile', alphabet=None ) y_repeated = np.repeat(y, 2) X_sfa = sfa.fit_transform(X_windowed, y_repeated) X_word = np.asarray([''.join(X_sfa[i]) for i in range(16)]) X_word = X_word.reshape(8, 2) X_bow = np.asarray([' '.join(X_word[i]) for i in range(8)]) X_class = np.array([' '.join(X_bow[y == i]) for i in range(2)]) tfidf = TfidfVectorizer( norm=None, use_idf=True, smooth_idf=False, sublinear_tf=True ) tfidf_desired = tfidf.fit_transform(X_class).toarray() # Vocabulary vocabulary_desired = {value: key for key, value in tfidf.vocabulary_.items()} # Tf-idf tfidf_actual = bossvs.fit(X, y).tfidf_ # Decision function decision_function_actual = bossvs.decision_function(X) decision_function_desired = cosine_similarity( tfidf.transform(X_bow), tfidf_desired) # Predictions y_pred_actual = bossvs.predict(X) y_pred_desired = decision_function_desired.argmax(axis=1) # Testing assert bossvs.vocabulary_ == vocabulary_desired np.testing.assert_allclose(tfidf_actual, tfidf_desired, atol=1e-5, rtol=0) np.testing.assert_allclose( decision_function_actual, decision_function_desired, atol=1e-5, rtol=0) np.testing.assert_allclose( y_pred_actual, y_pred_desired, atol=1e-5, rtol=0)
class Data_Transformer(): SAX = SymbolicAggregateApproximation(strategy= 'uniform', alphabet= 'ordinal') Xtr_SAX = SAX.fit_transform(Xtr) Xte_SAX = SAX.fit_transform(Xte) SFA = SymbolicFourierApproximation(alphabet= 'ordinal') Xtr_SFA = SFA.fit_transform(Xtr) Xte_SFA = SFA.fit_transform(Xte)
def sfa(train_vec_samples, test_vec_samples, n_components=100, n_bins=2, alphabet='ordinal'): ''' Apply SFA to reduce dimensionality of input vector. :param train_vec_samples: :param test_vec_samples: :param n_components: :param n_bins: :param alphabet: :return: ''' sfa = SymbolicFourierApproximation(n_coefs=n_components, n_bins=n_bins, alphabet=alphabet) sfa.fit(train_vec_samples) sfa_train_samples = sfa.transform(train_vec_samples) # print("sfa_train_samples.shape: ", sfa_train_samples.shape) sfa_test_samples = sfa.transform(test_vec_samples) # print("sfa_test_samples.shape: ", sfa_test_samples.shape) return sfa_train_samples, sfa_test_samples
def test_actual_results(params): """Test that the actual results are the expected ones.""" arr_actual = SymbolicFourierApproximation(**params).fit_transform(X, y) arr_desired = _compute_expected_results(X, y, **params) np.testing.assert_array_equal(arr_actual, arr_desired)
def test_fit_transform(params): """Test that fit and transform yield the same results as fit_transform.""" arr_1 = SymbolicFourierApproximation(**params).fit(X, y).transform(X) arr_2 = SymbolicFourierApproximation(**params).fit_transform(X, y) np.testing.assert_array_equal(arr_1, arr_2)
import numpy as np import pytest from sklearn.base import clone from pyts.classification import SAXVSM from pyts.datasets import load_gunpoint, load_basic_motions from pyts.multivariate.image import JointRecurrencePlot from pyts.multivariate.classification import MultivariateClassifier from pyts.approximation import SymbolicFourierApproximation X_uni, _, y_uni, _ = load_gunpoint(return_X_y=True) X_multi, _, y_multi, _ = load_basic_motions(return_X_y=True) @pytest.mark.parametrize('estimator, X, y', [ (SymbolicFourierApproximation(n_bins=2), X_uni, None), (SymbolicFourierApproximation(n_bins=2, strategy='entropy'), X_uni, y_uni) ]) def test_univariate_transformer_mixin(estimator, X, y): sfa_1 = clone(estimator) sfa_2 = clone(estimator) np.testing.assert_array_equal(sfa_1.fit_transform(X, y), sfa_2.fit(X, y).transform(X)) @pytest.mark.parametrize('estimator, X, y', [(JointRecurrencePlot(), X_multi, None), (JointRecurrencePlot(), X_multi, y_multi)]) def test_multivariate_transformer_mixin(estimator, X, y): jrp_1 = clone(estimator) jrp_2 = clone(estimator)
from pyts.approximation import SymbolicFourierApproximation from pyts.datasets import load_gunpoint X, _, _, _ = load_gunpoint(return_X_y=True) transformer = SymbolicFourierApproximation(n_coefs=4) X_new = transformer.fit_transform(X)