示例#1
0
def test_accurate_results():
    """Test that the actual results are the expected ones."""
    X_features = csr_matrix((n_samples, 0), dtype=np.int64)
    vocabulary_ = {}

    weasel = WEASEL(
        word_size=4, n_bins=3, window_sizes=[5, 10],
        window_steps=None, anova=True, drop_sum=True, norm_mean=True,
        norm_std=True, strategy='entropy', chi2_threshold=2, alphabet=None
    )

    for window_size, n_windows in zip([5, 10], [40, 20]):
        X_windowed = X.reshape(n_samples, n_windows, window_size)
        X_windowed = X_windowed.reshape(n_samples * n_windows, window_size)

        sfa = SymbolicFourierApproximation(
            n_coefs=4, drop_sum=True, anova=True, norm_mean=True,
            norm_std=True, n_bins=3, strategy='entropy', alphabet=None
        )
        y_repeated = np.repeat(y, n_windows)
        X_sfa = sfa.fit_transform(X_windowed, y_repeated)
        X_word = np.asarray([''.join(X_sfa[i])
                             for i in range((n_samples * n_windows))])
        X_word = X_word.reshape(n_samples, n_windows)
        X_bow = np.asarray([' '.join(X_word[i]) for i in range(n_samples)])

        vectorizer = CountVectorizer(ngram_range=(1, 2))
        X_counts = vectorizer.fit_transform(X_bow)
        chi2_statistics, _ = chi2(X_counts, y)
        relevant_features = np.where(
            chi2_statistics > 2)[0]
        X_features = hstack([X_features, X_counts[:, relevant_features]])

        old_length_vocab = len(vocabulary_)
        vocabulary = {value: key
                      for (key, value) in vectorizer.vocabulary_.items()}
        for i, idx in enumerate(relevant_features):
            vocabulary_[i + old_length_vocab] = \
                str(window_size) + " " + vocabulary[idx]

    arr_desired = X_features.toarray()

    # Accuracte results for fit followed by transform
    arr_actual_1 = weasel.fit_transform(X, y).toarray()
    np.testing.assert_allclose(arr_actual_1, arr_desired, atol=1e-5, rtol=0)
    assert weasel.vocabulary_ == vocabulary_

    # Accuracte results for fit_transform
    arr_actual_2 = weasel.fit(X, y).transform(X).toarray()
    np.testing.assert_allclose(arr_actual_2, arr_desired, atol=1e-5, rtol=0)
    assert weasel.vocabulary_ == vocabulary_
示例#2
0
def test_fit_transform(params):
    """Test that 'fit_transform' and 'fit' then 'transform' yield same res."""
    weasel_1 = WEASEL(**params, sparse=False)
    weasel_2 = WEASEL(**params, sparse=False)

    X_fit_transform = weasel_1.fit_transform(X, y)
    X_fit_then_transform = weasel_2.fit(X, y).transform(X)

    # Test that the transformation are identical
    np.testing.assert_allclose(X_fit_transform, X_fit_then_transform,
                               atol=1e-5, rtol=0.)

    # Test that the vocabulary_ attributes are identical
    assert weasel_1.vocabulary_ == weasel_2.vocabulary_
示例#3
0
This example shows how the WEASEL algorithm transforms a time series of
real numbers into a sequence of frequencies of words. It is implemented
as :class:`pyts.transformation.WEASEL`.
"""

import numpy as np
import matplotlib.pyplot as plt
from pyts.datasets import load_gunpoint
from pyts.transformation import WEASEL

# Toy dataset
X_train, _, y_train, _ = load_gunpoint(return_X_y=True)

# WEASEL transformation
weasel = WEASEL(word_size=2, n_bins=2, window_sizes=[12, 36], sparse=False)
X_weasel = weasel.fit_transform(X_train, y_train)

# Visualize the transformation for the first time series
plt.figure(figsize=(8, 5))
vocabulary_length = len(weasel.vocabulary_)
width = 0.3
plt.bar(np.arange(vocabulary_length) - width / 2, X_weasel[y_train == 1][0],
        width=width, label='First time series in class 1')
plt.bar(np.arange(vocabulary_length) + width / 2, X_weasel[y_train == 2][0],
        width=width, label='First time series in class 2')
plt.xticks(np.arange(vocabulary_length),
           np.vectorize(weasel.vocabulary_.get)(np.arange(X_weasel[0].size)),
           fontsize=12, rotation=60)
y_max = np.max(np.concatenate([X_weasel[y_train == 1][0],
                               X_weasel[y_train == 2][0]]))
示例#4
0
def test_sparse_dense(sparse, instance):
    """Test that the expected type is returned."""
    weasel = WEASEL(strategy='quantile', sparse=sparse)
    assert isinstance(weasel.fit(X, y).transform(X), instance)
    assert isinstance(weasel.fit_transform(X, y), instance)
示例#5
0
def test_parameter_check(params, error, err_msg):
    """Test parameter validation."""
    weasel = WEASEL(**params)
    with pytest.raises(error, match=re.escape(err_msg)):
        weasel.fit(X, y)
示例#6
0
import numpy as np
import matplotlib.pyplot as plt
from pyts.transformation import WEASEL

# Parameters
n_samples, n_timestamps = 100, 300
n_classes = 2

# Toy dataset
rng = np.random.RandomState(41)
X = rng.randn(n_samples, n_timestamps)
y = rng.randint(n_classes, size=n_samples)

# WEASEL transformation
weasel = WEASEL(word_size=2, n_bins=2, window_sizes=[12, 36])
X_weasel = weasel.fit_transform(X, y).toarray()

# Visualize the transformation for the first time series
plt.figure(figsize=(12, 8))
vocabulary_length = len(weasel.vocabulary_)
width = 0.3
plt.bar(np.arange(vocabulary_length) - width / 2, X_weasel[0],
        width=width, label='First time series')
plt.bar(np.arange(vocabulary_length) + width / 2, X_weasel[1],
        width=width, label='Second time series')
plt.xticks(np.arange(vocabulary_length),
           np.vectorize(weasel.vocabulary_.get)(np.arange(X_weasel[0].size)),
           fontsize=12, rotation=60)
plt.yticks(np.arange(np.max(X_weasel[:2] + 1)), fontsize=12)
plt.xlabel("Words", fontsize=18)
示例#7
0
                                        return_path=True)
        DTW_Classic_train.append(dtw_classic)

DTW_Classic_train = np.array(DTW_Classic_train)
DTW_Classic_train.resize(y_train.shape[0],
                         int(len(DTW_Classic_train) / y_train.shape[0]))
DTW_Classic_test = np.array(DTW_Classic_test)
DTW_Classic_test.resize(y_test.shape[0],
                        int(len(DTW_Classic_test) / y_test.shape[0]))

train_concat_mv = TimeSeriesScalerMeanVariance().fit_transform(
    DTW_Classic_train)
test_concat_mv = TimeSeriesScalerMeanVariance().fit_transform(DTW_Classic_test)

train_concat_mv.resize(DTW_Classic_train.shape[0], DTW_Classic_train.shape[1])
test_concat_mv.resize(DTW_Classic_test.shape[0], DTW_Classic_test.shape[1])

#SVM

clf = svm.SVC(gamma='scale')
clf.fit(DTW_Classic_train, y_train)

print('Accuracy: ', clf.score(DTW_Classic_test, y_test))
#WEASEL
weasel_adiac = WEASEL(word_size=5, window_sizes=np.arange(6, X_train.shape[1]))

pipeline_adiac = Pipeline([("weasel", weasel_adiac), ("clf", clf)])

accuracy_adiac = pipeline_adiac.fit(X_train, y_train).score(X_test, y_test)

print("Accuracy on the testing set: {0:.3f}".format(accuracy_adiac))
示例#8
0
:class:`pyts.transformation.WEASEL`.
"""

import numpy as np
import matplotlib.pyplot as plt
from pyts.transformation import WEASEL

# Parameters
n_samples, n_features = 100, 144
n_classes = 2

# Toy dataset
rng = np.random.RandomState(41)
X = rng.randn(n_samples, n_features)
y = rng.randint(n_classes, size=n_samples)

# WEASEL transformation
weasel = WEASEL(n_coefs=2, window_sizes=[12, 24, 36], pvalue_threshold=0.2)
X_weasel = weasel.fit_transform(X, y).toarray()

# Visualize the transformation for the first time series
plt.figure(figsize=(12, 8))
plt.bar(np.arange(X_weasel[0].size), X_weasel[0])
plt.xticks(np.arange(X_weasel[0].size),
           np.vectorize(weasel.vocabulary_.get)(np.arange(X_weasel[0].size)),
           fontsize=12,
           rotation=60)
plt.xlabel("Words", fontsize=18)
plt.ylabel("Frequencies", fontsize=18)
plt.show()
示例#9
0
            print("Error rate BOSSVS {0:.4f}".format(1 - accuracy_BOSSVS))
            error_BOSSVS_list.append(1 - accuracy_BOSSVS)
        else:
            clf_bossvs = BOSSVS(word_size=5,
                                n_bins=4,
                                norm_mean=False,
                                drop_sum=False,
                                window_size=40)
            accuracy_BOSSVS = clf_bossvs.fit(X_train,
                                             y_train).score(X_test, y_test)
            print('Accuracy BOSSVS: ', accuracy_BOSSVS)
            print("Error rate BOSSVS {0:.4f}".format(1 - accuracy_BOSSVS))
            error_BOSSVS_list.append(1 - accuracy_BOSSVS)

    #WEASEL
    weasel = WEASEL(word_size=3, window_sizes=np.arange(4, X_train.shape[1]))
    clf_weasel = LogisticRegression(penalty='l2',
                                    C=1,
                                    fit_intercept=True,
                                    solver='liblinear',
                                    multi_class='ovr')
    pipeline = Pipeline([("weasel", weasel), ("clf", clf_weasel)])

    accuracy_weasel = pipeline.fit(X_train, y_train).score(X_test, y_test)
    print('Accuracy WEASEL: ', accuracy_weasel)
    print("Error rate WEASEL {0:.4f}".format(1 - accuracy_weasel))
    error_WEASEL_list.append(1 - accuracy_weasel)

    print()

er_final = [