Пример #1
0
    def _explain(self, X_specimens):
        from pyts.bag_of_words import BagOfWords

        X_specimens = np.asarray(X_specimens)

        size_x = X_specimens.shape[1]
        n_model_outputs = self.model.tfidf_.shape[0]
        vocab = self.model._tfidf.vocabulary_
        normed_tfidf = self.model.tfidf_ / np.linalg.norm(
            self.model.tfidf_, axis=1, keepdims=True)

        # Matrix of the form:
        #    1   -1/3 -1/3 -1/3
        #   -1/3  1   -1/3 -1/3
        #   -1/3 -1/3  1   -1/3
        #   -1/3 -1/3 -1/3  1
        mat = np.full((n_model_outputs, n_model_outputs),
                      -1 / (n_model_outputs - 1))
        np.fill_diagonal(mat, 1)

        word_impacts = mat @ normed_tfidf

        # No numerosity reduction here!
        bow = BagOfWords(self.model.window_size,
                         self.model.window_step,
                         numerosity_reduction=False)
        sentences = bow.transform(self.model._sax.transform(X_specimens))

        explanations = []

        for x_specimen, sentence in zip(X_specimens, sentences):
            sentence = sentence.split(" ")
            word_contribs = {
                word: word_impacts[:, vocab[word]] /
                (cnt if self.divide_spread else 1)
                for word, cnt in zip(*np.unique(sentence, return_counts=True))
                if word in vocab
            }

            impacts = np.zeros((n_model_outputs, size_x))
            starts_at = 0
            for word in sentence:
                if word in word_contribs:
                    impacts[:, starts_at:starts_at + self.model.
                            window_size] += word_contribs[word][:, np.newaxis]
                starts_at += self.model.window_step
            explanations.append(TimeExplanation(x_specimen, impacts))

        return explanations
Пример #2
0
# Author: Johann Faouzi <*****@*****.**>
# License: BSD-3-Clause

import matplotlib.pyplot as plt
import numpy as np
from pyts.bag_of_words import BagOfWords
from pyts.datasets import load_gunpoint

# Load the dataset and perform the transformation
X, _, _, _ = load_gunpoint(return_X_y=True)
window_size, word_size = 30, 5
bow = BagOfWords(window_size=window_size,
                 word_size=word_size,
                 window_step=window_size,
                 numerosity_reduction=False)
X_bow = bow.transform(X)

# Plot the considered subseries
plt.figure(figsize=(10, 4))
splits_series = np.linspace(0,
                            X.shape[1],
                            1 + X.shape[1] // window_size,
                            dtype='int64')
for start, end in zip(splits_series[:-1],
                      np.clip(splits_series[1:] + 1, 0, X.shape[1])):
    plt.plot(np.arange(start, end), X[0, start:end], 'o-', lw=1, ms=1)

# Plot the corresponding letters
splits_letters = np.linspace(0, X.shape[1],
                             1 + word_size * X.shape[1] // window_size)
splits_letters = ((splits_letters[:-1] + splits_letters[1:]) /
Пример #3
0
import matplotlib.pyplot as plt
from pyts.bag_of_words import BagOfWords

# Parameters
n_samples, n_timestamps = 100, 48
n_bins = 4

# Toy dataset
rng = np.random.RandomState(42)
alphabet = np.array(['a', 'b', 'c', 'd'])
X_ordinal = rng.randint(n_bins, size=(n_samples, n_timestamps))
X_alphabet = alphabet[X_ordinal]

# Bag-of-words transformation
bow = BagOfWords(window_size=2, numerosity_reduction=False)
X_bow = bow.transform(X_alphabet)
words = np.asarray(X_bow[0].split(' '))
different_words_idx = np.r_[True, words[1:] != words[:-1]]

# Show the results
plt.figure(figsize=(16, 7))
plt.suptitle('Transforming a discretized time series into a bag of words',
             fontsize=20,
             y=0.9)

plt.subplot(121)
plt.plot(X_ordinal[0], 'o', scalex=0.2)
plt.yticks(np.arange(4), alphabet)
plt.xticks([], [])
plt.yticks(fontsize=16)
plt.title('Without numerosity reduction', fontsize=16)
Пример #4
0
# km = KMeans(
#     n_clusters=3, init='random',
#     n_init=10, max_iter=300,
#     tol=1e-04, random_state=0
# )
# y_km = km.fit(data_train)
instances = readData("./venezia/Punta_Salute_1983_2015/Punta_Salute_2015.csv")

data_train, data_test, target_train, target_test = load_gunpoint(return_X_y=True)



window_size, word_size = 50, 5
bow = BagOfWords(window_size=window_size, word_size=word_size,
                 window_step=window_size, numerosity_reduction=False)
X_bow = bow.transform(data_train)
test_bow = bow.transform(data_test)


frequencyDictListX = []
frequencyDictListTest = []

for i in range(len(X_bow)):
    frequencyDict = {}
    localCodeWordArr = X_bow[i].split(" ")
    for word in localCodeWordArr:
        if word not in frequencyDict:
            frequencyDict[word] = 1
        elif word in frequencyDict.keys():
            frequencyDict[word] += 1
    frequencyDictListX.append(frequencyDict)
Пример #5
0
def test_parameter_check_bag_of_words(params, error, err_msg):
    """Test parameter validation."""
    bow = BagOfWords(**params)
    with pytest.raises(error, match=re.escape(err_msg)):
        bow.transform(X_bow)