def _explain(self, X_specimens): from pyts.bag_of_words import BagOfWords X_specimens = np.asarray(X_specimens) size_x = X_specimens.shape[1] n_model_outputs = self.model.tfidf_.shape[0] vocab = self.model._tfidf.vocabulary_ normed_tfidf = self.model.tfidf_ / np.linalg.norm( self.model.tfidf_, axis=1, keepdims=True) # Matrix of the form: # 1 -1/3 -1/3 -1/3 # -1/3 1 -1/3 -1/3 # -1/3 -1/3 1 -1/3 # -1/3 -1/3 -1/3 1 mat = np.full((n_model_outputs, n_model_outputs), -1 / (n_model_outputs - 1)) np.fill_diagonal(mat, 1) word_impacts = mat @ normed_tfidf # No numerosity reduction here! bow = BagOfWords(self.model.window_size, self.model.window_step, numerosity_reduction=False) sentences = bow.transform(self.model._sax.transform(X_specimens)) explanations = [] for x_specimen, sentence in zip(X_specimens, sentences): sentence = sentence.split(" ") word_contribs = { word: word_impacts[:, vocab[word]] / (cnt if self.divide_spread else 1) for word, cnt in zip(*np.unique(sentence, return_counts=True)) if word in vocab } impacts = np.zeros((n_model_outputs, size_x)) starts_at = 0 for word in sentence: if word in word_contribs: impacts[:, starts_at:starts_at + self.model. window_size] += word_contribs[word][:, np.newaxis] starts_at += self.model.window_step explanations.append(TimeExplanation(x_specimen, impacts)) return explanations
# Author: Johann Faouzi <*****@*****.**> # License: BSD-3-Clause import matplotlib.pyplot as plt import numpy as np from pyts.bag_of_words import BagOfWords from pyts.datasets import load_gunpoint # Load the dataset and perform the transformation X, _, _, _ = load_gunpoint(return_X_y=True) window_size, word_size = 30, 5 bow = BagOfWords(window_size=window_size, word_size=word_size, window_step=window_size, numerosity_reduction=False) X_bow = bow.transform(X) # Plot the considered subseries plt.figure(figsize=(10, 4)) splits_series = np.linspace(0, X.shape[1], 1 + X.shape[1] // window_size, dtype='int64') for start, end in zip(splits_series[:-1], np.clip(splits_series[1:] + 1, 0, X.shape[1])): plt.plot(np.arange(start, end), X[0, start:end], 'o-', lw=1, ms=1) # Plot the corresponding letters splits_letters = np.linspace(0, X.shape[1], 1 + word_size * X.shape[1] // window_size) splits_letters = ((splits_letters[:-1] + splits_letters[1:]) /
import matplotlib.pyplot as plt from pyts.bag_of_words import BagOfWords # Parameters n_samples, n_timestamps = 100, 48 n_bins = 4 # Toy dataset rng = np.random.RandomState(42) alphabet = np.array(['a', 'b', 'c', 'd']) X_ordinal = rng.randint(n_bins, size=(n_samples, n_timestamps)) X_alphabet = alphabet[X_ordinal] # Bag-of-words transformation bow = BagOfWords(window_size=2, numerosity_reduction=False) X_bow = bow.transform(X_alphabet) words = np.asarray(X_bow[0].split(' ')) different_words_idx = np.r_[True, words[1:] != words[:-1]] # Show the results plt.figure(figsize=(16, 7)) plt.suptitle('Transforming a discretized time series into a bag of words', fontsize=20, y=0.9) plt.subplot(121) plt.plot(X_ordinal[0], 'o', scalex=0.2) plt.yticks(np.arange(4), alphabet) plt.xticks([], []) plt.yticks(fontsize=16) plt.title('Without numerosity reduction', fontsize=16)
# km = KMeans( # n_clusters=3, init='random', # n_init=10, max_iter=300, # tol=1e-04, random_state=0 # ) # y_km = km.fit(data_train) instances = readData("./venezia/Punta_Salute_1983_2015/Punta_Salute_2015.csv") data_train, data_test, target_train, target_test = load_gunpoint(return_X_y=True) window_size, word_size = 50, 5 bow = BagOfWords(window_size=window_size, word_size=word_size, window_step=window_size, numerosity_reduction=False) X_bow = bow.transform(data_train) test_bow = bow.transform(data_test) frequencyDictListX = [] frequencyDictListTest = [] for i in range(len(X_bow)): frequencyDict = {} localCodeWordArr = X_bow[i].split(" ") for word in localCodeWordArr: if word not in frequencyDict: frequencyDict[word] = 1 elif word in frequencyDict.keys(): frequencyDict[word] += 1 frequencyDictListX.append(frequencyDict)
def test_parameter_check_bag_of_words(params, error, err_msg): """Test parameter validation.""" bow = BagOfWords(**params) with pytest.raises(error, match=re.escape(err_msg)): bow.transform(X_bow)