Пример #1
0
import numpy as np
from pyts.classification import LearningShapelets
from pyts.datasets import load_gunpoint
from pyts.utils import windowed_view

# Load the data set and fit the classifier
X, _, y, _ = load_gunpoint(return_X_y=True)
clf = LearningShapelets(random_state=42, tol=0.01)
clf.fit(X, y)

# Select two shapelets
shapelets = np.asarray([clf.shapelets_[0, -9], clf.shapelets_[0, -12]])

# Derive the distances between the time series and the shapelets
shapelet_size = shapelets.shape[1]
X_window = windowed_view(X, window_size=shapelet_size, window_step=1)
X_dist = np.mean(
    (X_window[:, :, None] - shapelets[None, :]) ** 2, axis=3).min(axis=1)

plt.figure(figsize=(14, 4))

# Plot the two shapelets
plt.subplot(1, 2, 1)
plt.plot(shapelets[0])
plt.plot(shapelets[1])
plt.title('Two learned shapelets', fontsize=14)

# Plot the distances
plt.subplot(1, 2, 2)
for color, label in zip('br', (1, 2)):
    plt.scatter(X_dist[y == label, 0], X_dist[y == label, 1],
Пример #2
0
def test_accurate_results(params, arr_desired):
    """Test that the actual results are the expected ones."""
    arr_actual = windowed_view(**params)
    np.testing.assert_array_equal(arr_actual, arr_desired)
Пример #3
0
    def _explain(self, X_specimens):
        from pyts.utils import windowed_view

        X_specimens = np.asarray(X_specimens)
        n_specimens, size_x = X_specimens.shape
        n_freq_bins = max(self.model._window_sizes) // 2
        time_domain = self.domain.startswith("t")

        overall_y_preds = []
        overall_impacts = []

        for (window_size, window_step, sfa, vectorizer, relevant_features) \
                in zip(self.model._window_sizes, self.model._window_steps, self.model._sfa_list,
                       self.model._vectorizer_list, self.model._relevant_features_list):
            n_windows = (size_x - window_size + window_step) // window_step
            X_windowed = windowed_view(X_specimens,
                                       window_size=window_size,
                                       window_step=window_step)
            X_windowed = X_windowed.reshape(n_specimens * n_windows,
                                            window_size)
            X_sfa = sfa.transform(X_windowed)

            X_word = np.array(
                ["".join(X_sfa[i]) for i in range(n_specimens * n_windows)])
            X_word = X_word.reshape(n_specimens, n_windows)

            # Predictions
            X_bow = np.asarray(
                [" ".join(X_word[i]) for i in range(n_specimens)])
            overall_y_preds.append(
                vectorizer.transform(X_bow)[:, relevant_features])

            # Impacts

            # 1. Create an array of pairs:
            #    (ngram length, numba dict from ngrams of that length to actual model outputs)
            ngram_range = range(vectorizer.ngram_range[0],
                                vectorizer.ngram_range[1] + 1)
            ngramlen_to_ngram_to_modelout = {
                ngram_len: optional_numba_dict("unicode_type", "int64")
                for ngram_len in ngram_range
            }
            for ngram, ngram_idx in vectorizer.vocabulary_.items():
                find = np.where(relevant_features == ngram_idx)[0]
                if find.size != 0:
                    ngramlen_to_ngram_to_modelout[ngram.count(" ") +
                                                  1][ngram] = find[0]
            ngramlen_to_ngram_to_modelout = optional_numba_list(
                ngramlen_to_ngram_to_modelout.items())

            if time_domain:
                # Dummy data to make numba not complain.
                global_freq_bins = np.zeros((1, 2))
            else:
                # 2. If drop_sum is False, retroactively drop the sum from the support indices.
                win_freq_bins = np.copy(sfa.support_)
                if not self.model.drop_sum:
                    win_freq_bins = win_freq_bins[win_freq_bins != 0]
                    win_freq_bins -= 1
                #    Also convert the support indices (two consecutive indices represent the real and imag parts
                #    of one bin's output) to bin indices by dividing by 2 and rounding down.
                win_freq_bins //= 2
                #    Convert the support bin indices for this window to global support bin indices along with a weight
                #    for each index which is smaller than 1 when the local bin doesn't fully cover the respective
                #    global bin.
                n_win_freq_bins = window_size // 2
                bin_split = _soft_range_split(n_freq_bins, n_win_freq_bins)
                global_freq_bins = np.vstack(
                    [bin_split[freq_bin] for freq_bin in win_freq_bins])

            # 3. Compute a sparse representation of the impacts.
            rowptr, cols, data = \
                _weasel_impacts_csr(time_domain,
                                    n_specimens, size_x, n_freq_bins,
                                    window_size, window_step, n_windows,
                                    ngramlen_to_ngram_to_modelout, global_freq_bins,
                                    X_word,
                                    np.array(" "))

            # 4. Construct the sparse matrix object.
            impacts_shape = (n_specimens, len(relevant_features) * size_x *
                             (1 if time_domain else n_freq_bins))
            overall_impacts.append(
                sparse.csr_matrix((data, cols, rowptr), impacts_shape))

        overall_y_preds = sparse.hstack(overall_y_preds, format="csr")
        overall_impacts = sparse.hstack(overall_impacts, format="csr")

        if not getattr(self.model, "sparse", True):
            overall_y_preds = overall_y_preds.toarray()

        n_model_outputs = overall_y_preds.shape[1]
        if time_domain:
            constr = TimeExplanation
            kwargs = {}
        else:
            constr = FreqExplanation
            kwargs = {
                "freq_slicing":
                Slicing(bin_rate=size_x,
                        n_slices=n_freq_bins,
                        cont_interval=(0, 0.5))
            }
        return [
            constr(x_specimen,
                   impact_row.reshape((n_model_outputs, -1)).tocsr(),
                   y_pred=y_pred,
                   **kwargs) for x_specimen, y_pred, impact_row in zip(
                       X_specimens, overall_y_preds, overall_impacts)
        ]
Пример #4
0
def test_parameter_check(params, error, err_msg):
    """Test parameter validation in segmentation."""
    with pytest.raises(error, match=re.escape(err_msg)):
        windowed_view(**params)