Exemplo n.º 1
0
def fit_lts(X_train, y_train, X_test, y_test, shap_dict, reg, max_it, shap_out_path, pred_out_path, timing_out_path):
    # Fit LTS model, print metrics on test-set, write away predictions and shapelets
    clf = ShapeletModel(n_shapelets_per_size=shap_dict, 
                        max_iter=max_it, verbose_level=0, batch_size=1,
                        optimizer='sgd', weight_regularizer=reg)

    start = time.time()
    clf.fit(
        np.reshape(
            X_train, 
            (X_train.shape[0], X_train.shape[1], 1)
        ), 
        y_train
    )
    learning_time = time.time() - start

    with open(shap_out_path, 'w+') as ofp:
        for shap in clf.shapelets_:
            ofp.write(str(np.reshape(shap, (-1))) + '\n')

    with open(timing_out_path, 'w+') as ofp:
        ofp.write(str(learning_time))

    X_distances_train = clf.transform(X_train)
    X_distances_test = clf.transform(X_test)

    fit_lr(X_distances_train, y_train, X_distances_test, y_test, pred_out_path)
Exemplo n.º 2
0
def test_shapelet_lengths():
    pytest.importorskip('tensorflow')
    from tslearn.shapelets import ShapeletModel

    # Test variable-length
    y = [0, 1]
    time_series = to_time_series_dataset([[1, 2, 3, 4, 5], [3, 2, 1]])
    clf = ShapeletModel(n_shapelets_per_size={3: 1},
                        max_iter=1,
                        verbose=0,
                        random_state=0)
    clf.fit(time_series, y)

    weights_shapelet = [np.array([[1, 2, 3]])]
    clf.set_weights(weights_shapelet, layer_name="shapelets_0_0")
    tr = clf.transform(time_series)
    np.testing.assert_allclose(tr,
                               np.array([[0.], [8. / 3]]))

    # Test max_size to predict longer series than those passed at fit time
    y = [0, 1]
    time_series = to_time_series_dataset([[1, 2, 3, 4, 5], [3, 2, 1]])
    clf = ShapeletModel(n_shapelets_per_size={3: 1},
                        max_iter=1,
                        verbose=0,
                        max_size=6,
                        random_state=0)
    clf.fit(time_series[:, :-1], y)  # Fit with size 4
    weights_shapelet = [np.array([[1, 2, 3]])]
    clf.set_weights(weights_shapelet, layer_name="shapelets_0_0")
    tr = clf.transform(time_series)
    np.testing.assert_allclose(tr,
                               np.array([[0.], [8. / 3]]))
Exemplo n.º 3
0
def lts_discovery(X_train, y_train, X_test, y_test,  nr_shap, l, r, reg, max_it, shap_out_path, pred_out_path, timing_out_path):
    # Fit LTS model, print metrics on test-set, write away predictions and shapelets
    shapelet_dict = grabocka_params_to_shapelet_size_dict(
            X_train.shape[0], X_train.shape[1], int(nr_shap*X_train.shape[1]), l, r
    )
    
    clf = ShapeletModel(n_shapelets_per_size=shapelet_dict, 
                        max_iter=max_it, verbose_level=0, batch_size=1,
                        optimizer='sgd', weight_regularizer=reg)

    start = time.time()
    clf.fit(
        np.reshape(
            X_train, 
            (X_train.shape[0], X_train.shape[1], 1)
        ), 
        y_train
    )
    learning_time = time.time() - start

    print('Learning shapelets took {}s'.format(learning_time))

    with open(shap_out_path, 'w+') as ofp:
        for shap in clf.shapelets_:
            ofp.write(str(np.reshape(shap, (-1))) + '\n')

    with open(timing_out_path, 'w+') as ofp:
        ofp.write(str(learning_time))

    X_distances_train = clf.transform(X_train)
    X_distances_test = clf.transform(X_test)

    fit_lr(X_distances_train, y_train, X_distances_test, y_test, pred_out_path)
Exemplo n.º 4
0
    def fit(self, X, y):
        """Fit the model using X as training data and y as target values
        Parameters
        ----------
        X : {array-like}
            Training data. Shape [n_samples, n_features].
        y : {array-like, sparse matrix}
            Target values of shape = [n_samples] or [n_samples, n_outputs]
        """

        self.X = X
        self.y = y

        n_shapelets_per_size = self.shapelet_model_params.get(
            "n_shapelets_per_size", "heuristic")
        if n_shapelets_per_size == "heuristic":
            n_ts, ts_sz = X.shape[:2]
            n_classes = len(set(y))
            n_shapelets_per_size = grabocka_params_to_shapelet_size_dict(
                n_ts=n_ts,
                ts_sz=ts_sz,
                n_classes=n_classes,
                l=self.shapelet_model_params.get("l", 0.1),
                r=self.shapelet_model_params.get("r", 2))

        shp_clf = ShapeletModel(
            n_shapelets_per_size=n_shapelets_per_size,
            optimizer=self.shapelet_model_params.get("optimizer", "sgd"),
            weight_regularizer=self.shapelet_model_params.get(
                "weight_regularizer", .01),
            max_iter=self.shapelet_model_params.get("max_iter", 100),
            random_state=self.random_state,
            verbose=self.shapelet_model_params.get("verbose", 0))

        shp_clf.fit(X, y)
        X_transformed = shp_clf.transform(X)
        self.X_transformed = X_transformed

        if self.tau is not None:
            self.X_thresholded = 1 * (self.X_transformed < self.tau)
            clf = DecisionTreeClassifier()
            param_grid = self.decision_tree_grid_search_params
            grid = GridSearchCV(clf,
                                param_grid=param_grid,
                                scoring='accuracy',
                                n_jobs=-1,
                                verbose=0)
            grid.fit(self.X_thresholded, y)
        else:
            grids = []
            grids_scores = []
            for quantile in self.tau_quantiles:
                _X_thresholded = 1 * (self.X_transformed < (np.quantile(
                    self.X_transformed, quantile)))
                clf = DecisionTreeClassifier()
                param_grid = self.decision_tree_grid_search_params
                grid = GridSearchCV(clf,
                                    param_grid=param_grid,
                                    scoring='accuracy',
                                    n_jobs=-1,
                                    verbose=0)
                grid.fit(_X_thresholded, y)
                grids.append(grid)
                grids_scores.append(grid.best_score_)
            grid = grids[np.argmax(np.array(grids_scores))]
            best_quantile = self.tau_quantiles[np.argmax(
                np.array(grids_scores))]
            self.tau = np.quantile(self.X_transformed, best_quantile)
            self.X_thresholded = 1 * (self.X_transformed < self.tau)

        clf = DecisionTreeClassifier(**grid.best_params_)
        clf.fit(self.X_thresholded, y)
        if self.prune_duplicate_tree_leaves:
            prune_duplicate_leaves(
                clf)  # FIXME: does it influence the .tree properties?

        self.decision_tree = clf
        self.decision_tree_explorable = NewTree(clf)
        self.decision_tree_explorable.build_tree()
        self._shapelet_model = shp_clf
        self._build_tree_graph()

        return self
Exemplo n.º 5
0
        n_iterations = np.random.choice([2000, 5000, 10000])

        shapelet_dict = grabocka_params_to_shapelet_size_dict(
            X_train.shape[0], X_train.shape[1], int(K * X_train.shape[1]), L,
            R)
        clf = ShapeletModel(n_shapelets_per_size=shapelet_dict,
                            max_iter=n_iterations,
                            verbose_level=0,
                            batch_size=1,
                            optimizer='sgd',
                            weight_regularizer=_lambda)

        clf.fit(np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1)),
                y_train)

        X_distances_train = clf.transform(X_train)
        X_distances_test = clf.transform(X_test)

        lr = GridSearchCV(LogisticRegression(), {
            'penalty': ['l1', 'l2'],
            'C': [0.001, 0.01, 0.1, 1.0, 10.0]
        })
        lr.fit(X_distances_train, y_train)

        acc = accuracy_score(y_test, lr.predict(X_distances_test))

        print([K, L, R, _lambda, n_iterations], acc)

        lts_results.append([K, L, R, _lambda, n_iterations, acc])

        # Sample random hyper-parameters for GENDIS
Exemplo n.º 6
0
                                                       l=0.125,
                                                       r=1)

# Define the model and fit it using the training data
shp_clf = ShapeletModel(n_shapelets_per_size=shapelet_sizes,
                        weight_regularizer=.01,
                        max_iter=100,
                        verbose=0,
                        random_state=42)
shp_clf.fit(X_train, y_train)

# Get the number of extracted shapelets, the (minimal) distances from
# each of the timeseries to each of the shapelets, and the corresponding
# locations (index) where the minimal distance was found
n_shapelets = sum(shapelet_sizes.values())
distances = shp_clf.transform(X_train)
predicted_locations = shp_clf.locate(X_train)

plt.figure()
plt.title("Example locations of shapelet matches "
          "({} shapelets extracted)".format(n_shapelets))

# Plot the test timeseries with the best matches with the shapelets
test_ts_id = numpy.argmin(numpy.sum(distances, axis=1))
plt.plot(X_train[test_ts_id].ravel())

# Plot the shapelets on their best-matching locations
for idx_shp, shp in enumerate(shp_clf.shapelets_):
    t0 = predicted_locations[test_ts_id, idx_shp]
    plt.plot(numpy.arange(t0, t0 + len(shp)), shp, linewidth=2)