def fit_lts(X_train, y_train, X_test, y_test, shap_dict, reg, max_it, shap_out_path, pred_out_path, timing_out_path): # Fit LTS model, print metrics on test-set, write away predictions and shapelets clf = ShapeletModel(n_shapelets_per_size=shap_dict, max_iter=max_it, verbose_level=0, batch_size=1, optimizer='sgd', weight_regularizer=reg) start = time.time() clf.fit( np.reshape( X_train, (X_train.shape[0], X_train.shape[1], 1) ), y_train ) learning_time = time.time() - start with open(shap_out_path, 'w+') as ofp: for shap in clf.shapelets_: ofp.write(str(np.reshape(shap, (-1))) + '\n') with open(timing_out_path, 'w+') as ofp: ofp.write(str(learning_time)) X_distances_train = clf.transform(X_train) X_distances_test = clf.transform(X_test) fit_lr(X_distances_train, y_train, X_distances_test, y_test, pred_out_path)
def test_shapelet_lengths(): pytest.importorskip('tensorflow') from tslearn.shapelets import ShapeletModel # Test variable-length y = [0, 1] time_series = to_time_series_dataset([[1, 2, 3, 4, 5], [3, 2, 1]]) clf = ShapeletModel(n_shapelets_per_size={3: 1}, max_iter=1, verbose=0, random_state=0) clf.fit(time_series, y) weights_shapelet = [np.array([[1, 2, 3]])] clf.set_weights(weights_shapelet, layer_name="shapelets_0_0") tr = clf.transform(time_series) np.testing.assert_allclose(tr, np.array([[0.], [8. / 3]])) # Test max_size to predict longer series than those passed at fit time y = [0, 1] time_series = to_time_series_dataset([[1, 2, 3, 4, 5], [3, 2, 1]]) clf = ShapeletModel(n_shapelets_per_size={3: 1}, max_iter=1, verbose=0, max_size=6, random_state=0) clf.fit(time_series[:, :-1], y) # Fit with size 4 weights_shapelet = [np.array([[1, 2, 3]])] clf.set_weights(weights_shapelet, layer_name="shapelets_0_0") tr = clf.transform(time_series) np.testing.assert_allclose(tr, np.array([[0.], [8. / 3]]))
def lts_discovery(X_train, y_train, X_test, y_test, nr_shap, l, r, reg, max_it, shap_out_path, pred_out_path, timing_out_path): # Fit LTS model, print metrics on test-set, write away predictions and shapelets shapelet_dict = grabocka_params_to_shapelet_size_dict( X_train.shape[0], X_train.shape[1], int(nr_shap*X_train.shape[1]), l, r ) clf = ShapeletModel(n_shapelets_per_size=shapelet_dict, max_iter=max_it, verbose_level=0, batch_size=1, optimizer='sgd', weight_regularizer=reg) start = time.time() clf.fit( np.reshape( X_train, (X_train.shape[0], X_train.shape[1], 1) ), y_train ) learning_time = time.time() - start print('Learning shapelets took {}s'.format(learning_time)) with open(shap_out_path, 'w+') as ofp: for shap in clf.shapelets_: ofp.write(str(np.reshape(shap, (-1))) + '\n') with open(timing_out_path, 'w+') as ofp: ofp.write(str(learning_time)) X_distances_train = clf.transform(X_train) X_distances_test = clf.transform(X_test) fit_lr(X_distances_train, y_train, X_distances_test, y_test, pred_out_path)
def fit(self, X, y): """Fit the model using X as training data and y as target values Parameters ---------- X : {array-like} Training data. Shape [n_samples, n_features]. y : {array-like, sparse matrix} Target values of shape = [n_samples] or [n_samples, n_outputs] """ self.X = X self.y = y n_shapelets_per_size = self.shapelet_model_params.get( "n_shapelets_per_size", "heuristic") if n_shapelets_per_size == "heuristic": n_ts, ts_sz = X.shape[:2] n_classes = len(set(y)) n_shapelets_per_size = grabocka_params_to_shapelet_size_dict( n_ts=n_ts, ts_sz=ts_sz, n_classes=n_classes, l=self.shapelet_model_params.get("l", 0.1), r=self.shapelet_model_params.get("r", 2)) shp_clf = ShapeletModel( n_shapelets_per_size=n_shapelets_per_size, optimizer=self.shapelet_model_params.get("optimizer", "sgd"), weight_regularizer=self.shapelet_model_params.get( "weight_regularizer", .01), max_iter=self.shapelet_model_params.get("max_iter", 100), random_state=self.random_state, verbose=self.shapelet_model_params.get("verbose", 0)) shp_clf.fit(X, y) X_transformed = shp_clf.transform(X) self.X_transformed = X_transformed if self.tau is not None: self.X_thresholded = 1 * (self.X_transformed < self.tau) clf = DecisionTreeClassifier() param_grid = self.decision_tree_grid_search_params grid = GridSearchCV(clf, param_grid=param_grid, scoring='accuracy', n_jobs=-1, verbose=0) grid.fit(self.X_thresholded, y) else: grids = [] grids_scores = [] for quantile in self.tau_quantiles: _X_thresholded = 1 * (self.X_transformed < (np.quantile( self.X_transformed, quantile))) clf = DecisionTreeClassifier() param_grid = self.decision_tree_grid_search_params grid = GridSearchCV(clf, param_grid=param_grid, scoring='accuracy', n_jobs=-1, verbose=0) grid.fit(_X_thresholded, y) grids.append(grid) grids_scores.append(grid.best_score_) grid = grids[np.argmax(np.array(grids_scores))] best_quantile = self.tau_quantiles[np.argmax( np.array(grids_scores))] self.tau = np.quantile(self.X_transformed, best_quantile) self.X_thresholded = 1 * (self.X_transformed < self.tau) clf = DecisionTreeClassifier(**grid.best_params_) clf.fit(self.X_thresholded, y) if self.prune_duplicate_tree_leaves: prune_duplicate_leaves( clf) # FIXME: does it influence the .tree properties? self.decision_tree = clf self.decision_tree_explorable = NewTree(clf) self.decision_tree_explorable.build_tree() self._shapelet_model = shp_clf self._build_tree_graph() return self
n_iterations = np.random.choice([2000, 5000, 10000]) shapelet_dict = grabocka_params_to_shapelet_size_dict( X_train.shape[0], X_train.shape[1], int(K * X_train.shape[1]), L, R) clf = ShapeletModel(n_shapelets_per_size=shapelet_dict, max_iter=n_iterations, verbose_level=0, batch_size=1, optimizer='sgd', weight_regularizer=_lambda) clf.fit(np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1)), y_train) X_distances_train = clf.transform(X_train) X_distances_test = clf.transform(X_test) lr = GridSearchCV(LogisticRegression(), { 'penalty': ['l1', 'l2'], 'C': [0.001, 0.01, 0.1, 1.0, 10.0] }) lr.fit(X_distances_train, y_train) acc = accuracy_score(y_test, lr.predict(X_distances_test)) print([K, L, R, _lambda, n_iterations], acc) lts_results.append([K, L, R, _lambda, n_iterations, acc]) # Sample random hyper-parameters for GENDIS
l=0.125, r=1) # Define the model and fit it using the training data shp_clf = ShapeletModel(n_shapelets_per_size=shapelet_sizes, weight_regularizer=.01, max_iter=100, verbose=0, random_state=42) shp_clf.fit(X_train, y_train) # Get the number of extracted shapelets, the (minimal) distances from # each of the timeseries to each of the shapelets, and the corresponding # locations (index) where the minimal distance was found n_shapelets = sum(shapelet_sizes.values()) distances = shp_clf.transform(X_train) predicted_locations = shp_clf.locate(X_train) plt.figure() plt.title("Example locations of shapelet matches " "({} shapelets extracted)".format(n_shapelets)) # Plot the test timeseries with the best matches with the shapelets test_ts_id = numpy.argmin(numpy.sum(distances, axis=1)) plt.plot(X_train[test_ts_id].ravel()) # Plot the shapelets on their best-matching locations for idx_shp, shp in enumerate(shp_clf.shapelets_): t0 = predicted_locations[test_ts_id, idx_shp] plt.plot(numpy.arange(t0, t0 + len(shp)), shp, linewidth=2)