def predict(self, X): """Predict class for a given set of time series. Parameters ---------- X : array-like of shape=(n_ts, sz, d) Time series dataset. Returns ------- array of shape=(n_ts, ) or (n_ts, n_classes), depending on the shape of the label vector provided at training time. Index of the cluster each sample belongs to or class probability matrix, depending on what was provided at training time. """ check_is_fitted(self, '_X_fit') X = check_array(X, allow_nd=True) X = to_time_series_dataset(X) X = check_dims(X, X_fit=self._X_fit) categorical_preds = self.predict_proba(X) if self.categorical_y_: return categorical_preds else: return self.label_binarizer_.inverse_transform(categorical_preds)
def predict_proba(self, X): """Predict class probability for a given set of time series. Parameters ---------- X : array-like of shape=(n_ts, sz, d) Time series dataset. Returns ------- array of shape=(n_ts, n_classes), Class probability matrix. """ check_is_fitted(self, '_X_fit') X = check_array(X, allow_nd=True) X = to_time_series_dataset(X) X = check_dims(X, self._X_fit) n_ts, sz, d = X.shape categorical_preds = self.model_.predict( [X[:, :, di].reshape((n_ts, sz, 1)) for di in range(self.d_)], batch_size=self.batch_size, verbose=self.verbose) if categorical_preds.shape[1] == 1 and len(self.classes_) == 2: categorical_preds = numpy.hstack( (1 - categorical_preds, categorical_preds)) return categorical_preds
def fit(self, X, y): """Fit the model using X as training data and y as target values Parameters ---------- X : array-like, shape (n_ts, sz, d) Training data. y : array-like, shape (n_ts, ) or (n_ts, dim_y) Target values. Returns ------- KNeighborsTimeSeriesRegressor The fitted estimator """ if self.metric in VARIABLE_LENGTH_METRICS: self._ts_metric = self.metric self.metric = "precomputed" X = check_array(X, allow_nd=True, force_all_finite=(self.metric != "precomputed")) X = to_time_series_dataset(X) X = check_dims(X, X_fit=None) if self.metric == "precomputed" and hasattr(self, '_ts_metric'): self._ts_fit = X self._d = X.shape[2] self._X_fit = numpy.zeros((self._ts_fit.shape[0], self._ts_fit.shape[0])) else: self._X_fit, self._d = to_sklearn_dataset(X, return_dim=True) super(KNeighborsTimeSeriesRegressor, self).fit(self._X_fit, y) if hasattr(self, '_ts_metric'): self.metric = self._ts_metric return self
def locate(self, X): """Compute shapelet match location for a set of time series. Parameters ---------- X : array-like of shape=(n_ts, sz, d) Time series dataset. Returns ------- array of shape=(n_ts, n_shapelets) Location of the shapelet matches for the provided time series. Examples -------- >>> from tslearn_cuda.not_used.tslearn import random_walk_blobs >>> X = numpy.zeros((3, 10, 1)) >>> X[0, 4:7, 0] = numpy.array([1, 2, 3]) >>> y = [1, 0, 0] >>> # Data is all zeros except a motif 1-2-3 in the first time series >>> clf = ShapeletModel(n_shapelets_per_size={3: 1}, max_iter=0, ... verbose=0) >>> _ = clf.fit(X, y) >>> weights_shapelet = [ ... numpy.array([[1, 2, 3]]) ... ] >>> clf.set_weights(weights_shapelet, layer_name="shapelets_0_0") >>> clf.locate(X) array([[4], [0], [0]]) """ X = check_dims(X, X_fit=self._X_fit) X = check_array(X, allow_nd=True) X = to_time_series_dataset(X) X = check_dims(X, X_fit=self._X_fit) n_ts, sz, d = X.shape locations = self.locator_model_.predict( [X[:, :, di].reshape((n_ts, sz, 1)) for di in range(self.d_)], batch_size=self.batch_size, verbose=self.verbose) return locations.astype(numpy.int)
def predict_proba(self, X): """Predict the class probabilities for the provided data Parameters ---------- X : array-like, shape (n_ts, sz, d) Test samples. Returns ------- array, shape = (n_ts, n_classes) Array of predicted class probabilities """ if self.metric in VARIABLE_LENGTH_METRICS: self._ts_metric = self.metric self.metric = "precomputed" if self.metric_params is None: metric_params = {} else: metric_params = self.metric_params.copy() if "n_jobs" in metric_params.keys(): del metric_params["n_jobs"] if "verbose" in metric_params.keys(): del metric_params["verbose"] check_is_fitted(self, '_ts_fit') X = check_array(X, allow_nd=True, force_all_finite=False) X = to_time_series_dataset(X) if self._ts_metric == "dtw": X_ = cdist_dtw(X, self._ts_fit, n_jobs=self.n_jobs, verbose=self.verbose, **metric_params) elif self._ts_metric == "softdtw": X_ = cdist_soft_dtw(X, self._ts_fit, **metric_params) else: raise ValueError("Invalid metric recorded: %s" % self._ts_metric) pred = super(KNeighborsTimeSeriesClassifier, self).predict_proba(X_) self.metric = self._ts_metric return pred else: check_is_fitted(self, '_X_fit') X = check_array(X, allow_nd=True) X = to_time_series_dataset(X) X_ = to_sklearn_dataset(X) X_ = check_dims(X_, self._X_fit, extend=False) return super(KNeighborsTimeSeriesClassifier, self).predict_proba(X_)
def predict(self, X): """Predict the closest cluster each time series in X belongs to. Parameters ---------- X : array-like of shape=(n_ts, sz, d) Time series dataset to predict. Returns ------- labels : array of shape=(n_ts, ) Index of the cluster each sample belongs to. """ X = check_array(X, allow_nd=True, force_all_finite='allow-nan') check_is_fitted(self, 'cluster_centers_') X = check_dims(X, self.cluster_centers_) X_ = to_time_series_dataset(X) return self._assign(X_, update_class_attributes=False)
def _preprocess_sklearn(self, X, y=None, fit_time=False): force_all_finite = self.kernel not in VARIABLE_LENGTH_METRICS if y is None: X = check_array(X, allow_nd=True, force_all_finite=force_all_finite) else: X, y = check_X_y(X, y, allow_nd=True, force_all_finite=force_all_finite) X = check_dims(X, X_fit=None) X = to_time_series_dataset(X) if fit_time: self._X_fit = X if self.gamma == "auto": self.gamma_ = gamma_soft_dtw(X) else: self.gamma_ = self.gamma self.classes_ = numpy.unique(y) if self.kernel in VARIABLE_LENGTH_METRICS: assert self.kernel == "gak" self.estimator_kernel_ = "precomputed" if fit_time: sklearn_X = cdist_gak(X, sigma=numpy.sqrt(self.gamma_ / 2.), n_jobs=self.n_jobs, verbose=self.verbose) else: sklearn_X = cdist_gak(X, self._X_fit, sigma=numpy.sqrt(self.gamma_ / 2.), n_jobs=self.n_jobs, verbose=self.verbose) else: self.estimator_kernel_ = self.kernel sklearn_X = _prepare_ts_datasets_sklearn(X) if y is None: return sklearn_X else: return sklearn_X, y
def predict(self, X): """Predict the closest cluster each time series in X belongs to. Parameters ---------- X : array-like of shape=(n_ts, sz, d) Time series dataset to predict. Returns ------- labels : array of shape=(n_ts, ) Index of the cluster each sample belongs to. """ X = check_array(X, allow_nd=True, force_all_finite=False) check_is_fitted(self, '_X_fit') X = check_dims(X, self._X_fit) K = self._get_kernel(X, self._X_fit) n_samples = X.shape[0] dist = numpy.zeros((n_samples, self.n_clusters)) self._compute_dist(K, dist) return dist.argmin(axis=1)
def transform(self, X): """Generate shapelet transform for a set of time series. Parameters ---------- X : array-like of shape=(n_ts, sz, d) Time series dataset. Returns ------- array of shape=(n_ts, n_shapelets) Shapelet-Transform of the provided time series. """ check_is_fitted(self, '_X_fit') X = check_array(X, allow_nd=True) X = to_time_series_dataset(X) X = check_dims(X, X_fit=self._X_fit) n_ts, sz, d = X.shape pred = self.transformer_model_.predict( [X[:, :, di].reshape((n_ts, sz, 1)) for di in range(self.d_)], batch_size=self.batch_size, verbose=self.verbose) return pred
def predict(self, X): """Predict the closest cluster each time series in X belongs to. Parameters ---------- X : array-like of shape=(n_ts, sz, d) Time series dataset to predict. Returns ------- labels : array of shape=(n_ts, ) Index of the cluster each sample belongs to. """ X = check_array(X, allow_nd=True) check_is_fitted(self, ['cluster_centers_', 'norms_', 'norms_centroids_']) X_ = to_time_series_dataset(X) X = check_dims(X, self.cluster_centers_) X_ = TimeSeriesScalerMeanVariance(mu=0., std=1.).fit_transform(X_) dists = self._cross_dists(X_) return dists.argmin(axis=1)
def fit(self, X, y): """Learn time-series shapelets. Parameters ---------- X : array-like of shape=(n_ts, sz, d) Time series dataset. y : array-like of shape=(n_ts, ) Time series labels. """ if self.verbose_level is not None: warnings.warn( "'verbose_level' is deprecated in version 0.2 and will be " "removed in 0.4. Use 'verbose' instead.", DeprecationWarning, stacklevel=2) self.verbose = self.verbose_level X, y = check_X_y(X, y, allow_nd=True) X = to_time_series_dataset(X) X = check_dims(X, X_fit=None) set_random_seed(seed=self.random_state) numpy.random.seed(seed=self.random_state) n_ts, sz, d = X.shape self._X_fit = X self.model_ = None self.transformer_model_ = None self.locator_model_ = None self.categorical_y_ = False self.label_binarizer_ = None self.d_ = d if y.ndim == 1 or y.shape[1] == 1: self.label_binarizer_ = LabelBinarizer().fit(y) y_ = self.label_binarizer_.transform(y) self.classes_ = self.label_binarizer_.classes_ else: y_ = y self.categorical_y_ = True self.classes_ = numpy.unique(y) assert y_.shape[1] != 2, ("Binary classification case, " + "monodimensional y should be passed.") if y_.ndim == 1 or y_.shape[1] == 1: n_classes = 2 else: n_classes = y_.shape[1] if self.n_shapelets_per_size is None: sizes = grabocka_params_to_shapelet_size_dict( n_ts, sz, n_classes, self.shapelet_length, self.total_lengths) self.n_shapelets_per_size_ = sizes else: self.n_shapelets_per_size_ = self.n_shapelets_per_size self._set_model_layers(X=X, ts_sz=sz, d=d, n_classes=n_classes) self.transformer_model_.compile(loss="mean_squared_error", optimizer=self.optimizer) self.locator_model_.compile(loss="mean_squared_error", optimizer=self.optimizer) self._set_weights_false_conv(d=d) self.model_.fit( [X[:, :, di].reshape((n_ts, sz, 1)) for di in range(d)], y_, batch_size=self.batch_size, epochs=self.max_iter, verbose=self.verbose) self.n_iter_ = len(self.model_.history.history) return self
def kneighbors(self, X=None, n_neighbors=None, return_distance=True): """Finds the K-neighbors of a point. Returns indices of and distances to the neighbors of each point. Parameters ---------- X : array-like, shape (n_ts, sz, d) The query time series. If not provided, neighbors of each indexed point are returned. In this case, the query point is not considered its own neighbor. n_neighbors : int Number of neighbors to get (default is the value passed to the constructor). return_distance : boolean, optional. Defaults to True. If False, distances will not be returned Returns ------- dist : array Array representing the distance to points, only present if return_distance=True ind : array Indices of the nearest points in the population matrix. """ if self.metric in VARIABLE_LENGTH_METRICS: self._ts_metric = self.metric self.metric = "precomputed" if self.metric_params is None: metric_params = {} else: metric_params = self.metric_params.copy() if "n_jobs" in metric_params.keys(): del metric_params["n_jobs"] if "verbose" in metric_params.keys(): del metric_params["verbose"] check_is_fitted(self, '_ts_fit') X = check_array(X, allow_nd=True, force_all_finite=False) X = to_time_series_dataset(X) if self._ts_metric == "dtw": X_ = cdist_dtw(X, self._ts_fit, n_jobs=self.n_jobs, verbose=self.verbose, **metric_params) elif self._ts_metric == "softdtw": X_ = cdist_soft_dtw(X, self._ts_fit, **metric_params) else: raise ValueError("Invalid metric recorded: %s" % self._ts_metric) pred = KNeighborsTimeSeriesMixin.kneighbors( self, X=X_, n_neighbors=n_neighbors, return_distance=return_distance) self.metric = self._ts_metric return pred else: check_is_fitted(self, '_X_fit') if X is None: X_ = None else: X = check_array(X, allow_nd=True) X = to_time_series_dataset(X) X_ = to_sklearn_dataset(X) X_ = check_dims(X_, self._X_fit, extend=False) return KNeighborsTimeSeriesMixin.kneighbors( self, X=X_, n_neighbors=n_neighbors, return_distance=return_distance)
def fit(self, X, y=None, sample_weight=None): """Compute kernel k-means clustering. Parameters ---------- X : array-like of shape=(n_ts, sz, d) Time series dataset. y Ignored sample_weight : array-like of shape=(n_ts, ) or None (default: None) Weights to be given to time series in the learning process. By default, all time series weights are equal. """ X = check_array(X, allow_nd=True, force_all_finite=False) X = check_dims(X, X_fit=None) if sample_weight is not None: sample_weight = check_array(sample_weight, ensure_2d=False) max_attempts = max(self.n_init, 10) self.labels_ = None self.inertia_ = None self.sample_weight_ = None self._X_fit = None # n_iter_ will contain the number of iterations the most # successful run required. self.n_iter_ = 0 n_samples = X.shape[0] K = self._get_kernel(X) sw = (sample_weight if sample_weight is not None else numpy.ones(n_samples)) self.sample_weight_ = sw rs = check_random_state(self.random_state) last_correct_labels = None min_inertia = numpy.inf n_attempts = 0 n_successful = 0 while n_successful < self.n_init and n_attempts < max_attempts: try: if self.verbose and self.n_init > 1: print("Init %d" % (n_successful + 1)) n_attempts += 1 self._fit_one_init(K, rs) if self.inertia_ < min_inertia: last_correct_labels = self.labels_ min_inertia = self.inertia_ self.n_iter_ = self._iter n_successful += 1 except EmptyClusterError: if self.verbose: print("Resumed because of empty cluster") if n_successful > 0: self.labels_ = last_correct_labels self.inertia_ = min_inertia self._X_fit = X return self