Пример #1
0
    def predict(self, X):
        """Predict the target for the provided data

        Parameters
        ----------
        X : array-like, shape (n_ts, sz, d)
            Test samples.

        Returns
        -------
        array, shape = (n_ts, ) or (n_ts, dim_y)
            Array of predicted targets
        """
        if self.metric in TSLEARN_VALID_METRICS:
            check_is_fitted(self, '_ts_fit')
            X = to_time_series_dataset(X)
            X = check_dims(X, X_fit_dims=self._ts_fit.shape, extend=True,
                           check_n_features_only=True)
            X_ = self._precompute_cross_dist(X)
            pred = super().predict(X_)
            self.metric = self._ts_metric
            return pred
        else:
            check_is_fitted(self, '_X_fit')
            X = check_array(X, allow_nd=True)
            X = to_time_series_dataset(X)
            X_ = to_sklearn_dataset(X)
            X_ = check_dims(X_, X_fit_dims=self._X_fit.shape, extend=False)
            return super().predict(X_)
Пример #2
0
    def inverse_transform(self, X):
        """Compute time series corresponding to given 1d-SAX representations.

        Parameters
        ----------
        X : array-like of shape (n_ts, sz_sax, 2 * d)
            A dataset of SAX series.

        Returns
        -------
        numpy.ndarray of shape (n_ts, sz_original_ts, d)
            A dataset of time series corresponding to the provided
            representation.
        """
        self._is_fitted()
        X = check_array(X, allow_nd=True)
        X = check_dims(X, X_fit_dims=(None, None, 2 * self._X_fit_dims_[-1]),
                       check_n_features_only=True)
        X_orig = inv_transform_1d_sax(
                X,
                breakpoints_avg_middle_=self.breakpoints_avg_middle_,
                breakpoints_slope_middle_=self.breakpoints_slope_middle_,
                original_size=self._X_fit_dims_[1]
        )
        return self._unscale(X_orig)
Пример #3
0
    def fit(self, X, y=None, sample_weight=None):
        """Compute kernel k-means clustering.

        Parameters
        ----------
        X : array-like of shape=(n_ts, sz, d)
            Time series dataset.

        y
            Ignored
        sample_weight : array-like of shape=(n_ts, ) or None (default: None)
            Weights to be given to time series in the learning process. By
            default, all time series weights are equal.
        """

        X = check_array(X, allow_nd=True, force_all_finite=False)
        X = check_dims(X)

        sample_weight = _check_sample_weight(sample_weight=sample_weight, X=X)

        max_attempts = max(self.n_init, 10)

        self.labels_ = None
        self.inertia_ = None
        self.sample_weight_ = None
        self._X_fit = None
        # n_iter_ will contain the number of iterations the most
        # successful run required.
        self.n_iter_ = 0

        n_samples = X.shape[0]
        K = self._get_kernel(X)
        sw = (sample_weight if sample_weight is not None
              else numpy.ones(n_samples))
        self.sample_weight_ = sw
        rs = check_random_state(self.random_state)

        last_correct_labels = None
        min_inertia = numpy.inf
        n_attempts = 0
        n_successful = 0
        while n_successful < self.n_init and n_attempts < max_attempts:
            try:
                if self.verbose and self.n_init > 1:
                    print("Init %d" % (n_successful + 1))
                n_attempts += 1
                self._fit_one_init(K, rs)
                if self.inertia_ < min_inertia:
                    last_correct_labels = self.labels_
                    min_inertia = self.inertia_
                    self.n_iter_ = self._iter
                n_successful += 1
            except EmptyClusterError:
                if self.verbose:
                    print("Resumed because of empty cluster")
        if n_successful > 0:
            self.labels_ = last_correct_labels
            self.inertia_ = min_inertia
            self._X_fit = X
        return self
Пример #4
0
    def predict(self, X):
        """Predict class for a given set of time series.

        Parameters
        ----------
        X : array-like of shape=(n_ts, sz, d)
            Time series dataset.

        Returns
        -------
        array of shape=(n_ts, ) or (n_ts, n_classes), depending on the shape
        of the label vector provided at training time.
            Index of the cluster each sample belongs to or class probability
            matrix, depending on what was provided at training time.
        """
        check_is_fitted(self, '_X_fit')
        X = check_array(X, allow_nd=True)
        X = to_time_series_dataset(X)
        X = check_dims(X, X_fit=self._X_fit)

        categorical_preds = self.predict_proba(X)
        if self.categorical_y_:
            return categorical_preds
        else:
            return self.label_binarizer_.inverse_transform(categorical_preds)
Пример #5
0
    def transform(self, X, y=None, **kwargs):
        """Will normalize (min-max) each of the timeseries. IMPORTANT: this
        transformation is completely stateless, and is applied to each of
        the timeseries individually.

        Parameters
        ----------
        X : array-like of shape (n_ts, sz, d)
            Time series dataset to be rescaled.

        Returns
        -------
        numpy.ndarray
            Rescaled time series dataset.
        """
        value_range = self.value_range

        if value_range[0] >= value_range[1]:
            raise ValueError("Minimum of desired range must be smaller"
                             " than maximum. Got %s." % str(value_range))

        check_is_fitted(self, '_X_fit_dims')
        X = check_array(X, allow_nd=True, force_all_finite=False)
        X_ = to_time_series_dataset(X)
        X_ = check_dims(X_, X_fit_dims=self._X_fit_dims, extend=False)
        min_t = numpy.nanmin(X_, axis=1)[:, numpy.newaxis, :]
        max_t = numpy.nanmax(X_, axis=1)[:, numpy.newaxis, :]
        range_t = max_t - min_t
        nomin = (X_ - min_t) * (value_range[1] - value_range[0])
        X_ = nomin / range_t + value_range[0]
        return X_
Пример #6
0
    def fit(self, X, y=None):
        """Fit the model using X as training data

        Parameters
        ----------
        X : array-like, shape (n_ts, sz, d)
            Training data.
        """
        if self.metric in TSLEARN_VALID_METRICS:
            self._ts_metric = self.metric
            self.metric = "precomputed"

        X = check_array(X,
                        allow_nd=True,
                        force_all_finite=(self.metric != "precomputed"))
        X = to_time_series_dataset(X)
        X = check_dims(X)
        if self.metric == "precomputed" and hasattr(self, '_ts_metric'):
            self._ts_fit = X
            self._d = X.shape[2]
            self._X_fit = numpy.zeros((self._ts_fit.shape[0],
                                       self._ts_fit.shape[0]))
        else:
            self._X_fit, self._d = to_sklearn_dataset(X, return_dim=True)
        super().fit(self._X_fit, y)
        if hasattr(self, '_ts_metric'):
            self.metric = self._ts_metric
        return self
Пример #7
0
    def transform(self, X):
        """Generate shapelet transform for a set of time series.

        Parameters
        ----------
        X : array-like of shape=(n_ts, sz, d)
            Time series dataset.

        Returns
        -------
        array of shape=(n_ts, n_shapelets)
            Shapelet-Transform of the provided time series.
        """
        check_is_fitted(self, '_X_fit_dims')
        X = check_array(X, allow_nd=True, force_all_finite=False)
        X = self._preprocess_series(X)
        X = check_dims(X, X_fit_dims=self._X_fit_dims,
                       check_n_features_only=True)
        self._check_series_length(X)

        n_ts, sz, d = X.shape
        pred = self.transformer_model_.predict(
            [X[:, :, di].reshape((n_ts, sz, 1)) for di in range(self.d_)],
            batch_size=self.batch_size, verbose=self.verbose
        )
        return pred
Пример #8
0
    def predict(self, X):
        """Predict class for a given set of time series.

        Parameters
        ----------
        X : array-like of shape=(n_ts, sz, d)
            Time series dataset.

        Returns
        -------
        array of shape=(n_ts, ) or (n_ts, n_classes), depending on the shape
        of the label vector provided at training time.
            Index of the cluster each sample belongs to or class probability
            matrix, depending on what was provided at training time.
        """
        check_is_fitted(self, '_X_fit_dims')
        X = check_array(X, allow_nd=True)
        X = to_time_series_dataset(X)
        X = check_dims(X, X_fit_dims=self._X_fit_dims)

        y_ind = self.predict_proba(X).argmax(axis=1)
        y_label = numpy.array(
            [self.classes_[ind] for ind in y_ind]
        )
        return y_label
Пример #9
0
    def predict_proba(self, X):
        """Predict class probability for a given set of time series.

        Parameters
        ----------
        X : array-like of shape=(n_ts, sz, d)
            Time series dataset.

        Returns
        -------
        array of shape=(n_ts, n_classes),
            Class probability matrix.
        """
        check_is_fitted(self, '_X_fit_dims')
        X = check_array(X, allow_nd=True)
        X = to_time_series_dataset(X)
        X = check_dims(X, X_fit_dims=self._X_fit_dims)
        n_ts, sz, d = X.shape
        categorical_preds = self.model_.predict(
            [X[:, :, di].reshape((n_ts, sz, 1)) for di in range(self.d_)],
            batch_size=self.batch_size, verbose=self.verbose
        )

        if categorical_preds.shape[1] == 1 and len(self.classes_) == 2:
            categorical_preds = numpy.hstack((1 - categorical_preds,
                                              categorical_preds))

        return categorical_preds
Пример #10
0
    def predict_class_and_earliness(self, X):
        """
        Provide predicted class as well as prediction timestamps.

        Prediction timestamps are timestamps at which a prediction is made in
        early classification setting.

        Parameters
        ----------
        X : array-like of shape (n_series, n_timestamps, n_features)
            Vector to be scored, where `n_series` is the number of time series,
            `n_timestamps` is the number of timestamps in the series
            and `n_features` is the number of features recorded at each
            timestamp.

        Returns
        -------
        array, shape (n_samples,)
            Predicted classes.
        array-like of shape (n_series, )
            Prediction timestamps.
        """

        X = check_array(X, allow_nd=True)
        check_is_fitted(self, '_X_fit_dims')
        X = check_dims(X, X_fit_dims=self._X_fit_dims,
                       check_n_features_only=True)
        y_pred = []
        time_prediction = []
        for i in range(0, X.shape[0]):
            cl, t = self._predict_single_series(X[i])
            y_pred.append(cl)
            time_prediction.append(t)
        return np.array(y_pred), np.array(time_prediction)
    def fit(self, X, y):
        """Fit the model using X as training data and y as target values

        Parameters
        ----------
        X : array-like, shape (n_ts, sz, d)
            Training data.
        y : array-like, shape (n_ts, ) or (n_ts, dim_y)
            Target values.

        Returns
        -------
        KNeighborsTimeSeriesRegressor
            The fitted estimator
        """
        if self.metric in VARIABLE_LENGTH_METRICS:
            self._ts_metric = self.metric
            self.metric = "precomputed"

        X = check_array(X,
                        allow_nd=True,
                        force_all_finite=(self.metric != "precomputed"))
        X = to_time_series_dataset(X)
        X = check_dims(X, X_fit=None)
        if self.metric == "precomputed" and hasattr(self, '_ts_metric'):
            self._ts_fit = X
            self._d = X.shape[2]
            self._X_fit = numpy.zeros(
                (self._ts_fit.shape[0], self._ts_fit.shape[0]))
        else:
            self._X_fit, self._d = to_sklearn_dataset(X, return_dim=True)
        super(KNeighborsTimeSeriesRegressor, self).fit(self._X_fit, y)
        if hasattr(self, '_ts_metric'):
            self.metric = self._ts_metric
        return self
Пример #12
0
    def locate(self, X):
        """Compute shapelet match location for a set of time series.

        Parameters
        ----------
        X : array-like of shape=(n_ts, sz, d)
            Time series dataset.

        Returns
        -------
        array of shape=(n_ts, n_shapelets)
            Location of the shapelet matches for the provided time series.

        Examples
        --------
        >>> from tslearn.generators import random_walk_blobs
        >>> X = numpy.zeros((3, 10, 1))
        >>> X[0, 4:7, 0] = numpy.array([1, 2, 3])
        >>> y = [1, 0, 0]
        >>> # Data is all zeros except a motif 1-2-3 in the first time series
        >>> clf = ShapeletModel(n_shapelets_per_size={3: 1}, max_iter=0,
        ...                     verbose=0)
        >>> _ = clf.fit(X, y)
        >>> weights_shapelet = [
        ...     numpy.array([[1, 2, 3]])
        ... ]
        >>> clf.set_weights(weights_shapelet, layer_name="shapelets_0_0")
        >>> clf.locate(X)
        array([[4],
               [0],
               [0]])
        """
        X = check_dims(X, X_fit=self._X_fit)
        X = check_array(X, allow_nd=True)
        X = to_time_series_dataset(X)
        X = check_dims(X, X_fit=self._X_fit)
        n_ts, sz, d = X.shape
        locations = self.locator_model_.predict(
            [X[:, :, di].reshape((n_ts, sz, 1)) for di in range(self.d_)],
            batch_size=self.batch_size, verbose=self.verbose
        )
        return locations.astype(numpy.int)
Пример #13
0
    def fit(self, X, y):
        """Learn time-series shapelets.

        Parameters
        ----------
        X : array-like of shape=(n_ts, sz, d)
            Time series dataset.
        y : array-like of shape=(n_ts, )
            Time series labels.
        """
        if self.verbose_level is not None:
            warnings.warn(
                "'verbose_level' is deprecated in version 0.2 and will be "
                "removed in 0.4. Use 'verbose' instead.",
                DeprecationWarning, stacklevel=2)
            self.verbose = self.verbose_level

        X, y = check_X_y(X, y, allow_nd=True)
        X = to_time_series_dataset(X)
        X = check_dims(X)

        set_random_seed(seed=self.random_state)
        numpy.random.seed(seed=self.random_state)

        n_ts, sz, d = X.shape
        self._X_fit_dims = X.shape

        self.model_ = None
        self.transformer_model_ = None
        self.locator_model_ = None
        self.d_ = d

        y_ = self._preprocess_labels(y)
        n_labels = len(self.classes_)

        if self.n_shapelets_per_size is None:
            sizes = grabocka_params_to_shapelet_size_dict(n_ts, sz, n_labels,
                                                          self.shapelet_length,
                                                          self.total_lengths)
            self.n_shapelets_per_size_ = sizes
        else:
            self.n_shapelets_per_size_ = self.n_shapelets_per_size

        self._set_model_layers(X=X, ts_sz=sz, d=d, n_classes=n_labels)
        self._set_weights_false_conv(d=d)
        self.model_.fit(
            [X[:, :, di].reshape((n_ts, sz, 1)) for di in range(d)], y_,
            batch_size=self.batch_size, epochs=self.max_iter,
            verbose=self.verbose
        )
        self.n_iter_ = len(self.model_.history.history)
        return self
    def predict_proba(self, X):
        """Predict the class probabilities for the provided data

        Parameters
        ----------
        X : array-like, shape (n_ts, sz, d)
            Test samples.

        Returns
        -------
        array, shape = (n_ts, n_classes)
            Array of predicted class probabilities
        """
        if self.metric in VARIABLE_LENGTH_METRICS:
            self._ts_metric = self.metric
            self.metric = "precomputed"

            if self.metric_params is None:
                metric_params = {}
            else:
                metric_params = self.metric_params.copy()
                if "n_jobs" in metric_params.keys():
                    del metric_params["n_jobs"]
                if "verbose" in metric_params.keys():
                    del metric_params["verbose"]
            check_is_fitted(self, '_ts_fit')
            X = check_array(X, allow_nd=True, force_all_finite=False)
            X = to_time_series_dataset(X)
            if self._ts_metric == "dtw":
                X_ = cdist_dtw(X,
                               self._ts_fit,
                               n_jobs=self.n_jobs,
                               verbose=self.verbose,
                               **metric_params)
            elif self._ts_metric == "softdtw":
                X_ = cdist_soft_dtw(X, self._ts_fit, **metric_params)
            else:
                raise ValueError("Invalid metric recorded: %s" %
                                 self._ts_metric)
            pred = super(KNeighborsTimeSeriesClassifier,
                         self).predict_proba(X_)
            self.metric = self._ts_metric
            return pred
        else:
            check_is_fitted(self, '_X_fit')
            X = check_array(X, allow_nd=True)
            X = to_time_series_dataset(X)
            X_ = to_sklearn_dataset(X)
            X_ = check_dims(X_, self._X_fit, extend=False)
            return super(KNeighborsTimeSeriesClassifier,
                         self).predict_proba(X_)
Пример #15
0
    def locate(self, X):
        """Compute shapelet match location for a set of time series.

        Parameters
        ----------
        X : array-like of shape=(n_ts, sz, d)
            Time series dataset.

        Returns
        -------
        array of shape=(n_ts, n_shapelets)
            Location of the shapelet matches for the provided time series.
        """
        X = check_dims(X, X_fit=self._X_fit)
        X = check_array(X, allow_nd=True)
        X = to_time_series_dataset(X)
        X = check_dims(X, X_fit=self._X_fit)
        n_ts, sz, d = X.shape
        locations = self.locator_model_.predict(
            [X[:, :, di].reshape((n_ts, sz, 1)) for di in range(self.d_)],
            batch_size=self.batch_size, verbose=self.verbose
        )
        return locations.astype(numpy.int)
Пример #16
0
    def _preprocess_sklearn(self, X, y=None, fit_time=False):
        force_all_finite = self.kernel not in VARIABLE_LENGTH_METRICS
        if y is None:
            X = check_array(X,
                            allow_nd=True,
                            force_all_finite=force_all_finite)
        else:
            X, y = check_X_y(X,
                             y,
                             allow_nd=True,
                             force_all_finite=force_all_finite)
        X = to_time_series_dataset(X)

        if fit_time:
            self._X_fit = X
            if self.gamma == "auto":
                self.gamma_ = gamma_soft_dtw(X)
            else:
                self.gamma_ = self.gamma
            self.classes_ = numpy.unique(y)
        else:
            check_is_fitted(self, ['svm_estimator_', '_X_fit'])
            X = check_dims(X,
                           X_fit_dims=self._X_fit.shape,
                           extend=True,
                           check_n_features_only=(self.kernel
                                                  in VARIABLE_LENGTH_METRICS))

        if self.kernel in VARIABLE_LENGTH_METRICS:
            assert self.kernel == "gak"
            self.estimator_kernel_ = "precomputed"
            if fit_time:
                sklearn_X = cdist_gak(X,
                                      sigma=numpy.sqrt(self.gamma_ / 2.),
                                      n_jobs=self.n_jobs,
                                      verbose=self.verbose)
            else:
                sklearn_X = cdist_gak(X,
                                      self._X_fit,
                                      sigma=numpy.sqrt(self.gamma_ / 2.),
                                      n_jobs=self.n_jobs,
                                      verbose=self.verbose)
        else:
            self.estimator_kernel_ = self.kernel
            sklearn_X = to_sklearn_dataset(X)

        if y is None:
            return sklearn_X
        else:
            return sklearn_X, y
Пример #17
0
    def fit(self, X, y):
        """Learn time-series shapelets.

        Parameters
        ----------
        X : array-like of shape=(n_ts, sz, d)
            Time series dataset.
        y : array-like of shape=(n_ts, )
            Time series labels.
        """
        X, y = check_X_y(X, y, allow_nd=True, force_all_finite=False)
        X = self._preprocess_series(X)
        X = check_dims(X)
        self._check_series_length(X)

        numpy.random.seed(seed=self.random_state)
        tf.random.set_seed(seed=self.random_state)
        n_ts, sz, d = X.shape
        self._X_fit_dims = X.shape

        self.model_ = None
        self.transformer_model_ = None
        self.locator_model_ = None
        self.d_ = d

        y_ = self._preprocess_labels(y)
        n_labels = len(self.classes_)

        if self.n_shapelets_per_size is None:
            sizes = grabocka_params_to_shapelet_size_dict(n_ts,
                                                          self._min_sz_fit,
                                                          n_labels,
                                                          self.shapelet_length,
                                                          self.total_lengths)
            self.n_shapelets_per_size_ = sizes
        else:
            self.n_shapelets_per_size_ = self.n_shapelets_per_size

        self._set_model_layers(X=X, ts_sz=sz, d=d, n_classes=n_labels)
        self._set_weights_false_conv(d=d)
        h = self.model_.fit(
            [X[:, :, di].reshape((n_ts, sz, 1)) for di in range(d)], y_,
            batch_size=self.batch_size, epochs=self.max_iter,
            verbose=self.verbose
        )
        self.history_ = h.history
        self.n_iter_ = len(self.history_.get("loss", []))
        return self
Пример #18
0
    def fit(self, X, y=None):
        """Fit a Matrix Profile representation.

        Parameters
        ----------
        X : array-like of shape (n_ts, sz, d)
            Time series dataset

        Returns
        -------
        MatrixProfile
            self
        """
        X = check_array(X, allow_nd=True, force_all_finite=False)
        X = check_dims(X)
        return self._fit(X)
Пример #19
0
    def fit_transform(self, X, y=None, **fit_params):
        """Fit a SAX representation and transform the data accordingly.

        Parameters
        ----------
        X : array-like of shape (n_ts, sz, d)
            Time series dataset

        Returns
        -------
        numpy.ndarray of integers with shape (n_ts, n_segments, d)
            SAX-Transformed dataset
        """
        X = check_array(X, allow_nd=True, force_all_finite=False)
        X = check_dims(X)
        return self._fit(X)._transform(X)
Пример #20
0
    def fit(self, X, y=None):
        """Fit a 1d-SAX representation.

        Parameters
        ----------
        X : array-like of shape (n_ts, sz, d)
            Time series dataset

        Returns
        -------
        OneD_SymbolicAggregateApproximation
            self
        """
        X = check_array(X, allow_nd=True, force_all_finite=False)
        X = check_dims(X)
        return self._fit(X)
Пример #21
0
    def transform(self, X, y=None):
        """Transform a dataset of time series into its SAX representation.

        Parameters
        ----------
        X : array-like of shape (n_ts, sz, d)
            Time series dataset

        Returns
        -------
        numpy.ndarray of integers with shape (n_ts, n_segments, d)
            SAX-Transformed dataset
        """
        self._is_fitted()
        X = check_array(X, allow_nd=True, force_all_finite=False)
        X = check_dims(X, X_fit_dims=tuple(self._X_fit_dims_),
                       check_n_features_only=True)
        return self._transform(X, y)
Пример #22
0
    def predict(self, X):
        """Predict the closest cluster each time series in X belongs to.

        Parameters
        ----------
        X : array-like of shape=(n_ts, sz, d)
            Time series dataset to predict.

        Returns
        -------
        labels : array of shape=(n_ts, )
            Index of the cluster each sample belongs to.
        """
        X = check_array(X, allow_nd=True, force_all_finite='allow-nan')
        check_is_fitted(self, 'cluster_centers_')
        X = check_dims(X, self.cluster_centers_)
        X_ = to_time_series_dataset(X)
        return self._assign(X_, update_class_attributes=False)
Пример #23
0
    def fit_transform(self, X, y=None, **fit_params):
        """Transform a dataset of time series into its Matrix Profile
         representation.

        Parameters
        ----------
        X : array-like of shape (n_ts, sz, d)
            Time series dataset

        Returns
        -------
        numpy.ndarray of shape (n_ts, output_size, 1)
            Matrix-Profile-Transformed dataset. `ouput_size` is equal to
            `sz - subsequence_length + 1`
        """
        X = check_array(X, allow_nd=True, force_all_finite=False)
        X = check_dims(X)
        return self._fit(X)._transform(X)
Пример #24
0
    def inverse_transform(self, X):
        """Compute time series corresponding to given PAA representations.

        Parameters
        ----------
        X : array-like of shape (n_ts, sz_paa, d)
            A dataset of PAA series.

        Returns
        -------
        numpy.ndarray of shape (n_ts, sz_original_ts, d)
            A dataset of time series corresponding to the provided
            representation.
        """
        self._is_fitted()
        X = check_array(X, allow_nd=True, force_all_finite=False)
        X = check_dims(X)
        return inv_transform_paa(X, original_size=self._X_fit_dims_[1])
Пример #25
0
    def fit_transform(self, X, y=None, **fit_params):
        """Fit a 1d-SAX representation and transform the data accordingly.

        Parameters
        ----------
        X : array-like of shape (n_ts, sz, d)
            Time series dataset

        Returns
        -------
        numpy.ndarray of integers with shape (n_ts, n_segments, 2 * d)
            1d-SAX-Transformed dataset. The order of the last dimension is:
            first d elements represent average values
            (standard SAX symbols) and the last d are for slopes
        """
        X = check_array(X, allow_nd=True, force_all_finite=False)
        X = check_dims(X)
        return self._fit(X)._transform(X)
Пример #26
0
    def fit(self, X, y):
        """Fit the model using X as training data and y as target values

        Parameters
        ----------
        X : array-like, shape (n_ts, sz, d)
            Training data.
        y : array-like, shape (n_ts, )
            Target values.

        Returns
        -------
        KNeighborsTimeSeriesClassifier
            The fitted estimator
        """
        if self.metric in TSLEARN_VALID_METRICS:
            self._ts_metric = self.metric
            self.metric = "precomputed"

        X = check_array(X,
                        allow_nd=True,
                        force_all_finite=(self.metric != "precomputed"))
        X = to_time_series_dataset(X)
        X = check_dims(X)
        if self.metric == "precomputed" and hasattr(self, '_ts_metric'):
            self._ts_fit = X
            if self._ts_metric == 'sax':
                self._sax_mu = None
                self._sax_sigma = None
                if self.metric_params is not None:
                    self._ts_fit = self._sax_preprocess(
                        X, **self.metric_params)
                else:
                    self._ts_fit = self._sax_preprocess(X)

            self._d = X.shape[2]
            self._X_fit = numpy.zeros(
                (self._ts_fit.shape[0], self._ts_fit.shape[0]))
        else:
            self._X_fit, self._d = to_sklearn_dataset(X, return_dim=True)
        super().fit(self._X_fit, y)
        if hasattr(self, '_ts_metric'):
            self.metric = self._ts_metric
        return self
Пример #27
0
    def predict(self, X):
        """Predict the closest cluster each time series in X belongs to.

        Parameters
        ----------
        X : array-like of shape=(n_ts, sz, d)
            Time series dataset to predict.

        Returns
        -------
        labels : array of shape=(n_ts, )
            Index of the cluster each sample belongs to.
        """
        X = check_array(X, allow_nd=True, force_all_finite='allow-nan')
        check_is_fitted(self, 'cluster_centers_')
        X = check_dims(X, X_fit_dims=self.cluster_centers_.shape,
                       extend=True,
                       check_n_features_only=(self.metric != "euclidean"))
        return self._assign(X, update_class_attributes=False)
    def predict(self, X):
        """Predict the closest cluster each time series in X belongs to.

        Parameters
        ----------
        X : array-like of shape=(n_ts, sz, d)
            Time series dataset to predict.

        Returns
        -------
        labels : array of shape=(n_ts, )
            Index of the cluster each sample belongs to.
        """
        X = check_array(X, allow_nd=True)
        check_is_fitted(self, '_X_fit')
        X_ = to_time_series_dataset(X)
        X = check_dims(X, self._X_fit)
        X_ = TimeSeriesScalerMeanVariance(mu=0., std=1.).fit_transform(X_)
        dists = self._cross_dists(X_)
        return dists.argmin(axis=1)
Пример #29
0
    def _preprocess_sklearn(self, X, y=None, fit_time=False):
        force_all_finite = self.kernel not in VARIABLE_LENGTH_METRICS
        if y is None:
            X = check_array(X,
                            allow_nd=True,
                            force_all_finite=force_all_finite)
        else:
            X, y = check_X_y(X,
                             y,
                             allow_nd=True,
                             force_all_finite=force_all_finite)
        X = check_dims(X, X_fit=None)
        X = to_time_series_dataset(X)

        if fit_time:
            self._X_fit = X
            self.gamma_ = gamma_soft_dtw(X)
            self.classes_ = numpy.unique(y)

        if self.kernel in VARIABLE_LENGTH_METRICS:
            assert self.kernel == "gak"
            self.estimator_kernel_ = "precomputed"
            if fit_time:
                sklearn_X = cdist_gak(X,
                                      sigma=numpy.sqrt(self.gamma_ / 2.),
                                      n_jobs=self.n_jobs,
                                      verbose=self.verbose)
            else:
                sklearn_X = cdist_gak(X,
                                      self._X_fit,
                                      sigma=numpy.sqrt(self.gamma_ / 2.),
                                      n_jobs=self.n_jobs,
                                      verbose=self.verbose)
        else:
            self.estimator_kernel_ = self.kernel
            sklearn_X = _prepare_ts_datasets_sklearn(X)

        if y is None:
            return sklearn_X
        else:
            return sklearn_X, y
Пример #30
0
    def predict(self, X):
        """Predict the closest cluster each time series in X belongs to.

        Parameters
        ----------
        X : array-like of shape=(n_ts, sz, d)
            Time series dataset to predict.

        Returns
        -------
        labels : array of shape=(n_ts, )
            Index of the cluster each sample belongs to.
        """
        X = check_array(X, allow_nd=True, force_all_finite=False)
        check_is_fitted(self, '_X_fit')
        X = check_dims(X, self._X_fit)
        K = self._get_kernel(X, self._X_fit)
        n_samples = X.shape[0]
        dist = numpy.zeros((n_samples, self.n_clusters))
        self._compute_dist(K, dist)
        return dist.argmin(axis=1)