示例#1
0
    def predict(self, X):
        """Predict class for a given set of time series.

        Parameters
        ----------
        X : array-like of shape=(n_ts, sz, d)
            Time series dataset.

        Returns
        -------
        array of shape=(n_ts, ) or (n_ts, n_classes), depending on the shape
        of the label vector provided at training time.
            Index of the cluster each sample belongs to or class probability
            matrix, depending on what was provided at training time.
        """
        check_is_fitted(self, '_X_fit')
        X = check_array(X, allow_nd=True)
        X = to_time_series_dataset(X)
        X = check_dims(X, X_fit=self._X_fit)

        categorical_preds = self.predict_proba(X)
        if self.categorical_y_:
            return categorical_preds
        else:
            return self.label_binarizer_.inverse_transform(categorical_preds)
示例#2
0
    def predict_proba(self, X):
        """Predict class probability for a given set of time series.

        Parameters
        ----------
        X : array-like of shape=(n_ts, sz, d)
            Time series dataset.

        Returns
        -------
        array of shape=(n_ts, n_classes),
            Class probability matrix.
        """
        check_is_fitted(self, '_X_fit')
        X = check_array(X, allow_nd=True)
        X = to_time_series_dataset(X)
        X = check_dims(X, self._X_fit)
        n_ts, sz, d = X.shape
        categorical_preds = self.model_.predict(
            [X[:, :, di].reshape((n_ts, sz, 1)) for di in range(self.d_)],
            batch_size=self.batch_size,
            verbose=self.verbose)

        if categorical_preds.shape[1] == 1 and len(self.classes_) == 2:
            categorical_preds = numpy.hstack(
                (1 - categorical_preds, categorical_preds))

        return categorical_preds
示例#3
0
    def fit(self, X, y):
        """Fit the model using X as training data and y as target values

        Parameters
        ----------
        X : array-like, shape (n_ts, sz, d)
            Training data.
        y : array-like, shape (n_ts, ) or (n_ts, dim_y)
            Target values.

        Returns
        -------
        KNeighborsTimeSeriesRegressor
            The fitted estimator
        """
        if self.metric in VARIABLE_LENGTH_METRICS:
            self._ts_metric = self.metric
            self.metric = "precomputed"

        X = check_array(X,
                        allow_nd=True,
                        force_all_finite=(self.metric != "precomputed"))
        X = to_time_series_dataset(X)
        X = check_dims(X, X_fit=None)
        if self.metric == "precomputed" and hasattr(self, '_ts_metric'):
            self._ts_fit = X
            self._d = X.shape[2]
            self._X_fit = numpy.zeros((self._ts_fit.shape[0],
                                       self._ts_fit.shape[0]))
        else:
            self._X_fit, self._d = to_sklearn_dataset(X, return_dim=True)
        super(KNeighborsTimeSeriesRegressor, self).fit(self._X_fit, y)
        if hasattr(self, '_ts_metric'):
            self.metric = self._ts_metric
        return self
示例#4
0
    def locate(self, X):
        """Compute shapelet match location for a set of time series.

        Parameters
        ----------
        X : array-like of shape=(n_ts, sz, d)
            Time series dataset.

        Returns
        -------
        array of shape=(n_ts, n_shapelets)
            Location of the shapelet matches for the provided time series.

        Examples
        --------
        >>> from tslearn_cuda.not_used.tslearn import random_walk_blobs
        >>> X = numpy.zeros((3, 10, 1))
        >>> X[0, 4:7, 0] = numpy.array([1, 2, 3])
        >>> y = [1, 0, 0]
        >>> # Data is all zeros except a motif 1-2-3 in the first time series
        >>> clf = ShapeletModel(n_shapelets_per_size={3: 1}, max_iter=0,
        ...                     verbose=0)
        >>> _ = clf.fit(X, y)
        >>> weights_shapelet = [
        ...     numpy.array([[1, 2, 3]])
        ... ]
        >>> clf.set_weights(weights_shapelet, layer_name="shapelets_0_0")
        >>> clf.locate(X)
        array([[4],
               [0],
               [0]])
        """
        X = check_dims(X, X_fit=self._X_fit)
        X = check_array(X, allow_nd=True)
        X = to_time_series_dataset(X)
        X = check_dims(X, X_fit=self._X_fit)
        n_ts, sz, d = X.shape
        locations = self.locator_model_.predict(
            [X[:, :, di].reshape((n_ts, sz, 1)) for di in range(self.d_)],
            batch_size=self.batch_size,
            verbose=self.verbose)
        return locations.astype(numpy.int)
示例#5
0
    def predict_proba(self, X):
        """Predict the class probabilities for the provided data

        Parameters
        ----------
        X : array-like, shape (n_ts, sz, d)
            Test samples.

        Returns
        -------
        array, shape = (n_ts, n_classes)
            Array of predicted class probabilities
        """
        if self.metric in VARIABLE_LENGTH_METRICS:
            self._ts_metric = self.metric
            self.metric = "precomputed"

            if self.metric_params is None:
                metric_params = {}
            else:
                metric_params = self.metric_params.copy()
                if "n_jobs" in metric_params.keys():
                    del metric_params["n_jobs"]
                if "verbose" in metric_params.keys():
                    del metric_params["verbose"]
            check_is_fitted(self, '_ts_fit')
            X = check_array(X, allow_nd=True, force_all_finite=False)
            X = to_time_series_dataset(X)
            if self._ts_metric == "dtw":
                X_ = cdist_dtw(X, self._ts_fit, n_jobs=self.n_jobs,
                               verbose=self.verbose, **metric_params)
            elif self._ts_metric == "softdtw":
                X_ = cdist_soft_dtw(X, self._ts_fit, **metric_params)
            else:
                raise ValueError("Invalid metric recorded: %s" %
                                 self._ts_metric)
            pred = super(KNeighborsTimeSeriesClassifier,
                         self).predict_proba(X_)
            self.metric = self._ts_metric
            return pred
        else:
            check_is_fitted(self, '_X_fit')
            X = check_array(X, allow_nd=True)
            X = to_time_series_dataset(X)
            X_ = to_sklearn_dataset(X)
            X_ = check_dims(X_, self._X_fit, extend=False)
            return super(KNeighborsTimeSeriesClassifier,
                         self).predict_proba(X_)
示例#6
0
    def predict(self, X):
        """Predict the closest cluster each time series in X belongs to.

        Parameters
        ----------
        X : array-like of shape=(n_ts, sz, d)
            Time series dataset to predict.

        Returns
        -------
        labels : array of shape=(n_ts, )
            Index of the cluster each sample belongs to.
        """
        X = check_array(X, allow_nd=True, force_all_finite='allow-nan')
        check_is_fitted(self, 'cluster_centers_')
        X = check_dims(X, self.cluster_centers_)
        X_ = to_time_series_dataset(X)
        return self._assign(X_, update_class_attributes=False)
示例#7
0
    def _preprocess_sklearn(self, X, y=None, fit_time=False):
        force_all_finite = self.kernel not in VARIABLE_LENGTH_METRICS
        if y is None:
            X = check_array(X,
                            allow_nd=True,
                            force_all_finite=force_all_finite)
        else:
            X, y = check_X_y(X,
                             y,
                             allow_nd=True,
                             force_all_finite=force_all_finite)
        X = check_dims(X, X_fit=None)
        X = to_time_series_dataset(X)

        if fit_time:
            self._X_fit = X
            if self.gamma == "auto":
                self.gamma_ = gamma_soft_dtw(X)
            else:
                self.gamma_ = self.gamma
            self.classes_ = numpy.unique(y)

        if self.kernel in VARIABLE_LENGTH_METRICS:
            assert self.kernel == "gak"
            self.estimator_kernel_ = "precomputed"
            if fit_time:
                sklearn_X = cdist_gak(X,
                                      sigma=numpy.sqrt(self.gamma_ / 2.),
                                      n_jobs=self.n_jobs,
                                      verbose=self.verbose)
            else:
                sklearn_X = cdist_gak(X,
                                      self._X_fit,
                                      sigma=numpy.sqrt(self.gamma_ / 2.),
                                      n_jobs=self.n_jobs,
                                      verbose=self.verbose)
        else:
            self.estimator_kernel_ = self.kernel
            sklearn_X = _prepare_ts_datasets_sklearn(X)

        if y is None:
            return sklearn_X
        else:
            return sklearn_X, y
示例#8
0
    def predict(self, X):
        """Predict the closest cluster each time series in X belongs to.

        Parameters
        ----------
        X : array-like of shape=(n_ts, sz, d)
            Time series dataset to predict.

        Returns
        -------
        labels : array of shape=(n_ts, )
            Index of the cluster each sample belongs to.
        """
        X = check_array(X, allow_nd=True, force_all_finite=False)
        check_is_fitted(self, '_X_fit')
        X = check_dims(X, self._X_fit)
        K = self._get_kernel(X, self._X_fit)
        n_samples = X.shape[0]
        dist = numpy.zeros((n_samples, self.n_clusters))
        self._compute_dist(K, dist)
        return dist.argmin(axis=1)
示例#9
0
    def transform(self, X):
        """Generate shapelet transform for a set of time series.

        Parameters
        ----------
        X : array-like of shape=(n_ts, sz, d)
            Time series dataset.

        Returns
        -------
        array of shape=(n_ts, n_shapelets)
            Shapelet-Transform of the provided time series.
        """
        check_is_fitted(self, '_X_fit')
        X = check_array(X, allow_nd=True)
        X = to_time_series_dataset(X)
        X = check_dims(X, X_fit=self._X_fit)
        n_ts, sz, d = X.shape
        pred = self.transformer_model_.predict(
            [X[:, :, di].reshape((n_ts, sz, 1)) for di in range(self.d_)],
            batch_size=self.batch_size,
            verbose=self.verbose)
        return pred
示例#10
0
    def predict(self, X):
        """Predict the closest cluster each time series in X belongs to.

        Parameters
        ----------
        X : array-like of shape=(n_ts, sz, d)
            Time series dataset to predict.

        Returns
        -------
        labels : array of shape=(n_ts, )
            Index of the cluster each sample belongs to.
        """
        X = check_array(X, allow_nd=True)
        check_is_fitted(self,
                        ['cluster_centers_', 'norms_', 'norms_centroids_'])

        X_ = to_time_series_dataset(X)

        X = check_dims(X, self.cluster_centers_)

        X_ = TimeSeriesScalerMeanVariance(mu=0., std=1.).fit_transform(X_)
        dists = self._cross_dists(X_)
        return dists.argmin(axis=1)
示例#11
0
    def fit(self, X, y):
        """Learn time-series shapelets.

        Parameters
        ----------
        X : array-like of shape=(n_ts, sz, d)
            Time series dataset.
        y : array-like of shape=(n_ts, )
            Time series labels.
        """
        if self.verbose_level is not None:
            warnings.warn(
                "'verbose_level' is deprecated in version 0.2 and will be "
                "removed in 0.4. Use 'verbose' instead.",
                DeprecationWarning,
                stacklevel=2)
            self.verbose = self.verbose_level

        X, y = check_X_y(X, y, allow_nd=True)
        X = to_time_series_dataset(X)
        X = check_dims(X, X_fit=None)

        set_random_seed(seed=self.random_state)
        numpy.random.seed(seed=self.random_state)

        n_ts, sz, d = X.shape
        self._X_fit = X

        self.model_ = None
        self.transformer_model_ = None
        self.locator_model_ = None
        self.categorical_y_ = False
        self.label_binarizer_ = None
        self.d_ = d

        if y.ndim == 1 or y.shape[1] == 1:
            self.label_binarizer_ = LabelBinarizer().fit(y)
            y_ = self.label_binarizer_.transform(y)
            self.classes_ = self.label_binarizer_.classes_
        else:
            y_ = y
            self.categorical_y_ = True
            self.classes_ = numpy.unique(y)
            assert y_.shape[1] != 2, ("Binary classification case, " +
                                      "monodimensional y should be passed.")

        if y_.ndim == 1 or y_.shape[1] == 1:
            n_classes = 2
        else:
            n_classes = y_.shape[1]

        if self.n_shapelets_per_size is None:
            sizes = grabocka_params_to_shapelet_size_dict(
                n_ts, sz, n_classes, self.shapelet_length, self.total_lengths)
            self.n_shapelets_per_size_ = sizes
        else:
            self.n_shapelets_per_size_ = self.n_shapelets_per_size

        self._set_model_layers(X=X, ts_sz=sz, d=d, n_classes=n_classes)
        self.transformer_model_.compile(loss="mean_squared_error",
                                        optimizer=self.optimizer)
        self.locator_model_.compile(loss="mean_squared_error",
                                    optimizer=self.optimizer)
        self._set_weights_false_conv(d=d)
        self.model_.fit(
            [X[:, :, di].reshape((n_ts, sz, 1)) for di in range(d)],
            y_,
            batch_size=self.batch_size,
            epochs=self.max_iter,
            verbose=self.verbose)
        self.n_iter_ = len(self.model_.history.history)
        return self
示例#12
0
    def kneighbors(self, X=None, n_neighbors=None, return_distance=True):
        """Finds the K-neighbors of a point.

        Returns indices of and distances to the neighbors of each point.

        Parameters
        ----------
        X : array-like, shape (n_ts, sz, d)
            The query time series.
            If not provided, neighbors of each indexed point are returned.
            In this case, the query point is not considered its own neighbor.
        n_neighbors : int
            Number of neighbors to get (default is the value passed to the
            constructor).
        return_distance : boolean, optional. Defaults to True.
            If False, distances will not be returned

        Returns
        -------
        dist : array
            Array representing the distance to points, only present if
            return_distance=True
        ind : array
            Indices of the nearest points in the population matrix.
        """
        if self.metric in VARIABLE_LENGTH_METRICS:
            self._ts_metric = self.metric
            self.metric = "precomputed"

            if self.metric_params is None:
                metric_params = {}
            else:
                metric_params = self.metric_params.copy()
                if "n_jobs" in metric_params.keys():
                    del metric_params["n_jobs"]
                if "verbose" in metric_params.keys():
                    del metric_params["verbose"]
            check_is_fitted(self, '_ts_fit')
            X = check_array(X, allow_nd=True, force_all_finite=False)
            X = to_time_series_dataset(X)
            if self._ts_metric == "dtw":
                X_ = cdist_dtw(X, self._ts_fit, n_jobs=self.n_jobs,
                               verbose=self.verbose, **metric_params)
            elif self._ts_metric == "softdtw":
                X_ = cdist_soft_dtw(X, self._ts_fit, **metric_params)
            else:
                raise ValueError("Invalid metric recorded: %s" %
                                 self._ts_metric)
            pred = KNeighborsTimeSeriesMixin.kneighbors(
                self,
                X=X_,
                n_neighbors=n_neighbors,
                return_distance=return_distance)
            self.metric = self._ts_metric
            return pred
        else:
            check_is_fitted(self, '_X_fit')
            if X is None:
                X_ = None
            else:
                X = check_array(X, allow_nd=True)
                X = to_time_series_dataset(X)
                X_ = to_sklearn_dataset(X)
                X_ = check_dims(X_, self._X_fit, extend=False)
            return KNeighborsTimeSeriesMixin.kneighbors(
                self,
                X=X_,
                n_neighbors=n_neighbors,
                return_distance=return_distance)
示例#13
0
    def fit(self, X, y=None, sample_weight=None):
        """Compute kernel k-means clustering.

        Parameters
        ----------
        X : array-like of shape=(n_ts, sz, d)
            Time series dataset.

        y
            Ignored
        sample_weight : array-like of shape=(n_ts, ) or None (default: None)
            Weights to be given to time series in the learning process. By
            default, all time series weights are equal.
        """

        X = check_array(X, allow_nd=True, force_all_finite=False)
        X = check_dims(X, X_fit=None)

        if sample_weight is not None:
            sample_weight = check_array(sample_weight, ensure_2d=False)

        max_attempts = max(self.n_init, 10)

        self.labels_ = None
        self.inertia_ = None
        self.sample_weight_ = None
        self._X_fit = None
        # n_iter_ will contain the number of iterations the most
        # successful run required.
        self.n_iter_ = 0

        n_samples = X.shape[0]
        K = self._get_kernel(X)
        sw = (sample_weight if sample_weight is not None
              else numpy.ones(n_samples))
        self.sample_weight_ = sw
        rs = check_random_state(self.random_state)

        last_correct_labels = None
        min_inertia = numpy.inf
        n_attempts = 0
        n_successful = 0
        while n_successful < self.n_init and n_attempts < max_attempts:
            try:
                if self.verbose and self.n_init > 1:
                    print("Init %d" % (n_successful + 1))
                n_attempts += 1
                self._fit_one_init(K, rs)
                if self.inertia_ < min_inertia:
                    last_correct_labels = self.labels_
                    min_inertia = self.inertia_
                    self.n_iter_ = self._iter
                n_successful += 1
            except EmptyClusterError:
                if self.verbose:
                    print("Resumed because of empty cluster")
        if n_successful > 0:
            self.labels_ = last_correct_labels
            self.inertia_ = min_inertia
            self._X_fit = X
        return self