示例#1
0
    def predict_proba(self, X):
        """Predict the class probabilities for the provided data

        Parameters
        ----------
        X : array-like, shape (n_ts, sz, d)
            Test samples.

        Returns
        -------
        array, shape = (n_ts, n_classes)
            Array of predicted class probabilities
        """
        if self.metric in VARIABLE_LENGTH_METRICS:
            self._ts_metric = self.metric
            self.metric = "precomputed"

            if self.metric_params is None:
                metric_params = {}
            else:
                metric_params = self.metric_params.copy()
                if "n_jobs" in metric_params.keys():
                    del metric_params["n_jobs"]
                if "verbose" in metric_params.keys():
                    del metric_params["verbose"]
            check_is_fitted(self, '_ts_fit')
            X = check_array(X, allow_nd=True, force_all_finite=False)
            X = to_time_series_dataset(X)
            if self._ts_metric == "dtw":
                X_ = cdist_dtw(X, self._ts_fit, n_jobs=self.n_jobs,
                               verbose=self.verbose, **metric_params)
            elif self._ts_metric == "softdtw":
                X_ = cdist_soft_dtw(X, self._ts_fit, **metric_params)
            else:
                raise ValueError("Invalid metric recorded: %s" %
                                 self._ts_metric)
            pred = super(KNeighborsTimeSeriesClassifier,
                         self).predict_proba(X_)
            self.metric = self._ts_metric
            return pred
        else:
            check_is_fitted(self, '_X_fit')
            X = check_array(X, allow_nd=True)
            X = to_time_series_dataset(X)
            X_ = to_sklearn_dataset(X)
            X_ = check_dims(X_, self._X_fit, extend=False)
            return super(KNeighborsTimeSeriesClassifier,
                         self).predict_proba(X_)
示例#2
0
    def predict(self, X):
        """Predict class for a given set of time series.

        Parameters
        ----------
        X : array-like of shape=(n_ts, sz, d)
            Time series dataset.

        Returns
        -------
        array of shape=(n_ts, ) or (n_ts, n_classes), depending on the shape
        of the label vector provided at training time.
            Index of the cluster each sample belongs to or class probability
            matrix, depending on what was provided at training time.
        """
        check_is_fitted(self, '_X_fit')
        X = check_array(X, allow_nd=True)
        X = to_time_series_dataset(X)
        X = check_dims(X, X_fit=self._X_fit)

        categorical_preds = self.predict_proba(X)
        if self.categorical_y_:
            return categorical_preds
        else:
            return self.label_binarizer_.inverse_transform(categorical_preds)
示例#3
0
    def fit(self, X, y):
        """Fit the model using X as training data and y as target values

        Parameters
        ----------
        X : array-like, shape (n_ts, sz, d)
            Training data.
        y : array-like, shape (n_ts, ) or (n_ts, dim_y)
            Target values.

        Returns
        -------
        KNeighborsTimeSeriesRegressor
            The fitted estimator
        """
        if self.metric in VARIABLE_LENGTH_METRICS:
            self._ts_metric = self.metric
            self.metric = "precomputed"

        X = check_array(X,
                        allow_nd=True,
                        force_all_finite=(self.metric != "precomputed"))
        X = to_time_series_dataset(X)
        X = check_dims(X, X_fit=None)
        if self.metric == "precomputed" and hasattr(self, '_ts_metric'):
            self._ts_fit = X
            self._d = X.shape[2]
            self._X_fit = numpy.zeros((self._ts_fit.shape[0],
                                       self._ts_fit.shape[0]))
        else:
            self._X_fit, self._d = to_sklearn_dataset(X, return_dim=True)
        super(KNeighborsTimeSeriesRegressor, self).fit(self._X_fit, y)
        if hasattr(self, '_ts_metric'):
            self.metric = self._ts_metric
        return self
示例#4
0
    def predict_proba(self, X):
        """Predict class probability for a given set of time series.

        Parameters
        ----------
        X : array-like of shape=(n_ts, sz, d)
            Time series dataset.

        Returns
        -------
        array of shape=(n_ts, n_classes),
            Class probability matrix.
        """
        check_is_fitted(self, '_X_fit')
        X = check_array(X, allow_nd=True)
        X = to_time_series_dataset(X)
        X = check_dims(X, self._X_fit)
        n_ts, sz, d = X.shape
        categorical_preds = self.model_.predict(
            [X[:, :, di].reshape((n_ts, sz, 1)) for di in range(self.d_)],
            batch_size=self.batch_size,
            verbose=self.verbose)

        if categorical_preds.shape[1] == 1 and len(self.classes_) == 2:
            categorical_preds = numpy.hstack(
                (1 - categorical_preds, categorical_preds))

        return categorical_preds
示例#5
0
    def transform(self, X, **kwargs):
        """Fit to data, then transform it.

        Parameters
        ----------
        X : array-like
            Time series dataset to be resampled.

        Returns
        -------
        numpy.ndarray
            Resampled time series dataset.
        """
        X_ = to_time_series_dataset(X)
        n_ts, sz, d = X_.shape
        equal_size = check_equal_size(X_)
        X_out = numpy.empty((n_ts, self.sz_, d))
        for i in range(X_.shape[0]):
            xnew = numpy.linspace(0, 1, self.sz_)
            if not equal_size:
                sz = ts_size(X_[i])
            for di in range(d):
                f = interp1d(numpy.linspace(0, 1, sz),
                             X_[i, :sz, di],
                             kind="slinear")
                X_out[i, :, di] = f(xnew)
        return X_out
示例#6
0
 def support_vectors_time_series_(self, X):
     X_ = to_time_series_dataset(X)
     sv = []
     idx_start = 0
     for cl in range(len(self.svm_estimator_.n_support_)):
         idx_end = idx_start + self.svm_estimator_.n_support_[cl]
         indices = self.svm_estimator_.support_[idx_start:idx_end]
         sv.append(X_[indices])
         idx_start += self.svm_estimator_.n_support_[cl]
     return sv
示例#7
0
    def fit(self, X, y=None):
        """Compute k-Shape clustering.

        Parameters
        ----------
        X : array-like of shape=(n_ts, sz, d)
            Time series dataset.

        y
            Ignored
        """
        X = check_array(X, allow_nd=True)

        max_attempts = max(self.n_init, 10)

        self.labels_ = None
        self.inertia_ = numpy.inf
        self.cluster_centers_ = None

        self.norms_ = 0.
        self.norms_centroids_ = 0.

        self.n_iter_ = 0

        X_ = to_time_series_dataset(X)
        self._X_fit = X_
        self.norms_ = numpy.linalg.norm(X_, axis=(1, 2))

        _check_initial_guess(self.init, self.n_clusters)

        rs = check_random_state(self.random_state)

        best_correct_centroids = None
        min_inertia = numpy.inf
        n_successful = 0
        n_attempts = 0
        while n_successful < self.n_init and n_attempts < max_attempts:
            try:
                if self.verbose and self.n_init > 1:
                    print("Init %d" % (n_successful + 1))
                n_attempts += 1
                self._fit_one_init(X_, rs)
                if self.inertia_ < min_inertia:
                    best_correct_centroids = self.cluster_centers_.copy()
                    min_inertia = self.inertia_
                    self.n_iter_ = self._iter
                n_successful += 1
            except EmptyClusterError:
                if self.verbose:
                    print("Resumed because of empty cluster")
        self.norms_centroids_ = numpy.linalg.norm(self.cluster_centers_,
                                                  axis=(1, 2))
        self._post_fit(X_, best_correct_centroids, min_inertia)
        return self
示例#8
0
def _prepare_ts_datasets_sklearn(X):
    """Prepare time series datasets for sklearn.

    Examples
    --------
    >>> X = to_time_series_dataset([[1, 2, 3], [2, 2, 3]])
    >>> _prepare_ts_datasets_sklearn(X).shape
    (2, 3)
    """
    sklearn_X = to_time_series_dataset(X)
    n_ts, sz, d = sklearn_X.shape
    return sklearn_X.reshape((n_ts, -1))
def test_variable_length_knn():
    X = to_time_series_dataset([[1, 2, 3, 4],
                                [1, 2, 3],
                                [2, 5, 6, 7, 8, 9],
                                [3, 5, 6, 7, 8]])
    y = [0, 0, 1, 1]
    clf = KNeighborsTimeSeriesClassifier(metric="dtw", n_neighbors=1)
    clf.fit(X, y)
    assert_allclose(clf.predict(X), [0, 0, 1, 1])

    clf = KNeighborsTimeSeriesClassifier(metric="softdtw", n_neighbors=1)
    clf.fit(X, y)
    assert_allclose(clf.predict(X), [0, 0, 1, 1])
示例#10
0
    def fit(self, X, y=None):
        """Compute k-means clustering.

        Parameters
        ----------
        X : array-like of shape=(n_ts, sz, d)
            Time series dataset.

        y
            Ignored
        """

        X = check_array(X, allow_nd=True, force_all_finite='allow-nan')

        self.labels_ = None
        self.inertia_ = numpy.inf
        self.cluster_centers_ = None
        self._X_fit = None
        self._squared_inertia = True

        self.n_iter_ = 0

        max_attempts = max(self.n_init, 10)

        X_ = to_time_series_dataset(X)
        rs = check_random_state(self.random_state)
        x_squared_norms = cdist(X_.reshape((X_.shape[0], -1)),
                                numpy.zeros((1, X_.shape[1] * X_.shape[2])),
                                metric="sqeuclidean").reshape((1, -1))
        _check_initial_guess(self.init, self.n_clusters)

        best_correct_centroids = None
        min_inertia = numpy.inf
        n_successful = 0
        n_attempts = 0
        while n_successful < self.n_init and n_attempts < max_attempts:
            try:
                if self.verbose and self.n_init > 1:
                    print("Init %d" % (n_successful + 1))
                n_attempts += 1
                self._fit_one_init(X_, x_squared_norms, rs)
                if self.inertia_ < min_inertia:
                    best_correct_centroids = self.cluster_centers_.copy()
                    min_inertia = self.inertia_
                    self.n_iter_ = self._iter
                n_successful += 1
            except EmptyClusterError:
                if self.verbose:
                    print("Resumed because of empty cluster")
        self._post_fit(X_, best_correct_centroids, min_inertia)
        return self
示例#11
0
    def fit_transform(self, X, y=None, **fit_params):
        """Fit a SAX representation and transform the data accordingly.

        Parameters
        ----------
        X : array-like of shape (n_ts, sz, d)
            Time series dataset

        Returns
        -------
        numpy.ndarray of integers with shape (n_ts, n_segments, d)
            SAX-Transformed dataset
        """
        X_ = to_time_series_dataset(X)
        return self.fit(X_)._transform(X_)
示例#12
0
    def fit(self, X, y=None):
        """Fit a PAA representation.

        Parameters
        ----------
        X : array-like of shape (n_ts, sz, d)
            Time series dataset

        Returns
        -------
        PiecewiseAggregateApproximation
            self
        """
        X_ = to_time_series_dataset(X)
        return self._fit(X_, y)
def test_variable_length_svm():
    X = to_time_series_dataset([[1, 2, 3, 4],
                                [1, 2, 3],
                                [2, 5, 6, 7, 8, 9],
                                [3, 5, 6, 7, 8]])
    y = [0, 0, 1, 1]
    rng = np.random.RandomState(0)
    clf = TimeSeriesSVC(kernel="gak", random_state=rng)
    clf.fit(X, y)
    assert_allclose(clf.predict(X), [0, 0, 1, 1])

    y_reg = [-1., -1.3, 3.2, 4.1]
    clf = TimeSeriesSVR(kernel="gak")
    clf.fit(X, y_reg)
    assert_array_less(clf.predict(X[:2]), 0.)
    assert_array_less(-clf.predict(X[2:]), 0.)
示例#14
0
    def inverse_transform(self, X):
        """Compute time series corresponding to given PAA representations.

        Parameters
        ----------
        X : array-like of shape (n_ts, sz_paa, d)
            A dataset of PAA series.

        Returns
        -------
        numpy.ndarray of shape (n_ts, sz_original_ts, d)
            A dataset of time series corresponding to the provided
            representation.
        """
        X_ = to_time_series_dataset(X)
        return inv_transform_paa(X_, original_size=self.size_fitted_)
示例#15
0
    def fit_transform(self, X, y=None, **fit_params):
        """Fit a 1d-SAX representation and transform the data accordingly.

        Parameters
        ----------
        X : array-like of shape (n_ts, sz, d)
            Time series dataset

        Returns
        -------
        numpy.ndarray of integers with shape (n_ts, n_segments, 2 * d)
            1d-SAX-Transformed dataset. The order of the last dimension is:
            first d elements represent average values
            (standard SAX symbols) and the last d are for slopes
        """
        X_ = to_time_series_dataset(X)
        return self.fit(X_)._transform(X_)
def test_variable_length_clustering():
    # TODO: here we just check that they can accept variable-length TS, not
    # that they do clever things
    X = to_time_series_dataset([[1, 2, 3, 4],
                                [1, 2, 3],
                                [2, 5, 6, 7, 8, 9],
                                [3, 5, 6, 7, 8]])
    rng = np.random.RandomState(0)

    clf = GlobalAlignmentKernelKMeans(n_clusters=2, random_state=rng)
    clf.fit(X)

    clf = TimeSeriesKMeans(n_clusters=2, metric="dtw", random_state=rng)
    clf.fit(X)

    clf = TimeSeriesKMeans(n_clusters=2, metric="softdtw", random_state=rng)
    clf.fit(X)
示例#17
0
    def fit(self, X, y=None):
        """Fit a 1d-SAX representation.

        Parameters
        ----------
        X : array-like of shape (n_ts, sz, d)
            Time series dataset

        Returns
        -------
        OneD_SymbolicAggregateApproximation
            self
        """

        self.breakpoints_avg_ = _breakpoints(self.alphabet_size_avg)
        self.breakpoints_avg_middle_ = _bin_medians(self.alphabet_size_avg)
        X_ = to_time_series_dataset(X)
        return self._fit(X_)
示例#18
0
    def transform(self, X, y=None):
        """Transform a dataset of time series into its PAA representation.

        Parameters
        ----------
        X : array-like of shape (n_ts, sz, d)
            Time series dataset

        Returns
        -------
        numpy.ndarray of shape (n_ts, n_segments, d)
            PAA-Transformed dataset
        """
        if not self._is_fitted():
            raise NotFittedError("Model not fitted.")

        X_ = to_time_series_dataset(X)
        return self._transform(X_, y)
示例#19
0
    def predict(self, X):
        """Predict the closest cluster each time series in X belongs to.

        Parameters
        ----------
        X : array-like of shape=(n_ts, sz, d)
            Time series dataset to predict.

        Returns
        -------
        labels : array of shape=(n_ts, )
            Index of the cluster each sample belongs to.
        """
        X = check_array(X, allow_nd=True, force_all_finite='allow-nan')
        check_is_fitted(self, 'cluster_centers_')
        X = check_dims(X, self.cluster_centers_)
        X_ = to_time_series_dataset(X)
        return self._assign(X_, update_class_attributes=False)
示例#20
0
    def _preprocess_sklearn(self, X, y=None, fit_time=False):
        force_all_finite = self.kernel not in VARIABLE_LENGTH_METRICS
        if y is None:
            X = check_array(X,
                            allow_nd=True,
                            force_all_finite=force_all_finite)
        else:
            X, y = check_X_y(X,
                             y,
                             allow_nd=True,
                             force_all_finite=force_all_finite)
        X = check_dims(X, X_fit=None)
        X = to_time_series_dataset(X)

        if fit_time:
            self._X_fit = X
            if self.gamma == "auto":
                self.gamma_ = gamma_soft_dtw(X)
            else:
                self.gamma_ = self.gamma
            self.classes_ = numpy.unique(y)

        if self.kernel in VARIABLE_LENGTH_METRICS:
            assert self.kernel == "gak"
            self.estimator_kernel_ = "precomputed"
            if fit_time:
                sklearn_X = cdist_gak(X,
                                      sigma=numpy.sqrt(self.gamma_ / 2.),
                                      n_jobs=self.n_jobs,
                                      verbose=self.verbose)
            else:
                sklearn_X = cdist_gak(X,
                                      self._X_fit,
                                      sigma=numpy.sqrt(self.gamma_ / 2.),
                                      n_jobs=self.n_jobs,
                                      verbose=self.verbose)
        else:
            self.estimator_kernel_ = self.kernel
            sklearn_X = _prepare_ts_datasets_sklearn(X)

        if y is None:
            return sklearn_X
        else:
            return sklearn_X, y
示例#21
0
    def transform(self, X, y=None, **kwargs):
        """Will normalize (min-max) each of the timeseries. IMPORTANT: this
        transformation is completely stateless, and is applied to each of
        the timeseries individually.

        Parameters
        ----------
        X : array-like
            Time series dataset to be rescaled.

        Returns
        -------
        numpy.ndarray
            Rescaled time series dataset.
        """
        if self.min_ is not None:
            warnings.warn(
                "'min' is deprecated in version 0.2 and will be "
                "removed in 0.4. Use value_range instead.",
                DeprecationWarning,
                stacklevel=2)
            self.value_range = (self.min_, self.value_range[1])

        if self.max_ is not None:
            warnings.warn(
                "'max' is deprecated in version 0.2 and will be "
                "removed in 0.4. Use value_range instead.",
                DeprecationWarning,
                stacklevel=2)
            self.value_range = (self.value_range[0], self.max_)

        if self.value_range[0] >= self.value_range[1]:
            raise ValueError("Minimum of desired range must be smaller"
                             " than maximum. Got %s." % str(self.value_range))

        X_ = to_time_series_dataset(X)
        min_t = numpy.nanmin(X_, axis=1)[:, numpy.newaxis, :]
        max_t = numpy.nanmax(X_, axis=1)[:, numpy.newaxis, :]
        range_t = max_t - min_t
        nomin = (X_ - min_t) * (self.value_range[1] - self.value_range[0])
        X_ = nomin / range_t + self.value_range[0]
        return X_
示例#22
0
    def locate(self, X):
        """Compute shapelet match location for a set of time series.

        Parameters
        ----------
        X : array-like of shape=(n_ts, sz, d)
            Time series dataset.

        Returns
        -------
        array of shape=(n_ts, n_shapelets)
            Location of the shapelet matches for the provided time series.

        Examples
        --------
        >>> from tslearn_cuda.not_used.tslearn import random_walk_blobs
        >>> X = numpy.zeros((3, 10, 1))
        >>> X[0, 4:7, 0] = numpy.array([1, 2, 3])
        >>> y = [1, 0, 0]
        >>> # Data is all zeros except a motif 1-2-3 in the first time series
        >>> clf = ShapeletModel(n_shapelets_per_size={3: 1}, max_iter=0,
        ...                     verbose=0)
        >>> _ = clf.fit(X, y)
        >>> weights_shapelet = [
        ...     numpy.array([[1, 2, 3]])
        ... ]
        >>> clf.set_weights(weights_shapelet, layer_name="shapelets_0_0")
        >>> clf.locate(X)
        array([[4],
               [0],
               [0]])
        """
        X = check_dims(X, X_fit=self._X_fit)
        X = check_array(X, allow_nd=True)
        X = to_time_series_dataset(X)
        X = check_dims(X, X_fit=self._X_fit)
        n_ts, sz, d = X.shape
        locations = self.locator_model_.predict(
            [X[:, :, di].reshape((n_ts, sz, 1)) for di in range(self.d_)],
            batch_size=self.batch_size,
            verbose=self.verbose)
        return locations.astype(numpy.int)
示例#23
0
    def transform(self, X, **kwargs):
        """Fit to data, then transform it.

        Parameters
        ----------
        X
            Time series dataset to be rescaled

        Returns
        -------
        numpy.ndarray
            Rescaled time series dataset
        """
        X_ = to_time_series_dataset(X)
        mean_t = numpy.nanmean(X_, axis=1)[:, numpy.newaxis, :]
        std_t = numpy.nanstd(X_, axis=1)[:, numpy.newaxis, :]
        std_t[std_t == 0.] = 1.

        X_ = (X_ - mean_t) * self.std_ / std_t + self.mu_

        return X_
示例#24
0
    def transform(self, X):
        """Generate shapelet transform for a set of time series.

        Parameters
        ----------
        X : array-like of shape=(n_ts, sz, d)
            Time series dataset.

        Returns
        -------
        array of shape=(n_ts, n_shapelets)
            Shapelet-Transform of the provided time series.
        """
        check_is_fitted(self, '_X_fit')
        X = check_array(X, allow_nd=True)
        X = to_time_series_dataset(X)
        X = check_dims(X, X_fit=self._X_fit)
        n_ts, sz, d = X.shape
        pred = self.transformer_model_.predict(
            [X[:, :, di].reshape((n_ts, sz, 1)) for di in range(self.d_)],
            batch_size=self.batch_size,
            verbose=self.verbose)
        return pred
示例#25
0
def euclidean_barycenter(X, weights=None):
    """Standard Euclidean barycenter computed from a set of time series.

    Parameters
    ----------
    X : array-like, shape=(n_ts, sz, d)
        Time series dataset.

    weights: None or array
        Weights of each X[i]. Must be the same size as len(X).
        If None, uniform weights are used.

    Returns
    -------
    numpy.array of shape (sz, d)
        Barycenter of the provided time series dataset.

    Notes
    -----
        This method requires a dataset of equal-sized time series

    Examples
    --------
    >>> time_series = [[1, 2, 3, 4], [1, 2, 4, 5]]
    >>> bar = euclidean_barycenter(time_series)
    >>> bar.shape
    (4, 1)
    >>> bar
    array([[1. ],
           [2. ],
           [3.5],
           [4.5]])
    """
    X_ = to_time_series_dataset(X)
    weights = _set_weights(weights, X_.shape[0])
    return numpy.average(X_, axis=0, weights=weights)
示例#26
0
    def predict(self, X):
        """Predict the closest cluster each time series in X belongs to.

        Parameters
        ----------
        X : array-like of shape=(n_ts, sz, d)
            Time series dataset to predict.

        Returns
        -------
        labels : array of shape=(n_ts, )
            Index of the cluster each sample belongs to.
        """
        X = check_array(X, allow_nd=True)
        check_is_fitted(self,
                        ['cluster_centers_', 'norms_', 'norms_centroids_'])

        X_ = to_time_series_dataset(X)

        X = check_dims(X, self.cluster_centers_)

        X_ = TimeSeriesScalerMeanVariance(mu=0., std=1.).fit_transform(X_)
        dists = self._cross_dists(X_)
        return dists.argmin(axis=1)
def test_variable_cross_val():
    # TODO: here we just check that they can accept variable-length TS, not
    # that they do clever things
    X = to_time_series_dataset([[1, 2, 3, 4],
                                [1, 2, 3],
                                [1, 2, 3, 4],
                                [1, 2, 3],
                                [2, 5, 6, 7, 8, 9],
                                [3, 5, 6, 7, 8],
                                [2, 5, 6, 7, 8, 9],
                                [3, 5, 6, 7, 8]])
    y = [0, 0, 0, 0, 1, 1, 1, 1]
    rng = np.random.RandomState(0)

    cv = KFold(n_splits=2, shuffle=True)
    for estimator in [
        TimeSeriesSVC(kernel="gak", random_state=rng),
        TimeSeriesSVR(kernel="gak"),
        KNeighborsTimeSeriesClassifier(metric="dtw", n_neighbors=1),
        KNeighborsTimeSeriesClassifier(metric="softdtw", n_neighbors=1)
    ]:
        # TODO: cannot test for clustering methods since they don't have a
        # score method yet
        cross_val_score(estimator, X=X, y=y, cv=cv)
示例#28
0
 def __init__(self, X):
     self.X_ = to_time_series_dataset(X)
示例#29
0
    def fit(self, X, y):
        """Learn time-series shapelets.

        Parameters
        ----------
        X : array-like of shape=(n_ts, sz, d)
            Time series dataset.
        y : array-like of shape=(n_ts, )
            Time series labels.
        """
        if self.verbose_level is not None:
            warnings.warn(
                "'verbose_level' is deprecated in version 0.2 and will be "
                "removed in 0.4. Use 'verbose' instead.",
                DeprecationWarning,
                stacklevel=2)
            self.verbose = self.verbose_level

        X, y = check_X_y(X, y, allow_nd=True)
        X = to_time_series_dataset(X)
        X = check_dims(X, X_fit=None)

        set_random_seed(seed=self.random_state)
        numpy.random.seed(seed=self.random_state)

        n_ts, sz, d = X.shape
        self._X_fit = X

        self.model_ = None
        self.transformer_model_ = None
        self.locator_model_ = None
        self.categorical_y_ = False
        self.label_binarizer_ = None
        self.d_ = d

        if y.ndim == 1 or y.shape[1] == 1:
            self.label_binarizer_ = LabelBinarizer().fit(y)
            y_ = self.label_binarizer_.transform(y)
            self.classes_ = self.label_binarizer_.classes_
        else:
            y_ = y
            self.categorical_y_ = True
            self.classes_ = numpy.unique(y)
            assert y_.shape[1] != 2, ("Binary classification case, " +
                                      "monodimensional y should be passed.")

        if y_.ndim == 1 or y_.shape[1] == 1:
            n_classes = 2
        else:
            n_classes = y_.shape[1]

        if self.n_shapelets_per_size is None:
            sizes = grabocka_params_to_shapelet_size_dict(
                n_ts, sz, n_classes, self.shapelet_length, self.total_lengths)
            self.n_shapelets_per_size_ = sizes
        else:
            self.n_shapelets_per_size_ = self.n_shapelets_per_size

        self._set_model_layers(X=X, ts_sz=sz, d=d, n_classes=n_classes)
        self.transformer_model_.compile(loss="mean_squared_error",
                                        optimizer=self.optimizer)
        self.locator_model_.compile(loss="mean_squared_error",
                                    optimizer=self.optimizer)
        self._set_weights_false_conv(d=d)
        self.model_.fit(
            [X[:, :, di].reshape((n_ts, sz, 1)) for di in range(d)],
            y_,
            batch_size=self.batch_size,
            epochs=self.max_iter,
            verbose=self.verbose)
        self.n_iter_ = len(self.model_.history.history)
        return self
示例#30
0
    def kneighbors(self, X=None, n_neighbors=None, return_distance=True):
        """Finds the K-neighbors of a point.

        Returns indices of and distances to the neighbors of each point.

        Parameters
        ----------
        X : array-like, shape (n_ts, sz, d)
            The query time series.
            If not provided, neighbors of each indexed point are returned.
            In this case, the query point is not considered its own neighbor.
        n_neighbors : int
            Number of neighbors to get (default is the value passed to the
            constructor).
        return_distance : boolean, optional. Defaults to True.
            If False, distances will not be returned

        Returns
        -------
        dist : array
            Array representing the distance to points, only present if
            return_distance=True
        ind : array
            Indices of the nearest points in the population matrix.
        """
        if self.metric in VARIABLE_LENGTH_METRICS:
            self._ts_metric = self.metric
            self.metric = "precomputed"

            if self.metric_params is None:
                metric_params = {}
            else:
                metric_params = self.metric_params.copy()
                if "n_jobs" in metric_params.keys():
                    del metric_params["n_jobs"]
                if "verbose" in metric_params.keys():
                    del metric_params["verbose"]
            check_is_fitted(self, '_ts_fit')
            X = check_array(X, allow_nd=True, force_all_finite=False)
            X = to_time_series_dataset(X)
            if self._ts_metric == "dtw":
                X_ = cdist_dtw(X, self._ts_fit, n_jobs=self.n_jobs,
                               verbose=self.verbose, **metric_params)
            elif self._ts_metric == "softdtw":
                X_ = cdist_soft_dtw(X, self._ts_fit, **metric_params)
            else:
                raise ValueError("Invalid metric recorded: %s" %
                                 self._ts_metric)
            pred = KNeighborsTimeSeriesMixin.kneighbors(
                self,
                X=X_,
                n_neighbors=n_neighbors,
                return_distance=return_distance)
            self.metric = self._ts_metric
            return pred
        else:
            check_is_fitted(self, '_X_fit')
            if X is None:
                X_ = None
            else:
                X = check_array(X, allow_nd=True)
                X = to_time_series_dataset(X)
                X_ = to_sklearn_dataset(X)
                X_ = check_dims(X_, self._X_fit, extend=False)
            return KNeighborsTimeSeriesMixin.kneighbors(
                self,
                X=X_,
                n_neighbors=n_neighbors,
                return_distance=return_distance)