예제 #1
0
def euclidean_barycenter(X, weights=None):
    """Standard Euclidean barycenter computed from a set of time series.

    Parameters
    ----------
    X : array-like, shape=(n_ts, sz, d)
        Time series dataset.

    weights: None or array
        Weights of each X[i]. Must be the same size as len(X).

    Returns
    -------
    numpy.array of shape (sz, d)
        Barycenter of the provided time series dataset.

    Note
    ----
        This method requires a dataset of equal-sized time series

    Examples
    --------
    >>> time_series = [[1, 2, 3, 4], [1, 2, 4, 5]]
    >>> bar = euclidean_barycenter(time_series)
    >>> bar.shape
    (4, 1)
    >>> bar  # doctest: +ELLIPSIS
    array([[ 1. ],
           [ 2. ],
           [ 3.5],
           [ 4.5]])
    """
    X_ = to_time_series_dataset(X)
    weights = _set_weights(weights, X_.shape[0])
    return numpy.average(X_, axis=0, weights=weights)
예제 #2
0
    def predict(self, X):
        """Predict class probability for a given set of time series.

        Parameters
        ----------
        X : array-like of shape=(n_ts, sz, d)
            Time series dataset.

        Returns
        -------
        array of shape=(n_ts, ) or (n_ts, n_classes), depending on the shape of the \
        label vector provided at training time.
            Index of the cluster each sample belongs to or class probability matrix, depending on
            what was provided at training time.
        """
        X_ = to_time_series_dataset(X)
        n_ts, sz, d = X_.shape
        categorical_preds = self.model.predict(
            [X_[:, :, di].reshape((n_ts, sz, 1)) for di in range(self.d)],
            batch_size=self.batch_size,
            verbose=self.verbose_level)
        if self.categorical_y:
            return categorical_preds
        else:
            if categorical_preds.shape[1] == 2:
                categorical_preds = categorical_preds[:, 0]
            return self.label_binarizer.inverse_transform(categorical_preds)
예제 #3
0
    def fit(self, X, y):
        """Fit the model using X as training data and y as target values

        Parameters
        ----------
        X : array-like, shape (n_ts, sz, d)
            Training data.
        y : array-like, shape (n_ts, ) or (n_ts, dim_y)
            Target values.

        Returns
        -------
        KNeighborsTimeSeriesRegressor
            The fitted estimator
        """
        if self.metric in TSLEARN_VALID_METRICS:
            self._ts_metric = self.metric
            self.metric = "precomputed"

        X = check_array(X,
                        allow_nd=True,
                        force_all_finite=(self.metric != "precomputed"))
        X = to_time_series_dataset(X)
        X = check_dims(X)
        if self.metric == "precomputed" and hasattr(self, '_ts_metric'):
            self._ts_fit = X
            self._d = X.shape[2]
            self._X_fit = numpy.zeros(
                (self._ts_fit.shape[0], self._ts_fit.shape[0]))
        else:
            self._X_fit, self._d = to_sklearn_dataset(X, return_dim=True)
        super().fit(self._X_fit, y)
        if hasattr(self, '_ts_metric'):
            self.metric = self._ts_metric
        return self
예제 #4
0
    def fit(self, X):
        """Compute the barycenter from a dataset of time series.

        Parameters
        ----------
        X : array-like, shape=(n_ts, sz, d)
            Time series dataset.

        Returns
        -------
        numpy.array of shape (barycenter_size, d) or (sz, d) if barycenter_size is None
            DBA barycenter of the provided time series dataset.
        """
        X_ = to_time_series_dataset(X)
        if self.barycenter_size is None:
            self.barycenter_size = X_.shape[1]
        self.weights = _set_weights(self.weights, X_.shape[0])
        if self.init_barycenter is None:
            barycenter = self._init_avg(X_)
        else:
            barycenter = self.init_barycenter
        cost_prev, cost = numpy.inf, numpy.inf
        for it in range(self.max_iter):
            assign = self._petitjean_assignment(X_, barycenter)
            cost = self._petitjean_cost(X_, barycenter, assign)
            if self.verbose:
                print("[DBA] epoch %d, cost: %.3f" % (it + 1, cost))
            barycenter = self._petitjean_update_barycenter(X_, assign)
            if abs(cost_prev - cost) < self.tol:
                break
            elif cost_prev < cost:
                warnings.warn("DBA loss is increasing while it should not be.", ConvergenceWarning)
            else:
                cost_prev = cost
        return barycenter
예제 #5
0
    def _precompute_cross_dist(self, X, other_X=None):
        if other_X is None:
            other_X = self._ts_fit

        self._ts_metric = self.metric
        self.metric = "precomputed"

        metric_params = self._get_metric_params()

        X = check_array(X, allow_nd=True, force_all_finite=False)
        X = to_time_series_dataset(X)

        if self._ts_metric == "dtw":
            X_ = cdist_dtw(X, other_X, n_jobs=self.n_jobs, **metric_params)
        elif self._ts_metric == "softdtw":
            X_ = cdist_soft_dtw(X, other_X, **metric_params)
        elif self._ts_metric == "sax":
            X = self._sax_preprocess(X, **metric_params)
            X_ = cdist_sax(X,
                           self._sax.breakpoints_avg_,
                           self._sax.size_fitted_,
                           other_X,
                           n_jobs=self.n_jobs)
        else:
            raise ValueError("Invalid metric recorded: %s" % self._ts_metric)

        return X_
예제 #6
0
    def predict_proba(self, X):
        """Predict the class probabilities for the provided data

        Parameters
        ----------
        X : array-like, shape (n_ts, sz, d)
            Test samples.

        Returns
        -------
        array, shape = (n_ts, n_classes)
            Array of predicted class probabilities
        """
        if self.metric in TSLEARN_VALID_METRICS:
            check_is_fitted(self, '_ts_fit')
            X = check_dims(X,
                           X_fit_dims=self._ts_fit.shape,
                           extend=True,
                           check_n_features_only=True)
            X_ = self._precompute_cross_dist(X)
            pred = super().predict_proba(X_)
            self.metric = self._ts_metric
            return pred
        else:
            check_is_fitted(self, '_X_fit')
            X = check_array(X, allow_nd=True)
            X = to_time_series_dataset(X)
            X_ = to_sklearn_dataset(X)
            X_ = check_dims(X_, X_fit_dims=self._X_fit.shape, extend=False)
            return super().predict_proba(X_)
예제 #7
0
    def fit(self, X, y=None):
        """Fit the model using X as training data

        Parameters
        ----------
        X : array-like, shape (n_ts, sz, d)
            Training data.
        """
        if self.metric in TSLEARN_VALID_METRICS:
            self._ts_metric = self.metric
            self.metric = "precomputed"

        X = check_array(X,
                        allow_nd=True,
                        force_all_finite=(self.metric != "precomputed"))
        X = to_time_series_dataset(X)
        X = check_dims(X)
        if self.metric == "precomputed" and hasattr(self, '_ts_metric'):
            self._ts_fit = X
            self._d = X.shape[2]
            self._X_fit = numpy.zeros(
                (self._ts_fit.shape[0], self._ts_fit.shape[0]))
        else:
            self._X_fit, self._d = to_sklearn_dataset(X, return_dim=True)
        super().fit(self._X_fit, y)
        if hasattr(self, '_ts_metric'):
            self.metric = self._ts_metric
        return self
    def do_kmeans(days, km_size):
        """
        From a time series (as a list of df called days), creates km_size 
        clusters using kmeans algo.

        Parameters
        ----------
          * days: time series to cluster 
          * km_size: number of clusters needed 

        Returns
        ----------
          * km: k-means object generated for the clustering, it contains info about the algorithm
          * y_pred: results of the clustering, it contains the clusters themselves
        """
        # Arrange data for our lib
        unq = days["n_day_"].unique()
        values = [days[days["n_day_"] == l]["val_"].values for l in unq]
        formatted_dataset = to_time_series_dataset(values)

        # Configure our kmeans
        km = TimeSeriesKMeans(n_clusters=km_size,
                              metric="euclidean",
                              random_state=42,
                              verbose=False)

        y_pred = km.fit_predict(formatted_dataset)

        return km, y_pred
예제 #9
0
def cdist_gak(dataset1, dataset2=None, sigma=1.):
    """Compute cross-similarity matrix using Global Alignment kernel (GAK).

    GAK was originally presented in [1]_.

    Parameters
    ----------
    dataset1
        A dataset of time series
    dataset2
        Another dataset of time series
    sigma : float (default 1.)
        Bandwidth of the internal gaussian kernel used for GAK

    Returns
    -------
    numpy.ndarray
        Cross-similarity matrix

    Examples
    --------
    >>> cdist_gak([[1, 2, 2, 3], [1., 2., 3., 4.]], sigma=2.)  # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
    array([[ 1. , 0.656...],
           [ 0.656..., 1. ]])
    >>> cdist_gak([[1, 2, 2], [1., 2., 3., 4.]], [[1, 2, 2, 3], [1., 2., 3., 4.]], sigma=2.)  # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
    array([[ 0.710...,  0.297...],
           [ 0.656...,  1.        ]])

    See Also
    --------
    gak : Compute Global Alignment kernel

    References
    ----------
    .. [1] M. Cuturi, "Fast global alignment kernels," ICML 2011.
    """
    dataset1 = to_time_series_dataset(dataset1)
    self_similarity = False
    if dataset2 is None:
        dataset2 = dataset1
        self_similarity = True
    else:
        dataset2 = to_time_series_dataset(dataset2)
    return cycdist_normalized_gak(dataset1,
                                  dataset2,
                                  sigma,
                                  self_similarity=self_similarity)
예제 #10
0
    def kneighbors(self, X=None, n_neighbors=None, return_distance=True):
        """Finds the K-neighbors of a point.

        Returns indices of and distances to the neighbors of each point.

        Parameters
        ----------
        X : array-like, shape (n_ts, sz, d)
            The query time series.
            If not provided, neighbors of each indexed point are returned.
            In this case, the query point is not considered its own neighbor.
        n_neighbors : int
            Number of neighbors to get (default is the value passed to the constructor).
        return_distance : boolean, optional. Defaults to True.
            If False, distances will not be returned

        Returns
        -------
        dist : array
            Array representing the distance to points, only present if return_distance=True
        ind : array
            Indices of the nearest points in the population matrix.
        """
        self_neighbors = False
        if n_neighbors is None:
            n_neighbors = self.n_neighbors
        if X is None:
            X = self._fit_X
            self_neighbors = True
        else:
            X = to_time_series_dataset(X)
        if self.metric == "dtw":
            cdist_fun = cdist_dtw
        elif self.metric in ["euclidean", "sqeuclidean", "cityblock"]:
            cdist_fun = lambda X, Xp: scipy_cdist(X.reshape((X.shape[0], -1)),
                                                  Xp.reshape(
                                                      (Xp.shape[0], -1)),
                                                  metric=self.metric)
        else:
            raise ValueError(
                "Unrecognized time series metric string: %s (should be one of 'dtw', 'euclidean', "
                "'sqeuclidean' or 'cityblock')" % self.metric)
        full_dist_matrix = cdist_fun(X, self._fit_X)
        ind = numpy.argsort(full_dist_matrix, axis=1)

        if self_neighbors:
            ind = ind[:, 1:]
        if n_neighbors > full_dist_matrix.shape[1]:
            n_neighbors = full_dist_matrix.shape[1]
        ind = ind[:, :n_neighbors]

        n_ts = X.shape[0]
        sample_range = numpy.arange(n_ts)[:, None]
        dist = full_dist_matrix[sample_range, ind]

        if return_distance:
            return dist, ind
        else:
            return ind
예제 #11
0
def run():
    parser = cli_parser()
    args = parser.parse_args()

    nii = image.index_img(args.input, slice(0, 30))
    masker = input_data.NiftiMasker()
    data = masker.fit_transform(nii)
    ds = to_time_series_dataset(data.T[::80, :])

    model = TimeSeriesKMeans(n_clusters=2, metric="dtw", max_iter=15)
    model.fit(ds)

    all = to_time_series_dataset(data.T)

    mask = model.predict(all)
    mask_nii = masker.inverse_transform(mask)
    mask.nii.to_filename(args.output)
예제 #12
0
파일: svm.py 프로젝트: lejafar/tslearn
 def fit(self, X, y, sample_weight=None):
     sklearn_X = _prepare_ts_datasets_sklearn(X)
     if self.kernel == "gak" and self.gamma == "auto":
         self.gamma = gamma_soft_dtw(to_time_series_dataset(X))
         self.kernel = _sparse_kernel_func_gak(sz=self.sz, d=self.d, gamma=self.gamma)
     super(TimeSeriesSVC, self).fit(sklearn_X, y, sample_weight=sample_weight)
     self.kernel = _sparse_kernel_func_gak(sz=self.sz, d=self.d, gamma=self.gamma, slice_support_vectors=self.support_)
     return self
예제 #13
0
def test_single_value_ts_no_nan():
    X = to_time_series_dataset([[1, 1, 1, 1]])

    standard_scaler = TimeSeriesScalerMeanVariance()
    assert np.sum(np.isnan(standard_scaler.fit_transform(X))) == 0

    minmax_scaler = TimeSeriesScalerMinMax()
    assert np.sum(np.isnan(minmax_scaler.fit_transform(X))) == 0
예제 #14
0
 def support_vectors_time_series_(self, X):
     X_ = to_time_series_dataset(X)
     sv = []
     idx_start = 0
     for cl in range(len(self.n_support_)):
         indices = self.support_[idx_start:idx_start + self.n_support_[cl]]
         sv.append(X_[indices])
         idx_start += self.n_support_[cl]
     return sv
예제 #15
0
파일: neighbors.py 프로젝트: wgova/tslearn
    def _sax_preprocess(self, X, n_segments=10, alphabet_size_avg=4):
        # Now SAX-transform the time series
        if not hasattr(self, '_sax') or self._sax is None:
            self._sax = SymbolicAggregateApproximation(
                n_segments=n_segments, alphabet_size_avg=alphabet_size_avg)

        X = to_time_series_dataset(X)
        X = self._sax.fit_transform(X)

        return X
예제 #16
0
    def predict_proba(self, X):
        """Predict the class probabilities for the provided data

        Parameters
        ----------
        X : array-like, shape (n_ts, sz, d)
            Test samples.
        """
        if self.metric in VARIABLE_LENGTH_METRICS:
            self._ts_metric = self.metric
            self.metric = "precomputed"

            if self.metric_params is None:
                metric_params = {}
            else:
                metric_params = self.metric_params.copy()
                if "n_jobs" in metric_params.keys():
                    del metric_params["n_jobs"]
            check_is_fitted(self, '_ts_fit')
            X = check_array(X, allow_nd=True, force_all_finite=False)
            X = to_time_series_dataset(X)
            if self._ts_metric == "dtw":
                X_ = cdist_dtw(X,
                               self._ts_fit,
                               n_jobs=self.n_jobs,
                               **metric_params)
            elif self._ts_metric == "softdtw":
                X_ = cdist_soft_dtw(X, self._ts_fit, **metric_params)
            else:
                raise ValueError("Invalid metric recorded: %s" %
                                 self._ts_metric)
            pred = super(KNeighborsTimeSeriesClassifier,
                         self).predict_proba(X_)
            self.metric = self._ts_metric
            return pred
        else:
            check_is_fitted(self, '_X_fit')
            X = check_array(X, allow_nd=True)
            X = to_time_series_dataset(X)
            X_ = to_sklearn_dataset(X)
            X_ = check_dims(X_, self._X_fit, extend=False)
            return super(KNeighborsTimeSeriesClassifier,
                         self).predict_proba(X_)
예제 #17
0
    def fit(self, X, y=None):
        """Fit the model using X as training data

        Parameters
        ----------
        X : array-like, shape (n_ts, sz, d)
            Training data.
        """
        self._fit_X = to_time_series_dataset(X)
        return self
예제 #18
0
def dataImport(name):

    if not os.path.exists("../Classifier/TimeSeriesFiles/" + name):
        url = "http://www.timeseriesclassification.com/Downloads/%s.zip" % name
        extract_from_zip_url(url,
                             "../Classifier/TimeSeriesFiles/" + name + "/",
                             verbose=False)

    data_train = numpy.loadtxt("../Classifier/TimeSeriesFiles/" + name + "/" +
                               name + "_TRAIN.txt")
    data_test = numpy.loadtxt("../Classifier/TimeSeriesFiles/" + name + "/" +
                              name + "_TEST.txt")
    X_train = to_time_series_dataset(data_train[:, 1:])
    y_train = data_train[:, 0].astype(numpy.int)
    X_test = to_time_series_dataset(data_test[:, 1:])
    y_test = data_test[:, 0].astype(numpy.int)
    X_train = TimeSeriesScalerMinMax().fit_transform(X_train)
    X_test = TimeSeriesScalerMinMax().fit_transform(X_test)
    return X_train, y_train, X_test, y_test
예제 #19
0
def clustering(df, n_cluster: int = 2, metric: str = 'softdtw', init='k-means++', random_state=1234, verbose=False,
               n_init=1):

    tsk = TimeSeriesKMeans(n_clusters=n_cluster, metric=metric, init=init, random_state=random_state, verbose=verbose,
                           n_init=n_init)
    df = np.roll(df, -6, axis=0)
    M = to_time_series_dataset(df.T)

    cluster_labels = tsk.fit_predict(M)

    return cluster_labels
def test_variable_length_knn():
    X = to_time_series_dataset([[1, 2, 3, 4], [1, 2, 3], [2, 5, 6, 7, 8, 9],
                                [3, 5, 6, 7, 8]])
    y = [0, 0, 1, 1]
    clf = KNeighborsTimeSeriesClassifier(metric="dtw", n_neighbors=1)
    clf.fit(X, y)
    assert_allclose(clf.predict(X), [0, 0, 1, 1])

    clf = KNeighborsTimeSeriesClassifier(metric="softdtw", n_neighbors=1)
    clf.fit(X, y)
    assert_allclose(clf.predict(X), [0, 0, 1, 1])
예제 #21
0
    def fit(self, X, y=None):
        """Fit the model using X as training data

        Parameters
        ----------
        X : array-like, shape (n_ts, sz, d)
            Training data.
        """
        X = check_array(X, allow_nd=True)
        self._X_fit = to_time_series_dataset(X)
        return self
예제 #22
0
    def fit(self, X, y=None):
        """Compute k-Shape clustering.

        Parameters
        ----------
        X : array-like of shape=(n_ts, sz, d)
            Time series dataset.

        y
            Ignored
        """
        X = check_array(X, allow_nd=True)

        max_attempts = max(self.n_init, 10)

        self.labels_ = None
        self.inertia_ = numpy.inf
        self.cluster_centers_ = None

        self.norms_ = 0.
        self.norms_centroids_ = 0.

        self.n_iter_ = 0

        X_ = to_time_series_dataset(X)
        self._X_fit = X_
        self.norms_ = numpy.linalg.norm(X_, axis=(1, 2))

        _check_initial_guess(self.init, self.n_clusters)

        rs = check_random_state(self.random_state)

        best_correct_centroids = None
        min_inertia = numpy.inf
        n_successful = 0
        n_attempts = 0
        while n_successful < self.n_init and n_attempts < max_attempts:
            try:
                if self.verbose and self.n_init > 1:
                    print("Init %d" % (n_successful + 1))
                n_attempts += 1
                self._fit_one_init(X_, rs)
                if self.inertia_ < min_inertia:
                    best_correct_centroids = self.cluster_centers_.copy()
                    min_inertia = self.inertia_
                    self.n_iter_ = self._iter
                n_successful += 1
            except EmptyClusterError:
                if self.verbose:
                    print("Resumed because of empty cluster")
        self.norms_centroids_ = numpy.linalg.norm(self.cluster_centers_,
                                                  axis=(1, 2))
        self._post_fit(X_, best_correct_centroids, min_inertia)
        return self
예제 #23
0
def cluster_examples(
    train: pd.DataFrame,
    test: pd.DataFrame,
    dataset_name: str,
    nclusters: int = 5,
    n_examples: int = 3,
):
    """Explore:
    -cluster series and look at examples from each cluster
    """
    clusterer = TimeSeriesKMeans(n_clusters=nclusters)

    group_cols = [train.columns[c] for c in grouping_cols[dataset_name]]
    train_groups = train.groupby(group_cols)
    test_groups = test.groupby(group_cols)
    max_l = max(
        [len(trg) + len(teg) for (_, trg), (_, teg) in zip(train_groups, test_groups)]
    )

    timeseries = []
    keys = []
    for (group_name, train_group), (_, test_group) in zip(train_groups, test_groups):
        t_values = train_group.iloc[:, target_cols[dataset_name]].astype(float)
        t_values = t_values.append(
            test_group.iloc[:, target_cols[dataset_name]].astype(float)
        )
        t_padded = t_values.append(
            pd.Series([np.nan] * (max_l - t_values.shape[0])),
        )
        t_padded = t_padded.interpolate()
        assert len(t_padded) == max_l
        timeseries.append(t_padded)
        keys.append(group_name)

    timeseries_dataset = ts_utils.to_time_series_dataset(timeseries)
    clusters = clusterer.fit_predict(timeseries_dataset)

    plot_hist(clusters, "Distribution of Clusters")

    for i in range(nclusters):
        print(f"Looking at examples from cluster {i}")
        idxs = np.where(clusters == i)[0]
        examples = np.random.choice(idxs, size=n_examples, replace=False)
        for j, ex in enumerate(examples):
            query_list = [
                f'{grp_col}=="{key}"' for grp_col, key in zip(group_cols, keys[ex])
            ]
            values = train.query(" & ".join(query_list)).iloc[
                :, target_cols[dataset_name]
            ]
            # values = values.append(
            #     test.query(' & '.join(query_list)).iloc[:, target_cols[dataset_name]]
            # )
            plot_ts(values, f"Example {j} of cluster {i}")
    def apply_clustering(days, clust):
        """
        Apply given clustering algorithm on given dataset.
        """
        # Arrange data for our lib
        unq = days["n_day_"].unique()
        values = [days[days["n_day_"] == l]["val_"].values for l in unq]
        formatted_dataset = to_time_series_dataset(values)

        y_pred = clust.predict(formatted_dataset)

        return y_pred
예제 #25
0
    def fit(self, X, y):
        """Fit the model using X as training data and y as target values

        Parameters
        ----------
        X : array-like, shape (n_ts, sz, d)
            Training data.
        y : array-like, shape (n_ts, )
            Target values.
        """
        self._fit_X = to_time_series_dataset(X)
        self._fit_y = numpy.array(y)
예제 #26
0
def _prepare_ts_datasets_sklearn(X):
    """Prepare time series datasets for sklearn.

    Examples
    --------
    >>> X = to_time_series_dataset([[1, 2, 3], [2, 2, 3]])
    >>> _prepare_ts_datasets_sklearn(X).shape
    (2, 3)
    """
    sklearn_X = to_time_series_dataset(X)
    n_ts, sz, d = sklearn_X.shape
    return sklearn_X.reshape((n_ts, -1))
예제 #27
0
    def load_dataset(self, dataset_name):
        """Load a dataset from the UCR/UEA archive from its name.

        Parameters
        ----------
        dataset_name : str
            Name of the dataset. Should be in the list returned by `list_datasets`

        Returns
        -------
        numpy.ndarray of shape (n_ts_train, sz, d) or None
            Training time series. None if unsuccessful.
        numpy.ndarray of integers with shape (n_ts_train, ) or None
            Training labels. None if unsuccessful.
        numpy.ndarray of shape (n_ts_test, sz, d) or None
            Test time series. None if unsuccessful.
        numpy.ndarray of integers with shape (n_ts_test, ) or None
            Test labels. None if unsuccessful.

        Examples
        --------
        >>> X_train, y_train, X_test, y_test = UCR_UEA_datasets().load_dataset("TwoPatterns")
        >>> print(X_train.shape)
        (1000, 128, 1)
        >>> print(y_train.shape)
        (1000,)
        """
        full_path = os.path.join(self._data_dir, dataset_name)
        if os.path.isdir(full_path) and dataset_name not in self._ignore_list:
            if os.path.exists(os.path.join(full_path, self._filenames.get(dataset_name, dataset_name) + "_TRAIN.txt")):
                fname_train = self._filenames.get(dataset_name, dataset_name) + "_TRAIN.txt"
                fname_test = self._filenames.get(dataset_name, dataset_name) + "_TEST.txt"
                data_train = numpy.loadtxt(os.path.join(full_path, fname_train), delimiter=",")
                data_test = numpy.loadtxt(os.path.join(full_path, fname_test), delimiter=",")
                X_train = to_time_series_dataset(data_train[:, 1:])
                y_train = data_train[:, 0].astype(numpy.int)
                X_test = to_time_series_dataset(data_test[:, 1:])
                y_test = data_test[:, 0].astype(numpy.int)
                return X_train, y_train, X_test, y_test
        return None, None, None, None
예제 #28
0
def convert_mus_data_to_time_series(ts_dict):
    ts_dict[MUS1] = np.nan_to_num(to_time_series_dataset(ts_dict[MUS1]))
    ts_dict[MUS2] = np.nan_to_num(to_time_series_dataset(ts_dict[MUS2]))
    ts_dict[MUS3] = np.nan_to_num(to_time_series_dataset(ts_dict[MUS3]))
    ts_dict[MUS4] = np.nan_to_num(to_time_series_dataset(ts_dict[MUS4]))
    ts_dict[MUS5] = np.nan_to_num(to_time_series_dataset(ts_dict[MUS5]))
    ts_dict[MUS6] = np.nan_to_num(to_time_series_dataset(ts_dict[MUS6]))
예제 #29
0
    def fit(self, X, y):
        """Learn time-series shapelets.

        Parameters
        ----------
        X : array-like of shape=(n_ts, sz, d)
            Time series dataset.
        y : array-like of shape=(n_ts, )
            Time series labels.
        """
        if self.verbose_level is not None:
            warnings.warn(
                "'verbose_level' is deprecated in version 0.2 and will be "
                "removed in 0.4. Use 'verbose' instead.",
                DeprecationWarning, stacklevel=2)
            self.verbose = self.verbose_level

        X, y = check_X_y(X, y, allow_nd=True)
        X = to_time_series_dataset(X)
        X = check_dims(X)

        set_random_seed(seed=self.random_state)
        numpy.random.seed(seed=self.random_state)

        n_ts, sz, d = X.shape
        self._X_fit_dims = X.shape

        self.model_ = None
        self.transformer_model_ = None
        self.locator_model_ = None
        self.d_ = d

        y_ = self._preprocess_labels(y)
        n_labels = len(self.classes_)

        if self.n_shapelets_per_size is None:
            sizes = grabocka_params_to_shapelet_size_dict(n_ts, sz, n_labels,
                                                          self.shapelet_length,
                                                          self.total_lengths)
            self.n_shapelets_per_size_ = sizes
        else:
            self.n_shapelets_per_size_ = self.n_shapelets_per_size

        self._set_model_layers(X=X, ts_sz=sz, d=d, n_classes=n_labels)
        self._set_weights_false_conv(d=d)
        self.model_.fit(
            [X[:, :, di].reshape((n_ts, sz, 1)) for di in range(d)], y_,
            batch_size=self.batch_size, epochs=self.max_iter,
            verbose=self.verbose
        )
        self.n_iter_ = len(self.model_.history.history)
        return self
예제 #30
0
def sigma_gak(dataset, n_samples=100, random_state=None):
    r"""Compute sigma value to be used for GAK.

    This method was originally presented in [1]_.

    Parameters
    ----------
    dataset
        A dataset of time series
    n_samples : int (default: 100)
        Number of samples on which median distance should be estimated
    random_state : integer or numpy.RandomState or None (default: None)
        The generator used to draw the samples. If an integer is given, it
        fixes the seed. Defaults to the global numpy random number generator.

    Returns
    -------
    float
        Suggested bandwidth (:math:`\\sigma`) for the Global Alignment kernel

    Examples
    --------
    >>> dataset = [[1, 2, 2, 3], [1., 2., 3., 4.]]
    >>> sigma_gak(dataset=dataset,
    ...           n_samples=200,
    ...           random_state=0)  # doctest: +ELLIPSIS
    2.0...

    See Also
    --------
    gak : Compute Global Alignment kernel
    cdist_gak : Compute cross-similarity matrix using Global Alignment kernel

    References
    ----------
    .. [1] M. Cuturi, "Fast global alignment kernels," ICML 2011.
    """
    random_state = check_random_state(random_state)
    dataset = to_time_series_dataset(dataset)
    n_ts, sz, d = dataset.shape
    if not check_equal_size(dataset):
        sz = numpy.min([ts_size(ts) for ts in dataset])
    if n_ts * sz < n_samples:
        replace = True
    else:
        replace = False
    sample_indices = random_state.choice(n_ts * sz,
                                         size=n_samples,
                                         replace=replace)
    dists = pdist(dataset[:, :sz, :].reshape((-1, d))[sample_indices],
                  metric="euclidean")
    return numpy.median(dists) * numpy.sqrt(sz)