示例#1
0
def kshape_grid_iter(X_partitioned: List[np.array],
                     kshape_kwargs: dict) -> Tuple[KShape, int]:
    seed_ixs = [np.random.randint(0, X.shape[0] - 1) for X in X_partitioned]
    centroid_seeds = np.array(
        [X_partitioned[i][seed] for i, seed in enumerate(seed_ixs)])
    init = np.swapaxes(np.array([centroid_seeds]).T, 0, 1)

    kshape = KShape(n_clusters=len(seed_ixs),
                    init=init,
                    verbose=True,
                    random_state=None,
                    **kshape_kwargs)

    X = np.vstack(X_partitioned)

    print('** Fitting ks model **')
    kshape.fit(X)

    print('** Predicting **')
    n_clusters_out = np.unique(kshape.predict(X)).size

    # until the tslearn hyper-param json issue is released in the latest pypi version
    kshape.init = kshape.init.tolist()

    return kshape, n_clusters_out
示例#2
0
def test_serialize_kshape():
    n, sz, d = 15, 10, 3
    rng = numpy.random.RandomState(0)
    time_series = rng.randn(n, sz, d)
    X = TimeSeriesScalerMeanVariance().fit_transform(time_series)

    ks = KShape(n_clusters=3, verbose=True)

    _check_not_fitted(ks)

    ks.fit(X)

    _check_params_predict(ks, X, ['predict'])
示例#3
0
    def plot_elbow(self, data):
        """

        :param df:multi time series  type is np.array
        :return: elbow plot
        """
        distortions = []
        for i in range(2, 7):
            ks = KShape(n_clusters=i,
                        n_init=5,
                        verbose=True,
                        random_state=self.seed)
            ks.fit(data)
            distortions.append(ks.inertia_)
        plt.plot(range(2, 7), distortions, marker='o')
        plt.xlabel('Number of clusters')
        plt.ylabel('Distortion Line')
        plt.show()
示例#4
0
def run_single(X, train, params, workdir, out):
    kwargs = params
    ks = KShape(**kwargs)

    ks.fit(train)

    print('**** Predicting ****')
    y_pred = ks.predict(X)

    ks_path = os.path.join(workdir, 'ks.pickle')
    pickle.dump(ks, open(ks_path, 'wb'))

    y_pred_path = os.path.join(workdir, 'y_pred.npy')
    np.save(y_pred_path, y_pred)

    train_path = os.path.join(workdir, 'train.npy')
    np.save(train_path, train)

    with open(out, 'w') as f:
        f.write('1')

    print('* Done! *')
示例#5
0
def run(data_path: str, params_path: str):
    X = np.load(data_path)

    params = pickle.load(open(params_path, 'rb'))
    workdir = params['workdir']

    out = os.path.join(workdir, 'out')
    with open(out, 'w') as f:
        f.write('0')

    print(f'Using work dir: {workdir}')

    print('** Fitting training data **')
    n_train = int((params['kwargs'].pop('train_percent') / 100) * X.shape[0])
    train = X[np.random.choice(X.shape[0], size=n_train, replace=False)]

    kwargs = params['kwargs']
    ks = KShape(**kwargs)

    ks.fit(train)

    print('**** Predicting ****')
    y_pred = ks.predict(X)

    ks_path = os.path.join(workdir, 'ks.pickle')
    pickle.dump(ks, open(ks_path, 'wb'))

    y_pred_path = os.path.join(workdir, 'y_pred.npy')
    np.save(y_pred_path, y_pred)

    train_path = os.path.join(workdir, 'train.npy')
    np.save(train_path, train)

    with open(out, 'w') as f:
        f.write('1')

    print('* Done! *')
示例#6
0
def test_serialize_kshape():
    n, sz, d = 15, 10, 3
    rng = numpy.random.RandomState(0)
    time_series = rng.randn(n, sz, d)
    X = TimeSeriesScalerMeanVariance().fit_transform(time_series)

    ks = KShape(n_clusters=3, verbose=True)

    _check_not_fitted(ks)

    ks.fit(X)

    _check_params_predict(ks, X, ['predict'])

    seed_ixs = [numpy.random.randint(0, X.shape[0] - 1) for i in range(3)]
    seeds = numpy.array([X[i] for i in seed_ixs])

    ks_seeded = KShape(n_clusters=3, verbose=True, init=seeds)

    _check_not_fitted(ks_seeded)

    ks_seeded.fit(X)

    _check_params_predict(ks_seeded, X, ['predict'])
示例#7
0
class TimeSeriesKShapes(BaseClusterer):
    """Kshape algorithm wrapper tslearns implementation.

    Parameters
    ----------
    n_clusters: int, defaults = 8
        The number of clusters to form as well as the number of
        centroids to generate.
    init_algorithm: str or np.ndarray, defaults = 'random'
        Method for initializing cluster centers. Any of the following are valid:
        ['random']. Or a np.ndarray of shape (n_clusters, ts_size, d) and gives the
        initial centers.
    n_init: int, defaults = 10
        Number of times the k-means algorithm will be run with different
        centroid seeds. The final result will be the best output of n_init
        consecutive runs in terms of inertia.
    max_iter: int, defaults = 30
        Maximum number of iterations of the k-means algorithm for a single
        run.
    tol: float, defaults = 1e-4
        Relative tolerance with regards to Frobenius norm of the difference
        in the cluster centers of two consecutive iterations to declare
        convergence.
    verbose: bool, defaults = False
        Verbosity mode.
    random_state: int or np.random.RandomState instance or None, defaults = None
        Determines random number generation for centroid initialization.

    Attributes
    ----------
    labels_: np.ndarray (1d array of shape (n_instance,))
        Labels that is the index each time series belongs to.
    inertia_: float
        Sum of squared distances of samples to their closest cluster center, weighted by
        the sample weights if provided.
    n_iter_: int
        Number of iterations run.
    """

    _tags = {
        "capability:multivariate": True,
    }

    def __init__(
        self,
        n_clusters: int = 8,
        init_algorithm: Union[str, np.ndarray] = "random",
        n_init: int = 10,
        max_iter: int = 300,
        tol: float = 1e-4,
        verbose: bool = False,
        random_state: Union[int, RandomState] = None,
    ):
        _check_soft_dependencies("tslearn", severity="error", object=self)

        self.init_algorithm = init_algorithm
        self.n_init = n_init
        self.max_iter = max_iter
        self.tol = tol
        self.verbose = verbose
        self.random_state = random_state

        self.cluster_centers_ = None
        self.labels_ = None
        self.inertia_ = None
        self.n_iter_ = 0

        self._tslearn_k_shapes = None

        super(TimeSeriesKShapes, self).__init__(n_clusters=n_clusters)

    def _fit(self, X: TimeSeriesInstances, y=None) -> np.ndarray:
        """Fit time series clusterer to training data.

        Parameters
        ----------
        X : np.ndarray (2d or 3d array of shape (n_instances, series_length) or shape
            (n_instances, n_dimensions, series_length))
            Training time series instances to cluster.
        y: ignored, exists for API consistency reasons.

        Returns
        -------
        self:
            Fitted estimator.
        """
        from tslearn.clustering import KShape

        if self._tslearn_k_shapes is None:
            self._tslearn_k_shapes = KShape(
                # n_clusters=self.n_clusters,
                n_clusters=3,
                max_iter=self.max_iter,
                tol=self.tol,
                random_state=self.random_state,
                n_init=self.n_init,
                verbose=self.verbose,
                init=self.init_algorithm,
            )

        self._tslearn_k_shapes.fit(X)
        self._cluster_centers = self._tslearn_k_shapes.cluster_centers_
        self.labels_ = self._tslearn_k_shapes.labels_
        self.inertia_ = self._tslearn_k_shapes.inertia_
        self.n_iter_ = self._tslearn_k_shapes.n_iter_

    def _predict(self, X: TimeSeriesInstances, y=None) -> np.ndarray:
        """Predict the closest cluster each sample in X belongs to.

        Parameters
        ----------
        X : np.ndarray (2d or 3d array of shape (n_instances, series_length) or shape
            (n_instances, n_dimensions, series_length))
            Time series instances to predict their cluster indexes.
        y: ignored, exists for API consistency reasons.

        Returns
        -------
        np.ndarray (1d array of shape (n_instances,))
            Index of the cluster each time series in X belongs to.
        """
        return self._tslearn_k_shapes.predict(X)

    @classmethod
    def get_test_params(cls, parameter_set="default"):
        """Return testing parameter settings for the estimator.

        Parameters
        ----------
        parameter_set : str, default="default"
            Name of the set of test parameters to return, for use in tests. If no
            special parameters are defined for a value, will return `"default"` set.


        Returns
        -------
        params : dict or list of dict, default = {}
            Parameters to create testing instances of the class
            Each dict are parameters to construct an "interesting" test instance, i.e.,
            `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
            `create_test_instance` uses the first (or only) dictionary in `params`
        """
        params = {
            "n_clusters": 2,
            "init_algorithm": "random",
            "n_init": 1,
            "max_iter": 1,
            "tol": 1e-4,
            "verbose": False,
            "random_state": 1,
        }
        return params

    def _score(self, X, y=None):
        return np.abs(self.inertia_)
data_test = np.loadtxt(current_path + file +
                       "ECGFiveDays\\ECGFiveDays_TEST.tsv")
X_test = to_time_series_dataset(data_test[:, 1:])
y_test = data_test[:, 0].astype(np.int)
file = "教師なし教科書\\13章-時系列クラスタリング\\3_ECGFiveDays_k_shape\\result\\"

# Prepare the data - Scale
X_train = TimeSeriesScalerMeanVariance(mu=0., std=1.).fit_transform(X_train)
X_test = TimeSeriesScalerMeanVariance(mu=0., std=1.).fit_transform(X_test)


# k-Shape Algorithm
# Train using k-Shape
ks = KShape(n_clusters=2, max_iter=100, n_init=100, verbose=0)
ks.fit(X_train)

# Make predictions on train set and calculate adjusted Rand index
preds = ks.predict(X_train)
ars = adjusted_rand_score(data_train[:, 0], preds)
print("Adjusted Rand Index:", ars)

# Make predictions on test set and calculate adjusted Rand index
preds_test = ks.predict(X_test)
ars = adjusted_rand_score(data_test[:, 0], preds_test)
print("Adjusted Rand Index on Test Set:", ars)

# 訓練セットがちいさいから結果が悪い train 23 test 861
# Adjusted Rand Index: 0.668041237113402
# Adjusted Rand Index on Test Set: 0.012338817789874643