示例#1
0
 def setUpClass(cls):
     N_steps = 10000
     N_traj = 20
     lag = 1
     T = np.linalg.matrix_power(
         np.array([[0.7, 0.2, 0.1], [0.1, 0.8, 0.1], [0.1, 0.1, 0.8]]), lag)
     dtrajs = [generate(T, N_steps) for _ in range(N_traj)]
     p0 = np.zeros(3)
     p1 = np.zeros(3)
     trajs = []
     for dtraj in dtrajs:
         traj = np.zeros((N_steps, T.shape[0]))
         traj[np.arange(len(dtraj)), dtraj] = 1.0
         trajs.append(traj)
         p0 += traj[:-lag, :].sum(axis=0)
         p1 += traj[lag:, :].sum(axis=0)
     estimator, vamp = estimate_vamp(trajs,
                                     lag=lag,
                                     scaling=None,
                                     dim=1.0,
                                     return_estimator=True)
     msm = estimate_markov_model(dtrajs, lag=lag, reversible=False)
     cls.trajs = trajs
     cls.dtrajs = dtrajs
     cls.trajs_timeshifted = list(
         timeshifted_split(cls.trajs, lagtime=lag, chunksize=5000))
     cls.lag = lag
     cls.msm = msm
     cls.vamp = vamp
     cls.estimator = estimator
     cls.p0 = p0 / p0.sum()
     cls.p1 = p1 / p1.sum()
     cls.atol = np.finfo(np.float32).eps * 1000.0
    def test_koopman_estimator_partial_fit(self):
        from sktime.covariance.online_covariance import KoopmanEstimator
        est = KoopmanEstimator(lagtime=self.tau)
        data_lagged = timeshifted_split(self.data,
                                        lagtime=self.tau,
                                        n_splits=10)
        for traj in data_lagged:
            est.partial_fit(traj)
        m = est.fetch_model()

        np.testing.assert_allclose(m.u, self.weight_obj.u)
        np.testing.assert_allclose(m.u_const, self.weight_obj.u_const)
示例#3
0
    def test_fit_reset(self):
        chunk = 40
        lag = 100
        np.random.seed(0)
        data = np.random.randn(23000, 3)

        est = TICA(lagtime=lag, dim=1)
        for X, Y in timeshifted_split(data, lagtime=lag, chunksize=chunk):
            est.partial_fit((X, Y))
        model1 = est.fetch_model().copy()
        # ------- run again with new chunksize -------
        est.fit(data)
        model2 = est.fetch_model().copy()

        assert model1 != model2
        np.testing.assert_array_almost_equal(model1.mean_0, model2.mean_0)
        np.testing.assert_array_almost_equal(model1.cov_00, model2.cov_00)
        np.testing.assert_array_almost_equal(model1.cov_0t, model2.cov_0t)
示例#4
0
    def fit(self,
            data,
            lagtime=None,
            weights=None,
            n_splits=None,
            column_selection=None):
        """
         column_selection: ndarray(k, dtype=int) or None
         Indices of those columns that are to be computed. If None, all columns are computed.
        :param data: list of sequences (n elements)
        :param weights: list of weight arrays (n elements) or array (shape
        :param n_splits:
        :param column_selection:
        :return:
        """
        # TODO: constistent dtype
        data = ensure_timeseries_data(data)

        self._rc.clear()

        if n_splits is None:
            dlen = min(len(d) for d in data)
            n_splits = int(dlen // 100 if dlen >= 1e4 else 1)

        if lagtime is None:
            lagtime = self.lagtime
        else:
            self.lagtime = lagtime
        assert lagtime is not None

        lazy_weights = False
        wsplit = itertools.repeat(None)

        if weights is not None:
            if hasattr(weights, 'weights'):
                lazy_weights = True
            elif len(np.atleast_1d(weights)) != len(data[0]):
                raise ValueError(
                    "Weights have incompatible shape "
                    f"(#weights={len(weights) if weights is not None else None} != {len(data[0])}=#frames."
                )
            elif isinstance(weights, np.ndarray):
                wsplit = np.array_split(weights, n_splits)

        if self.is_lagged:
            for (x, y), w in zip(
                    timeshifted_split(data, lagtime=lagtime,
                                      n_splits=n_splits), wsplit):
                if lazy_weights:
                    w = weights.weights(x)
                # weights can weights be shorter than actual data
                if isinstance(w, np.ndarray):
                    w = w[:len(x)]
                self.partial_fit((x, y),
                                 weights=w,
                                 column_selection=column_selection)
        else:
            for x in data:
                self.partial_fit(x,
                                 weights=weights,
                                 column_selection=column_selection)

        return self