Пример #1
0
    def test_non_matching_length(self):
        n = 100
        data = [np.random.random(size=(n, 2)) for n in range(3)]
        data = (data[:-1], data[1:])
        weights = [np.random.random(n) for _ in range(3)]
        weights[0] = weights[0][:-3]
        with self.assertRaises(ValueError):
            OnlineCovariance(compute_c0t=True).fit(data, weights=weights)

        with self.assertRaises(ValueError):
            OnlineCovariance(compute_c0t=True).fit(data, weights=weights[:10])
Пример #2
0
 def test_XX_weighted_meanconst(self):
     est = OnlineCovariance(compute_c0t=False, bessel=False)
     cc = est.fit(self.data - self.mean_const, weights=self.data_weights).fetch_model()
     np.testing.assert_allclose(cc.mean_0, self.mx_c_wobj_lag0)
     np.testing.assert_allclose(cc.cov_00, self.Mxx_c_wobj_lag0)
     cc = est.fit(self.data - self.mean_const, weights=self.data_weights, column_selection=self.cols_2).fetch_model()
     np.testing.assert_allclose(cc.cov_00, self.Mxx_c_wobj_lag0[:, self.cols_2])
Пример #3
0
 def test_XX_with_mean(self):
     # many passes
     est = OnlineCovariance(compute_c0t=False, remove_data_mean=False, bessel=False)
     cc = est.fit(self.data).fetch_model()
     np.testing.assert_allclose(cc.mean_0, self.mx_lag0)
     np.testing.assert_allclose(cc.cov_00, self.Mxx_lag0)
     cc = est.fit(self.data, column_selection=self.cols_2).fetch_model()
     np.testing.assert_allclose(cc.cov_00, self.Mxx_lag0[:, self.cols_2])
Пример #4
0
 def test_re_estimate_weight_types(self):
     # check different types are allowed and re-estimation works
     x = np.random.random((100, 2))
     c = OnlineCovariance(lagtime=1, compute_c0t=True)
     c.fit(x, weights=np.ones((len(x), ))).fetch_model()
     c.fit(x, weights=np.ones((len(x), ))).fetch_model()
     c.fit(x, weights=None).fetch_model()
     c.fit(x, weights=x[:, 0]).fetch_model()
Пример #5
0
 def test_XX_weightobj_meanfree(self):
     # many passes
     est = OnlineCovariance(compute_c0t=False, remove_data_mean=True, bessel=False)
     cc = est.fit(self.data, weights=self.data_weights, n_splits=10).fetch_model()
     np.testing.assert_allclose(cc.mean_0, self.mx_wobj_lag0)
     np.testing.assert_allclose(cc.cov_00, self.Mxx0_wobj_lag0)
     cc = est.fit(self.data, column_selection=self.cols_2, weights=self.data_weights).fetch_model()
     np.testing.assert_allclose(cc.cov_00, self.Mxx0_wobj_lag0[:, self.cols_2])
Пример #6
0
 def test_XY_sym_meanconst(self):
     est = OnlineCovariance(compute_c0t=True, reversible=True, bessel=False)
     cc = est.fit((self.X - self.mean_const, self.Y - self.mean_const)).fetch_model()
     np.testing.assert_allclose(cc.mean_0, self.m_c_sym)
     np.testing.assert_allclose(cc.cov_00, self.Mxx_c_sym)
     np.testing.assert_allclose(cc.cov_0t, self.Mxy_c_sym)
     cc = est.fit((self.X - self.mean_const, self.Y - self.mean_const), column_selection=self.cols_2).fetch_model()
     np.testing.assert_allclose(cc.cov_00, self.Mxx_c_sym[:, self.cols_2])
     np.testing.assert_allclose(cc.cov_0t, self.Mxy_c_sym[:, self.cols_2])
Пример #7
0
 def test_XXXY_sym_withmean(self):
     # many passes
     est = OnlineCovariance(remove_data_mean=False, compute_c0t=True, reversible=True, bessel=False)
     cc = est.fit((self.X, self.Y)).fetch_model()
     np.testing.assert_allclose(cc.mean_0, self.m_sym)
     np.testing.assert_allclose(cc.cov_00, self.Mxx_sym)
     np.testing.assert_allclose(cc.cov_0t, self.Mxy_sym)
     cc = est.fit((self.X, self.Y), column_selection=self.cols_2).fetch_model()
     np.testing.assert_allclose(cc.cov_00, self.Mxx_sym[:, self.cols_2])
     np.testing.assert_allclose(cc.cov_0t, self.Mxy_sym[:, self.cols_2])
Пример #8
0
 def test_XXXY_weightobj_sym_meanfree(self):
     # many passes
     est = OnlineCovariance(remove_data_mean=True, compute_c0t=True, reversible=True, bessel=False)
     cc = est.fit((self.X, self.Y), weights=self.data_weights_lagged).fetch_model()
     np.testing.assert_allclose(cc.mean_0, self.m_sym_wobj)
     np.testing.assert_allclose(cc.cov_00, self.Mxx0_sym_wobj)
     np.testing.assert_allclose(cc.cov_0t, self.Mxy0_sym_wobj)
     cc = est.fit((self.X, self.Y), weights=self.data_weights_lagged, column_selection=self.cols_2).fetch_model()
     np.testing.assert_allclose(cc.cov_00, self.Mxx0_sym_wobj[:, self.cols_2])
     np.testing.assert_allclose(cc.cov_0t, self.Mxy0_sym_wobj[:, self.cols_2])
Пример #9
0
 def test_XX_meanconst(self):
     est = OnlineCovariance(lagtime=self.lag,
                            compute_c0t=False,
                            bessels_correction=False)
     cc = est.fit(self.data - self.mean_const).fetch_model()
     np.testing.assert_allclose(cc.mean_0, self.mx_c_lag0)
     np.testing.assert_allclose(cc.cov_00, self.Mxx_c_lag0)
     cc = est.fit(self.data - self.mean_const,
                  column_selection=self.cols_2).fetch_model()
     np.testing.assert_allclose(cc.cov_00, self.Mxx_c_lag0[:, self.cols_2])
Пример #10
0
 def test_XXXY_meanfree(self):
     # many passes
     est = OnlineCovariance(remove_data_mean=True, compute_c0t=True, bessel=False)
     cc = est.fit((self.X, self.Y)).fetch_model()
     np.testing.assert_allclose(cc.mean_0, self.mx)
     np.testing.assert_allclose(cc.mean_t, self.my)
     np.testing.assert_allclose(cc.cov_00, self.Mxx0)
     np.testing.assert_allclose(cc.cov_0t, self.Mxy0)
     cc = est.fit((self.X, self.Y), column_selection=self.cols_2).fetch_model()
     np.testing.assert_allclose(cc.cov_00, self.Mxx0[:, self.cols_2])
     np.testing.assert_allclose(cc.cov_0t, self.Mxy0[:, self.cols_2])
Пример #11
0
 def test_XY_meanconst(self):
     est = OnlineCovariance(lagtime=self.lag,
                            compute_c0t=True,
                            bessels_correction=False)
     cc = est.fit(self.Xc_lag0).fetch_model()
     np.testing.assert_allclose(cc.mean_0, self.mx_c)
     np.testing.assert_allclose(cc.mean_t, self.my_c)
     np.testing.assert_allclose(cc.cov_00, self.Mxx_c)
     np.testing.assert_allclose(cc.cov_0t, self.Mxy_c)
     cc = est.fit(self.Xc_lag0, column_selection=self.cols_2).fetch_model()
     np.testing.assert_allclose(cc.cov_00, self.Mxx_c[:, self.cols_2])
     np.testing.assert_allclose(cc.cov_0t, self.Mxy_c[:, self.cols_2])
Пример #12
0
    def __init__(self, lagtime, epsilon=1e-6, reversible=True, dim=0.95,
                 scaling='kinetic_map', ncov=5):
        # tica parameters
        self.epsilon = epsilon
        self.dim = dim
        self.scaling = scaling

        # online cov parameters
        self.reversible = reversible
        self._covar = OnlineCovariance(lagtime=lagtime, compute_c00=True, compute_c0t=True, compute_ctt=False, remove_data_mean=True,
                                       reversible=self.reversible, bessels_correction=False, ncov=ncov)
        super(TICA, self).__init__()
Пример #13
0
 def test_XY_weighted_meanconst(self):
     est = OnlineCovariance(compute_c0t=True, bessel=False)
     cc = est.fit((self.X - self.mean_const, self.Y - self.mean_const),
                  weights=self.data_weights_lagged).fetch_model()
     np.testing.assert_allclose(cc.mean_0, self.mx_c_wobj)
     np.testing.assert_allclose(cc.mean_t, self.my_c_wobj)
     np.testing.assert_allclose(cc.cov_00, self.Mxx_c_wobj)
     np.testing.assert_allclose(cc.cov_0t, self.Mxy_c_wobj)
     cc = est.fit((self.X - self.mean_const, self.Y - self.mean_const),
                  weights=self.data_weights_lagged, column_selection=self.cols_2).fetch_model()
     np.testing.assert_allclose(cc.cov_00, self.Mxx_c_wobj[:, self.cols_2])
     np.testing.assert_allclose(cc.cov_0t, self.Mxy_c_wobj[:, self.cols_2])
Пример #14
0
 def test_XXXY_sym_meanfree(self):
     # many passes
     est = OnlineCovariance(lagtime=self.lag,
                            remove_data_mean=True,
                            compute_c0t=True,
                            reversible=True,
                            bessels_correction=False)
     cc = est.fit(self.data, lagtime=self.lag).fetch_model()
     np.testing.assert_allclose(cc.mean_0, self.m_sym)
     np.testing.assert_allclose(cc.cov_00, self.Mxx0_sym)
     np.testing.assert_allclose(cc.cov_0t, self.Mxy0_sym)
     cc = est.fit(self.data, column_selection=self.cols_2).fetch_model()
     np.testing.assert_allclose(cc.cov_00, self.Mxx0_sym[:, self.cols_2])
     np.testing.assert_allclose(cc.cov_0t, self.Mxy0_sym[:, self.cols_2])
Пример #15
0
 def test_XXXY_weightobj_withmean(self):
     # many passes
     est = OnlineCovariance(lagtime=self.lag,
                            remove_data_mean=False,
                            compute_c0t=True,
                            bessels_correction=False)
     cc = est.fit(self.data, weights=self.data_weights).fetch_model()
     np.testing.assert_allclose(cc.mean_0, self.mx_wobj)
     np.testing.assert_allclose(cc.mean_t, self.my_wobj)
     np.testing.assert_allclose(cc.cov_00, self.Mxx_wobj)
     np.testing.assert_allclose(cc.cov_0t, self.Mxy_wobj)
     cc = est.fit(self.data,
                  weights=self.data_weights,
                  column_selection=self.cols_2).fetch_model()
     np.testing.assert_allclose(cc.cov_00, self.Mxx_wobj[:, self.cols_2])
     np.testing.assert_allclose(cc.cov_0t, self.Mxy_wobj[:, self.cols_2])
Пример #16
0
 def test_XXXY_withmean(self):
     # many passes
     est = OnlineCovariance(lagtime=self.lag,
                            remove_data_mean=False,
                            compute_c0t=True,
                            bessels_correction=False)
     cc = est.fit(self.data, n_splits=1).fetch_model()
     assert not cc.bessels_correction
     np.testing.assert_allclose(cc.mean_0, self.mx)
     np.testing.assert_allclose(cc.mean_t, self.my)
     np.testing.assert_allclose(cc.cov_00, self.Mxx)
     np.testing.assert_allclose(cc.cov_0t, self.Mxy)
     cc = est.fit(self.data, n_splits=1,
                  column_selection=self.cols_2).fetch_model()
     np.testing.assert_allclose(cc.cov_00, self.Mxx[:, self.cols_2])
     np.testing.assert_allclose(cc.cov_0t, self.Mxy[:, self.cols_2])
Пример #17
0
 def test_XY_sym_weighted_meanconst(self):
     est = OnlineCovariance(lagtime=self.lag,
                            compute_c0t=True,
                            reversible=True,
                            bessels_correction=False)
     cc = est.fit(self.Xc_lag0, n_splits=1,
                  weights=self.data_weights).fetch_model()
     np.testing.assert_allclose(cc.mean_0, self.m_c_sym_wobj)
     np.testing.assert_allclose(cc.cov_00, self.Mxx_c_sym_wobj)
     np.testing.assert_allclose(cc.cov_0t, self.Mxy_c_sym_wobj)
     cc = est.fit(self.Xc_lag0,
                  weights=self.data_weights,
                  n_splits=1,
                  column_selection=self.cols_2).fetch_model()
     np.testing.assert_allclose(cc.cov_00, self.Mxx_c_sym_wobj[:,
                                                               self.cols_2])
     np.testing.assert_allclose(cc.cov_0t, self.Mxy_c_sym_wobj[:,
                                                               self.cols_2])
Пример #18
0
    def test_weights_equal_to_zero(self):
        n = 1000
        data = [np.random.random(size=(n, 2)) for _ in range(5)]

        # create some artificial correlations
        data[0][:, 0] *= np.random.randint(n)

        weights = [np.ones(n, dtype=np.float32) for _ in range(5)]
        # omit the first trajectory by setting a weight close to zero.
        weights[0][:] = 0
        weights[0][800:850] = 1

        est = OnlineCovariance(compute_c0t=True)
        for x, w in zip(data, weights):
            est.partial_fit((x[:-3], x[3:]), w[:-3])
        cov = est.fetch_model()
        zeros = sum((sum(w == 0) for w in weights))
        assert np.all(cov.cov_00 < 1), cov.cov_00
        assert np.all(cov.cov_00 > 0), cov.cov_00
Пример #19
0
    def test_weights_close_to_zero(self):
        n = 1000
        data = [np.random.random(size=(n, 2)) for _ in range(5)]

        # create some artificial correlations
        data[0][:, 0] *= np.random.randint(n)
        data = np.asarray(data)

        weights = [np.ones(n, dtype=np.float32) for _ in range(5)]
        # omit the first trajectory by setting a weight close to zero.
        weights[0][:] = 1E-44
        weights = np.asarray(weights)

        est = OnlineCovariance(compute_c0t=True)
        for data_traj, weights_traj in zip(data, weights):
           est.partial_fit((data_traj[:-3], data_traj[3:]), weights=weights_traj[:-3])
        cov = est.fetch_model()
        # cov = covariance_lagged(data, lag=3, weights=weights, chunksize=10)
        assert np.all(cov.cov_00 < 1)
Пример #20
0
 def test_XXXY_weightobj_meanfree(self):
     #TODO: tests do not pass for n_splits > 1!
     # many passes
     est = OnlineCovariance(lagtime=self.lag,
                            remove_data_mean=True,
                            compute_c0t=True,
                            bessels_correction=False)
     cc = est.fit(self.data, weights=self.data_weights,
                  n_splits=1).fetch_model()
     np.testing.assert_allclose(cc.mean_0, self.mx_wobj)
     np.testing.assert_allclose(cc.mean_t, self.my_wobj)
     np.testing.assert_allclose(cc.cov_00, self.Mxx0_wobj)
     np.testing.assert_allclose(cc.cov_0t, self.Mxy0_wobj)
     cc = est.fit(self.data,
                  weights=self.data_weights,
                  column_selection=self.cols_2,
                  n_splits=1).fetch_model()
     np.testing.assert_allclose(cc.cov_00, self.Mxx0_wobj[:, self.cols_2])
     np.testing.assert_allclose(cc.cov_0t, self.Mxy0_wobj[:, self.cols_2])
Пример #21
0
    def __init__(self,
                 lagtime=1,
                 dim=None,
                 scaling=None,
                 right=False,
                 epsilon=1e-6,
                 ncov=float('inf')):
        r""" Variational approach for Markov processes (VAMP) [1]_.

          Parameters
          ----------
          dim : float or int, default=None
              Number of dimensions to keep:

              * if dim is not set (None) all available ranks are kept:
                  `n_components == min(n_samples, n_uncorrelated_features)`
              * if dim is an integer >= 1, this number specifies the number
                of dimensions to keep.
              * if dim is a float with ``0 < dim < 1``, select the number
                of dimensions such that the amount of kinetic variance
                that needs to be explained is greater than the percentage
                specified by dim.
          scaling : None or string
              Scaling to be applied to the VAMP order parameters upon transformation

              * None: no scaling will be applied, variance of the order parameters is 1
              * 'kinetic map' or 'km': order parameters are scaled by singular value.
                Only the left singular functions induce a kinetic map wrt the
                conventional forward propagator. The right singular functions induce
                a kinetic map wrt the backward propagator.
          right : boolean
              Whether to compute the right singular functions.
              If `right==True`, `get_output()` will return the right singular
              functions. Otherwise, `get_output()` will return the left singular
              functions.
              Beware that only `frames[tau:, :]` of each trajectory returned
              by `get_output()` contain valid values of the right singular
              functions. Conversely, only `frames[0:-tau, :]` of each
              trajectory returned by `get_output()` contain valid values of
              the left singular functions. The remaining frames might
              possibly be interpreted as some extrapolation.
          epsilon : float
              eigenvalue cutoff. Eigenvalues of :math:`C_{00}` and :math:`C_{11}`
              with norms <= epsilon will be cut off. The remaining number of
              eigenvalues together with the value of `dim` define the size of the output.
          ncov : int, default=infinity
              limit the memory usage of the algorithm from [3]_ to an amount that corresponds
              to ncov additional copies of each correlation matrix

          Notes
          -----
          VAMP is a method for dimensionality reduction of Markov processes.

          The Koopman operator :math:`\mathcal{K}` is an integral operator
          that describes conditional future expectation values. Let
          :math:`p(\mathbf{x},\,\mathbf{y})` be the conditional probability
          density of visiting an infinitesimal phase space volume around
          point :math:`\mathbf{y}` at time :math:`t+\tau` given that the phase
          space point :math:`\mathbf{x}` was visited at the earlier time
          :math:`t`. Then the action of the Koopman operator on a function
          :math:`f` can be written as follows:

          .. math::

              \mathcal{K}f=\int p(\mathbf{x},\,\mathbf{y})f(\mathbf{y})\,\mathrm{dy}=\mathbb{E}\left[f(\mathbf{x}_{t+\tau}\mid\mathbf{x}_{t}=\mathbf{x})\right]

          The Koopman operator is defined without any reference to an
          equilibrium distribution. Therefore it is well-defined in
          situations where the dynamics is irreversible or/and non-stationary
          such that no equilibrium distribution exists.

          If we approximate :math:`f` by a linear superposition of ansatz
          functions :math:`\boldsymbol{\chi}` of the conformational
          degrees of freedom (features), the operator :math:`\mathcal{K}`
          can be approximated by a (finite-dimensional) matrix :math:`\mathbf{K}`.

          The approximation is computed as follows: From the time-dependent
          input features :math:`\boldsymbol{\chi}(t)`, we compute the mean
          :math:`\boldsymbol{\mu}_{0}` (:math:`\boldsymbol{\mu}_{1}`) from
          all data excluding the last (first) :math:`\tau` steps of every
          trajectory as follows:

          .. math::

              \boldsymbol{\mu}_{0}	:=\frac{1}{T-\tau}\sum_{t=0}^{T-\tau}\boldsymbol{\chi}(t)

              \boldsymbol{\mu}_{1}	:=\frac{1}{T-\tau}\sum_{t=\tau}^{T}\boldsymbol{\chi}(t)

          Next, we compute the instantaneous covariance matrices
          :math:`\mathbf{C}_{00}` and :math:`\mathbf{C}_{11}` and the
          time-lagged covariance matrix :math:`\mathbf{C}_{01}` as follows:

          .. math::

              \mathbf{C}_{00}	:=\frac{1}{T-\tau}\sum_{t=0}^{T-\tau}\left[\boldsymbol{\chi}(t)-\boldsymbol{\mu}_{0}\right]\left[\boldsymbol{\chi}(t)-\boldsymbol{\mu}_{0}\right]

              \mathbf{C}_{11}	:=\frac{1}{T-\tau}\sum_{t=\tau}^{T}\left[\boldsymbol{\chi}(t)-\boldsymbol{\mu}_{1}\right]\left[\boldsymbol{\chi}(t)-\boldsymbol{\mu}_{1}\right]

              \mathbf{C}_{01}	:=\frac{1}{T-\tau}\sum_{t=0}^{T-\tau}\left[\boldsymbol{\chi}(t)-\boldsymbol{\mu}_{0}\right]\left[\boldsymbol{\chi}(t+\tau)-\boldsymbol{\mu}_{1}\right]

          The Koopman matrix is then computed as follows:

          .. math::

              \mathbf{K}=\mathbf{C}_{00}^{-1}\mathbf{C}_{01}

          It can be shown [1]_ that the leading singular functions of the
          half-weighted Koopman matrix

          .. math::

              \bar{\mathbf{K}}:=\mathbf{C}_{00}^{-\frac{1}{2}}\mathbf{C}_{01}\mathbf{C}_{11}^{-\frac{1}{2}}

          encode the best reduced dynamical model for the time series.

          The singular functions can be computed by first performing the
          singular value decomposition

          .. math::

              \bar{\mathbf{K}}=\mathbf{U}^{\prime}\mathbf{S}\mathbf{V}^{\prime}

          and then mapping the input conformation to the left singular
          functions :math:`\boldsymbol{\psi}` and right singular
          functions :math:`\boldsymbol{\phi}` as follows:

          .. math::

              \boldsymbol{\psi}(t):=\mathbf{U}^{\prime\top}\mathbf{C}_{00}^{-\frac{1}{2}}\left[\boldsymbol{\chi}(t)-\boldsymbol{\mu}_{0}\right]

              \boldsymbol{\phi}(t):=\mathbf{V}^{\prime\top}\mathbf{C}_{11}^{-\frac{1}{2}}\left[\boldsymbol{\chi}(t)-\boldsymbol{\mu}_{1}\right]


          References
          ----------
          .. [1] Wu, H. and Noe, F. 2017. Variational approach for learning Markov processes from time series data.
              arXiv:1707.04659v1
          .. [2] Noe, F. and Clementi, C. 2015. Kinetic distance and kinetic maps from molecular dynamics simulation.
              J. Chem. Theory. Comput. doi:10.1021/acs.jctc.5b00553
          .. [3] Chan, T. F., Golub G. H., LeVeque R. J. 1979. Updating formulae and pairwise algorithms for
             computing sample variances. Technical Report STAN-CS-79-773, Department of Computer Science, Stanford University.
          """
        self.dim = dim
        self.scaling = scaling
        self.right = right
        self.epsilon = epsilon
        self.ncov = ncov
        self._covar = OnlineCovariance(lagtime=lagtime,
                                       compute_c00=True,
                                       compute_c0t=True,
                                       compute_ctt=True,
                                       remove_data_mean=True,
                                       reversible=False,
                                       bessels_correction=False,
                                       ncov=self.ncov)
        self.lagtime = lagtime
        super(VAMP, self).__init__()
Пример #22
0
import numpy as np

from sktime.covariance.online_covariance import OnlineCovariance

if __name__ == '__main__':
    data = np.random.normal(size=(500000, 10))

    ################################################################################################
    # compute covariance matrix C00
    ################################################################################################

    # configure estimator with estimator-global parameters
    estimator = OnlineCovariance(compute_c00=True, remove_data_mean=True)

    for batch in np.array_split(data, 100):
        # during fit or partial fit parameters can be entered that are relevant for that batch only
        estimator.partial_fit(batch, weights=None, column_selection=None)

    # this finalizes the partial estimation (ie extracts means & covariance matrices from running covar)
    # and returns the current model
    model = estimator.fetch_model()
    print(model.mean_0)

    # retrieves copy of current model
    model_copy = model.copy()
    assert np.all(
        model_copy.mean_0 == model.mean_0) and model_copy is not model

    ################################################################################################
    # compute covariance matrix C0t
    ################################################################################################