Exemplo n.º 1
0
    def setUpClass(cls):
        with numpy_random_seed(123):
            import msmtools.generation as msmgen

            # generate HMM with two Gaussians
            cls.P = np.array([[0.99, 0.01], [0.01, 0.99]])
            cls.T = 40000
            means = [np.array([-1, 1]), np.array([1, -1])]
            widths = [np.array([0.3, 2]), np.array([0.3, 2])]
            # continuous trajectory
            cls.X = np.zeros((cls.T, 2))
            # hidden trajectory
            dtraj = msmgen.generate_traj(cls.P, cls.T)
            for t in range(cls.T):
                s = dtraj[t]
                cls.X[t, 0] = widths[s][0] * np.random.randn() + means[s][0]
                cls.X[t, 1] = widths[s][1] * np.random.randn() + means[s][1]
            # Set the lag time:
            cls.lag = 10
            # Compute mean free data:
            mref = (np.sum(cls.X[:-cls.lag, :], axis=0) + np.sum(
                cls.X[cls.lag:, :], axis=0)) / float(2 * (cls.T - cls.lag))
            mref_nr = np.sum(cls.X[:-cls.lag, :],
                             axis=0) / float(cls.T - cls.lag)
            cls.X_mf = cls.X - mref[None, :]
            cls.X_mf_nr = cls.X - mref_nr[None, :]
            # Compute correlation matrices:
            cls.cov_ref = (np.dot(cls.X_mf[:-cls.lag, :].T, cls.X_mf[:-cls.lag, :]) +\
                  np.dot(cls.X_mf[cls.lag:, :].T, cls.X_mf[cls.lag:, :])) / float(2*(cls.T-cls.lag))
            cls.cov_ref_nr = np.dot(
                cls.X_mf_nr[:-cls.lag, :].T,
                cls.X_mf_nr[:-cls.lag, :]) / float(cls.T - cls.lag)
            cls.cov_tau_ref = (np.dot(cls.X_mf[:-cls.lag, :].T, cls.X_mf[cls.lag:, :]) +\
                  np.dot(cls.X_mf[cls.lag:, :].T, cls.X_mf[:-cls.lag, :])) / float(2*(cls.T-cls.lag))
            cls.cov_tau_ref_nr = np.dot(
                cls.X_mf_nr[:-cls.lag, :].T,
                cls.X_mf_nr[cls.lag:, :]) / float(cls.T - cls.lag)

            # do unscaled TICA
            reader = api.source(cls.X, chunksize=0)
            cls.tica_obj = api.tica(data=reader,
                                    lag=cls.lag,
                                    dim=1,
                                    kinetic_map=False)
            # non-reversible TICA
            cls.tica_obj_nr = api.tica(data=reader,
                                       lag=cls.lag,
                                       dim=1,
                                       kinetic_map=False,
                                       reversible=False)
Exemplo n.º 2
0
 def test_fit_transform(self):
     X = np.random.randn(100, 2)
     tica = _internal_tica(1, 1)
     out = tica.fit_transform(X)
     np.testing.assert_array_almost_equal(
         out,
         api.tica(data=X, lag=1, dim=1).get_output()[0])
Exemplo n.º 3
0
    def test_transformer_random_access_in_memory(self):
        feature_reader = self._get_reader_instance(1)
        tica = coor.tica(feature_reader)
        # everything normal
        assert tica.is_random_accessible
        from pyerna.coordinates.data._base.transformer import StreamingTransformerRandomAccessStrategy
        assert isinstance(tica._ra_jagged,
                          StreamingTransformerRandomAccessStrategy)

        # set to memory
        tica.in_memory = True
        assert tica.is_random_accessible
        from pyerna.coordinates.data.data_in_memory import DataInMemoryJaggedRandomAccessStrategy
        assert isinstance(tica._ra_jagged,
                          DataInMemoryJaggedRandomAccessStrategy)

        # not in memory anymore, expect to fall back
        tica.in_memory = False
        assert tica.is_random_accessible
        from pyerna.coordinates.data._base.transformer import StreamingTransformerRandomAccessStrategy
        assert isinstance(tica._ra_jagged,
                          StreamingTransformerRandomAccessStrategy)

        # remove data source
        tica.data_producer = None
        assert not tica.is_random_accessible
        assert tica._ra_jagged is None
Exemplo n.º 4
0
 def test_duplicated_data_in_fit_transform(self):
     X = np.random.randn(100, 2)
     d = DataInMemory([X, X])
     tica = api.tica(data=d, lag=1, dim=1)
     out1 = tica.get_output()
     out2 = tica.fit_transform([X, X])
     np.testing.assert_array_almost_equal(out1, out2)
Exemplo n.º 5
0
    def test_in_memory(self):
        data = np.random.random((100, 10))
        reader = api.source(data)
        tica_obj = api.tica(reader, lag=10, dim=1)

        tica_obj.in_memory = True
        tica_obj.get_output()
Exemplo n.º 6
0
    def test_with_pipeline_time_lagged(self):
        reader = api.source(self.trajfile, top=self.topfile)
        assert isinstance(reader, FeatureReader)

        t = tica(dim=2, lag=1)
        d = discretizer(reader, t, chunksize=10)
        d.parametrize()
    def test_covariances_and_eigenvalues(self):
        reader = FeatureReader(self.trajnames, self.temppdb)
        for tau in [1, 10, 100, 1000, 2000]:
            trans = tica(lag=tau, dim=self.dim, kinetic_map=False)

            trans.estimate(reader)
            data = trans.get_output()

            log.info('max. eigenvalue: %f' % np.max(trans.eigenvalues))
            self.assertTrue(np.all(trans.eigenvalues <= 1.0))
            # check ICs
            check = tica(data=data, lag=tau, dim=self.dim)

            np.testing.assert_allclose(np.eye(self.dim), check.cov, atol=1e-8)
            np.testing.assert_allclose(check.mean, 0.0, atol=1e-8)
            ic_cov_tau = np.zeros((self.dim, self.dim))
            ic_cov_tau[np.diag_indices(self.dim)] = trans.eigenvalues
            np.testing.assert_allclose(ic_cov_tau, check.cov_tau, atol=1e-8)
    def test_partial_fit(self):
        reader = FeatureReader(self.trajnames, self.temppdb, chunksize=10000)
        output = reader.get_output()
        params = {'dim': self.dim, 'lag': 1001}
        ref = api.tica(reader, **params)
        partial = api.tica(**params)

        for traj in output:
            partial.partial_fit(traj)

        np.testing.assert_allclose(partial.eigenvalues,
                                   ref.eigenvalues,
                                   atol=1e-3)
        # only compare first two eigenvectors, because we only have two metastable processes
        np.testing.assert_allclose(np.abs(partial.eigenvectors[:2]),
                                   np.abs(ref.eigenvectors[:2]),
                                   rtol=1e-3,
                                   atol=1e-3)
Exemplo n.º 9
0
    def test_singular_zeros(self):
        # make some data that has one column of all zeros
        X = np.random.randn(100, 2)
        X = np.hstack((X, np.zeros((100, 1))))

        tica_obj = api.tica(data=X, lag=1, dim=1)

        assert tica_obj.eigenvectors.dtype == np.float64
        assert tica_obj.eigenvalues.dtype == np.float64
Exemplo n.º 10
0
 def test_no_cluster(self):
     reader_xtc = api.source(self.traj_files, top=self.pdb_file)
     # only reader
     api.pipeline(reader_xtc)
     reader_xtc.get_output()
     # reader + pca / tica
     tica = api.tica()
     pca = api.pca()
     api.pipeline([reader_xtc, tica])._chain[-1].get_output()
     api.pipeline([reader_xtc, pca])._chain[-1].get_output()
Exemplo n.º 11
0
 def test(self):
     # make it deterministic
     with numpy_random_seed(0):
         data = np.random.randn(100, 10)
     tica_obj = api.tica(data=data, lag=10, dim=1)
     Y = tica_obj._transform_array(data)
     # right shape
     assert types.is_float_matrix(Y)
     assert Y.shape[0] == 100
     assert Y.shape[1] == 1, Y.shape[1]
    def test_partial_fit(self):
        from pyerna.coordinates import source
        reader = source(self.trajnames, top=self.temppdb)
        reader_output = reader.get_output()

        for output_params in [{
                'kinetic_map': False
        }, {
                'kinetic_map': True
        }, {
                'kinetic_map': False,
                'commute_map': True
        }]:
            params = {'lag': 10, 'dim': self.dim}
            params.update(output_params)

            tica_obj = tica(**params)
            tica_obj.partial_fit(reader_output[0])
            assert not tica_obj._estimated
            # acccess eigenvectors to force diagonalization
            tica_obj.eigenvectors
            assert tica_obj._estimated

            tica_obj.partial_fit(reader_output[1])
            assert not tica_obj._estimated

            tica_obj.eigenvalues
            assert tica_obj._estimated

            for traj in reader_output[2:]:
                tica_obj.partial_fit(traj)

            # reference
            ref = tica(reader, **params)

            np.testing.assert_allclose(tica_obj.cov, ref.cov, atol=1e-15)
            np.testing.assert_allclose(tica_obj.cov_tau,
                                       ref.cov_tau,
                                       atol=1e-15)

            np.testing.assert_allclose(tica_obj.eigenvalues,
                                       ref.eigenvalues,
                                       atol=1e-15)
Exemplo n.º 13
0
 def test_kinetic_map(self):
     # test kinetic map variances:
     tica_kinmap = api.tica(data=self.X,
                            lag=self.lag,
                            dim=-1,
                            var_cutoff=1,
                            kinetic_map=True)
     O = tica_kinmap.get_output()[0]
     vars = np.var(O, axis=0)
     refs = tica_kinmap.eigenvalues**2
     assert np.max(np.abs(vars - refs)) < 0.01
Exemplo n.º 14
0
    def test_duplicated_data(self):
        # make some data that has one column repeated twice
        X = np.random.randn(100, 2)
        X = np.hstack((X, X[:, 0, np.newaxis]))

        d = DataInMemory(X)

        tica_obj = api.tica(data=d, lag=1, dim=1)

        assert tica_obj.eigenvectors.dtype == np.float64
        assert tica_obj.eigenvalues.dtype == np.float64
Exemplo n.º 15
0
 def test_chunksize(self):
     reader_xtc = api.source(self.traj_files, top=self.pdb_file)
     chunksize = 1001
     chain = [
         reader_xtc,
         api.tica(),
         api.cluster_mini_batch_kmeans(batch_size=0.3, k=3)
     ]
     p = api.pipeline(chain, chunksize=chunksize, run=False)
     assert p.chunksize == chunksize
     for e in p._chain:
         assert e.chunksize == chunksize
Exemplo n.º 16
0
 def test_discretizer(self):
     reader_gen = DataInMemory(data=self.generated_data)
     # check if exception safe
     api.discretizer(reader_gen)._chain[-1].get_output()
     api.discretizer(reader_gen,
                     transform=api.tica())._chain[-1].get_output()
     api.discretizer(
         reader_gen,
         cluster=api.cluster_uniform_time())._chain[-1].get_output()
     api.discretizer(
         reader_gen,
         transform=api.pca(),
         cluster=api.cluster_regspace(dmin=10))._chain[-1].get_output()
Exemplo n.º 17
0
    def test_dimension(self):
        assert types.is_int(self.tica_obj.dimension())
        # Here:
        assert self.tica_obj.dimension() == 1
        # Test other variants
        tica = api.tica(data=self.X, lag=self.lag, dim=-1, var_cutoff=1.0)
        assert tica.dimension() == 2
        tica = api.tica(data=self.X, lag=self.lag, dim=-1, var_cutoff=0.9)
        assert tica.dimension() == 1
        with self.assertRaises(
                ValueError
        ):  # trying to set both dim and subspace_variance is forbidden
            api.tica(data=self.X, lag=self.lag, dim=1, var_cutoff=0.9)

        with self.assertRaises(ValueError):
            api.tica(lag=self.lag, var_cutoff=0)
        with self.assertRaises(ValueError):
            api.tica(lag=self.lag, var_cutoff=1.1)
Exemplo n.º 18
0
 def test_is_parametrized(self):
     # construct pipeline with all possible transformers
     p = api.pipeline([
         api.source(self.traj_files, top=self.pdb_file),
         api.tica(),
         api.pca(),
         api.cluster_kmeans(k=50),
         api.cluster_regspace(dmin=50),
         api.cluster_uniform_time(k=20)
     ],
                      run=False)
     self.assertFalse(
         p._is_estimated(),
         "If run=false, the pipeline should not be parametrized.")
     p.parametrize()
     self.assertTrue(
         p._is_estimated(),
         "If parametrized was called, the pipeline should be parametrized.")
Exemplo n.º 19
0
 def test_set_element(self):
     reader = api.source(self.traj_files, top=self.pdb_file)
     pca = api.pca()
     p = api.pipeline([reader, pca])
     self.assertTrue(p._is_estimated())
     pca_out = pca.get_output()
     tica = api.tica(lag=self.generated_lag)
     # replace pca with tica
     p.set_element(1, tica)
     self.assertFalse(
         p._is_estimated(),
         "After replacing an element, the pipeline should not be parametrized."
     )
     p.parametrize()
     tica_out = tica.get_output()
     # check if replacement actually happened
     self.assertFalse(
         np.array_equal(pca_out[0], tica_out[0]),
         "The output should not be the same when the method got replaced.")
    def test_covariances_and_eigenvalues(self):
        reader = FeatureReader(self.trajnames, self.temppdb, chunksize=10000)
        for lag in [1, 11, 101, 1001, 2001]:  # avoid cos(w*tau)==0
            trans = api.tica(data=reader, dim=self.dim, lag=lag)
            log.info('number of trajectories reported by tica %d' %
                     trans.number_of_trajectories())
            log.info('tau = %d corresponds to a number of %f cycles' %
                     (lag, self.w * lag / (2.0 * np.pi)))

            # analytical solution for C_ij(lag) is 0.5*A[i]*A[j]*cos(phi[i]-phi[j])*cos(w*lag)
            ana_cov = 0.5 * self.A[:, np.newaxis] * self.A * np.cos(
                self.phi[:, np.newaxis] - self.phi)
            ana_cov_tau = ana_cov * np.cos(self.w * lag)

            self.assertTrue(np.allclose(ana_cov, trans.cov, atol=1.E-3))
            self.assertTrue(np.allclose(ana_cov_tau, trans.cov_tau,
                                        atol=1.E-3))
            log.info('max. eigenvalue: %f' % np.max(trans.eigenvalues))
            self.assertTrue(np.all(trans.eigenvalues <= 1.0))
Exemplo n.º 21
0
    def testChunksizeResultsTica(self):
        chunk = 40
        lag = 100
        np.random.seed(0)
        X = np.random.randn(23000, 3)

        # un-chunked
        d = DataInMemory(X)

        tica_obj = api.tica(data=d, lag=lag, dim=1)

        cov = tica_obj.cov.copy()
        mean = tica_obj.mean.copy()

        # ------- run again with new chunksize -------
        d = DataInMemory(X)
        d.chunksize = chunk
        tica_obj = tica(data=d, lag=lag, dim=1)

        np.testing.assert_allclose(tica_obj.mean, mean)
        np.testing.assert_allclose(tica_obj.cov, cov)
Exemplo n.º 22
0
    def test_transfomer_random_access(self):
        for in_memory in [True, False]:
            for r in range(0, 2):
                dim = self._get_reader_instance(r)

                tica = coor.tica(dim, dim=3)
                tica.in_memory = in_memory
                out = tica.get_output()

                # linear random access
                np.testing.assert_array_equal(
                    np.squeeze(tica.ra_linear[0:2, 0]), out[0][0:2, 0])
                # linear itraj random access
                np.testing.assert_array_equal(
                    np.squeeze(tica.ra_itraj_linear[0, :12, 0]), out[0][:12,
                                                                        0])
                # jagged random access
                jagged = tica.ra_itraj_jagged[:, ::-3, 0]
                for i, X in enumerate(jagged):
                    np.testing.assert_array_equal(X, out[i][::-3, 0])
                # cuboid random access
                cube = tica.ra_itraj_cuboid[:, 0, 0]
                for i in range(3):
                    np.testing.assert_array_equal(cube[i], out[i][0, 0])
Exemplo n.º 23
0
 def test_with_skip(self):
     data = np.random.random((100, 10))
     tica_obj = api.tica(data, lag=10, dim=1, skip=1)