def testChunksizeResultsTica(self): chunk = 40 lag = 100 np.random.seed(0) X = np.random.randn(23000, 3) # un-chunked d = DataInMemory(X) tica = TICA(lag=lag, output_dimension=1) tica.data_producer = d tica.parametrize() cov = tica.cov.copy() mean = tica.mu.copy() # ------- run again with new chunksize ------- d = DataInMemory(X) d.chunksize = chunk tica = TICA(lag=lag, output_dimension=1) tica.data_producer = d tica.parametrize() np.testing.assert_allclose(tica.mu, mean) np.testing.assert_allclose(tica.cov, cov)
def __init__(self, data, lag, units='frames'): from pyemma.coordinates import tica # data.dat.tolist() might be better? self.data = data if isinstance(data, Metric): if units != 'frames': raise RuntimeError( 'Cannot use delayed projection TICA with units other than frames for now. Report this to HTMD issues.' ) metr = data from pyemma.coordinates.transform.tica import TICA self.tic = TICA(lag) p = ProgressBar(len(metr.simulations)) for proj in _projectionGenerator(metr, _getNcpus()): for pro in proj: self.tic.partial_fit(pro[0]) p.progress(len(proj)) p.stop() else: lag = unitconvert(units, 'frames', lag, data.fstep) if lag == 0: raise RuntimeError( 'Lag time conversion resulted in 0 frames. Please use a larger lag-time for TICA.' ) self.tic = tica(data.dat.tolist(), lag=lag)
def test(self): np.random.seed(0) tica = TICA(lag=50, output_dimension=1) data = np.random.randn(100, 10) ds = DataInMemory(data) tica.data_producer = ds tica.parametrize() Y = tica.map(data)
def test_singular_zeros(self): tica = TICA(lag=1, output_dimension=1) # make some data that has one column of all zeros X = np.random.randn(100, 2) X = np.hstack((X, np.zeros((100, 1)))) d = DataInMemory(X) tica.data_producer = d tica.parametrize() assert tica.eigenvectors.dtype == np.float64 assert tica.eigenvalues.dtype == np.float64
def test_duplicated_data(self): tica = TICA(lag=1, output_dimension=1) # make some data that has one column repeated twice X = np.random.randn(100, 2) X = np.hstack((X, X[:, 0, np.newaxis])) d = DataInMemory(X) tica.data_producer = d tica.parametrize() assert tica.eigenvectors.dtype == np.float64 assert tica.eigenvalues.dtype == np.float64
def __init__(self, data, lag, units='frames'): from pyemma.coordinates import tica # data.dat.tolist() might be better? self.data = data if isinstance(data, Metric): from pyemma.coordinates.transform.tica import TICA lag = unitconvert(units, 'frames', lag, data.fstep) self.tic = TICA(lag) p = ProgressBar(len(data.simulations)) for i in range(len(data.simulations)): # Fix for pyemma bug. Remove eventually: d, _, _ = data._projectSingle(i) if d is None or d.shape[0] < lag: continue self.tic.partial_fit(d) p.progress() p.stop() else: self.tic = tica(data.dat.tolist(), lag=lag)