def test_add_element(self): # start with empty pipeline without auto-parametrization p = api.pipeline([], run=False) # add some reader reader = api.source(self.traj_files, top=self.pdb_file) p.add_element(reader) p.parametrize() # get the result immediately out1 = reader.get_output() # add some kmeans kmeans = api.cluster_kmeans(k=15) p.add_element(kmeans) p.parametrize() # get the result immediately kmeans1 = kmeans.get_output() # get reader output again out2 = reader.get_output() p.add_element(api.cluster_kmeans(k=2)) p.parametrize() # get kmeans output again kmeans2 = kmeans.get_output() # check if add_element changes the intermediate results np.testing.assert_array_equal(out1[0], out2[0]) np.testing.assert_array_equal(out1[1], out2[1]) np.testing.assert_array_equal(kmeans1[0], kmeans2[0]) np.testing.assert_array_equal(kmeans1[1], kmeans2[1])
def test_np_reader_in_pipeline(self): with TemporaryDirectory() as td: file_name = os.path.join(td, "test.npy") data = np.random.random((100, 3)) np.save(file_name, data) reader = api.source(file_name) p = api.pipeline(reader, run=False, stride=2, chunksize=5) p.parametrize()
def test_chunksize(self): reader_xtc = api.source(self.traj_files, top=self.pdb_file) chunksize = 1001 chain = [ reader_xtc, api.tica(), api.cluster_mini_batch_kmeans(batch_size=0.3, k=3) ] p = api.pipeline(chain, chunksize=chunksize, run=False) assert p.chunksize == chunksize for e in p._chain: assert e.chunksize == chunksize
def test_no_transform(self): reader_xtc = api.source(self.traj_files, top=self.pdb_file) api.pipeline([reader_xtc, api.cluster_kmeans(k=10)])._chain[-1].get_output() api.pipeline([reader_xtc, api.cluster_regspace(dmin=10)])._chain[-1].get_output() api.pipeline([reader_xtc, api.cluster_uniform_time()])._chain[-1].get_output()
def test_no_cluster(self): reader_xtc = api.source(self.traj_files, top=self.pdb_file) # only reader api.pipeline(reader_xtc) reader_xtc.get_output() # reader + pca / tica tica = api.tica() pca = api.pca() api.pipeline([reader_xtc, tica])._chain[-1].get_output() api.pipeline([reader_xtc, pca])._chain[-1].get_output()
def test_replace_data_source(self): reader_xtc = api.source(self.traj_files, top=self.pdb_file) reader_gen = DataInMemory(data=self.generated_data) kmeans = api.cluster_kmeans(k=10) assert hasattr(kmeans, '_chunks') p = api.pipeline([reader_xtc, kmeans]) out1 = kmeans.get_output() # replace source print(reader_gen) p.set_element(0, reader_gen) assert hasattr(kmeans, '_chunks') p.parametrize() out2 = kmeans.get_output() self.assertFalse( np.array_equal(out1, out2), "Data source changed, so should the resulting clusters.")
def test_is_parametrized(self): # construct pipeline with all possible transformers p = api.pipeline([ api.source(self.traj_files, top=self.pdb_file), api.tica(), api.pca(), api.cluster_kmeans(k=50), api.cluster_regspace(dmin=50), api.cluster_uniform_time(k=20) ], run=False) self.assertFalse( p._is_estimated(), "If run=false, the pipeline should not be parametrized.") p.parametrize() self.assertTrue( p._is_estimated(), "If parametrized was called, the pipeline should be parametrized.")
def test_set_element(self): reader = api.source(self.traj_files, top=self.pdb_file) pca = api.pca() p = api.pipeline([reader, pca]) self.assertTrue(p._is_estimated()) pca_out = pca.get_output() tica = api.tica(lag=self.generated_lag) # replace pca with tica p.set_element(1, tica) self.assertFalse( p._is_estimated(), "After replacing an element, the pipeline should not be parametrized." ) p.parametrize() tica_out = tica.get_output() # check if replacement actually happened self.assertFalse( np.array_equal(pca_out[0], tica_out[0]), "The output should not be the same when the method got replaced.")