def test_add_element(self): # start with empty pipeline without auto-parametrization p = api.pipeline([], run=False) # add some reader reader = api.source(self.traj_files, top=self.pdb_file) p.add_element(reader) p.parametrize() # get the result immediately out1 = reader.get_output() # add some kmeans kmeans = api.cluster_kmeans(k=15) p.add_element(kmeans) p.parametrize() # get the result immediately kmeans1 = kmeans.get_output() # get reader output again out2 = reader.get_output() p.add_element(api.cluster_kmeans(k=2)) p.parametrize() # get kmeans output again kmeans2 = kmeans.get_output() # check if add_element changes the intermediate results np.testing.assert_array_equal(out1[0], out2[0]) np.testing.assert_array_equal(out1[1], out2[1]) np.testing.assert_array_equal(kmeans1[0], kmeans2[0]) np.testing.assert_array_equal(kmeans1[1], kmeans2[1])
def test_save_dtrajs(self): reader = source(self.trajfiles, top=self.topfile) cluster = cluster_kmeans(k=2) d = Discretizer(reader, cluster=cluster) d.parametrize() d.save_dtrajs(output_dir=self.dest_dir) dtrajs = os.listdir(self.dest_dir)
def test_no_transform(self): reader_xtc = api.source(self.traj_files, top=self.pdb_file) api.pipeline([reader_xtc, api.cluster_kmeans(k=10)])._chain[-1].get_output() api.pipeline([reader_xtc, api.cluster_regspace(dmin=10)])._chain[-1].get_output() api.pipeline([reader_xtc, api.cluster_uniform_time()])._chain[-1].get_output()
def test_transformer_iterator_random_access(self): kmeans = coor.cluster_kmeans(self.data, k=2) kmeans.in_memory = True for cs in range(0, 5): kmeans.chunksize = cs ref_stride = {0: 0, 1: 0, 2: 0} it = kmeans.iterator(stride=self.stride) for x in it: ref_stride[x[0]] += len(x[1]) for key in list(ref_stride.keys()): expected = len(it.ra_indices_for_traj(key)) assert ref_stride[key] == expected, \ "Expected to get exactly %s elements of trajectory %s, but got %s for chunksize=%s" \ % (expected, key, ref_stride[key], cs)
def test_replace_data_source(self): reader_xtc = api.source(self.traj_files, top=self.pdb_file) reader_gen = DataInMemory(data=self.generated_data) kmeans = api.cluster_kmeans(k=10) assert hasattr(kmeans, '_chunks') p = api.pipeline([reader_xtc, kmeans]) out1 = kmeans.get_output() # replace source print(reader_gen) p.set_element(0, reader_gen) assert hasattr(kmeans, '_chunks') p.parametrize() out2 = kmeans.get_output() self.assertFalse( np.array_equal(out1, out2), "Data source changed, so should the resulting clusters.")
def test_is_parametrized(self): # construct pipeline with all possible transformers p = api.pipeline([ api.source(self.traj_files, top=self.pdb_file), api.tica(), api.pca(), api.cluster_kmeans(k=50), api.cluster_regspace(dmin=50), api.cluster_uniform_time(k=20) ], run=False) self.assertFalse( p._is_estimated(), "If run=false, the pipeline should not be parametrized.") p.parametrize() self.assertTrue( p._is_estimated(), "If parametrized was called, the pipeline should be parametrized.")