示例#1
0
 def test_output_descriptor_duplicates(self):
     """test output for descriptor duplicates"""
     self.defineConfig()
     structure_data = tuner.helper.read_csv("tests/structure_preparation_test/reference_set.csv")
     ecfp = tuner.run_fingerprint(structure_data['smiles'], 1)
     df_processed_desc = tuner.output_processed_descriptors(ecfp, structure_data)
     structure_data_duplicates = tuner.output_descriptor_duplicates(df_processed_desc)
     self.assertEqual(len(structure_data_duplicates), 0)
示例#2
0
 def test_output_descriptor_duplicates_ref_file_ecfp1(self):
     """test output for descriptor duplicates with fuzzier fingerprint """
     self.defineConfig(fp=1)
     with open(self.referenceFilePathDuplicates,"r") as h:
         smiles=[line.strip() for line in h.readlines()]
         structure_data=pd.DataFrame(smiles,columns=["smiles"])
     
         ecfp = tuner.run_fingerprint(structure_data['smiles'], 1)
         df_processed_desc = tuner.output_processed_descriptors(ecfp, structure_data)
         structure_data_duplicates = tuner.output_descriptor_duplicates(df_processed_desc)
         self.assertEqual(len(structure_data_duplicates), 19)
示例#3
0
    def test_lsh_folding(self):
        self.defineConfig()
        with open(self.referenceFilePathDuplicates,"r") as h:
            smiles=[line.strip() for line in h.readlines()]
            structure_data=pd.DataFrame(smiles,columns=["smiles"])
        
            ecfp = tuner.run_fingerprint(structure_data['smiles'], 1)
            lsh_folding = tuner.LSHFolding()
            df_high_entropy_bits = lsh_folding.calc_highest_entropy_bits(ecfp)
            #df_high_entropy_bits.to_pickle("unit_test/output/df_high_entropy_bits.pkl") #reference results
            df_high_entropy_bits_ref=pd.read_pickle("unit_test/output/df_high_entropy_bits.pkl")

            df_folds = lsh_folding.run_lsh_calculation(ecfp)
            #df_folds.to_pickle("unit_test/output/df_folds.pkl") #reference results
            df_folds_ref = pd.read_pickle("unit_test/output/df_folds.pkl")

            self.assertEqual(df_high_entropy_bits.equals(df_high_entropy_bits_ref) & df_folds.equals(df_folds_ref),True)