def test_reordering_mols_produces_same_fprints(self): from e3fp.fingerprint import fprinter from e3fp.conformer.util import mol_from_sdf import random rand_sdf_files = glob.glob(RAND_SDF_DIR + "/*.sdf*") mols = list(map(mol_from_sdf, rand_sdf_files[:5])) level = 5 fpr = fprinter.Fingerprinter(level=level, stereo=False, radius_multiplier=1.718, remove_duplicate_substructs=True) fprints1 = {} for mol in mols: fpr.run(conf=0, mol=mol) fprints1[mol] = fpr.get_fingerprint_at_level(level) random.shuffle(mols) fpr = fprinter.Fingerprinter(level=level, stereo=False, radius_multiplier=1.718, remove_duplicate_substructs=True) fprints2 = {} for mol in mols: fpr.run(conf=0, mol=mol) fprints2[mol] = fpr.get_fingerprint_at_level(level) self.assertEqual(fprints1, fprints2)
def test_remove_dupe_substructs_makes_same_substruts_diff_shells(self): from e3fp.fingerprint import fprinter from e3fp.conformer.util import mol_from_sdf mol = mol_from_sdf(PLANAR_SDF_FILE) level = 2 conf = mol.GetConformers()[0] fpr = fprinter.Fingerprinter(level=level, bits=1024, stereo=True, radius_multiplier=1.718, remove_duplicate_substructs=True) fpr.run(conf, mol) shells_no_dupes = set(fpr.level_shells[fpr.current_level]) substructs_no_dupes = set([x.substruct for x in shells_no_dupes]) fpr = fprinter.Fingerprinter(level=level, bits=1024, stereo=True, radius_multiplier=1.718, remove_duplicate_substructs=False) fpr.run(conf, mol) shells_with_dupes = set(fpr.level_shells[fpr.current_level]) substructs_with_dupes = set([x.substruct for x in shells_with_dupes]) self.assertEqual(substructs_no_dupes, substructs_with_dupes) self.assertNotEqual(shells_no_dupes, shells_with_dupes)
def test_reordering_conformers_produces_same_fprints(self): from e3fp.fingerprint import fprinter from e3fp.conformer.util import mol_from_sdf import random rand_sdf_files = glob.glob(os.path.join(RAND_SDF_DIR, "*.sdf*")) mol = mol_from_sdf(rand_sdf_files[0]) level = 5 fpr = fprinter.Fingerprinter( level=level, stereo=False, radius_multiplier=1.718, remove_duplicate_substructs=True, ) conf_ids1 = [x.GetId() for x in mol.GetConformers()] fprints1 = {} for conf_id in conf_ids1: fpr.run(conf_id, mol) fprints1[conf_id] = fpr.get_fingerprint_at_level(level) conf_ids2 = list(conf_ids1) random.shuffle(conf_ids2) fprints2 = {} for conf_id in conf_ids2: fpr.run(conf_id, mol) fprints2[conf_id] = fpr.get_fingerprint_at_level(level) self.assertEqual(fprints1, fprints2)
def test_quick(self): from e3fp.fingerprint import fprinter from e3fp.conformer.util import mol_from_sdf mol = mol_from_sdf(PLANAR_SDF_FILE) level = 5 conf = mol.GetConformers()[0] fpr = fprinter.Fingerprinter(level=level, bits=1024, stereo=True, radius_multiplier=1.718) fpr.run(conf, mol)
def test_initial_identifiers_assigned_correctly(self): from e3fp.fingerprint import fprinter from e3fp.conformer.util import mol_from_sdf mol = mol_from_sdf(PLANAR_SDF_FILE) level = 0 conf = mol.GetConformers()[0] fpr = fprinter.Fingerprinter(level=level, bits=1024, stereo=True, radius_multiplier=1.718) fpr.run(conf, mol) fprint = fpr.get_fingerprint_at_level(0) expect_ident = set([48, 124, 185, 484, 617, 674]) self.assertEqual(set(fprint.indices), expect_ident)
def test_stereoisomers_produce_equal_fingerprints_nonstereo(self): from e3fp.fingerprint import fprinter from e3fp.conformer.util import mol_from_sdf mol1 = mol_from_sdf(ENANT1_SDF_FILE) mol2 = mol_from_sdf(ENANT2_SDF_FILE) level = 5 fpr = fprinter.Fingerprinter(level=level, stereo=False, radius_multiplier=1.718, remove_duplicate_substructs=True) fpr.run(conf=0, mol=mol1) fp1 = fpr.get_fingerprint_at_level(level) fpr.run(conf=0, mol=mol2) fp2 = fpr.get_fingerprint_at_level(level) self.assertEqual(fp1, fp2)
def test_runs_without_exception_on_random_mols(self): from e3fp.fingerprint import fprinter from e3fp.conformer.util import mol_from_sdf rand_sdf_files = glob.glob(RAND_SDF_DIR+"/*.sdf*") rand_sdf_files = [rand_sdf_files[i] for i in np.random.randint(len(rand_sdf_files), size=10)] level = 5 for sdf_file in rand_sdf_files: mol = mol_from_sdf(sdf_file) conf = mol.GetConformers()[0] fpr = fprinter.Fingerprinter(level=level, bits=1024, stereo=True, radius_multiplier=1.718) fpr.run(conf, mol) fpr.get_fingerprint_at_level(level)
def generate_e3fp_fingerprint(sdf_file): logging.debug(f"Generating fingerprint for file {sdf_file}") # Load molecule from file using rdkit molecule = Chem.SDMolSupplier(sdf_file, sanitize=False)[0] # Calculate necessary properties molecule.UpdatePropertyCache(strict=False) # Set up fingerprinter and generate fingerprint for this molecule fingerprinter = fprinter.Fingerprinter(bits=1048576) fingerprinter.run(mol=molecule) result = fingerprinter.get_fingerprint_at_level() # Set name of the fingerprint to the sdf file name result.name = sdf_file return result
def test_repeated_runs_produce_same_results(self): from e3fp.fingerprint import fprinter from e3fp.conformer.util import mol_from_sdf mol = mol_from_sdf(PLANAR_SDF_FILE) level = 2 conf = mol.GetConformers()[0] ref_identifiers = None for i in range(5): fpr = fprinter.Fingerprinter(level=level, stereo=True, radius_multiplier=1.718) fpr.run(conf, mol) identifiers = sorted([x.identifier for x in fpr.level_shells[level]]) if ref_identifiers is None: ref_identifiers = identifiers else: self.assertEqual(identifiers, ref_identifiers)
def test_fingerprint_is_transform_invariant(self): from e3fp.fingerprint import fprinter from e3fp.fingerprint.array_ops import ( make_transform_matrix, transform_array, ) from e3fp.conformer.util import mol_from_sdf mol = mol_from_sdf(PLANAR_SDF_FILE) level = 5 conf = mol.GetConformers()[0] ref_fp = None atom_ids = [x.GetIdx() for x in mol.GetAtoms()] coords = np.array(list(map(conf.GetAtomPosition, atom_ids)), dtype=np.float) for i in range(5): rand_y = np.random.uniform(size=3) rand_trans = np.random.uniform(size=3) * 100 trans_mat = make_transform_matrix(rand_trans) rot_mat = make_transform_matrix(np.zeros(3), rand_y) transform_mat = np.dot(trans_mat, rot_mat) new_coords = transform_array(transform_mat, coords) with self.assertRaises(AssertionError): np.testing.assert_almost_equal(new_coords, coords) for atom_id, new_coord in zip(atom_ids, new_coords): conf.SetAtomPosition(atom_id, new_coord) test_coords = np.array(list(map(conf.GetAtomPosition, atom_ids)), dtype=np.float) np.testing.assert_almost_equal(test_coords, new_coords) fpr = fprinter.Fingerprinter(level=level, stereo=True, radius_multiplier=1.718) fpr.run(conf, mol) fp = fpr.get_fingerprint_at_level(level) if ref_fp is None: ref_fp = fp else: self.assertEqual(fp, ref_fp)