Exemplo n.º 1
0
def benchmark_fprinting(smiles, sdf_file, name, fprint_params={}):
    mol = mol_from_sdf(sdf_file, conf_num=fprint_params.get('first', None))
    num_confs = mol.GetNumConformers()
    num_rot = AllChem.CalcNumRotatableBonds(mol)
    num_heavy = mol.GetNumHeavyAtoms()

    start_time = time.time()
    Chem.rdMolDescriptors.GetMorganFingerprintAsBitVect(mol, 2, 1024)
    fprint_2d_time = time.time() - start_time

    start_time = time.time()
    fprints_from_mol(mol, fprint_params=fprint_params, save=False)
    fprint_3d_time = time.time() - start_time

    return (fprint_2d_time, fprint_3d_time, num_heavy, num_confs, num_rot)
Exemplo n.º 2
0
def native_tuples_from_mol(mol, fprint_params={}, save=False):
    """Fingerprint molecule and convert to native encoding."""
    if not mol.HasProp("_Name"):
        raise ValueError(
            "mol must have a '_Name' property or `name` must be provided")

    fprints_list = fprints_from_mol(mol,
                                    fprint_params=fprint_params,
                                    save=save)
    native_tuples = list(map(fprint_to_native_tuple, fprints_list))
    return native_tuples
Exemplo n.º 3
0
def get_e3fp(mol, bits, smiles=None):
    """
    Get an E3FP fingerprint from an RDKit mol.
    Args:
        mol (rdkit.Chem.rdchem.Mol): RDKit mol object
        bits (int): Number of bits in fingerprint
    Returns:
        fp (np.array): fingerprint as numpy array
    """
    if smiles is None:
        smiles = Chem.MolToSmiles(mol)
    mol.SetProp("_Name", smiles)
    fprint_params = {"bits": bits}
    fp = (fprints_from_mol(
        mol, fprint_params=fprint_params)[0].to_vector().toarray().astype(int)
          ).reshape(-1)
    return fp
Exemplo n.º 4
0
def gen_mol_blocks_from_confs(mols, num_confs, ref, ref_mol_block):

    # fprint_params = {'bits': 4096, 'radius_multiplier': 1.5, 'rdkit_invariants': True}
    # ref_fprint = fprints_from_mol(ref_mol, fprint_params=fprint_params)
    # ref = ref_fprint[0].fold().to_rdkit()
    mols_b = copy.deepcopy(mols)
    names = []
    mol_blocks = []
    fps = []
    ref_mol_block = []
    for mol in mols_b:
        mol = AllChem.AddHs(mol)
        AllChem.EmbedMultipleConfs(mol,
                                   numConfs=num_confs,
                                   ignoreSmoothingFailures=True,
                                   pruneRmsThresh=-1.0,
                                   maxAttempts=10 * num_confs,
                                   randomSeed=0xf00d)
        fprint_params = {
            'bits': 4096,
            'first': num_confs,
            'radius_multiplier': 1.5,
            'rdkit_invariants': True
        }
        fprints = fprints_from_mol(mol, fprint_params=fprint_params)
        binfp = [fp.fold().to_rdkit() for fp in fprints]
        similarity_efcp4 = [
            DataStructs.FingerprintSimilarity(ref, x) for x in binfp
        ]
        for i in range(num_confs):
            sub_name = mol.GetProp('_Name')
            name = f'{sub_name}_confnum_{i}'
            names.append(name)
            mol_blocks.append(Chem.MolToMolBlock(mol, confId=i))
            fps.append(similarity_efcp4[i])
            ref_mol_block.append(ref_mol_block)
    df = pd.DataFrame(list(zip(names, mol_blocks, fps, ref_mol_block)),
                      columns=['name', 'mol_blocks', 'fps', 'ref_mol_block'])
    return df
def get_e3fp_tc(mol):
    fprints = fprints_from_mol(mol, fprint_params=FPRINT_PARAMS)
    return tanimoto(fprints[0], fprints[1])
Exemplo n.º 6
0
chunksize = 1048576 / 10000
chunks = 10

suppl = [
    m for m in AllChem.SDMolSupplier(
        '/Users/tom/code_test_repository/arrow_testing/cdk2.sdf',
        removeHs=False)
]
ref_mol = suppl[0]
ref_mol_block = Chem.MolToMolBlock(ref_mol)
fprint_params = {
    'bits': 4096,
    'radius_multiplier': 1.5,
    'rdkit_invariants': True
}
fprints = fprints_from_mol(ref_mol, fprint_params=fprint_params)
binfp = [fp.fold().to_rdkit() for fp in fprints]
arr = np.zeros((0, ), dtype=np.int8)

DataStructs.ConvertToNumpyArray(binfp[0], arr)

# ref_fprint = fprints_from_mol(ref_mol, fprint_params=fprint_params)
# ref = ref_fprint[0].fold().to_rdkit()
# ref_smiles = 'CCC1=CC(Cl)=C(OC)C(C(NC[C@H]2C[C@H](OC)CN2CC)=O)=C1O'
include_columns = ['SMILES', 'Name']
table_list = csv_chunk_extractor(chunks, chunksize, include_columns, arr,
                                 ref_mol_block)
ray.shutdown()
print('finished alignment')
# table = Chem.MolFromMolBlock(mol_block_array[0])
# print(f' the mol from the array is not a mol true or false: {m_out_of_array is None}')