Exemplo n.º 1
0
def generate(smiles, verbose=False):

    moldata = []
    for elem in smiles:
        mol = Chem.MolFromSmiles(elem)
        moldata.append(mol)

    baseData = np.arange(1, 1)
    i = 0
    for mol in moldata:

        desc_MolWt = Descriptors.MolWt(mol)
        desc_MolLogP = Descriptors.MolLogP(mol)
        desc_MolMR = Descriptors.MolMR(mol)
        desc_HeavyAtomCount = Descriptors.HeavyAtomCount(mol)
        desc_NumHAcceptors = Descriptors.NumHAcceptors(mol)
        desc_NumHDonors = Descriptors.NumHDonors(mol)
        desc_NumHeteroatoms = Descriptors.NumHeteroatoms(mol)
        desc_NumRotatableBonds = Descriptors.NumRotatableBonds(mol)
        desc_NumValenceElectrons = Descriptors.NumValenceElectrons(mol)
        desc_NumAromaticRings = Descriptors.NumAromaticRings(mol)
        desc_NumSaturatedRings = Descriptors.NumSaturatedRings(mol)
        desc_NumAliphaticRings = Descriptors.NumAliphaticRings(mol)
        desc_RingCount = Descriptors.RingCount(mol)
        desc_TPSA = Descriptors.TPSA(mol)
        desc_LabuteASA = Descriptors.LabuteASA(mol)
        desc_BalabanJ = Descriptors.BalabanJ(mol)
        desc_BertzCT = Descriptors.BertzCT(mol)

        row = np.array([
            desc_MolWt, desc_MolLogP, desc_MolMR, desc_HeavyAtomCount,
            desc_NumHAcceptors, desc_NumHDonors, desc_NumHeteroatoms,
            desc_NumRotatableBonds, desc_NumValenceElectrons,
            desc_NumAromaticRings, desc_NumSaturatedRings,
            desc_NumAliphaticRings, desc_RingCount, desc_TPSA, desc_LabuteASA,
            desc_BalabanJ, desc_BertzCT
        ])

        if (i == 0):
            baseData = row
        else:
            baseData = np.vstack([baseData, row])
        i = i + 1

    columnNames = [
        "MolWt", "MolLogP", "MolMR", "HeavyAtomCount", "NumHAcceptors",
        "NumHDonors", "NumHeteroatoms", "NumRotatableBonds",
        "NumValenceElectrons", "NumAromaticRings", "NumSaturatedRings",
        "NumAliphaticRings", "RingCount", "TPSA", "LabuteASA", "BalabanJ",
        "BertzCT"
    ]
    descriptors = pd.DataFrame(data=baseData, columns=columnNames)

    print("Total descriptors generated: 17 x " + str(len(smiles)))
    return descriptors
Exemplo n.º 2
0
def get_descriptors(sml):
    try:
        m = Chem.MolFromSmiles(sml)
        descriptor_list = []
        descriptor_list.append(Descriptors.MolLogP(m))
        descriptor_list.append(Descriptors.MolMR(m))  #ok
        descriptor_list.append(Descriptors.BalabanJ(m))
        descriptor_list.append(Descriptors.NumHAcceptors(m))  #ok
        descriptor_list.append(Descriptors.NumHDonors(m))  #ok
        descriptor_list.append(Descriptors.NumValenceElectrons(m))
        descriptor_list.append(Descriptors.TPSA(m))  # nice
        return descriptor_list
    except:
        return [np.float("nan")] * 7
Exemplo n.º 3
0
def compute_descriptors(mol, id_string):
    """
		compute rdkit descriptors
    """
    descriptors = [id_string]

    # Property descriptor
    descriptors.append(Descriptors.MolWt(mol))
    descriptors.append(Descriptors.HeavyAtomMolWt(mol))
    descriptors.append(Descriptors.MolLogP(mol))
    descriptors.append(Descriptors.MolMR(mol))
    descriptors.append(Descriptors.TPSA(mol))
    # Constitutional descriptor
    descriptors.append(Descriptors.FractionCSP3(mol))
    # Atom
    descriptors.append(Descriptors.HeavyAtomCount(mol))
    descriptors.append(Descriptors.NHOHCount(mol))
    descriptors.append(Descriptors.NOCount(mol))
    descriptors.append(Descriptors.NumHAcceptors(mol))
    descriptors.append(Descriptors.NumHDonors(mol))
    descriptors.append(Descriptors.NumHeteroatoms(mol))
    #descriptors.append(Descriptors.NumBridgeheadAtoms(mol))
    #descriptors.append(Descriptors.NumSpiroAtoms(mol))
    # Bond
    descriptors.append(Descriptors.NumRotatableBonds(mol))
    # Electronic
    descriptors.append(Descriptors.NumRadicalElectrons(mol))
    descriptors.append(Descriptors.NumValenceElectrons(mol))
    descriptors.append(Descriptors.MaxPartialCharge(mol))
    descriptors.append(Descriptors.MinPartialCharge(mol))
    descriptors.append(Descriptors.MaxAbsPartialCharge(mol))
    descriptors.append(Descriptors.MinAbsPartialCharge(mol))
    # Ring
    #descriptors.append(Descriptors.NumRings(mol))
    descriptors.append(Descriptors.NumAromaticRings(mol))
    descriptors.append(Descriptors.NumSaturatedRings(mol))
    descriptors.append(Descriptors.NumAliphaticRings(mol))
    #descriptors.append(Descriptors.NumCarbocycles(mol))
    descriptors.append(Descriptors.NumAromaticCarbocycles(mol))
    descriptors.append(Descriptors.NumSaturatedCarbocycles(mol))
    descriptors.append(Descriptors.NumAliphaticCarbocycles(mol))
    #descriptors.append(Descriptors.NumHeterocycles(mol))
    descriptors.append(Descriptors.NumAromaticHeterocycles(mol))
    descriptors.append(Descriptors.NumSaturatedHeterocycles(mol))
    descriptors.append(Descriptors.NumAliphaticHeterocycles(mol))
    # Functional Groups
    descriptors.append(Descriptors.fr_Al_COO(mol))
    descriptors.append(Descriptors.fr_Al_OH(mol))
    descriptors.append(Descriptors.fr_Al_OH_noTert(mol))
    descriptors.append(Descriptors.fr_ArN(mol))
    descriptors.append(Descriptors.fr_Ar_COO(mol))
    descriptors.append(Descriptors.fr_Ar_N(mol))
    descriptors.append(Descriptors.fr_Ar_NH(mol))
    descriptors.append(Descriptors.fr_Ar_OH(mol))
    descriptors.append(Descriptors.fr_COO(mol))
    descriptors.append(Descriptors.fr_COO2(mol))
    descriptors.append(Descriptors.fr_C_O(mol))
    descriptors.append(Descriptors.fr_C_O_noCOO(mol))
    descriptors.append(Descriptors.fr_C_S(mol))
    descriptors.append(Descriptors.fr_HOCCN(mol))
    descriptors.append(Descriptors.fr_Imine(mol))
    descriptors.append(Descriptors.fr_NH0(mol))
    descriptors.append(Descriptors.fr_NH1(mol))
    descriptors.append(Descriptors.fr_NH2(mol))
    descriptors.append(Descriptors.fr_N_O(mol))
    descriptors.append(Descriptors.fr_Ndealkylation1(mol))
    descriptors.append(Descriptors.fr_Ndealkylation2(mol))
    descriptors.append(Descriptors.fr_Nhpyrrole(mol))
    descriptors.append(Descriptors.fr_SH(mol))
    descriptors.append(Descriptors.fr_aldehyde(mol))
    descriptors.append(Descriptors.fr_alkyl_carbamate(mol))
    descriptors.append(Descriptors.fr_alkyl_halide(mol))
    descriptors.append(Descriptors.fr_allylic_oxid(mol))
    descriptors.append(Descriptors.fr_amide(mol))
    descriptors.append(Descriptors.fr_amidine(mol))
    descriptors.append(Descriptors.fr_aniline(mol))
    descriptors.append(Descriptors.fr_aryl_methyl(mol))
    descriptors.append(Descriptors.fr_azide(mol))
    descriptors.append(Descriptors.fr_azo(mol))
    descriptors.append(Descriptors.fr_barbitur(mol))
    descriptors.append(Descriptors.fr_benzene(mol))
    descriptors.append(Descriptors.fr_benzodiazepine(mol))
    descriptors.append(Descriptors.fr_bicyclic(mol))
    descriptors.append(Descriptors.fr_diazo(mol))
    descriptors.append(Descriptors.fr_dihydropyridine(mol))
    descriptors.append(Descriptors.fr_epoxide(mol))
    descriptors.append(Descriptors.fr_ester(mol))
    descriptors.append(Descriptors.fr_ether(mol))
    descriptors.append(Descriptors.fr_furan(mol))
    descriptors.append(Descriptors.fr_guanido(mol))
    descriptors.append(Descriptors.fr_halogen(mol))
    descriptors.append(Descriptors.fr_hdrzine(mol))
    descriptors.append(Descriptors.fr_hdrzone(mol))
    descriptors.append(Descriptors.fr_imidazole(mol))
    descriptors.append(Descriptors.fr_imide(mol))
    descriptors.append(Descriptors.fr_isocyan(mol))
    descriptors.append(Descriptors.fr_isothiocyan(mol))
    descriptors.append(Descriptors.fr_ketone(mol))
    descriptors.append(Descriptors.fr_ketone_Topliss(mol))
    descriptors.append(Descriptors.fr_lactam(mol))
    descriptors.append(Descriptors.fr_lactone(mol))
    descriptors.append(Descriptors.fr_methoxy(mol))
    descriptors.append(Descriptors.fr_morpholine(mol))
    descriptors.append(Descriptors.fr_nitrile(mol))
    descriptors.append(Descriptors.fr_nitro(mol))
    descriptors.append(Descriptors.fr_nitro_arom(mol))
    descriptors.append(Descriptors.fr_nitro_arom_nonortho(mol))
    descriptors.append(Descriptors.fr_nitroso(mol))
    descriptors.append(Descriptors.fr_oxazole(mol))
    descriptors.append(Descriptors.fr_oxime(mol))
    descriptors.append(Descriptors.fr_para_hydroxylation(mol))
    descriptors.append(Descriptors.fr_phenol(mol))
    descriptors.append(Descriptors.fr_phenol_noOrthoHbond(mol))
    descriptors.append(Descriptors.fr_phos_acid(mol))
    descriptors.append(Descriptors.fr_phos_ester(mol))
    descriptors.append(Descriptors.fr_piperdine(mol))
    descriptors.append(Descriptors.fr_piperzine(mol))
    descriptors.append(Descriptors.fr_priamide(mol))
    descriptors.append(Descriptors.fr_prisulfonamd(mol))
    descriptors.append(Descriptors.fr_pyridine(mol))
    descriptors.append(Descriptors.fr_quatN(mol))
    descriptors.append(Descriptors.fr_sulfide(mol))
    descriptors.append(Descriptors.fr_sulfonamd(mol))
    descriptors.append(Descriptors.fr_sulfone(mol))
    descriptors.append(Descriptors.fr_term_acetylene(mol))
    descriptors.append(Descriptors.fr_tetrazole(mol))
    descriptors.append(Descriptors.fr_thiazole(mol))
    descriptors.append(Descriptors.fr_thiocyan(mol))
    descriptors.append(Descriptors.fr_thiophene(mol))
    descriptors.append(Descriptors.fr_unbrch_alkane(mol))
    descriptors.append(Descriptors.fr_urea(mol))
    # MOE-type descriptors
    descriptors.append(Descriptors.LabuteASA(mol))
    descriptors.append(Descriptors.PEOE_VSA1(mol))
    descriptors.append(Descriptors.PEOE_VSA2(mol))
    descriptors.append(Descriptors.PEOE_VSA3(mol))
    descriptors.append(Descriptors.PEOE_VSA4(mol))
    descriptors.append(Descriptors.PEOE_VSA5(mol))
    descriptors.append(Descriptors.PEOE_VSA6(mol))
    descriptors.append(Descriptors.PEOE_VSA7(mol))
    descriptors.append(Descriptors.PEOE_VSA8(mol))
    descriptors.append(Descriptors.PEOE_VSA9(mol))
    descriptors.append(Descriptors.PEOE_VSA10(mol))
    descriptors.append(Descriptors.PEOE_VSA11(mol))
    descriptors.append(Descriptors.PEOE_VSA12(mol))
    descriptors.append(Descriptors.PEOE_VSA13(mol))
    descriptors.append(Descriptors.PEOE_VSA14(mol))
    descriptors.append(Descriptors.SMR_VSA1(mol))
    descriptors.append(Descriptors.SMR_VSA2(mol))
    descriptors.append(Descriptors.SMR_VSA3(mol))
    descriptors.append(Descriptors.SMR_VSA4(mol))
    descriptors.append(Descriptors.SMR_VSA5(mol))
    descriptors.append(Descriptors.SMR_VSA6(mol))
    descriptors.append(Descriptors.SMR_VSA7(mol))
    descriptors.append(Descriptors.SMR_VSA8(mol))
    descriptors.append(Descriptors.SMR_VSA9(mol))
    descriptors.append(Descriptors.SMR_VSA10(mol))
    descriptors.append(Descriptors.SlogP_VSA1(mol))
    descriptors.append(Descriptors.SlogP_VSA2(mol))
    descriptors.append(Descriptors.SlogP_VSA3(mol))
    descriptors.append(Descriptors.SlogP_VSA4(mol))
    descriptors.append(Descriptors.SlogP_VSA5(mol))
    descriptors.append(Descriptors.SlogP_VSA6(mol))
    descriptors.append(Descriptors.SlogP_VSA7(mol))
    descriptors.append(Descriptors.SlogP_VSA8(mol))
    descriptors.append(Descriptors.SlogP_VSA9(mol))
    descriptors.append(Descriptors.SlogP_VSA10(mol))
    descriptors.append(Descriptors.SlogP_VSA11(mol))
    descriptors.append(Descriptors.SlogP_VSA12(mol))
    descriptors.append(Descriptors.EState_VSA1(mol))
    descriptors.append(Descriptors.EState_VSA2(mol))
    descriptors.append(Descriptors.EState_VSA3(mol))
    descriptors.append(Descriptors.EState_VSA4(mol))
    descriptors.append(Descriptors.EState_VSA5(mol))
    descriptors.append(Descriptors.EState_VSA6(mol))
    descriptors.append(Descriptors.EState_VSA7(mol))
    descriptors.append(Descriptors.EState_VSA8(mol))
    descriptors.append(Descriptors.EState_VSA9(mol))
    descriptors.append(Descriptors.EState_VSA10(mol))
    descriptors.append(Descriptors.EState_VSA11(mol))
    descriptors.append(Descriptors.VSA_EState1(mol))
    descriptors.append(Descriptors.VSA_EState2(mol))
    descriptors.append(Descriptors.VSA_EState3(mol))
    descriptors.append(Descriptors.VSA_EState4(mol))
    descriptors.append(Descriptors.VSA_EState5(mol))
    descriptors.append(Descriptors.VSA_EState6(mol))
    descriptors.append(Descriptors.VSA_EState7(mol))
    descriptors.append(Descriptors.VSA_EState8(mol))
    descriptors.append(Descriptors.VSA_EState9(mol))
    descriptors.append(Descriptors.VSA_EState10(mol))
    # Topological descriptors
    descriptors.append(Descriptors.BalabanJ(mol))
    descriptors.append(Descriptors.BertzCT(mol))
    descriptors.append(Descriptors.HallKierAlpha(mol))
    descriptors.append(Descriptors.Ipc(mol))
    descriptors.append(Descriptors.Kappa1(mol))
    descriptors.append(Descriptors.Kappa2(mol))
    descriptors.append(Descriptors.Kappa3(mol))
    # Connectivity descriptors
    descriptors.append(Descriptors.Chi0(mol))
    descriptors.append(Descriptors.Chi1(mol))
    descriptors.append(Descriptors.Chi0n(mol))
    descriptors.append(Descriptors.Chi1n(mol))
    descriptors.append(Descriptors.Chi2n(mol))
    descriptors.append(Descriptors.Chi3n(mol))
    descriptors.append(Descriptors.Chi4n(mol))
    descriptors.append(Descriptors.Chi0v(mol))
    descriptors.append(Descriptors.Chi1v(mol))
    descriptors.append(Descriptors.Chi2v(mol))
    descriptors.append(Descriptors.Chi3v(mol))
    descriptors.append(Descriptors.Chi4v(mol))
    # Other properties
    descriptors.append(Descriptors.qed(mol))
    # Morgan FP
    rad = 3
    nBits = 1024
    descriptors.extend(genFP(mol, rad, nBits))

    return (descriptors)
Exemplo n.º 4
0
 'fr_unbrch_alkane': (lambda x: des.fr_unbrch_alkane(x)),
 'fr_urea': (lambda x: des.fr_urea(x)),
 'MaxEStateIndex': (lambda x: des.MaxEStateIndex(x)),
 'MinEStateIndex': (lambda x: des.MinEStateIndex(x)),
 'MaxAbsEStateIndex': (lambda x: des.MaxAbsEStateIndex(x)),
 'MinAbsEStateIndex': (lambda x: des.MinAbsEStateIndex(x)),
 'NumValenceElectrons': (lambda x: des.NumValenceElectrons(x)),
 'NumRadicalElectrons': (lambda x: des.NumRadicalElectrons(x)),
 'MaxPartialCharge': (lambda x: des.MaxPartialCharge(x)),
 'MinPartialCharge': (lambda x: des.MinPartialCharge(x)),
 'MaxAbsPartialCharge': (lambda x: des.MaxAbsPartialCharge(x)),
 'MinAbsPartialCharge': (lambda x: des.MinAbsPartialCharge(x)),
 'FpDensityMorgan1': (lambda x: des.FpDensityMorgan1(x)),
 'FpDensityMorgan2': (lambda x: des.FpDensityMorgan2(x)),
 'FpDensityMorgan3': (lambda x: des.FpDensityMorgan3(x)),
 'BalabanJ': (lambda x: des.BalabanJ(x)),
 'BertzCT': (lambda x: des.BertzCT(x)),
 'Chi0': (lambda x: des.Chi0(x)),
 'Chi0n': (lambda x: des.Chi0n(x)),
 'Chi0v': (lambda x: des.Chi0v(x)),
 'Chi1': (lambda x: des.Chi1(x)),
 'Chi1n': (lambda x: des.Chi1n(x)),
 'Chi1v': (lambda x: des.Chi1v(x)),
 'Chi2n': (lambda x: des.Chi2n(x)),
 'Chi2v': (lambda x: des.Chi2v(x)),
 'Chi3n': (lambda x: des.Chi3n(x)),
 'Chi3v': (lambda x: des.Chi3v(x)),
 'Chi4n': (lambda x: des.Chi4n(x)),
 'Chi4v': (lambda x: des.Chi4v(x)),
 'HallKierAlpha': (lambda x: des.HallKierAlpha(x)),
 'Ipc': (lambda x: des.Ipc(x)),
Exemplo n.º 5
0
def rdkit_descriptors(smiles,
                      ndigits=6,
                      include_pc=True,
                      include_moe=False,
                      include_h_bond=False,
                      ch3_smiles=None,
                      barcode_seed=None,
                      vary_descriptors=None,
                      vary_significant=None):
    """
    Parameters
    ----------
    include_pc : bool, optional, default=True
        Include partial charge descriptors (as defined in
        https://www.chemcomp.com/journal/descr.htm). These are calculated
        using Gasteiger charge assignments and VSA descriptors
        obtained from RDKit.
    """
    mol = Chem.AddHs(Chem.MolFromSmiles(smiles))
    Chem.EmbedMolecule(mol, Chem.ETKDG())

    descriptors = {}

    # Molecular weight
    descriptors['molwt'] = round(Descriptors.ExactMolWt(mol), ndigits)

    # Molecular weight (excluding H's)
    descriptors['molwt-hvy'] = round(Descriptors.HeavyAtomMolWt(mol), ndigits)

    # Number of valence electrons
    descriptors['e-valence'] = Descriptors.NumValenceElectrons(mol)

    # Balaban J value
    descriptors['balabanj'] = round(Descriptors.BalabanJ(mol), ndigits)

    # BertzCT
    descriptors['bertzct'] = round(Descriptors.BertzCT(mol), ndigits)

    # Ipc
    descriptors['ipc'] = round(Descriptors.Ipc(mol), ndigits)

    # Hall-Kier alpha
    descriptors['hk-alpha'] = Descriptors.HallKierAlpha(mol)

    # Hall-Kier kappas
    descriptors['hk-kappa1'] = round(Descriptors.Kappa1(mol), ndigits)
    descriptors['hk-kappa2'] = round(Descriptors.Kappa2(mol), ndigits)
    descriptors['hk-kappa3'] = round(Descriptors.Kappa3(mol), ndigits)

    # Chi values from Rev. Comput. Chem. 2:367-422 (1991)
    descriptors['chi0'] = round(Descriptors.Chi0(mol), ndigits)
    descriptors['chi1'] = round(Descriptors.Chi1(mol), ndigits)
    descriptors['chi0n'] = round(Descriptors.Chi0n(mol), ndigits)
    descriptors['chi1n'] = round(Descriptors.Chi1n(mol), ndigits)
    descriptors['chi2n'] = round(Descriptors.Chi2n(mol), ndigits)
    descriptors['chi3n'] = round(Descriptors.Chi3n(mol), ndigits)
    descriptors['chi4n'] = round(Descriptors.Chi4n(mol), ndigits)
    descriptors['chi0v'] = round(Descriptors.Chi0v(mol), ndigits)
    descriptors['chi1v'] = round(Descriptors.Chi1v(mol), ndigits)
    descriptors['chi2v'] = round(Descriptors.Chi2v(mol), ndigits)
    descriptors['chi3v'] = round(Descriptors.Chi3v(mol), ndigits)
    descriptors['chi4v'] = round(Descriptors.Chi4v(mol), ndigits)

    # Wildman-Crippen LogP value
    descriptors['logP'] = round(Descriptors.MolLogP(mol), ndigits)

    # Wildman-Crippen MR value
    descriptors['MR'] = round(Descriptors.MolMR(mol), ndigits)

    # Number of rotateable bonds
    descriptors['rbonds'] = Descriptors.NumRotatableBonds(mol)

    # Number of heavy atoms
    descriptors['nheavy'] = Descriptors.HeavyAtomCount(mol)

    # TPSA, J. Med. Chem. 43:3714-7, (2000)
    descriptors['tpsa'] = round(Descriptors.TPSA(mol), ndigits)

    # Labute's Approximate Surface Area, J. Mol. Graph. Mod. 18:464-77 (2000)
    descriptors['labuteASA'] = round(Descriptors.LabuteASA(mol), ndigits)

    # MOE-type descriptors using partial charges and SA contributions
    moe = {}
    moe['peoe-vsa1'] = round(Descriptors.PEOE_VSA1(mol), ndigits)
    moe['peoe-vsa2'] = round(Descriptors.PEOE_VSA2(mol), ndigits)
    moe['peoe-vsa3'] = round(Descriptors.PEOE_VSA3(mol), ndigits)
    moe['peoe-vsa4'] = round(Descriptors.PEOE_VSA4(mol), ndigits)
    moe['peoe-vsa5'] = round(Descriptors.PEOE_VSA5(mol), ndigits)
    moe['peoe-vsa6'] = round(Descriptors.PEOE_VSA6(mol), ndigits)
    moe['peoe-vsa7'] = round(Descriptors.PEOE_VSA7(mol), ndigits)
    moe['peoe-vsa8'] = round(Descriptors.PEOE_VSA8(mol), ndigits)
    moe['peoe-vsa9'] = round(Descriptors.PEOE_VSA9(mol), ndigits)
    moe['peoe-vsa10'] = round(Descriptors.PEOE_VSA10(mol), ndigits)
    moe['peoe-vsa11'] = round(Descriptors.PEOE_VSA11(mol), ndigits)
    moe['peoe-vsa12'] = round(Descriptors.PEOE_VSA12(mol), ndigits)
    moe['peoe-vsa13'] = round(Descriptors.PEOE_VSA13(mol), ndigits)
    moe['peoe-vsa14'] = round(Descriptors.PEOE_VSA14(mol), ndigits)

    # MOE-type descriptors using MR and SA contributions
    moe['smr-vsa1'] = round(Descriptors.SMR_VSA1(mol), ndigits)
    moe['smr-vsa2'] = round(Descriptors.SMR_VSA2(mol), ndigits)
    moe['smr-vsa3'] = round(Descriptors.SMR_VSA3(mol), ndigits)
    moe['smr-vsa4'] = round(Descriptors.SMR_VSA4(mol), ndigits)
    moe['smr-vsa5'] = round(Descriptors.SMR_VSA5(mol), ndigits)
    moe['smr-vsa6'] = round(Descriptors.SMR_VSA6(mol), ndigits)
    moe['smr-vsa7'] = round(Descriptors.SMR_VSA7(mol), ndigits)
    moe['smr-vsa8'] = round(Descriptors.SMR_VSA8(mol), ndigits)
    moe['smr-vsa9'] = round(Descriptors.SMR_VSA9(mol), ndigits)
    moe['smr-vsa10'] = round(Descriptors.SMR_VSA10(mol), ndigits)

    # MOE-type descriptors using LogP and SA contributions
    moe['slogP-vsa1'] = round(Descriptors.SlogP_VSA1(mol), ndigits)
    moe['slogP-vsa2'] = round(Descriptors.SlogP_VSA2(mol), ndigits)
    moe['slogP-vsa3'] = round(Descriptors.SlogP_VSA3(mol), ndigits)
    moe['slogP-vsa4'] = round(Descriptors.SlogP_VSA4(mol), ndigits)
    moe['slogP-vsa5'] = round(Descriptors.SlogP_VSA5(mol), ndigits)
    moe['slogP-vsa6'] = round(Descriptors.SlogP_VSA6(mol), ndigits)
    moe['slogP-vsa7'] = round(Descriptors.SlogP_VSA7(mol), ndigits)
    moe['slogP-vsa8'] = round(Descriptors.SlogP_VSA8(mol), ndigits)
    moe['slogP-vsa9'] = round(Descriptors.SlogP_VSA9(mol), ndigits)
    moe['slogP-vsa10'] = round(Descriptors.SlogP_VSA10(mol), ndigits)
    moe['slogP-vsa11'] = round(Descriptors.SlogP_VSA11(mol), ndigits)
    moe['slogP-vsa12'] = round(Descriptors.SlogP_VSA12(mol), ndigits)

    # MOE-type descriptors using EState indices as SA contributions
    moe['estate-vsa1'] = round(Descriptors.EState_VSA1(mol), ndigits)
    moe['estate-vsa2'] = round(Descriptors.EState_VSA2(mol), ndigits)
    moe['estate-vsa3'] = round(Descriptors.EState_VSA3(mol), ndigits)
    moe['estate-vsa4'] = round(Descriptors.EState_VSA4(mol), ndigits)
    moe['estate-vsa5'] = round(Descriptors.EState_VSA5(mol), ndigits)
    moe['estate-vsa6'] = round(Descriptors.EState_VSA6(mol), ndigits)
    moe['estate-vsa7'] = round(Descriptors.EState_VSA7(mol), ndigits)
    moe['estate-vsa8'] = round(Descriptors.EState_VSA8(mol), ndigits)
    moe['estate-vsa9'] = round(Descriptors.EState_VSA9(mol), ndigits)
    moe['estate-vsa10'] = round(Descriptors.EState_VSA10(mol), ndigits)
    moe['estate-vsa11'] = round(Descriptors.EState_VSA11(mol), ndigits)

    # MOE-type descriptors using EState indices as SA contributions
    moe['vsa-estate1'] = round(Descriptors.VSA_EState1(mol), ndigits)
    moe['vsa-estate2'] = round(Descriptors.VSA_EState2(mol), ndigits)
    moe['vsa-estate3'] = round(Descriptors.VSA_EState3(mol), ndigits)
    moe['vsa-estate4'] = round(Descriptors.VSA_EState4(mol), ndigits)
    moe['vsa-estate5'] = round(Descriptors.VSA_EState5(mol), ndigits)
    moe['vsa-estate6'] = round(Descriptors.VSA_EState6(mol), ndigits)
    moe['vsa-estate7'] = round(Descriptors.VSA_EState7(mol), ndigits)
    moe['vsa-estate8'] = round(Descriptors.VSA_EState8(mol), ndigits)
    moe['vsa-estate9'] = round(Descriptors.VSA_EState9(mol), ndigits)
    moe['vsa-estate10'] = round(Descriptors.VSA_EState10(mol), ndigits)

    if include_moe:
        descriptors.update(moe)

    # Plane of best fit, Firth et al., JCIM 52:2516-25
    descriptors['pbf'] = round(rdMolDescriptors.CalcPBF(mol), ndigits)

    # Principal moments of inertia
    descriptors['pmi1'] = round(rdMolDescriptors.CalcPMI1(mol), ndigits)
    descriptors['pmi2'] = round(rdMolDescriptors.CalcPMI2(mol), ndigits)
    descriptors['pmi3'] = round(rdMolDescriptors.CalcPMI3(mol), ndigits)

    # Normalized principal moments ratios Sauer and Schwarz JCIM 43:987-1003 (2003)
    descriptors['npr1'] = round(rdMolDescriptors.CalcNPR1(mol), ndigits)
    descriptors['npr2'] = round(rdMolDescriptors.CalcNPR2(mol), ndigits)

    # Radius of gyration
    descriptors['rg'] = round(rdMolDescriptors.CalcRadiusOfGyration(mol),
                              ndigits)

    # Inertial shape factor
    descriptors['isf'] = round(rdMolDescriptors.CalcInertialShapeFactor(mol),
                               ndigits)

    # Eccentricity
    descriptors['eccentricity'] = round(rdMolDescriptors.CalcEccentricity(mol),
                                        ndigits)

    # Asphericity
    descriptors['asphericity'] = round(rdMolDescriptors.CalcAsphericity(mol),
                                       ndigits)

    # Spherocity Index
    descriptors['spherocity'] = round(
        rdMolDescriptors.CalcSpherocityIndex(mol), ndigits)

    # Charge descriptors
    if include_pc:
        Chem.ComputeGasteigerCharges(mol)
        charges = [
            float(atom.GetProp('_GasteigerCharge')) for atom in mol.GetAtoms()
        ]
        positive_charges = [c for c in charges if c > 0]
        negative_charges = [c for c in charges if c < 0]
        atoms = [atom for atom in mol.GetAtoms()]

        # Total positive charge
        descriptors['pc+'] = round(sum(positive_charges), ndigits)

        # Total negative charge
        descriptors['pc-'] = round(sum(negative_charges), ndigits)

        # Relative positive partial charge
        descriptors['rpc+'] = round(
            max(positive_charges) / sum(positive_charges), ndigits)

        # Relative negative partial charge
        descriptors['rpc-'] = round(
            min(negative_charges) / sum(negative_charges), ndigits)

        # Total positive van der Waals surface area
        descriptors['vsa+'] = round(
            sum([moe['peoe-vsa{}'.format(val)] for val in range(8, 15)]),
            ndigits)

        # Total negative van der Waals surface area
        descriptors['vsa-'] = round(
            sum([moe['peoe-vsa{}'.format(val)] for val in range(1, 8)]),
            ndigits)

        total_vsa = round(descriptors['vsa+'] + descriptors['vsa-'], ndigits)

        # Total positive polar van der Waals surface area
        descriptors['vsa-polar+'] = round(
            sum([moe['peoe-vsa{}'.format(val)] for val in range(12, 15)]),
            ndigits)

        # Total negative polar van der Waals surface area
        descriptors['vsa-polar-'] = round(
            sum([moe['peoe-vsa{}'.format(val)] for val in range(1, 4)]),
            ndigits)

        # Total hydrophobic van der Waals surface area
        descriptors['vsa-hyd'] = round(
            sum([moe['peoe-vsa{}'.format(val)] for val in range(4, 12)]),
            ndigits)

        # Total polar van der Waals surface area
        descriptors['vsa-polar'] = round(
            descriptors['vsa-polar+'] + descriptors['vsa-polar-'], ndigits)

        # Fractional positive van der Waals surface area
        descriptors['vsa-fpos'] = round(descriptors['vsa+'] / total_vsa,
                                        ndigits)

        # Fractional negative van der Waals surface area
        descriptors['vsa-fneg'] = round(descriptors['vsa-'] / total_vsa,
                                        ndigits)

        # Fractional positive polar van der Waals surface area
        descriptors['vsa-fppos'] = round(descriptors['vsa-polar+'] / total_vsa,
                                         ndigits)

        # Fractional negative polar van der Waals surface area
        descriptors['vsa-fpneg'] = round(descriptors['vsa-polar-'] / total_vsa,
                                         ndigits)

        # Fractional hydrophobic van der Waals surface area
        descriptors['vsa-fhyd'] = round(descriptors['vsa-hyd'] / total_vsa,
                                        ndigits)

        # Fractional polar van der Waals surface area
        descriptors['vsa-polar'] = round(descriptors['vsa-polar'] / total_vsa,
                                         ndigits)

    # If the number of H-bond donors and acceptors are desired then
    # an additional smiles needs to be provided where groups are CH3-
    # terminated
    if include_h_bond:
        assert (ch3_smiles)
        mol_ch3 = Chem.AddHs(Chem.MolFromSmiles(ch3_smiles))
        descriptors['hdonors'] = Descriptors.NumHDonors(mol_ch3)
        descriptors['hacceptors'] = Descriptors.NumHAcceptors(mol_ch3)

    return descriptors
Exemplo n.º 6
0
def getBalabanJ(mol):
    return Descriptors.BalabanJ(mol)