示例#1
0
    def test1(self):
        " testing first 200 mols from NCI "
        # figure out which rotor version we are using
        m = Chem.MolFromSmiles("CC(C)(C)c1cc(O)c(cc1O)C(C)(C)C")
        if Lipinski.NumRotatableBonds(m) == 2:
            rot_prop = NonStrict
        else:
            rot_prop = Strict

        suppl = Chem.SDMolSupplier(self.inFileName)
        idx = 1
        for m in suppl:
            if m:
                calc = Lipinski.NHOHCount(m)
                orig = int(m.GetProp('NUM_LIPINSKIHDONORS'))
                assert calc == orig, 'bad num h donors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                calc = Lipinski.NOCount(m)
                orig = int(m.GetProp('NUM_LIPINSKIHACCEPTORS'))
                assert calc == orig, 'bad num h acceptors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                calc = Lipinski.NumHDonors(m)
                orig = int(m.GetProp('NUM_HDONORS'))
                assert calc == orig, 'bad num h donors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                calc = Lipinski.NumHAcceptors(m)
                orig = int(m.GetProp('NUM_HACCEPTORS'))
                assert calc == orig, 'bad num h acceptors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                calc = Lipinski.NumHeteroatoms(m)
                orig = int(m.GetProp('NUM_HETEROATOMS'))
                assert calc == orig, 'bad num heteroatoms for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                calc = Lipinski.NumRotatableBonds(m)
                orig = int(m.GetProp(rot_prop))
                assert calc == orig, 'bad num rotors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                # test the underlying numrotatable bonds
                calc = rdMolDescriptors.CalcNumRotatableBonds(
                    m, rdMolDescriptors.NumRotatableBondsOptions.NonStrict)
                orig = int(m.GetProp(NonStrict))
                assert calc == orig, 'bad num rotors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                calc = rdMolDescriptors.CalcNumRotatableBonds(
                    m, rdMolDescriptors.NumRotatableBondsOptions.Strict)
                orig = int(m.GetProp(Strict))
                assert calc == orig, 'bad num rotors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

            idx += 1
示例#2
0
def calculate_property(m):
    # SA_score = -sascorer.calculateScore(m)
    MW = Descriptors.MolWt(m)
    RB = Lipinski.NumRotatableBonds(m)
    logp = Descriptors.MolLogP(m)
    #return (SA_score, MW, RB, logp)
    return (MW, RB, logp)
示例#3
0
 def veber_infraction(molecule: Chem.Mol) -> bool:
   """
   Checks if a given molecule fails the veber infraction filters.
   """
   rotatable_bond_saturation = Lipinski.NumRotatableBonds(molecule) > 10
   hydrogen_bond_saturation = Lipinski.NumHAcceptors(molecule) + Lipinski.NumHDonors(molecule) > 10
   return rotatable_bond_saturation or hydrogen_bond_saturation
示例#4
0
def score_molecule(smiles):
    lipinski_score = 0
    qed = LipinskiRuleOfFiveDecorator.MAX_QED + 1

    try:
        m = Chem.MolFromSmiles(smiles)
        logp = Descriptors.MolLogP(m)
        lipinski_score += 1 if logp < LipinskiRuleOfFiveDecorator.MAX_LOGP else 0

        wt = Descriptors.MolWt(m)
        lipinski_score += 1 if wt < LipinskiRuleOfFiveDecorator.MAX_MOL_WT else 0

        hdonor = Lipinski.NumHDonors(m)
        lipinski_score += 1 if hdonor < LipinskiRuleOfFiveDecorator.MAX_H_DONORS else 0

        hacceptor = Lipinski.NumHAcceptors(m)
        lipinski_score += 1 if hacceptor < LipinskiRuleOfFiveDecorator.MAX_H_DONORS else 0

        rotatable_bond = Lipinski.NumRotatableBonds(m)
        lipinski_score += 1 if rotatable_bond < LipinskiRuleOfFiveDecorator.MAX_ROTATABLE_BONDS else 0

        qed = QED.qed(m)
    except Exception as ex:
        lipinski_score = 0
        logger.exception(ex)

    return lipinski_score, qed
示例#5
0
def ProcessMol(mol,typeConversions,globalProps,nDone,nameProp='_Name',nameCol='compound_id',
               redraw=False,keepHs=False,
               skipProps=False,addComputedProps=False,
               skipSmiles=False,
               uniqNames=None,namesSeen=None):
  if not mol:
    raise ValueError('no molecule')
  if keepHs:
    Chem.SanitizeMol(mol)
  try:
    nm = mol.GetProp(nameProp)
  except KeyError:
    nm = None
  if not nm:
    nm = 'Mol_%d'%nDone
  if uniqNames and nm in namesSeen:
    logger.error('duplicate compound id (%s) encountered. second instance skipped.'%nm)
    return None
  namesSeen.add(nm)
  row = [nm]
  if not skipProps:
    if addComputedProps:
      nHD=Lipinski.NumHDonors(mol)
      mol.SetProp('DonorCount',str(nHD))
      nHA=Lipinski.NumHAcceptors(mol)
      mol.SetProp('AcceptorCount',str(nHA))
      nRot=Lipinski.NumRotatableBonds(mol)
      mol.SetProp('RotatableBondCount',str(nRot))
      MW=Descriptors.MolWt(mol)
      mol.SetProp('AMW',str(MW))
      logp=Crippen.MolLogP(mol)
      mol.SetProp('MolLogP',str(logp))

    pns = list(mol.GetPropNames())
    pD={}
    for pi,pn in enumerate(pns):
      if pn.lower()==nameCol.lower(): continue
      pv = mol.GetProp(pn).strip()
      if pv.find('>')<0 and pv.find('<')<0:
        colTyp = globalProps.get(pn,2)
        while colTyp>0:
          try:
            tpi = typeConversions[colTyp][1](pv)
          except:
            colTyp-=1
          else:
            break
        globalProps[pn]=colTyp
        pD[pn]=typeConversions[colTyp][1](pv)
      else:
        pD[pn]=pv
  else:
    pD={}
  if redraw:
    AllChem.Compute2DCoords(m)
  if not skipSmiles:
    row.append(Chem.MolToSmiles(mol,True))
  row.append(DbModule.binaryHolder(mol.ToBinary()))
  row.append(pD)
  return row
示例#6
0
def pct_rotatable_bonds(mol):
    n_bonds = mol.GetNumBonds()
    if n_bonds > 0:
        rot_bonds = Lipinski.NumRotatableBonds(mol) / n_bonds
    else:
        rot_bonds = 0
    return rot_bonds
示例#7
0
def get_descriptors(mol, write=False):
    # Make a copy of the molecule dataframe
    desc = [
        Lipinski.NumAromaticHeterocycles(mol),
        Lipinski.NumAromaticRings(mol),
        Lipinski.NumHDonors(mol),
        Lipinski.RingCount(mol),
        Lipinski.NHOHCount(mol),
        Lipinski.NumHeteroatoms(mol),
        Lipinski.NumAliphaticCarbocycles(mol),
        Lipinski.NumSaturatedCarbocycles(mol),
        Lipinski.NumAliphaticHeterocycles(mol),
        Lipinski.NumHAcceptors(mol),
        Lipinski.NumSaturatedHeterocycles(mol),
        Lipinski.NumAliphaticRings(mol),
        Descriptors.NumRadicalElectrons(mol),
        Descriptors.MaxPartialCharge(mol),
        Descriptors.NumValenceElectrons(mol),
        Lipinski.FractionCSP3(mol),
        Descriptors.MaxAbsPartialCharge(mol),
        Lipinski.NumAromaticCarbocycles(mol),
        Lipinski.NumSaturatedRings(mol),
        Lipinski.NumRotatableBonds(mol)
    ]

    desc = [0 if i != i else i for i in desc]
    return desc
示例#8
0
def generate(smiles):
    moldata = []
    for elem in smiles:
        mol = Chem.MolFromSmiles(elem)
        moldata.append(mol)

    baseData = np.arange(1, 1)
    i = 0
    for mol in moldata:

        desc_MolLogP = Crippen.MolLogP(mol)
        desc_MolWt = Descriptors.MolWt(mol)
        desc_NumRotatableBonds = Lipinski.NumRotatableBonds(mol)
        desc_AromaticProportion = getAromaticProportion(mol)

        row = np.array([desc_MolLogP,
                        desc_MolWt,
                        desc_NumRotatableBonds,
                        desc_AromaticProportion])

        if i == 0:
            baseData = row
        else:
            baseData = np.vstack([baseData, row])
        i = i + 1

    columnNames = ["MolLogP", "MolWt", "NumRotatableBonds", "AromaticProportion"]
    descriptors = pd.DataFrame(data=baseData, columns=columnNames)

    return descriptors
示例#9
0
    def testMQN(self):
        m = Chem.MolFromSmiles("CC(C)(C)c1cc(O)c(cc1O)C(C)(C)C")
        if Lipinski.NumRotatableBonds(m) == 2:
            tgt = [
                42917, 274, 870, 621, 135, 1582, 29, 3147, 5463, 6999, 470,
                62588, 19055, 4424, 309, 24061, 17820, 1, 9303, 24146, 16076,
                5560, 4262, 646, 746, 13725, 5430, 2629, 362, 24211, 15939,
                292, 41, 20, 1852, 5642, 31, 9, 1, 2, 3060, 1750
            ]
        else:
            tgt = [
                42917, 274, 870, 621, 135, 1582, 29, 3147, 5463, 6999, 470,
                62588, 19055, 4424, 309, 24061, 17820, 1, 8314, 24146, 16076,
                5560, 4262, 646, 746, 13725, 5430, 2629, 362, 24211, 15939,
                292, 41, 20, 1852, 5642, 31, 9, 1, 2, 3060, 1750
            ]
            tgt = [
                42917, 274, 870, 621, 135, 1582, 29, 3147, 5463, 6999, 470,
                62588, 19055, 4424, 309, 24059, 17822, 1, 8314, 24146, 16076,
                5560, 4262, 646, 746, 13725, 5430, 2629, 362, 24211, 15939,
                292, 41, 20, 1852, 5642, 31, 9, 1, 2, 3060, 1750
            ]
        fn = os.path.join(os.path.dirname(__file__), 'test_data',
                          'aromat_regress.txt')
        ms = [x for x in Chem.SmilesMolSupplier(fn, delimiter='\t')]
        vs = np.zeros((42, ), np.int32)

        for m in ms:
            vs += rdMolDescriptors.MQNs_(m)
        self.assertEqual(list(vs), tgt)
示例#10
0
    def testMQNDetails(self):
        refFile = os.path.join(os.path.dirname(__file__), 'test_data',
                               'MQNs_regress.pkl')
        refFile2 = os.path.join(os.path.dirname(__file__), 'test_data',
                                'MQNs_non_strict_regress.pkl')
        # figure out which definition we are currently using
        m = Chem.MolFromSmiles("CC(C)(C)c1cc(O)c(cc1O)C(C)(C)C")
        if Lipinski.NumRotatableBonds(m) == 2:
            refFile = refFile2

        with open(refFile, 'rb') as intf:
            refData = pickle.load(intf)
        fn = os.path.join(os.path.dirname(__file__), 'test_data',
                          'aromat_regress.txt')
        ms = [x for x in Chem.SmilesMolSupplier(fn, delimiter='\t')]
        for i, m in enumerate(ms):
            mqns = rdMolDescriptors.MQNs_(m)
            if mqns != refData[i][1]:
                indices = [
                    (j, x, y)
                    for j, x, y in zip(range(len(mqns)), mqns, refData[i][1])
                    if x != y
                ]
                print(i, Chem.MolToSmiles(m), indices)
            self.assertEqual(mqns, refData[i][1])
示例#11
0
def get_filter_values(mol):
    """
    calculate the values, for a given molecule, that are used to filter
    return as a dictionary
    """

    assert isinstance(mol, Chem.Mol)

    values = {}
    values["MW"] = desc.CalcExactMolWt(mol)
    values["logP"] = crip.MolLogP(mol)
    values["HBA"] = lip.NumHAcceptors(mol)
    values["HBD"] = lip.NumHDonors(mol)
    values["tPSA"] = desc.CalcTPSA(mol)
    values["rot_bonds"] = lip.NumRotatableBonds(mol)
    values["rigid_bonds"] = mol.GetNumBonds() - values["rot_bonds"]  # assume mutual exclusion
    values["num_rings"] = lip.RingCount(mol)
    values["num_hetero_atoms"] = lip.NumHeteroatoms(mol)
    values["charge"] = rdmolops.GetFormalCharge(mol)  # trusting this charge calculation method
    values["num_carbons"], values["num_charges"], values["max_ring_size"] = get_atom_props(mol)
    try:
        values["hc_ratio"] = float(values["num_hetero_atoms"]) / float(values["num_carbons"])
    except ZeroDivisionError:
        values["hc_ratio"] = 100000000  # if there are zero carbons
    values["fc"] = len(list(Brics.FindBRICSBonds(mol)))  # how many BRICS bonds, related to complexity
    values["is_good"] = True  # default to true, but not yet observed
    atoms = [atom.GetSymbol() for atom in mol.GetAtoms()]  # get all the atoms, and make the list unique (only types)
    atoms = set(atoms)
    atoms = list(atoms)
    values["atoms"] = atoms
    values["num_chiral_centers"] = len(Chem.FindMolChiralCenters(mol, includeUnassigned=True))
    values["rejections"] = []  # empty list to store the reasons for rejection

    return values
示例#12
0
    def testMQNDetails(self):
        refFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data',
                               'MQNs_regress.pkl')
        refFile2 = os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data',
                                'MQNs_non_strict_regress.pkl')
        # figure out which definition we are currently using
        m = Chem.MolFromSmiles("CC(C)(C)c1cc(O)c(cc1O)C(C)(C)C")
        if Lipinski.NumRotatableBonds(m) == 2:
            refFile = refFile2

        with open(refFile, 'r') as intf:
            buf = intf.read().replace('\r\n', '\n').encode('utf-8')
            intf.close()
        with io.BytesIO(buf) as inf:
            pkl = inf.read()
        refData = cPickle.loads(pkl, encoding='bytes')
        fn = os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data',
                          'aromat_regress.txt')
        ms = [x for x in Chem.SmilesMolSupplier(fn, delimiter='\t')]
        refData2 = []
        for i, m in enumerate(ms):
            mqns = rdMolDescriptors.MQNs_(m)
            refData2.append((m, mqns))
            if mqns != refData[i][1]:
                indices = [
                    (j, x, y)
                    for j, x, y in zip(range(len(mqns)), mqns, refData[i][1])
                    if x != y
                ]
                print(i, Chem.MolToSmiles(m), indices)
            self.assertEqual(mqns, refData[i][1])
示例#13
0
def CalculateRotationBondNumber(mol):
    """
    Calculation of rotation bonds count in a molecule
        Parameters:
            mol: rdkit molecule
        Returns:
            Rotation Bond Number
    """
    return LPK.NumRotatableBonds(mol)
def mole_proper(mol):
    num_hdonors = Lipinski.NumHDonors(mol)
    num_hacceptors = Lipinski.NumHAcceptors(mol)
    num_rotatable = Lipinski.NumRotatableBonds(mol)
    mol_weight = Descriptors.MolWt(mol)
    mol_logp = Crippen.MolLogP(mol)
    mol_TPSA = Descriptors.TPSA(mol)
    proper = (num_hdonors, num_hacceptors, num_rotatable, mol_weight, mol_logp,
              mol_TPSA)
    return proper
示例#15
0
 def calc_esol_descriptors(self, mol):
     """
     Calcuate mw,logp,rotors and aromatic proportion (ap)
     :param mol: input molecule
     :return: named tuple with descriptor values
     """
     mw = Descriptors.MolWt(mol)
     logp = Crippen.MolLogP(mol)
     rotors = Lipinski.NumRotatableBonds(mol)
     ap = self.calc_ap(mol)
     return self.Descriptor(mw=mw, logp=logp, rotors=rotors, ap=ap)
示例#16
0
def auto_sampling(mult_factor, mol, log):
    auto_samples = 0
    auto_samples += 3 * (Lipinski.NumRotatableBonds(mol)
                         )  # x3, for C3 rotations
    auto_samples += 3 * (Lipinski.NHOHCount(mol))  # x3, for OH/NH rotations
    auto_samples += 3 * (Lipinski.NumSaturatedRings(mol)
                         )  # x3, for boat/chair/envelope confs
    if auto_samples == 0:
        auto_samples = mult_factor
    else:
        auto_samples = mult_factor * auto_samples
    return auto_samples
 def descriptors(self, mol):
     aromatic_frac = self.arofrac(mol)
     mw = Descriptors.ExactMolWt(mol, False)
     valence_e = Descriptors.NumValenceElectrons(mol)
     h_acceptors = Lipinski.NumHAcceptors(mol)
     h_donors = Lipinski.NumHDonors(mol)
     NO_counts = Lipinski.NOCount(mol)
     NHOH_count = Lipinski.NHOHCount(mol)
     rotors = Lipinski.NumRotatableBonds(mol)
     SP3_frac = Lipinski.FractionCSP3(mol)
     logP = Crippen.MolLogP(mol)
     SP_bonds = len(mol.GetSubstructMatches(Chem.MolFromSmarts('[^1]')))
     return([aromatic_frac,mw,valence_e,h_acceptors,h_donors,NO_counts,NHOH_count, rotors,SP3_frac,logP,SP_bonds])
示例#18
0
def auto_sampling(mult_factor,mol,args,log):
	if args.metal_complex:
		if len(args.metal_idx) > 0:
			mult_factor = mult_factor*3*len(args.metal_idx) # this accounts for possible trans/cis isomers in metal complexes
	auto_samples = 0
	auto_samples += 3*(Lipinski.NumRotatableBonds(mol)) # x3, for C3 rotations
	auto_samples += 3*(Lipinski.NHOHCount(mol)) # x3, for OH/NH rotations
	auto_samples += 3*(Lipinski.NumSaturatedRings(mol)) # x3, for boat/chair/envelope confs
	if auto_samples == 0:
		auto_samples = mult_factor
	else:
		auto_samples = mult_factor*auto_samples
	return auto_samples
示例#19
0
    def getDiscriptor(self):
        from rdkit.Chem import Crippen
        from rdkit import Chem
        import pandas as pd
        from rdkit.Chem import Descriptors, Lipinski
        import os

        os.chdir(r"G:\マイドライブ\Data\Meram Chronic Data")
        df = pd.read_csv('extChronicStrcture.csv', engine='python')
        df = df[['CAS', 'canonical_smiles']]
        df = df.dropna(how='any')

        #df = pd.read_csv('extractInchi.csv',header=None)
        columns = [
            'CAS', 'weight', 'logP', 'RotatableBonds', 'HeavyAtomCounts',
            'AromProp', 'TPSA', 'HDonor', 'HAcceptors', 'FractionCSP3',
            'AromaticCarbocycles', 'AromaticHeterocycles'
        ]
        CAS = df['CAS']
        SMILES = df['canonical_smiles']

        resultDf = pd.DataFrame(columns=columns)
        for cas, smiles in zip(CAS, SMILES):
            mol = Chem.MolFromSmiles(smiles)
            wt = Descriptors.MolWt(mol)
            rot = Lipinski.NumRotatableBonds(mol)
            heavy = Lipinski.HeavyAtomCount(mol)
            logp = Crippen.MolLogP(mol)
            aromaticHeavyatoms = len(
                mol.GetSubstructMatches(Chem.MolFromSmarts('[a]')))
            numAtoms = mol.GetNumAtoms()
            aromprop = float(aromaticHeavyatoms / numAtoms)
            TPSA = Descriptors.TPSA(mol)
            HDonors = Descriptors.NumHDonors(mol)
            HAcceptors = Descriptors.NumHAcceptors(mol)

            FractionCSP3 = Descriptors.FractionCSP3(mol)
            AromaticCarbocycles = Descriptors.NumAromaticCarbocycles(mol)
            AromaticHeterocycles = Descriptors.NumAromaticHeterocycles(mol)

            (print(HDonors, HAcceptors))
            tempDf = pd.DataFrame([[
                cas, wt, logp, rot, heavy, aromprop, TPSA, HDonors, HAcceptors,
                FractionCSP3, AromaticCarbocycles, AromaticHeterocycles
            ]],
                                  columns=columns)
            resultDf = pd.concat([resultDf, tempDf])
        resultDf.to_csv('Descriptors.csv', index=False)
示例#20
0
def ProcessMol(session,
               mol,
               globalProps,
               nDone,
               nameProp='_Name',
               nameCol='compound_id',
               redraw=False,
               keepHs=False,
               skipProps=False,
               addComputedProps=False,
               skipSmiles=False):
    if not mol:
        raise ValueError('no molecule')
    if keepHs:
        Chem.SanitizeMol(mol)
    try:
        nm = mol.GetProp(nameProp)
    except KeyError:
        nm = None
    if not nm:
        nm = 'Mol_%d' % nDone

    cmpd = Compound()
    session.add(cmpd)

    if redraw:
        AllChem.Compute2DCoords(m)

    if not skipSmiles:
        cmpd.smiles = Chem.MolToSmiles(mol, True)
    cmpd.molpkl = mol.ToBinary()
    setattr(cmpd, nameCol, nm)

    if not skipProps:
        if addComputedProps:
            cmpd.DonorCount = Lipinski.NumHDonors(mol)
            cmpd.AcceptorCount = Lipinski.NumHAcceptors(mol)
            cmpd.RotatableBondCount = Lipinski.NumRotatableBonds(mol)
            cmpd.AMW = Descriptors.MolWt(mol)
            cmpd.MolLogP = Crippen.MolLogP(mol)
        pns = list(mol.GetPropNames())
        for pi, pn in enumerate(pns):
            if pn.lower() == nameCol.lower():
                continue
            pv = mol.GetProp(pn).strip()
            if pn in globalProps:
                setattr(cmpd, pn.lower(), pv)
    return cmpd
    def run_filter(self, mol):
        """
        This runs a Mozziconacci filter. Mozziconacci filter is a filter for
        Drug-likeliness which filters molecules by the number of:

        To pass the filter a molecule must be:
            # of Rotatable bonds: Max 15
            # of Rings: Max 6
            # of Oxygens: Min 1
            # of Nitrogens: Min 1
            # of Halogens: Max 7

        Inputs:
        :param rdkit.Chem.rdchem.Mol object mol: An rdkit mol object to be
            tested if it passes the filters

        Returns:
        :returns: bool bool: True if the mol passes the filter; False if it
            fails the filter
        """

        halogen = Chem.MolFromSmarts("[*;#9,#17,#35,#53,#85]")
        number_of_halogens = len(mol.GetSubstructMatches(halogen,
                                                         maxMatches=8))
        if number_of_halogens > 7:
            return False

        oxygen = Chem.MolFromSmarts("[#8]")
        number_of_oxygens = len(mol.GetSubstructMatches(oxygen, maxMatches=2))
        if number_of_oxygens < 1:
            return False

        nitrogen = Chem.MolFromSmarts("[#7]")
        number_of_nitrogen = len(
            mol.GetSubstructMatches(nitrogen, maxMatches=2))
        if number_of_nitrogen < 1:
            return False

        num_rotatable_bonds = Lipinski.NumRotatableBonds(mol)
        if num_rotatable_bonds > 15:
            return False

        ring_count = Chem.rdmolops.GetSSSR(mol)
        if ring_count > 6:
            return False

        # Passes everything
        return True
示例#22
0
def filters(mol,args):
	valid_structure = True
	# First filter: number of rotatable bonds
	if Lipinski.NumRotatableBonds(mol) < args.max_torsions:
		# Second filter: molecular weight
		if Descriptors.MolWt(mol) < args.max_MolWt:
			# Third filter: this filters salts off (2 separated components)
			#if len(Chem.MolToSmiles(mol).split('.')) == 1:
			for atom in mol.GetAtoms():
				#Fourth filter: atoms outside the scope chosen in 'possible_atoms'
				if atom.GetSymbol() not in possible_atoms:
					valid_structure = False
			#else: valid_structure = False
		else: valid_structure = False
	else: valid_structure = False
	return valid_structure
示例#23
0
def PhyChem(smiles):
    """ Calculating the 19D physicochemical descriptors for each molecules,
    the value has been normalized with Gaussian distribution.

    Arguments:
        smiles (list): list of SMILES strings.
    Returns:
        props (ndarray): m X 19 matrix as normalized PhysChem descriptors.
            m is the No. of samples
    """
    props = []
    for smile in smiles:
        mol = Chem.MolFromSmiles(smile)
        try:
            MW = desc.MolWt(mol)
            LOGP = Crippen.MolLogP(mol)
            HBA = Lipinski.NumHAcceptors(mol)
            HBD = Lipinski.NumHDonors(mol)
            rotable = Lipinski.NumRotatableBonds(mol)
            amide = AllChem.CalcNumAmideBonds(mol)
            bridge = AllChem.CalcNumBridgeheadAtoms(mol)
            heteroA = Lipinski.NumHeteroatoms(mol)
            heavy = Lipinski.HeavyAtomCount(mol)
            spiro = AllChem.CalcNumSpiroAtoms(mol)
            FCSP3 = AllChem.CalcFractionCSP3(mol)
            ring = Lipinski.RingCount(mol)
            Aliphatic = AllChem.CalcNumAliphaticRings(mol)
            aromatic = AllChem.CalcNumAromaticRings(mol)
            saturated = AllChem.CalcNumSaturatedRings(mol)
            heteroR = AllChem.CalcNumHeterocycles(mol)
            TPSA = MolSurf.TPSA(mol)
            valence = desc.NumValenceElectrons(mol)
            mr = Crippen.MolMR(mol)
            # charge = AllChem.ComputeGasteigerCharges(mol)
            prop = [
                MW, LOGP, HBA, HBD, rotable, amide, bridge, heteroA, heavy,
                spiro, FCSP3, ring, Aliphatic, aromatic, saturated, heteroR,
                TPSA, valence, mr
            ]
        except Exception:
            print(smile)
            prop = [0] * 19
        props.append(prop)
    props = np.array(props)
    props = Scaler().fit_transform(props)
    return props
示例#24
0
def properties(fnames, labels, is_active=False):
    """ Five structural properties calculation for each molecule in each given file.
    These properties contains No. of Hydrogen Bond Acceptor/Donor, Rotatable Bond,
    Aliphatic Ring, Aromatic Ring and Heterocycle.

    Arguments:
        fnames (list): the file path of molecules.
        labels (list): the label for each file in the fnames.
        is_active (bool, optional): selecting only active ligands (True) or all of the molecules (False)
            if it is true, the molecule with PCHEMBL_VALUE >= 6.5 or SCORE > 0.5 will be selected.
            (Default: False)

    Returns:
        df (DataFrame): the table contains three columns; 'Set' is the label
            of fname the molecule belongs to, 'Property' is the name of one
            of five properties, 'Number' is the property value.
    """

    props = []
    for i, fname in enumerate(fnames):
        df = pd.read_table(fname)
        if 'SCORE' in df.columns:
            df = df[df.SCORE > (0.5 if is_active else 0)]
        elif 'PCHEMBL_VALUE' in df.columns:
            df = df[df.PCHEMBL_VALUE >= (6.5 if is_active else 0)]
        df = df.drop_duplicates(subset='CANONICAL_SMILES')
        if len(df) > int(1e5):
            df = df.sample(int(1e5))
        for smile in tqdm(df.CANONICAL_SMILES):
            mol = Chem.MolFromSmiles(smile)
            HA = Lipinski.NumHAcceptors(mol)
            props.append([labels[i], 'Hydrogen Bond\nAcceptor', HA])
            HD = Lipinski.NumHDonors(mol)
            props.append([labels[i], 'Hydrogen\nBond Donor', HD])
            RB = Lipinski.NumRotatableBonds(mol)
            props.append([labels[i], 'Rotatable\nBond', RB])
            RI = AllChem.CalcNumAliphaticRings(mol)
            props.append([labels[i], 'Aliphatic\nRing', RI])
            AR = Lipinski.NumAromaticRings(mol)
            props.append([labels[i], 'Aromatic\nRing', AR])
            HC = AllChem.CalcNumHeterocycles(mol)
            props.append([labels[i], 'Heterocycle', HC])
    df = pd.DataFrame(props, columns=['Set', 'Property', 'Number'])
    return df
示例#25
0
    def test1(self):
        " testing first 200 mols from NCI "
        suppl = Chem.SDMolSupplier(self.inFileName)
        idx = 1
        oldDonorSmarts = Chem.MolFromSmarts('[NH1,NH2,OH1]')
        OldDonorCount = lambda x, y=oldDonorSmarts: Lipinski._NumMatches(x, y)
        oldAcceptorSmarts = Chem.MolFromSmarts('[N,O]')
        OldAcceptorCount = lambda x, y=oldAcceptorSmarts: Lipinski._NumMatches(
            x, y)
        for m in suppl:
            if m:
                calc = Lipinski.NHOHCount(m)
                orig = int(m.GetProp('NUM_LIPINSKIHDONORS'))
                assert calc == orig, 'bad num h donors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                calc = Lipinski.NOCount(m)
                orig = int(m.GetProp('NUM_LIPINSKIHACCEPTORS'))
                assert calc == orig, 'bad num h acceptors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                calc = Lipinski.NumHDonors(m)
                orig = int(m.GetProp('NUM_HDONORS'))
                assert calc == orig, 'bad num h donors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                calc = Lipinski.NumHAcceptors(m)
                orig = int(m.GetProp('NUM_HACCEPTORS'))
                assert calc == orig, 'bad num h acceptors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                calc = Lipinski.NumHeteroatoms(m)
                orig = int(m.GetProp('NUM_HETEROATOMS'))
                assert calc == orig, 'bad num heteroatoms for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                calc = Lipinski.NumRotatableBonds(m)
                orig = int(m.GetProp('NUM_ROTATABLEBONDS'))
                assert calc == orig, 'bad num rotors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)
            idx += 1
示例#26
0
def CalculateRotationBondNumber(mol):
    """
    #################################################################
    Calculation of rotation bonds counts in a molecule

    ---->nrot

    Note that this is the same as calculation of single bond

    counts in a molecule.

    Usage:

        result=CalculateRotationBondNumber(mol)

        Input: mol is a molecule object.

        Output: result is a numeric value.
    #################################################################
    """
    return LPK.NumRotatableBonds(mol)
示例#27
0
def properties(mol):
    """
  Calculates the properties that are required to calculate the QED descriptor.
  """
    matches = []
    if (mol is None):
        raise TypeError('You need to provide a mol argument.')
    x = [0] * 8
    x[0] = rdmd._CalcMolWt(mol)  # MW
    x[1] = Crippen.MolLogP(mol)  # ALOGP
    for hbaPattern in Acceptors:  # HBA
        if (mol.HasSubstructMatch(hbaPattern)):
            matches = mol.GetSubstructMatches(hbaPattern)
            x[2] += len(matches)
    x[3] = Lipinski.NumHDonors(mol)  # HBD
    x[4] = MolSurf.TPSA(mol)  # PSA
    x[5] = Lipinski.NumRotatableBonds(mol)  # ROTB
    x[6] = Chem.GetSSSR(Chem.DeleteSubstructs(deepcopy(mol),
                                              AliphaticRings))  # AROM
    for alert in StructuralAlerts:  # ALERTS
        if (mol.HasSubstructMatch(alert)): x[7] += 1
    return x
示例#28
0
def calc_rdkit(mol):
    descriptors = pd.Series(
        np.array([
            Crippen.MolLogP(mol),
            Crippen.MolMR(mol),
            Descriptors.FpDensityMorgan1(mol),
            Descriptors.FpDensityMorgan2(mol),
            Descriptors.FpDensityMorgan3(mol),
            Descriptors.FractionCSP3(mol),
            Descriptors.HeavyAtomMolWt(mol),
            Descriptors.MaxAbsPartialCharge(mol),
            Descriptors.MaxPartialCharge(mol),
            Descriptors.MinAbsPartialCharge(mol),
            Descriptors.MinPartialCharge(mol),
            Descriptors.MolWt(mol),
            Descriptors.NumRadicalElectrons(mol),
            Descriptors.NumValenceElectrons(mol),
            EState.EState.MaxAbsEStateIndex(mol),
            EState.EState.MaxEStateIndex(mol),
            EState.EState.MinAbsEStateIndex(mol),
            EState.EState.MinEStateIndex(mol),
            EState.EState_VSA.EState_VSA1(mol),
            EState.EState_VSA.EState_VSA10(mol),
            EState.EState_VSA.EState_VSA11(mol),
            EState.EState_VSA.EState_VSA2(mol),
            EState.EState_VSA.EState_VSA3(mol),
            EState.EState_VSA.EState_VSA4(mol),
            EState.EState_VSA.EState_VSA5(mol),
            EState.EState_VSA.EState_VSA6(mol),
            EState.EState_VSA.EState_VSA7(mol),
            EState.EState_VSA.EState_VSA8(mol),
            EState.EState_VSA.EState_VSA9(mol),
            Fragments.fr_Al_COO(mol),
            Fragments.fr_Al_OH(mol),
            Fragments.fr_Al_OH_noTert(mol),
            Fragments.fr_aldehyde(mol),
            Fragments.fr_alkyl_carbamate(mol),
            Fragments.fr_alkyl_halide(mol),
            Fragments.fr_allylic_oxid(mol),
            Fragments.fr_amide(mol),
            Fragments.fr_amidine(mol),
            Fragments.fr_aniline(mol),
            Fragments.fr_Ar_COO(mol),
            Fragments.fr_Ar_N(mol),
            Fragments.fr_Ar_NH(mol),
            Fragments.fr_Ar_OH(mol),
            Fragments.fr_ArN(mol),
            Fragments.fr_aryl_methyl(mol),
            Fragments.fr_azide(mol),
            Fragments.fr_azo(mol),
            Fragments.fr_barbitur(mol),
            Fragments.fr_benzene(mol),
            Fragments.fr_benzodiazepine(mol),
            Fragments.fr_bicyclic(mol),
            Fragments.fr_C_O(mol),
            Fragments.fr_C_O_noCOO(mol),
            Fragments.fr_C_S(mol),
            Fragments.fr_COO(mol),
            Fragments.fr_COO2(mol),
            Fragments.fr_diazo(mol),
            Fragments.fr_dihydropyridine(mol),
            Fragments.fr_epoxide(mol),
            Fragments.fr_ester(mol),
            Fragments.fr_ether(mol),
            Fragments.fr_furan(mol),
            Fragments.fr_guanido(mol),
            Fragments.fr_halogen(mol),
            Fragments.fr_hdrzine(mol),
            Fragments.fr_hdrzone(mol),
            Fragments.fr_HOCCN(mol),
            Fragments.fr_imidazole(mol),
            Fragments.fr_imide(mol),
            Fragments.fr_Imine(mol),
            Fragments.fr_isocyan(mol),
            Fragments.fr_isothiocyan(mol),
            Fragments.fr_ketone(mol),
            Fragments.fr_ketone_Topliss(mol),
            Fragments.fr_lactam(mol),
            Fragments.fr_lactone(mol),
            Fragments.fr_methoxy(mol),
            Fragments.fr_morpholine(mol),
            Fragments.fr_N_O(mol),
            Fragments.fr_Ndealkylation1(mol),
            Fragments.fr_Ndealkylation2(mol),
            Fragments.fr_NH0(mol),
            Fragments.fr_NH1(mol),
            Fragments.fr_NH2(mol),
            Fragments.fr_Nhpyrrole(mol),
            Fragments.fr_nitrile(mol),
            Fragments.fr_nitro(mol),
            Fragments.fr_nitro_arom(mol),
            Fragments.fr_nitro_arom_nonortho(mol),
            Fragments.fr_nitroso(mol),
            Fragments.fr_oxazole(mol),
            Fragments.fr_oxime(mol),
            Fragments.fr_para_hydroxylation(mol),
            Fragments.fr_phenol(mol),
            Fragments.fr_phenol_noOrthoHbond(mol),
            Fragments.fr_phos_acid(mol),
            Fragments.fr_phos_ester(mol),
            Fragments.fr_piperdine(mol),
            Fragments.fr_piperzine(mol),
            Fragments.fr_priamide(mol),
            Fragments.fr_prisulfonamd(mol),
            Fragments.fr_pyridine(mol),
            Fragments.fr_quatN(mol),
            Fragments.fr_SH(mol),
            Fragments.fr_sulfide(mol),
            Fragments.fr_sulfonamd(mol),
            Fragments.fr_sulfone(mol),
            Fragments.fr_term_acetylene(mol),
            Fragments.fr_tetrazole(mol),
            Fragments.fr_thiazole(mol),
            Fragments.fr_thiocyan(mol),
            Fragments.fr_thiophene(mol),
            Fragments.fr_unbrch_alkane(mol),
            Fragments.fr_urea(mol),
            GraphDescriptors.BalabanJ(mol),
            GraphDescriptors.BertzCT(mol),
            GraphDescriptors.Chi0(mol),
            GraphDescriptors.Chi0n(mol),
            GraphDescriptors.Chi0v(mol),
            GraphDescriptors.Chi1(mol),
            GraphDescriptors.Chi1n(mol),
            GraphDescriptors.Chi1v(mol),
            GraphDescriptors.Chi2n(mol),
            GraphDescriptors.Chi2v(mol),
            GraphDescriptors.Chi3n(mol),
            GraphDescriptors.Chi3v(mol),
            GraphDescriptors.Chi4n(mol),
            GraphDescriptors.Chi4v(mol),
            GraphDescriptors.HallKierAlpha(mol),
            GraphDescriptors.Ipc(mol),
            GraphDescriptors.Kappa1(mol),
            GraphDescriptors.Kappa2(mol),
            GraphDescriptors.Kappa3(mol),
            Lipinski.HeavyAtomCount(mol),
            Lipinski.NHOHCount(mol),
            Lipinski.NOCount(mol),
            Lipinski.NumAliphaticCarbocycles(mol),
            Lipinski.NumAliphaticHeterocycles(mol),
            Lipinski.NumAliphaticRings(mol),
            Lipinski.NumAromaticCarbocycles(mol),
            Lipinski.NumAromaticHeterocycles(mol),
            Lipinski.NumAromaticRings(mol),
            Lipinski.NumHAcceptors(mol),
            Lipinski.NumHDonors(mol),
            Lipinski.NumHeteroatoms(mol),
            Lipinski.NumRotatableBonds(mol),
            Lipinski.NumSaturatedCarbocycles(mol),
            Lipinski.NumSaturatedHeterocycles(mol),
            Lipinski.NumSaturatedRings(mol),
            Lipinski.RingCount(mol),
            MolSurf.LabuteASA(mol),
            MolSurf.PEOE_VSA1(mol),
            MolSurf.PEOE_VSA10(mol),
            MolSurf.PEOE_VSA11(mol),
            MolSurf.PEOE_VSA12(mol),
            MolSurf.PEOE_VSA13(mol),
            MolSurf.PEOE_VSA14(mol),
            MolSurf.PEOE_VSA2(mol),
            MolSurf.PEOE_VSA3(mol),
            MolSurf.PEOE_VSA4(mol),
            MolSurf.PEOE_VSA5(mol),
            MolSurf.PEOE_VSA6(mol),
            MolSurf.PEOE_VSA7(mol),
            MolSurf.PEOE_VSA8(mol),
            MolSurf.PEOE_VSA9(mol),
            MolSurf.SlogP_VSA1(mol),
            MolSurf.SlogP_VSA10(mol),
            MolSurf.SlogP_VSA11(mol),
            MolSurf.SlogP_VSA12(mol),
            MolSurf.SlogP_VSA2(mol),
            MolSurf.SlogP_VSA3(mol),
            MolSurf.SlogP_VSA4(mol),
            MolSurf.SlogP_VSA5(mol),
            MolSurf.SlogP_VSA6(mol),
            MolSurf.SlogP_VSA7(mol),
            MolSurf.SlogP_VSA8(mol),
            MolSurf.SlogP_VSA9(mol),
            MolSurf.SMR_VSA1(mol),
            MolSurf.SMR_VSA10(mol),
            MolSurf.SMR_VSA2(mol),
            MolSurf.SMR_VSA3(mol),
            MolSurf.SMR_VSA4(mol),
            MolSurf.SMR_VSA5(mol),
            MolSurf.SMR_VSA6(mol),
            MolSurf.SMR_VSA7(mol),
            MolSurf.SMR_VSA8(mol),
            MolSurf.SMR_VSA9(mol),
            MolSurf.TPSA(mol)
        ]))
    return descriptors
示例#29
0
    def extract(x, from_smiles):
        if from_smiles:
            mol = Chem.MolFromSmiles(x)
        else:
            mol = x

        if (mol is None) or (len(mol.GetAtoms()) == 0):
            if include_3D:
                return [0] * 29
            else:
                return [0] * 24
        else:
            logP = Crippen.MolLogP(mol)
            refractivity = Crippen.MolMR(mol)

            weight = Descriptors.MolWt(mol)
            exact_weight = Descriptors.ExactMolWt(mol)
            heavy_weight = Descriptors.HeavyAtomMolWt(mol)
            heavy_count = Lipinski.HeavyAtomCount(mol)
            nhoh_count = Lipinski.NHOHCount(mol)
            no_count = Lipinski.NOCount(mol)
            hacceptor_count = Lipinski.NumHAcceptors(mol)
            hdonor_count = Lipinski.NumHDonors(mol)
            hetero_count = Lipinski.NumHeteroatoms(mol)
            rotatable_bond_count = Lipinski.NumRotatableBonds(mol)
            valance_electron_count = Descriptors.NumValenceElectrons(mol)
            amide_bond_count = rdMolDescriptors.CalcNumAmideBonds(mol)
            aliphatic_ring_count = Lipinski.NumAliphaticRings(mol)
            aromatic_ring_count = Lipinski.NumAromaticRings(mol)
            saturated_ring_count = Lipinski.NumSaturatedRings(mol)
            aliphatic_cycle_count = Lipinski.NumAliphaticCarbocycles(mol)
            aliphaticHetero_cycle_count = Lipinski.NumAliphaticHeterocycles(
                mol)
            aromatic_cycle_count = Lipinski.NumAromaticCarbocycles(mol)
            aromaticHetero_cycle_count = Lipinski.NumAromaticHeterocycles(mol)
            saturated_cycle_count = Lipinski.NumSaturatedCarbocycles(mol)
            saturatedHetero_cycle_count = Lipinski.NumSaturatedHeterocycles(
                mol)

            tpsa = rdMolDescriptors.CalcTPSA(mol)

            if include_3D:
                mol_3D = Chem.AddHs(mol)
                AllChem.EmbedMolecule(mol_3D)
                AllChem.MMFFOptimizeMolecule(mol_3D)
                eccentricity = rdMolDescriptors.CalcEccentricity(mol_3D)
                asphericity = rdMolDescriptors.CalcAsphericity(mol_3D)
                spherocity = rdMolDescriptors.CalcSpherocityIndex(mol_3D)
                inertial = rdMolDescriptors.CalcInertialShapeFactor(mol_3D)
                gyration = rdMolDescriptors.CalcRadiusOfGyration(mol_3D)

                return [
                    logP, refractivity, weight, exact_weight, heavy_weight,
                    heavy_count, nhoh_count, no_count, hacceptor_count,
                    hdonor_count, hetero_count, rotatable_bond_count,
                    valance_electron_count, amide_bond_count,
                    aliphatic_ring_count, aromatic_ring_count,
                    saturated_ring_count, aliphatic_cycle_count,
                    aliphaticHetero_cycle_count, aromatic_cycle_count,
                    aromaticHetero_cycle_count, saturated_cycle_count,
                    saturatedHetero_cycle_count, tpsa, eccentricity,
                    asphericity, spherocity, inertial, gyration
                ]
            else:
                return [
                    logP, refractivity, weight, exact_weight, heavy_weight,
                    heavy_count, nhoh_count, no_count, hacceptor_count,
                    hdonor_count, hetero_count, rotatable_bond_count,
                    valance_electron_count, amide_bond_count,
                    aliphatic_ring_count, aromatic_ring_count,
                    saturated_ring_count, aliphatic_cycle_count,
                    aliphaticHetero_cycle_count, aromatic_cycle_count,
                    aromaticHetero_cycle_count, saturated_cycle_count,
                    saturatedHetero_cycle_count, tpsa
                ]
示例#30
0
    def decorate(self,
                 df: Union[cudf.DataFrame, pandas.DataFrame],
                 smile_cols: int = 0):

        mol_wt = []
        mol_logp = []
        hdonors = []
        hacceptors = []
        rotatable_bonds = []
        qeds = []

        for idx in range(df.shape[0]):

            smiles = df.iat[idx, smile_cols]
            m = Chem.MolFromSmiles(smiles)

            if m is None:
                mol_logp.append({'value': '-', 'level': 'info'})
                mol_wt.append({'value': '-', 'level': 'info'})
                hdonors.append({'value': '-', 'level': 'info'})
                hacceptors.append({'value': '-', 'level': 'info'})
                rotatable_bonds.append({'value': '-', 'level': 'info'})
                qeds.append({'value': '-', 'level': 'info'})
                continue

            try:
                logp = Descriptors.MolLogP(m)
                mol_logp.append({
                    'value':
                    round(logp, 2),
                    'level':
                    'info'
                    if logp < LipinskiRuleOfFiveDecorator.MAX_LOGP else 'error'
                })
            except Exception as ex:
                logger.exception(ex)
                mol_logp.append({'value': '-', 'level': 'info'})

            try:
                wt = Descriptors.MolWt(m)
                mol_wt.append({
                    'value':
                    round(wt, 2),
                    'level':
                    'info'
                    if wt < LipinskiRuleOfFiveDecorator.MAX_MOL_WT else 'error'
                })
            except Exception as ex:
                logger.exception(ex)
                mol_wt.append({'value': '-', 'level': 'info'})

            try:
                hdonor = Lipinski.NumHDonors(m)
                hdonors.append({
                    'value':
                    hdonor,
                    'level':
                    'info' if hdonor < LipinskiRuleOfFiveDecorator.MAX_H_DONORS
                    else 'error'
                })
            except Exception as ex:
                logger.exception(ex)
                hdonors.append({'value': '-', 'level': 'info'})

            try:
                hacceptor = Lipinski.NumHAcceptors(m)
                hacceptors.append({
                    'value':
                    hacceptor,
                    'level':
                    'info'
                    if hacceptor < LipinskiRuleOfFiveDecorator.MAX_H_DONORS
                    else 'error'
                })
            except Exception as ex:
                logger.exception(ex)
                hacceptors.append({'value': '-', 'level': 'info'})

            try:
                rotatable_bond = Lipinski.NumRotatableBonds(m)
                rotatable_bonds.append({
                    'value':
                    rotatable_bond,
                    'level':
                    'info' if rotatable_bond <
                    LipinskiRuleOfFiveDecorator.MAX_ROTATABLE_BONDS else
                    'error'
                })
            except Exception as ex:
                logger.exception(ex)
                rotatable_bonds.append({'value': '-', 'level': 'info'})

            try:
                qed = QED.qed(m)
                qeds.append({
                    'value':
                    round(qed, 4),
                    'level':
                    'info'
                    if qed < LipinskiRuleOfFiveDecorator.MAX_QED else 'error'
                })
            except Exception as ex:
                logger.exception(ex)
                qeds.append({'value': '-', 'level': 'info'})

        df['Molecular Weight'] = mol_wt
        df['LogP'] = mol_logp
        df['H-Bond Donors'] = hdonors
        df['H-Bond Acceptors'] = hacceptors
        df['Rotatable Bonds'] = rotatable_bonds
        df['QED'] = qeds

        return df