示例#1
0
 def testIssue2183420(self):
   " testing a problem with the acceptor definition "
   self.assertTrue(Lipinski.NumHAcceptors(Chem.MolFromSmiles('NC')) == 1)
   self.assertTrue(Lipinski.NumHAcceptors(Chem.MolFromSmiles('CNC')) == 1)
   self.assertTrue(Lipinski.NumHAcceptors(Chem.MolFromSmiles('CN(C)C')) == 1)
   self.assertTrue(Lipinski.NumHAcceptors(Chem.MolFromSmiles('NC(=O)')) == 1)
   self.assertTrue(Lipinski.NumHAcceptors(Chem.MolFromSmiles('NC(=O)C')) == 1)
   self.assertTrue(Lipinski.NumHAcceptors(Chem.MolFromSmiles('CNC(=O)')) == 1)
   self.assertTrue(Lipinski.NumHAcceptors(Chem.MolFromSmiles('CNC(=O)C')) == 1)
   self.assertTrue(Lipinski.NumHAcceptors(Chem.MolFromSmiles('O=CNC(=O)C')) == 2)
   self.assertTrue(Lipinski.NumHAcceptors(Chem.MolFromSmiles('O=C(C)NC(=O)C')) == 2)
示例#2
0
def score_molecule(smiles):
    lipinski_score = 0
    qed = LipinskiRuleOfFiveDecorator.MAX_QED + 1

    try:
        m = Chem.MolFromSmiles(smiles)
        logp = Descriptors.MolLogP(m)
        lipinski_score += 1 if logp < LipinskiRuleOfFiveDecorator.MAX_LOGP else 0

        wt = Descriptors.MolWt(m)
        lipinski_score += 1 if wt < LipinskiRuleOfFiveDecorator.MAX_MOL_WT else 0

        hdonor = Lipinski.NumHDonors(m)
        lipinski_score += 1 if hdonor < LipinskiRuleOfFiveDecorator.MAX_H_DONORS else 0

        hacceptor = Lipinski.NumHAcceptors(m)
        lipinski_score += 1 if hacceptor < LipinskiRuleOfFiveDecorator.MAX_H_DONORS else 0

        rotatable_bond = Lipinski.NumRotatableBonds(m)
        lipinski_score += 1 if rotatable_bond < LipinskiRuleOfFiveDecorator.MAX_ROTATABLE_BONDS else 0

        qed = QED.qed(m)
    except Exception as ex:
        lipinski_score = 0
        logger.exception(ex)

    return lipinski_score, qed
示例#3
0
def lipinski(smiles, verbose=False):


    moldata= []
    for elem in smiles:

        mol=Chem.MolFromSmiles(elem) 
        moldata.append(mol)

    baseData= np.arange(1,1)
    i=0  
    for mol in moldata:        
       
      desc_MolWt = Descriptors.MolWt(mol)
      desc_MolLogP = Descriptors.MolLogP(mol)
      desc_NumHDonors = Lipinski.NumHDonors(mol)
      desc_NumHAcceptors = Lipinski.NumHAcceptors(mol)
          
      row = np.array([desc_MolWt,
                      desc_MolLogP,
                      desc_NumHDonors,
                      desc_NumHAcceptors])   
  
      if(i==0):
          baseData=row
      else:
          baseData=np.vstack([baseData, row])
      i=i+1      
  
    columnNames=["MW","LogP","NumHDonors","NumHAcceptors"]   
    descriptors = pd.DataFrame(data=baseData,columns=columnNames)
    
    return descriptors
示例#4
0
def ProcessMol(mol,typeConversions,globalProps,nDone,nameProp='_Name',nameCol='compound_id',
               redraw=False,keepHs=False,
               skipProps=False,addComputedProps=False,
               skipSmiles=False,
               uniqNames=None,namesSeen=None):
  if not mol:
    raise ValueError('no molecule')
  if keepHs:
    Chem.SanitizeMol(mol)
  try:
    nm = mol.GetProp(nameProp)
  except KeyError:
    nm = None
  if not nm:
    nm = 'Mol_%d'%nDone
  if uniqNames and nm in namesSeen:
    logger.error('duplicate compound id (%s) encountered. second instance skipped.'%nm)
    return None
  namesSeen.add(nm)
  row = [nm]
  if not skipProps:
    if addComputedProps:
      nHD=Lipinski.NumHDonors(mol)
      mol.SetProp('DonorCount',str(nHD))
      nHA=Lipinski.NumHAcceptors(mol)
      mol.SetProp('AcceptorCount',str(nHA))
      nRot=Lipinski.NumRotatableBonds(mol)
      mol.SetProp('RotatableBondCount',str(nRot))
      MW=Descriptors.MolWt(mol)
      mol.SetProp('AMW',str(MW))
      logp=Crippen.MolLogP(mol)
      mol.SetProp('MolLogP',str(logp))

    pns = list(mol.GetPropNames())
    pD={}
    for pi,pn in enumerate(pns):
      if pn.lower()==nameCol.lower(): continue
      pv = mol.GetProp(pn).strip()
      if pv.find('>')<0 and pv.find('<')<0:
        colTyp = globalProps.get(pn,2)
        while colTyp>0:
          try:
            tpi = typeConversions[colTyp][1](pv)
          except:
            colTyp-=1
          else:
            break
        globalProps[pn]=colTyp
        pD[pn]=typeConversions[colTyp][1](pv)
      else:
        pD[pn]=pv
  else:
    pD={}
  if redraw:
    AllChem.Compute2DCoords(m)
  if not skipSmiles:
    row.append(Chem.MolToSmiles(mol,True))
  row.append(DbModule.binaryHolder(mol.ToBinary()))
  row.append(pD)
  return row
示例#5
0
def get_ro5_from_mol(mol):
    """
    Get Lipinski's rule of five criteria for a molecule, i.e. molecular weight, logP, number of hydrogen bond acceptors/donors and
    accordance to Lipinski's rule of five.
    (Takes about 1s for 2000 mols.)

    Parameters
    ----------
    mol : rdkit.Chem.rdchem.Mol
        Molecule.

    Returns
    -------
    pd.Series
        Rule of five criteria for input molecule.
    """

    mw = 1 if Descriptors.ExactMolWt(mol) <= 500 else 0
    logp = 1 if Descriptors.MolLogP(mol) <= 5 else 0
    hbd = 1 if Lipinski.NumHDonors(mol) <= 5 else 0
    hba = 1 if Lipinski.NumHAcceptors(mol) <= 10 else 0
    lipinski = 1 if mw + logp + hbd + hba >= 3 else 0

    return pd.Series([mw, logp, hbd, hba, lipinski],
                     index="mw logp hbd hba lipinski".split())
示例#6
0
def check_ligand(file_path):
    bool = False
    if os.path.isfile(file_path):
        suppl = Chem.SDMolSupplier(file_path)
        for mol in suppl:
            if mol is not None:
                # components of rule
                hydrogen_bond_doner = True if Lipinski.NumHDonors(
                    mol) <= 5 else False
                hydrogen_bond_acceptors = True if Lipinski.NumHAcceptors(
                    mol) <= 10 else False
                molecular_mass = True if Descriptors.ExactMolWt(
                    mol) <= 500 else False
                octanol_water_partition_coefficient_logP = True if Crippen.MolLogP(
                    mol) <= 5 else False
                components_rank = hydrogen_bond_doner + hydrogen_bond_acceptors + molecular_mass + octanol_water_partition_coefficient_logP

                # variants
                partition_coefficient_logP = True if -0.4 <= Crippen.MolLogP(
                    mol) <= 5.6 else False
                molar_refractivity = True if 40 <= Crippen.MolMR(
                    mol) <= 130 else False
                molecular_weight = True if 180 <= Descriptors.ExactMolWt(
                    mol) <= 500 else False
                number_of_atoms = True if 20 <= Lipinski.HeavyAtomCount(
                    mol) <= 70 else False
                polar_surface_area = True if MolSurf.TPSA(
                    mol) <= 140 else False
                variants_rank = partition_coefficient_logP + molar_refractivity + molecular_weight + number_of_atoms + polar_surface_area

                if (components_rank == 4) and (variants_rank == 4
                                               or variants_rank == 5):
                    bool = True
    return bool
示例#7
0
    def calc_lipinski(self, mol):
        """
        Returns:     a tuple consisting of:
            - a boolean indicating whether the molecule passed Lipinski test
            - a dictionary giving the values of the Lipinski check.

        NOTE:   Lipinski's rules are:
            - Hydrogen bond donors <= 5
            - Hydrogen bond acceptors <= 10
            - Molecular weight < 500 daltons
            - logP < 5
        """

        num_hdonors = Lipi.NumHDonors(mol)
        num_hacceptors = Lipi.NumHAcceptors(mol)
        mol_weight = Descriptors.MolWt(mol)
        mol_logp = round(Crippen.MolLogP(mol), 4)

        return ((num_hdonors <= 5 and num_hacceptors <= 10 and mol_weight < 500
                 and mol_logp < 5), {
                     'hydrogen_bond_donors': num_hdonors,
                     'hydrogen_bond_acceptors': num_hacceptors,
                     'molecular_weight': mol_weight,
                     'logp': mol_logp
                 })
示例#8
0
def get_filter_values(mol):
    """
    calculate the values, for a given molecule, that are used to filter
    return as a dictionary
    """

    assert isinstance(mol, Chem.Mol)

    values = {}
    values["MW"] = desc.CalcExactMolWt(mol)
    values["logP"] = crip.MolLogP(mol)
    values["HBA"] = lip.NumHAcceptors(mol)
    values["HBD"] = lip.NumHDonors(mol)
    values["tPSA"] = desc.CalcTPSA(mol)
    values["rot_bonds"] = lip.NumRotatableBonds(mol)
    values["rigid_bonds"] = mol.GetNumBonds() - values["rot_bonds"]  # assume mutual exclusion
    values["num_rings"] = lip.RingCount(mol)
    values["num_hetero_atoms"] = lip.NumHeteroatoms(mol)
    values["charge"] = rdmolops.GetFormalCharge(mol)  # trusting this charge calculation method
    values["num_carbons"], values["num_charges"], values["max_ring_size"] = get_atom_props(mol)
    try:
        values["hc_ratio"] = float(values["num_hetero_atoms"]) / float(values["num_carbons"])
    except ZeroDivisionError:
        values["hc_ratio"] = 100000000  # if there are zero carbons
    values["fc"] = len(list(Brics.FindBRICSBonds(mol)))  # how many BRICS bonds, related to complexity
    values["is_good"] = True  # default to true, but not yet observed
    atoms = [atom.GetSymbol() for atom in mol.GetAtoms()]  # get all the atoms, and make the list unique (only types)
    atoms = set(atoms)
    atoms = list(atoms)
    values["atoms"] = atoms
    values["num_chiral_centers"] = len(Chem.FindMolChiralCenters(mol, includeUnassigned=True))
    values["rejections"] = []  # empty list to store the reasons for rejection

    return values
示例#9
0
def get_descriptors(mol, write=False):
    # Make a copy of the molecule dataframe
    desc = [
        Lipinski.NumAromaticHeterocycles(mol),
        Lipinski.NumAromaticRings(mol),
        Lipinski.NumHDonors(mol),
        Lipinski.RingCount(mol),
        Lipinski.NHOHCount(mol),
        Lipinski.NumHeteroatoms(mol),
        Lipinski.NumAliphaticCarbocycles(mol),
        Lipinski.NumSaturatedCarbocycles(mol),
        Lipinski.NumAliphaticHeterocycles(mol),
        Lipinski.NumHAcceptors(mol),
        Lipinski.NumSaturatedHeterocycles(mol),
        Lipinski.NumAliphaticRings(mol),
        Descriptors.NumRadicalElectrons(mol),
        Descriptors.MaxPartialCharge(mol),
        Descriptors.NumValenceElectrons(mol),
        Lipinski.FractionCSP3(mol),
        Descriptors.MaxAbsPartialCharge(mol),
        Lipinski.NumAromaticCarbocycles(mol),
        Lipinski.NumSaturatedRings(mol),
        Lipinski.NumRotatableBonds(mol)
    ]

    desc = [0 if i != i else i for i in desc]
    return desc
示例#10
0
 def veber_infraction(molecule: Chem.Mol) -> bool:
   """
   Checks if a given molecule fails the veber infraction filters.
   """
   rotatable_bond_saturation = Lipinski.NumRotatableBonds(molecule) > 10
   hydrogen_bond_saturation = Lipinski.NumHAcceptors(molecule) + Lipinski.NumHDonors(molecule) > 10
   return rotatable_bond_saturation or hydrogen_bond_saturation
示例#11
0
    def test1(self):
        " testing first 200 mols from NCI "
        # figure out which rotor version we are using
        m = Chem.MolFromSmiles("CC(C)(C)c1cc(O)c(cc1O)C(C)(C)C")
        if Lipinski.NumRotatableBonds(m) == 2:
            rot_prop = NonStrict
        else:
            rot_prop = Strict

        suppl = Chem.SDMolSupplier(self.inFileName)
        idx = 1
        for m in suppl:
            if m:
                calc = Lipinski.NHOHCount(m)
                orig = int(m.GetProp('NUM_LIPINSKIHDONORS'))
                assert calc == orig, 'bad num h donors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                calc = Lipinski.NOCount(m)
                orig = int(m.GetProp('NUM_LIPINSKIHACCEPTORS'))
                assert calc == orig, 'bad num h acceptors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                calc = Lipinski.NumHDonors(m)
                orig = int(m.GetProp('NUM_HDONORS'))
                assert calc == orig, 'bad num h donors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                calc = Lipinski.NumHAcceptors(m)
                orig = int(m.GetProp('NUM_HACCEPTORS'))
                assert calc == orig, 'bad num h acceptors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                calc = Lipinski.NumHeteroatoms(m)
                orig = int(m.GetProp('NUM_HETEROATOMS'))
                assert calc == orig, 'bad num heteroatoms for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                calc = Lipinski.NumRotatableBonds(m)
                orig = int(m.GetProp(rot_prop))
                assert calc == orig, 'bad num rotors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                # test the underlying numrotatable bonds
                calc = rdMolDescriptors.CalcNumRotatableBonds(
                    m, rdMolDescriptors.NumRotatableBondsOptions.NonStrict)
                orig = int(m.GetProp(NonStrict))
                assert calc == orig, 'bad num rotors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                calc = rdMolDescriptors.CalcNumRotatableBonds(
                    m, rdMolDescriptors.NumRotatableBondsOptions.Strict)
                orig = int(m.GetProp(Strict))
                assert calc == orig, 'bad num rotors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

            idx += 1
示例#12
0
def CalculateHacceptorNumber(mol):
    """
    Calculation of Hydrogen bond acceptor count in a molecule
        Parameters:
            mol: rdkit molecule
        Returns:
            Hacceptor Number
    """
    return LPK.NumHAcceptors(mol)
def lipinski(smile):
	# Convert into Chem object
	mol = Chem.MolFromSmiles(smile)

	MolWt = Descriptors.MolWt(mol)
	MolLogP = Descriptors.MolLogP(mol)
	NumHDonors = Lipinski.NumHDonors(mol)
	NumHAcceptors = Lipinski.NumHAcceptors(mol)

	return NumHDonors, NumHAcceptors, MolWt, MolLogP
def mole_proper(mol):
    num_hdonors = Lipinski.NumHDonors(mol)
    num_hacceptors = Lipinski.NumHAcceptors(mol)
    num_rotatable = Lipinski.NumRotatableBonds(mol)
    mol_weight = Descriptors.MolWt(mol)
    mol_logp = Crippen.MolLogP(mol)
    mol_TPSA = Descriptors.TPSA(mol)
    proper = (num_hdonors, num_hacceptors, num_rotatable, mol_weight, mol_logp,
              mol_TPSA)
    return proper
示例#15
0
def get_phys_fp(compound):
    c = []
    c.append(compound['mol_weight'] / 500)
    logp = get_logp(compound['dsstox_sid'])
    logp = logp / 10 if logp else logp
    c.append(logp)
    m = chm.MolFromSmiles(compound['smiles'])
    c.append(lip.NumHDonors(m) / 5)
    c.append(lip.NumHAcceptors(m) / 10)
    return c
示例#16
0
def CalculateNumHAcceptors(mol):
    """
    Caculation of the number of Hydrogen Bond Acceptors  
    --->nHA
    
    :param mol: molecular
    :type mol: rdkit.Chem.rdchem.Mol
    :return: the number of Hydrogen Bond Acceptors
    :rtype: int
    
    """
    nHA = Lipinski.NumHAcceptors(mol)
    return nHA
 def descriptors(self, mol):
     aromatic_frac = self.arofrac(mol)
     mw = Descriptors.ExactMolWt(mol, False)
     valence_e = Descriptors.NumValenceElectrons(mol)
     h_acceptors = Lipinski.NumHAcceptors(mol)
     h_donors = Lipinski.NumHDonors(mol)
     NO_counts = Lipinski.NOCount(mol)
     NHOH_count = Lipinski.NHOHCount(mol)
     rotors = Lipinski.NumRotatableBonds(mol)
     SP3_frac = Lipinski.FractionCSP3(mol)
     logP = Crippen.MolLogP(mol)
     SP_bonds = len(mol.GetSubstructMatches(Chem.MolFromSmarts('[^1]')))
     return([aromatic_frac,mw,valence_e,h_acceptors,h_donors,NO_counts,NHOH_count, rotors,SP3_frac,logP,SP_bonds])
示例#18
0
def filter(mol, type = "frags"):

    HBD = Lipinski.NumHDonors(mol)
    HBA = Lipinski.NumHAcceptors(mol)
    rings = len(Chem.GetSymmSSSR(mol))
    MW = Chem.Descriptors.MolWt(mol)

    if type == "frags":
        action = (HBD <=8) & (HBA <=8) & (rings >= 1) & (MW <=800)
    else:
        action = (HBD <= 5) & (HBA <= 5) & (MW <= 500)

    return action
示例#19
0
def check_lipinski(mol):
    fgs = load_functional_groups()
    h_donors = Lipinski.NumHDonors(mol.rdmol)
    h_acceptors = Lipinski.NumHAcceptors(mol.rdmol)
    log_p = MolLogP(mol.rdmol)
    wt = MolWt(mol.rdmol)
    if h_donors <= 5 and h_acceptors <= 5 and log_p < 5:
        if wt >= 450:
            mol.join(fgs['terminal_fg'].get_random())
            return True, False
        else:
            return True, False
    else:
        return False, False
示例#20
0
    def calcScore(self, m, smi):

        self.value = 0.0

        mw = Descriptors.MolWt(m)

        if mw > 700 or mw < 100: return False

        num_hdonors = Lipinski.NumHDonors(m)
        num_hacceptors = Lipinski.NumHAcceptors(m)

        if num_hdonors > 5: return False
        if num_hacceptors > 10: return False

        return True
示例#21
0
def lipinski_trial(smiles):
    '''
    Returns which of Lipinski's rules a molecule has failed, or an empty list
    
    Lipinski's rules are:
    Hydrogen bond donors <= 5
    Hydrogen bond acceptors <= 10
    Molecular weight < 500 daltons
    logP < 5
    '''
    passed = []
    failed = []

    mol = Chem.MolFromSmiles(smiles)
    if mol is None:
        raise Exception('%s is not a valid SMILES string' % smiles)

    num_hdonors = Lipinski.NumHDonors(mol)
    num_hacceptors = Lipinski.NumHAcceptors(mol)
    mol_weight = Descriptors.MolWt(mol)
    mol_logp = Crippen.MolLogP(mol)

    failed = []

    if num_hdonors > 5:
        failed.append('Over 5 H-bond donors, found %s' % num_hdonors)
    else:
        passed.append('Found %s H-bond donors' % num_hdonors)

    if num_hacceptors > 10:
        failed.append('Over 10 H-bond acceptors, found %s' \
        % num_hacceptors)
    else:
        passed.append('Found %s H-bond acceptors' % num_hacceptors)

    if mol_weight >= 500:
        failed.append('Molecular weight over 500, calculated %s'\
        % mol_weight)
    else:
        passed.append('Molecular weight: %s' % mol_weight)

    if mol_logp >= 5:
        failed.append('Log partition coefficient over 5, calculated %s' \
        % mol_logp)
    else:
        passed.append('Log partition coefficient: %s' % mol_logp)

    return passed, failed
    def run_filter(self, mol):
        """
        This runs a Strict Lipinski filter. Lipinski filter refines for orally
        available drugs. It filters molecules by Molecular weight (MW), the
        number of hydrogen donors, the number hydrogen acceptors, and the logP
        value.

        This is a strict Lipinski which means a ligand must pass all the
        requirements.

        To pass the Lipinski filter a molecule must be:
            MW: Max 500 dalton
            Number of H acceptors: Max 10
            Number of H donors: Max 5
            logP Max +5.0

        If you use the Lipinski Filter please cite: C.A. Lipinski et al.
        Experimental and computational approaches to estimate solubility and
        permeability in drug discovery and development settings Advanced Drug
        Delivery Reviews, 46 (2001), pp. 3-26

        Inputs:
        :param rdkit.Chem.rdchem.Mol object mol: An rdkit mol object to be
            tested if it passes the filters

        Returns:
        :returns: bool bool: True if the mol passes the filter; False if it
          fails the filter
        """

        exact_mwt = Descriptors.ExactMolWt(mol)
        if exact_mwt > 500:
            return False

        num_hydrogen_bond_donors = Lipinski.NumHDonors(mol)
        if num_hydrogen_bond_donors > 5:
            return False

        num_hydrogen_bond_acceptors = Lipinski.NumHAcceptors(mol)
        if num_hydrogen_bond_acceptors > 10:
            return False

        mol_log_p = Crippen.MolLogP(mol)
        if mol_log_p > 5:
            return False

        # Passed all filters
        return True
示例#23
0
def ProcessMol(session,
               mol,
               globalProps,
               nDone,
               nameProp='_Name',
               nameCol='compound_id',
               redraw=False,
               keepHs=False,
               skipProps=False,
               addComputedProps=False,
               skipSmiles=False):
    if not mol:
        raise ValueError('no molecule')
    if keepHs:
        Chem.SanitizeMol(mol)
    try:
        nm = mol.GetProp(nameProp)
    except KeyError:
        nm = None
    if not nm:
        nm = 'Mol_%d' % nDone

    cmpd = Compound()
    session.add(cmpd)

    if redraw:
        AllChem.Compute2DCoords(m)

    if not skipSmiles:
        cmpd.smiles = Chem.MolToSmiles(mol, True)
    cmpd.molpkl = mol.ToBinary()
    setattr(cmpd, nameCol, nm)

    if not skipProps:
        if addComputedProps:
            cmpd.DonorCount = Lipinski.NumHDonors(mol)
            cmpd.AcceptorCount = Lipinski.NumHAcceptors(mol)
            cmpd.RotatableBondCount = Lipinski.NumRotatableBonds(mol)
            cmpd.AMW = Descriptors.MolWt(mol)
            cmpd.MolLogP = Crippen.MolLogP(mol)
        pns = list(mol.GetPropNames())
        for pi, pn in enumerate(pns):
            if pn.lower() == nameCol.lower():
                continue
            pv = mol.GetProp(pn).strip()
            if pn in globalProps:
                setattr(cmpd, pn.lower(), pv)
    return cmpd
示例#24
0
def in_Ro5(mol):
    """
    Test whether a molecule is in Lipinski "Rule of 5" space, meaning 
    - 5 or fewer H bond donors
    - 10 or fewer H bond acceptors
    - MW < 500 Da
    - logP < 5
    """
    
    h_donor = Lipinski.NumHDonors(mol)
    h_accept = Lipinski.NumHAcceptors(mol)
    mw = Descriptors.MolWt(mol)
    logP = Descriptors.MolLogP(mol)
    
    Ro5 = h_donor <= 5 and h_accept <= 10 and mw <= 500 and logP < 5
    return(Ro5)
    def run_filter(self, mol):
        """
        This runs the Lenient Lipinski filter. Lipinski filter refines for
        orally available drugs. It filters molecules by Molecular weight (MW),
        the number of hydrogen donors, the number hydrogen acceptors, and the
        logP value.

        This is a Lenient Lipinski which means a ligand is allowed one
        violation exception to the Lipinski Rule of 5 restraints.

        To pass the Lipinski filter a molecule must be:
            MW: Max 500 dalton
            Number of H acceptors: Max 10
            Number of H donors: Max 5
            logP Max +5.0

        Inputs:
        :param rdkit.Chem.rdchem.Mol object mol: An rdkit mol object to be
            tested if it passes the filters

        Returns:
        :returns: bool bool: True if the mol passes the filter; False if it
            fails the filter
        """

        violation_counter = 0

        exact_mwt = Descriptors.ExactMolWt(mol)
        if exact_mwt > 500:
            violation_counter = violation_counter + 1

        num_hydrogen_bond_donors = Lipinski.NumHDonors(mol)
        if num_hydrogen_bond_donors > 5:
            violation_counter = violation_counter + 1

        num_hydrogen_bond_acceptors = Lipinski.NumHAcceptors(mol)
        if num_hydrogen_bond_acceptors > 10:
            violation_counter = violation_counter + 1
        mol_log_p = Crippen.MolLogP(mol)
        if mol_log_p > 5:
            violation_counter = violation_counter + 1

        if violation_counter < 2:
            return True

        # Failed more than two filters
        return False
示例#26
0
def CalculateHacceptorNumber(mol):
    """
    #################################################################
    Calculation of Hydrogen bond acceptor counts in a molecule

    ---->naccr

    Usage:

        result=CalculateHacceptorNumber(mol)

        Input: mol is a molecule object.

        Output: result is a numeric value.
    #################################################################
    """
    return LPK.NumHAcceptors(mol)
示例#27
0
    def calculate_properties_from_mol(self):
        """
        Function to calculate some molecular properties based on RDKit functionalities
        
        Return:
        Static physico-chemical properties: molecular weight, crippen logP, number of hydrogen bond acceptors and donors
        """

        # Generate molecule from sequence
        mol = Chem.MolFromSmiles(self.smiles)
        mol.SetProp("_Name", self.sequence)

        # Calculate the descriptors
        self.num_hdonors = Lipinski.NumHDonors(mol)
        self.num_hacceptors = Lipinski.NumHAcceptors(mol)
        self.mol_weight = Descriptors.MolWt(mol)
        self.mol_logp = Crippen.MolLogP(mol)
示例#28
0
def PhyChem(smiles):
    """ Calculating the 19D physicochemical descriptors for each molecules,
    the value has been normalized with Gaussian distribution.

    Arguments:
        smiles (list): list of SMILES strings.
    Returns:
        props (ndarray): m X 19 matrix as normalized PhysChem descriptors.
            m is the No. of samples
    """
    props = []
    for smile in smiles:
        mol = Chem.MolFromSmiles(smile)
        try:
            MW = desc.MolWt(mol)
            LOGP = Crippen.MolLogP(mol)
            HBA = Lipinski.NumHAcceptors(mol)
            HBD = Lipinski.NumHDonors(mol)
            rotable = Lipinski.NumRotatableBonds(mol)
            amide = AllChem.CalcNumAmideBonds(mol)
            bridge = AllChem.CalcNumBridgeheadAtoms(mol)
            heteroA = Lipinski.NumHeteroatoms(mol)
            heavy = Lipinski.HeavyAtomCount(mol)
            spiro = AllChem.CalcNumSpiroAtoms(mol)
            FCSP3 = AllChem.CalcFractionCSP3(mol)
            ring = Lipinski.RingCount(mol)
            Aliphatic = AllChem.CalcNumAliphaticRings(mol)
            aromatic = AllChem.CalcNumAromaticRings(mol)
            saturated = AllChem.CalcNumSaturatedRings(mol)
            heteroR = AllChem.CalcNumHeterocycles(mol)
            TPSA = MolSurf.TPSA(mol)
            valence = desc.NumValenceElectrons(mol)
            mr = Crippen.MolMR(mol)
            # charge = AllChem.ComputeGasteigerCharges(mol)
            prop = [
                MW, LOGP, HBA, HBD, rotable, amide, bridge, heteroA, heavy,
                spiro, FCSP3, ring, Aliphatic, aromatic, saturated, heteroR,
                TPSA, valence, mr
            ]
        except Exception:
            print(smile)
            prop = [0] * 19
        props.append(prop)
    props = np.array(props)
    props = Scaler().fit_transform(props)
    return props
示例#29
0
def CalculateNumHAcceptors(mol):
    """
    #################################################################
    Caculation of the number of Hydrogen Bond Acceptors
    
    ---->nHA
    
    Usage:
        
        result = CalculateNumHAcceptors(mol)
        
        Input: mol is a molecular object
        
        Output: result is a numeric values
    #################################################################    
    """
    nHA = Lipinski.NumHAcceptors(mol)
    return nHA
示例#30
0
def properties(fnames, labels, is_active=False):
    """ Five structural properties calculation for each molecule in each given file.
    These properties contains No. of Hydrogen Bond Acceptor/Donor, Rotatable Bond,
    Aliphatic Ring, Aromatic Ring and Heterocycle.

    Arguments:
        fnames (list): the file path of molecules.
        labels (list): the label for each file in the fnames.
        is_active (bool, optional): selecting only active ligands (True) or all of the molecules (False)
            if it is true, the molecule with PCHEMBL_VALUE >= 6.5 or SCORE > 0.5 will be selected.
            (Default: False)

    Returns:
        df (DataFrame): the table contains three columns; 'Set' is the label
            of fname the molecule belongs to, 'Property' is the name of one
            of five properties, 'Number' is the property value.
    """

    props = []
    for i, fname in enumerate(fnames):
        df = pd.read_table(fname)
        if 'SCORE' in df.columns:
            df = df[df.SCORE > (0.5 if is_active else 0)]
        elif 'PCHEMBL_VALUE' in df.columns:
            df = df[df.PCHEMBL_VALUE >= (6.5 if is_active else 0)]
        df = df.drop_duplicates(subset='CANONICAL_SMILES')
        if len(df) > int(1e5):
            df = df.sample(int(1e5))
        for smile in tqdm(df.CANONICAL_SMILES):
            mol = Chem.MolFromSmiles(smile)
            HA = Lipinski.NumHAcceptors(mol)
            props.append([labels[i], 'Hydrogen Bond\nAcceptor', HA])
            HD = Lipinski.NumHDonors(mol)
            props.append([labels[i], 'Hydrogen\nBond Donor', HD])
            RB = Lipinski.NumRotatableBonds(mol)
            props.append([labels[i], 'Rotatable\nBond', RB])
            RI = AllChem.CalcNumAliphaticRings(mol)
            props.append([labels[i], 'Aliphatic\nRing', RI])
            AR = Lipinski.NumAromaticRings(mol)
            props.append([labels[i], 'Aromatic\nRing', AR])
            HC = AllChem.CalcNumHeterocycles(mol)
            props.append([labels[i], 'Heterocycle', HC])
    df = pd.DataFrame(props, columns=['Set', 'Property', 'Number'])
    return df