Exemplo n.º 1
0
 def testDetails(self):
     Crippen._Init()
     with open(self.detailName, 'rb') as inF:
         if 0:
             outF = open('tmp.pkl', 'wb+')
             self._writeDetailFile(inF, outF)
         self._doDetailFile(inF)
Exemplo n.º 2
0
 def testIssue80(self):
     from rdkit.Chem import Lipinski
     m = Chem.MolFromSmiles('CCOC')
     ref = Crippen.MolLogP(m)
     Lipinski.NHOHCount(m)
     probe = Crippen.MolLogP(m)
     self.failUnless(probe == ref)
Exemplo n.º 3
0
def canonicalize(smi_list, showprogress=False):
    mol_list = []
    if showprogress:
        print('Canonicalising mols')
        for smi in tqdm(smi_list):
            mol = MolFromSmiles(smi)
            if mol is not None:
                mol_list.append(MolToSmiles(mol))
    else:
        for smi in smi_list:
            mol = MolFromSmiles(smi)
            if mol is not None:
                mol_list.append(MolToSmiles(mol))
    mol_list = list(set(mol_list))
    final_list = []
    if showprogress:
        print('Size of unfiltered final library: {}'.format(len(mol_list)))
        print('Filtering by n_heavy and logP:')
        for smi in tqdm(mol_list):
            mol = MolFromSmiles(smi)
            n_heavy = mol.GetNumHeavyAtoms()
            if n_heavy > 17:
                logP = Crippen.MolLogP(mol)
                if logP <= 5:
                    final_list.append(smi)
    else:
        for smi in mol_list:
            mol = MolFromSmiles(smi)
            n_heavy = mol.GetNumHeavyAtoms()
            if n_heavy > 17:
                logP = Crippen.MolLogP(mol)
                if logP <= 5:
                    final_list.append(smi)
    return final_list
Exemplo n.º 4
0
def check_ligand(file_path):
    bool = False
    if os.path.isfile(file_path):
        suppl = Chem.SDMolSupplier(file_path)
        for mol in suppl:
            if mol is not None:
                # components of rule
                hydrogen_bond_doner = True if Lipinski.NumHDonors(
                    mol) <= 5 else False
                hydrogen_bond_acceptors = True if Lipinski.NumHAcceptors(
                    mol) <= 10 else False
                molecular_mass = True if Descriptors.ExactMolWt(
                    mol) <= 500 else False
                octanol_water_partition_coefficient_logP = True if Crippen.MolLogP(
                    mol) <= 5 else False
                components_rank = hydrogen_bond_doner + hydrogen_bond_acceptors + molecular_mass + octanol_water_partition_coefficient_logP

                # variants
                partition_coefficient_logP = True if -0.4 <= Crippen.MolLogP(
                    mol) <= 5.6 else False
                molar_refractivity = True if 40 <= Crippen.MolMR(
                    mol) <= 130 else False
                molecular_weight = True if 180 <= Descriptors.ExactMolWt(
                    mol) <= 500 else False
                number_of_atoms = True if 20 <= Lipinski.HeavyAtomCount(
                    mol) <= 70 else False
                polar_surface_area = True if MolSurf.TPSA(
                    mol) <= 140 else False
                variants_rank = partition_coefficient_logP + molar_refractivity + molecular_weight + number_of_atoms + polar_surface_area

                if (components_rank == 4) and (variants_rank == 4
                                               or variants_rank == 5):
                    bool = True
    return bool
Exemplo n.º 5
0
def pySlogP_VSA_(mol, bins=None, force=1):
  """ *Internal Use Only*
  """
  if not force:
    try:
      res = mol._slogpVSA
    except AttributeError:
      pass
    else:
      if res.all():
        return res

  if bins is None:
    bins = logpBins
  Crippen._Init()
  propContribs = Crippen._GetAtomContribs(mol, force=force)
  volContribs = _LabuteHelper(mol)

  ans = numpy.zeros(len(bins) + 1, 'd')
  for i in range(len(propContribs)):
    prop = propContribs[i]
    vol = volContribs[i + 1]
    if prop is not None:
      bin = bisect.bisect_right(bins, prop[0])
      ans[bin] += vol

  mol._slogpVSA = ans
  return ans
Exemplo n.º 6
0
def pySlogP_VSA_(mol,bins=None,force=1):
  """ *Internal Use Only*
  """
  if not force:
    try:
      res = mol._slogpVSA
    except AttributeError:
      pass
    else:
      if res.all():
        return res

  if bins is None: bins = logpBins
  Crippen._Init()
  propContribs = Crippen._GetAtomContribs(mol,force=force)
  volContribs = _LabuteHelper(mol)

  ans = numpy.zeros(len(bins)+1,'d')
  for i in range(len(propContribs)):
    prop = propContribs[i]
    vol = volContribs[i+1]
    if prop is not None:
      bin = bisect.bisect_right(bins,prop[0])
      ans[bin] += vol

  mol._slogpVSA=ans
  return ans    
Exemplo n.º 7
0
 def testDetails(self):
   Crippen._Init()
   with open(self.detailName,'rb') as inF:
     if 0:
       outF = open('tmp.pkl','wb+')
       self._writeDetailFile(inF,outF)
     self._doDetailFile(inF)
Exemplo n.º 8
0
 def testDetails2(self):
     Crippen._Init()
     inF = open(self.detailName2, 'rb')
     if 0:
         outF = open('tmp.pkl', 'wb+')
         self._writeDetailFile(inF, outF)
     self._doDetailFile(inF)
Exemplo n.º 9
0
 def testDetails2(self):
   Crippen._Init()
   inF = open(self.detailName2,'rb')
   if 0:
     outF = open('tmp.pkl','wb+')
     self._writeDetailFile(inF,outF)
   self._doDetailFile(inF)
Exemplo n.º 10
0
 def testDetails2(self):
   Crippen._Init()
   with open(self.detailName2,'r') as inTF:
     buf = inTF.read().replace('\r\n', '\n').encode('utf-8')
     inTF.close()
   with io.BytesIO(buf) as inF:
     if 0:
       outF = open('tmp.pkl','wb+')
       self._writeDetailFile(inF,outF)
     self._doDetailFile(inF)
Exemplo n.º 11
0
 def testDetails2(self):
     Crippen._Init()
     with open(self.detailName2, 'r') as inTF:
         buf = inTF.read().replace('\r\n', '\n').encode('utf-8')
         inTF.close()
     with io.BytesIO(buf) as inF:
         if 0:
             outF = open('tmp.pkl', 'wb+')
             self._writeDetailFile(inF, outF)
         self._doDetailFile(inF)
Exemplo n.º 12
0
    def _doDetailFile(self, inF, nFailsAllowed=1):
        done = 0
        verbose = 0
        nFails = 0
        while not done:
            if verbose: print '---------------'
            try:
                smi, refContribs = cPickle.load(inF)
            except EOFError:
                done = 1
            else:
                refContribs = [x[0] for x in refContribs]
                refOrder = numpy.argsort(refContribs)
                try:
                    mol = Chem.MolFromSmiles(smi)
                except:
                    import traceback
                    traceback.print_exc()
                    mol = None
                if mol:
                    mol = Chem.AddHs(mol, 1)
                    smi2 = Chem.MolToSmiles(mol)
                    contribs = Crippen._GetAtomContribs(mol)
                    contribs = [x[0] for x in contribs]
                    #
                    #  we're comparing to the old results using the oelib code.
                    #  Since we have some disagreements with them as to what is
                    #  aromatic and what isn't, we may have different numbers of
                    #  Hs. For the sake of comparison, just pop those off our
                    #  new results.
                    #
                    while len(contribs) > len(refContribs):
                        del contribs[-1]
                    order = numpy.argsort(contribs)

                    for i in range(len(refContribs)):
                        refL = refContribs[refOrder[i]]
                        l = contribs[order[i]]
                        if not feq(refL, l):
                            print '%s (%s): %d %6.5f != %6.5f' % (
                                smi, smi2, order[i], refL, l)
                            Crippen._GetAtomContribs(mol, force=1)
                            print '-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*'
                            nFails += 1
                            break
                    self.failUnless(nFails < nFailsAllowed)
                else:
                    print 'Problems with SMILES:', smi
Exemplo n.º 13
0
    def run_filter(self, mol):
        """
        This runs a Ghose filter for drug-likeliness. Ghose filter filters
        molecules by Molecular weight (MW), the number of atoms, and the logP
        value.

        We protonate the mol in this filter because hydrogens affect
        atom count. Our Ghose implementation counts hydrogens in against
        the total number of atoms.

        To pass the filter a molecule must be:
            MW between 160 and 480 dalton
            Number of Atoms: between 20 and 70
            logP  between -0,4 and +5,6

        Inputs:
        :param rdkit.Chem.rdchem.Mol object mol: An rdkit mol object to be
            tested if it passes the filters

        Returns:
        :returns: bool bool: True if the mol passes the filter; False if it
            fails the filter
        """
        # Make a copy of the mol so we can AddHs without affecting other filters
        # number of atoms is altered by the presence/absence of hydrogens.
        # Our Ghose filter counts hydrogenss towards atom count
        copy_mol = copy.deepcopy(mol)
        copy_mol = Chem.AddHs(copy_mol)
        exact_mwt = Descriptors.ExactMolWt(copy_mol)
        if ((exact_mwt < 160) or (exact_mwt > 480)):
            return False

        num_atoms = copy_mol.GetNumAtoms()
        if ((num_atoms < 20) or (num_atoms > 70)):
            return False

        # molar Refractivity
        MolMR = Crippen.MolMR(copy_mol)
        if ((MolMR < 40) or (MolMR > 130)):
            return False

        # molar LogP
        mol_log_p = Crippen.MolLogP(copy_mol)
        if ((mol_log_p < -0.4) or (mol_log_p > 5.6)):
            return False

        # passed all filters
        return True
Exemplo n.º 14
0
def logP(smile, train_smiles):
    low_logp = -2.10799552492
    high_logp = 2.71567964162
    logp = Crippen.MolLogP(Chem.MolFromSmiles(smile))
    val = remap(logp, low_logp, high_logp)
    val = np.clip(logp, 0.0, 1.0)
    return val
Exemplo n.º 15
0
def logP(smile, train_smiles=None):
    low_logp = -2.12178879609
    high_logp = 6.0429063424
    logp = Crippen.MolLogP(Chem.MolFromSmiles(smile))
    val = remap(logp, low_logp, high_logp)
    val = np.clip(val, 0.0, 1.0)
    return val
Exemplo n.º 16
0
def CalculateMolLogP2(mol: Chem.Mol) -> float:
    """Cacluate MolLogP^2.

    From Wildman and G. M. Crippen JCICS _39_ 868-873 (1999).
    """
    res = Crippen._pyMolLogP(mol)
    return round(res * res, 3)
Exemplo n.º 17
0
  def _testLogPLong2(self):
      """ test calculation of Lipinski params
   
 """
      fName = 'PP_descrs_regress.2.csv'
      col = 33
      self.__testDesc(fName, col, lambda x: Crippen.MolLogP(x, includeHs=1))
Exemplo n.º 18
0
def generate(smiles):
    moldata = []
    for elem in smiles:
        mol = Chem.MolFromSmiles(elem)
        moldata.append(mol)

    baseData = np.arange(1, 1)
    i = 0
    for mol in moldata:

        desc_MolLogP = Crippen.MolLogP(mol)
        desc_MolWt = Descriptors.MolWt(mol)
        desc_NumRotatableBonds = Lipinski.NumRotatableBonds(mol)
        desc_AromaticProportion = getAromaticProportion(mol)

        row = np.array([desc_MolLogP,
                        desc_MolWt,
                        desc_NumRotatableBonds,
                        desc_AromaticProportion])

        if i == 0:
            baseData = row
        else:
            baseData = np.vstack([baseData, row])
        i = i + 1

    columnNames = ["MolLogP", "MolWt", "NumRotatableBonds", "AromaticProportion"]
    descriptors = pd.DataFrame(data=baseData, columns=columnNames)

    return descriptors
Exemplo n.º 19
0
def properties_mw_logp(filepaths):

    properties = []

    for i, fname in enumerate(filepaths):
        with open(filepaths[i], 'r') as f:
            reader = csv.reader(f)

            it = iter(reader)
            if not ("generated" in fname):
                for row in it:
                    try:
                        properties.append([float(row[2]), float(row[3]), i])
                    except:
                        print("")
            else:
                for row in it:
                    try:
                        mol = Chem.MolFromSmiles(row[0])
                        x, y = desc.MolWt(mol), Crippen.MolLogP(mol)
                        properties.append([x, y, i])
                    except:
                        print("Non-Canonical SMILES: " + row[0])

    df = pd.DataFrame(properties[2000:2355], columns=['MW', 'logP', 'Label'])
    return df
Exemplo n.º 20
0
  def testRepeat(self):
    self._readData()
    nMols = len(self.smis)
    for i in range(nMols):
      smi = self.smis[i]
      mol = Chem.MolFromSmiles(smi)

      clog = self.clogs[i]
      tmp = Crippen.MolLogP(mol)
      tmp = Crippen.MolLogP(mol)
      self.failUnless(feq(clog,tmp),'bad logp fooutF,r %s: %4.4f != %4.4f'%(smi,clog,tmp))

      mr = self.mrs[i]
      tmp = Crippen.MolMR(mol)
      tmp = Crippen.MolMR(mol)
      self.failUnless(feq(mr,tmp),'bad MR for %s: %4.4f != %4.4f'%(smi,mr,tmp))
Exemplo n.º 21
0
def get_properties(mols):
    properties = []
    for mol in tqdm(mols):
        molwt = Descriptors.MolWt(mol)
        logp = Crippen.MolLogP(mol)
        properties.append((molwt, logp))
    return properties
Exemplo n.º 22
0
def get_filter_values(mol):
    """
    calculate the values, for a given molecule, that are used to filter
    return as a dictionary
    """

    assert isinstance(mol, Chem.Mol)

    values = {}
    values["MW"] = desc.CalcExactMolWt(mol)
    values["logP"] = crip.MolLogP(mol)
    values["HBA"] = lip.NumHAcceptors(mol)
    values["HBD"] = lip.NumHDonors(mol)
    values["tPSA"] = desc.CalcTPSA(mol)
    values["rot_bonds"] = lip.NumRotatableBonds(mol)
    values["rigid_bonds"] = mol.GetNumBonds() - values["rot_bonds"]  # assume mutual exclusion
    values["num_rings"] = lip.RingCount(mol)
    values["num_hetero_atoms"] = lip.NumHeteroatoms(mol)
    values["charge"] = rdmolops.GetFormalCharge(mol)  # trusting this charge calculation method
    values["num_carbons"], values["num_charges"], values["max_ring_size"] = get_atom_props(mol)
    try:
        values["hc_ratio"] = float(values["num_hetero_atoms"]) / float(values["num_carbons"])
    except ZeroDivisionError:
        values["hc_ratio"] = 100000000  # if there are zero carbons
    values["fc"] = len(list(Brics.FindBRICSBonds(mol)))  # how many BRICS bonds, related to complexity
    values["is_good"] = True  # default to true, but not yet observed
    atoms = [atom.GetSymbol() for atom in mol.GetAtoms()]  # get all the atoms, and make the list unique (only types)
    atoms = set(atoms)
    atoms = list(atoms)
    values["atoms"] = atoms
    values["num_chiral_centers"] = len(Chem.FindMolChiralCenters(mol, includeUnassigned=True))
    values["rejections"] = []  # empty list to store the reasons for rejection

    return values
Exemplo n.º 23
0
def LogP(smile):
    smile = str(smile)
    try:
        m = Chem.MolFromSmiles(smile)
        return Crippen.MolLogP(m)
    except:
        return 'NaN'
Exemplo n.º 24
0
def ProcessMol(mol,typeConversions,globalProps,nDone,nameProp='_Name',nameCol='compound_id',
               redraw=False,keepHs=False,
               skipProps=False,addComputedProps=False,
               skipSmiles=False,
               uniqNames=None,namesSeen=None):
  if not mol:
    raise ValueError('no molecule')
  if keepHs:
    Chem.SanitizeMol(mol)
  try:
    nm = mol.GetProp(nameProp)
  except KeyError:
    nm = None
  if not nm:
    nm = 'Mol_%d'%nDone
  if uniqNames and nm in namesSeen:
    logger.error('duplicate compound id (%s) encountered. second instance skipped.'%nm)
    return None
  namesSeen.add(nm)
  row = [nm]
  if not skipProps:
    if addComputedProps:
      nHD=Lipinski.NumHDonors(mol)
      mol.SetProp('DonorCount',str(nHD))
      nHA=Lipinski.NumHAcceptors(mol)
      mol.SetProp('AcceptorCount',str(nHA))
      nRot=Lipinski.NumRotatableBonds(mol)
      mol.SetProp('RotatableBondCount',str(nRot))
      MW=Descriptors.MolWt(mol)
      mol.SetProp('AMW',str(MW))
      logp=Crippen.MolLogP(mol)
      mol.SetProp('MolLogP',str(logp))

    pns = list(mol.GetPropNames())
    pD={}
    for pi,pn in enumerate(pns):
      if pn.lower()==nameCol.lower(): continue
      pv = mol.GetProp(pn).strip()
      if pv.find('>')<0 and pv.find('<')<0:
        colTyp = globalProps.get(pn,2)
        while colTyp>0:
          try:
            tpi = typeConversions[colTyp][1](pv)
          except:
            colTyp-=1
          else:
            break
        globalProps[pn]=colTyp
        pD[pn]=typeConversions[colTyp][1](pv)
      else:
        pD[pn]=pv
  else:
    pD={}
  if redraw:
    AllChem.Compute2DCoords(m)
  if not skipSmiles:
    row.append(Chem.MolToSmiles(mol,True))
  row.append(DbModule.binaryHolder(mol.ToBinary()))
  row.append(pD)
  return row
Exemplo n.º 25
0
    def calc_lipinski(self, mol):
        """
        Returns:     a tuple consisting of:
            - a boolean indicating whether the molecule passed Lipinski test
            - a dictionary giving the values of the Lipinski check.

        NOTE:   Lipinski's rules are:
            - Hydrogen bond donors <= 5
            - Hydrogen bond acceptors <= 10
            - Molecular weight < 500 daltons
            - logP < 5
        """

        num_hdonors = Lipi.NumHDonors(mol)
        num_hacceptors = Lipi.NumHAcceptors(mol)
        mol_weight = Descriptors.MolWt(mol)
        mol_logp = round(Crippen.MolLogP(mol), 4)

        return ((num_hdonors <= 5 and num_hacceptors <= 10 and mol_weight < 500
                 and mol_logp < 5), {
                     'hydrogen_bond_donors': num_hdonors,
                     'hydrogen_bond_acceptors': num_hacceptors,
                     'molecular_weight': mol_weight,
                     'logp': mol_logp
                 })
Exemplo n.º 26
0
    def water_octanol_partition_coefficient_scores(mols, norm=False):
        scores = [MolecularMetrics._avoid_sanitization_error(lambda: Crippen.MolLogP(mol)) if mol is not None else None
                  for mol in mols]
        scores = np.array(list(map(lambda x: -3 if x is None else x, scores)))
        scores = np.clip(MolecularMetrics.remap(scores, -2.12178879609, 6.0429063424), 0.0, 1.0) if norm else scores

        return scores
Exemplo n.º 27
0
def properties(mol):
    """
  Calculates the properties that are required to calculate the QED descriptor.
  """
    if mol is None:
        raise ValueError('You need to provide a mol argument.')
    mol = Chem.RemoveHs(mol)
    qedProperties = QEDproperties(
        MW=rdmd._CalcMolWt(mol),
        ALOGP=Crippen.MolLogP(mol),
        HBA=sum(
            len(mol.GetSubstructMatches(pattern)) for pattern in Acceptors
            if mol.HasSubstructMatch(pattern)),
        HBD=rdmd.CalcNumHBD(mol),
        PSA=MolSurf.TPSA(mol),
        ROTB=rdmd.CalcNumRotatableBonds(mol,
                                        rdmd.NumRotatableBondsOptions.Strict),
        AROM=Chem.GetSSSR(Chem.DeleteSubstructs(Chem.Mol(mol),
                                                AliphaticRings)),
        ALERTS=sum(1 for alert in StructuralAlerts
                   if mol.HasSubstructMatch(alert)),
    )
    # The replacement
    # AROM=Lipinski.NumAromaticRings(mol),
    # is not identical. The expression above tends to count more rings
    # N1C2=CC=CC=C2SC3=C1C=CC4=C3C=CC=C4
    # OC1=C(O)C=C2C(=C1)OC3=CC(=O)C(=CC3=C2C4=CC=CC=C4)O
    # CC(C)C1=CC2=C(C)C=CC2=C(C)C=C1  uses 2, should be 0 ?
    return qedProperties
Exemplo n.º 28
0
  def _doDetailFile(self,inF,nFailsAllowed=1):
    done = 0
    verbose=0
    nFails=0
    while not done:
      if verbose: print('---------------')
      try:
        smi,refContribs = cPickle.load(inF)
      except EOFError:
        done = 1
      else:
        refContribs = [x[0] for x in refContribs]
        refOrder= numpy.argsort(refContribs)
        try:
          mol = Chem.MolFromSmiles(smi)
        except:
          import traceback
          traceback.print_exc()
          mol = None
        if mol:
          mol=Chem.AddHs(mol,1)
          smi2 = Chem.MolToSmiles(mol)
          contribs = Crippen._GetAtomContribs(mol)
          contribs = [x[0] for x in contribs]
          #
          #  we're comparing to the old results using the oelib code.
          #  Since we have some disagreements with them as to what is
          #  aromatic and what isn't, we may have different numbers of
          #  Hs. For the sake of comparison, just pop those off our
          #  new results.
          #
          while len(contribs)>len(refContribs):
            del contribs[-1]
          order = numpy.argsort(contribs)

          for i in range(len(refContribs)):
            refL = refContribs[refOrder[i]]
            l = contribs[order[i]]
            if not feq(refL,l):
              print('%s (%s): %d %6.5f != %6.5f'%(smi,smi2,order[i],refL,l))
              Crippen._GetAtomContribs(mol,force=1)
              print('-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*')
              nFails +=1
              break;
        else:
          print('Problems with SMILES:',smi)
    self.assertTrue(nFails<nFailsAllowed)
Exemplo n.º 29
0
def logP(mol, train_smiles=None):
    val = Crippen.MolLogP(mol)
    if NORMALIZE:
        low_logp = -2.12178879609
        high_logp = 6.0429063424
        val = remap(val, low_logp, high_logp)
        val = np.clip(val, 0.0, 1.0)
    return val
Exemplo n.º 30
0
def _rdkit_eval(entry: dict) -> dict:
    """Computes the chemical properties from RDKit,
    adds them to the input dictionary"""
    mol = Chem.MolFromSmiles(entry['smiles'])
    entry['logP'] = Crippen.MolLogP(mol)
    entry['QED'] = QED.qed(mol)
    entry['SA_score'] = calculateScore(mol)
    return entry
Exemplo n.º 31
0
def PhyChem(smiles):
    """ Calculating the 19D physicochemical descriptors for each molecules,
    the value has been normalized with Gaussian distribution.

    Arguments:
        smiles (list): list of SMILES strings.
    Returns:
        props (ndarray): m X 19 matrix as normalized PhysChem descriptors.
            m is the No. of samples
    """
    props = []
    for smile in smiles:
        mol = Chem.MolFromSmiles(smile)
        try:
            MW = desc.MolWt(mol)
            LOGP = Crippen.MolLogP(mol)
            HBA = Lipinski.NumHAcceptors(mol)
            HBD = Lipinski.NumHDonors(mol)
            rotable = Lipinski.NumRotatableBonds(mol)
            amide = AllChem.CalcNumAmideBonds(mol)
            bridge = AllChem.CalcNumBridgeheadAtoms(mol)
            heteroA = Lipinski.NumHeteroatoms(mol)
            heavy = Lipinski.HeavyAtomCount(mol)
            spiro = AllChem.CalcNumSpiroAtoms(mol)
            FCSP3 = AllChem.CalcFractionCSP3(mol)
            ring = Lipinski.RingCount(mol)
            Aliphatic = AllChem.CalcNumAliphaticRings(mol)
            aromatic = AllChem.CalcNumAromaticRings(mol)
            saturated = AllChem.CalcNumSaturatedRings(mol)
            heteroR = AllChem.CalcNumHeterocycles(mol)
            TPSA = MolSurf.TPSA(mol)
            valence = desc.NumValenceElectrons(mol)
            mr = Crippen.MolMR(mol)
            # charge = AllChem.ComputeGasteigerCharges(mol)
            prop = [
                MW, LOGP, HBA, HBD, rotable, amide, bridge, heteroA, heavy,
                spiro, FCSP3, ring, Aliphatic, aromatic, saturated, heteroR,
                TPSA, valence, mr
            ]
        except Exception:
            print(smile)
            prop = [0] * 19
        props.append(prop)
    props = np.array(props)
    props = Scaler().fit_transform(props)
    return props
Exemplo n.º 32
0
    def testLipinskiLong(self):
        """ Lipinski parameter """
        if not doLong:
            raise unittest.SkipTest('long test')
        fName = 'PP_descrs_regress.csv'
        self.__testDesc(fName, 30, Lipinski.NumHDonors)
        self.__testDesc(fName, 31, Lipinski.NumHeteroatoms)
        self.__testDesc(fName, 32, Lipinski.NumRotatableBonds)
        self.__testDesc(fName, 33, lambda x: Crippen.MolLogP(x, includeHs=1))

        fName = 'Block_regress.Lip.csv'
        self.__testDesc(fName, 1, Lipinski.NumHAcceptors)
        self.__testDesc(fName, 2, Lipinski.NumHDonors)
        self.__testDesc(fName, 3, Lipinski.NumHeteroatoms)
        self.__testDesc(fName, 4, Lipinski.NumRotatableBonds)

        fName = 'PP_descrs_regress.2.csv'
        self.__testDesc(fName, 33, lambda x: Crippen.MolLogP(x, includeHs=1))
def mole_proper(mol):
    num_hdonors = Lipinski.NumHDonors(mol)
    num_hacceptors = Lipinski.NumHAcceptors(mol)
    num_rotatable = Lipinski.NumRotatableBonds(mol)
    mol_weight = Descriptors.MolWt(mol)
    mol_logp = Crippen.MolLogP(mol)
    mol_TPSA = Descriptors.TPSA(mol)
    proper = (num_hdonors, num_hacceptors, num_rotatable, mol_weight, mol_logp,
              mol_TPSA)
    return proper
Exemplo n.º 34
0
def evaluate_chem_mol(mol):
    try:
        Chem.GetSSSR(mol)
        clogp = Crippen.MolLogP(mol)
        mw = MolDescriptors.CalcExactMolWt(mol)
        tpsa = Descriptors.TPSA(mol)
        ret_val = [True, 320 < mw < 420, 2 < clogp < 3, 40 < tpsa < 60]
    except:
        ret_val = [False] * 4

    return ret_val
Exemplo n.º 35
0
def log_partition_coefficient(smiles):
    '''
    Returns the octanol-water partition coefficient given a molecule SMILES 
    string
    '''
    try:
        mol = Chem.MolFromSmiles(smiles)
    except Exception as e:
        raise SmilesError('%s returns a None molecule' % smiles)

    return Crippen.MolLogP(mol)
Exemplo n.º 36
0
def pyPEOE_VSA_(mol, bins=None, force=1):
  """ *Internal Use Only*
  """
  if not force:
    try:
      res = mol._peoeVSA
    except AttributeError:
      pass
    else:
      if res.all():
        return res
  if bins is None:
    bins = chgBins
  Crippen._Init()
  #print('\ts:',repr(mol.GetMol()))
  #print('\t\t:',len(mol.GetAtoms()))
  rdPartialCharges.ComputeGasteigerCharges(mol)

  #propContribs = [float(x.GetProp('_GasteigerCharge'))  for x in mol.GetAtoms()]
  propContribs = []
  for at in mol.GetAtoms():
    p = at.GetProp('_GasteigerCharge')
    try:
      v = float(p)
    except ValueError:
      v = 0.0
    propContribs.append(v)
  #print '\tp',propContribs
  volContribs = _LabuteHelper(mol)
  #print '\tv',volContribs

  ans = numpy.zeros(len(bins) + 1, 'd')
  for i in range(len(propContribs)):
    prop = propContribs[i]
    vol = volContribs[i + 1]
    if prop is not None:
      bin = bisect.bisect_right(bins, prop)
      ans[bin] += vol

  mol._peoeVSA = ans
  return ans
Exemplo n.º 37
0
 def _writeDetailFile(self, inF, outF):
   while 1:
     try:
       smi, refContribs = pickle.load(inF)
     except EOFError:
       break
     else:
       mol = Chem.MolFromSmiles(smi)
       if mol:
         mol = Chem.AddHs(mol, 1)
         smi2 = Chem.MolToSmiles(mol)
         contribs = Crippen._GetAtomContribs(mol)
         pickle.dump((smi, contribs), outF)
       else:
         print('Problems with SMILES:', smi)
Exemplo n.º 38
0
def CalculateMolMR(mol):
    """
    #################################################################
    Cacluation of molecular refraction value based on Crippen method
    
    ---->MR
    
    Usage:
        
        result=CalculateMolMR(mol)
        
        Input: mol is a molecule object.
        
        Output: result is a numeric value.
    #################################################################
    """
    return round(Crippen._pyMolMR(mol),3)
Exemplo n.º 39
0
def runIt(inFileName, outFileName, smiCol=0, maxMols=-1, delim=','):
  inF = gzip.open(inFileName, 'r')
  outF = open(outFileName, 'wb+')
  mols = []
  nDone = 0
  for line in inF.readlines():
    if line[0] != '#':
      splitL = line.strip().split(delim)
      smi = splitL[smiCol].strip()
      print(smi)
      mol = Chem.MolFromSmiles(smi)
      if mol:
        contribs = Crippen._GetAtomContribs(mol)
        cPickle.dump((smi, contribs), outF)
      nDone += 1
      if maxMols > 0 and nDone >= maxMols:
        break
  outF.close()
Exemplo n.º 40
0
def CalculateMolLogP2(mol):
    """
    #################################################################
    Cacluation of LogP^2 value based on Crippen method
    
    ---->LogP2
    
    Usage:
        
        result=CalculateMolLogP2(mol)
        
        Input: mol is a molecule object.
        
        Output: result is a numeric value.
    #################################################################
    """
    res=Crippen._pyMolLogP(mol)
    
    return round(res**2,3)
Exemplo n.º 41
0
 def _writeDetailFile(self,inF,outF):
   while 1:
     try:
       smi,refContribs = cPickle.load(inF)
     except EOFError:
       break
     else:
       try:
         mol = Chem.MolFromSmiles(smi)
       except:
         import traceback
         traceback.print_exc()
         mol = None
       if mol:
         mol=Chem.AddHs(mol,1)
         smi2 = Chem.MolToSmiles(mol)
         contribs = Crippen._GetAtomContribs(mol)
         cPickle.dump((smi,contribs),outF)
       else:
         print 'Problems with SMILES:',smi
Exemplo n.º 42
0
from __future__ import print_function
from rdkit import RDConfig
import gzip
import os.path
from rdkit.six.moves import cPickle
from rdkit import Chem
from rdkit.Chem import Crippen
Crippen._Init()


def runIt(inFileName, outFileName, smiCol=0, maxMols=-1, delim=','):
  inF = gzip.open(inFileName, 'r')
  outF = open(outFileName, 'wb+')
  mols = []
  nDone = 0
  for line in inF.readlines():
    if line[0] != '#':
      splitL = line.strip().split(delim)
      smi = splitL[smiCol].strip()
      print(smi)
      mol = Chem.MolFromSmiles(smi)
      if mol:
        contribs = Crippen._GetAtomContribs(mol)
        cPickle.dump((smi, contribs), outF)
      nDone += 1
      if maxMols > 0 and nDone >= maxMols:
        break
  outF.close()


if __name__ == '__main__':