Пример #1
0
 def _validate(self, vals, tol=1e-2, show=False):
   for smi, ans in vals:
     mol = Chem.MolFromSmiles(smi)
     types = AtomTypes.TypeAtoms(mol)
     if show:  # pragma: nocover
       print(types)
     self.assertEqual(len(ans), len(types), 'bad type len for smiles: %s' % (smi))
     lens = [len(x) for x in types]
     self.assertEqual(max(lens), 1, 'atom matched multiple types for smiles: %s' % (smi))
     for a, b in zip(ans, [x[0] for x in types]):
       self.assertEqual(a, b, 'bad type for SMILES: %s' % (smi))
Пример #2
0
 def _validate(self,vals,tol=1e-2,show=0):
   for smi,ans in vals:
     mol = Chem.MolFromSmiles(smi)
     types = AtomTypes.TypeAtoms(mol)
     if show: print types
     assert len(ans)==len(types),'bad type len for smiles: %s'%(smi)
     lens = [len(x) for x in types]
     assert max(lens)==1,'atom matched multiple types for smiles: %s'%(smi)
     types = [x[0] for x in types]
     for a,b in zip(ans,types):
       assert a==b,'bad type for SMILES: %s'%(smi)
Пример #3
0
def _exampleCode():
  """ Example code for calculating E-state fingerprints """
  from rdkit import Chem
  smis = ['CC', 'CCC', 'c1[nH]cnc1CC(N)C(O)=O', 'NCCc1ccc(O)c(O)c1']
  for smi in smis:
    m = Chem.MolFromSmiles(smi)
    print(smi, Chem.MolToSmiles(m))
    types = AtomTypes.TypeAtoms(m)
    for i in range(m.GetNumAtoms()):
      print('%d %4s: %s' % (i + 1, m.GetAtomWithIdx(i).GetSymbol(), str(types[i])))
    es = EStateIndices(m)
    counts, sums = FingerprintMol(m)
    for i in range(len(AtomTypes.esPatterns)):
      if counts[i]:
        name, _ = AtomTypes.esPatterns[i]
        print('%6s, % 2d, % 5.4f' % (name, counts[i], sums[i]))
    for i in range(len(es)):
      print('% 2d, % 5.4f' % (i + 1, es[i]))
    print('--------')
def finger_print(chunk):
    """
    Create a dictionary with the e-state fingerprint for the molecule in mol (rdkit mol)

    Input:
    mol; rdkit mol object
    name; structure name
    e_opt; energy gap (target)
    """
    if AtomTypes.esPatterns is None:
        AtomTypes.BuildPatts()

    name_list = [name for name, _ in AtomTypes.esPatterns]
    df = pd.DataFrame(columns=['name', 'smiles'] + name_list)

    for row_index, row in chunk.iterrows():
        name = (row["name"])
        smiles = (row["smiles"])

        mol = Chem.MolFromSmiles(smiles)
        try:

            types = AtomTypes.TypeAtoms(mol)
            es = EStateIndices(mol)
            counts, sums = Fingerprinter.FingerprintMol(mol)

            if AtomTypes.esPatterns is None:
                AtomTypes.BuildPatts()

            name_list = [name for name, _ in AtomTypes.esPatterns]

            data = {'name': name, 'smiles': smiles}
            data2 = {k: v for k, v in zip(name_list, sums)}

            data.update(data2)
            df = df.append(data, ignore_index=True)

        except AttributeError:
            print(i, formula)
        continue
    return df
Пример #5
0
def finger_print(mol, name, e_opt):
    """ 
    Create a dictionary with the e-state fingerprint for the molecule in mol (rdkit mol)
    
    Input:
    mol; rdkit mol object
    name; structure name
    e_opt; energy gap (target)
    """

    types = AtomTypes.TypeAtoms(mol)
    es = EStateIndices(mol)
    counts, sums = Fingerprinter.FingerprintMol(mol)

    if AtomTypes.esPatterns is None:
        AtomTypes.BuildPatts()

    name_list = [name for name, _ in AtomTypes.esPatterns]

    data = {'name': name, 'E_opt': e_opt}
    data2 = {k: v for k, v in zip(name_list, sums)}

    data.update(data2)
    return data
Пример #6
0
  nPatts = len(AtomTypes.esPatterns)
  counts = numpy.zeros(nPatts,numpy.int)
  sums = numpy.zeros(nPatts,numpy.float)

  for i,(name,pattern) in enumerate(AtomTypes.esPatterns):
    matches = mol.GetSubstructMatches(pattern,uniquify=1)
    counts[i] = len(matches)
    for match in matches:
      sums[i] += esIndices[match[0]]
  return counts,sums


if __name__ == '__main__':
  from rdkit import Chem
  smis = ['CC','CCC','c1[nH]cnc1CC(N)C(O)=O','NCCc1ccc(O)c(O)c1']
  for smi in smis:
    m = Chem.MolFromSmiles(smi)
    print smi,Chem.MolToSmiles(m)
    types = AtomTypes.TypeAtoms(m)
    for i in range(m.GetNumAtoms()):
      print '%d %4s: %s'%(i+1,m.GetAtomWithIdx(i).GetSymbol(),str(types[i]))
    es = EStateIndices(m)
    counts,sums = FingerprintMol(m)
    for i in range(len(AtomTypes.esPatterns)):
      if counts[i]:
        name,patt = AtomTypes.esPatterns[i]
        print '%6s, % 2d, % 5.4f'%(name,counts[i],sums[i])
    for i in range(len(es)):
      print '% 2d, % 5.4f'%(i+1,es[i])
    print '--------'