Пример #1
0
  def testAtomMatch(self):
    fdefBlock = \
"""
DefineFeature HAcceptor1 [#7,#8]
    Family HBondAcceptor
    Weights 1.0
EndFeature
DefineFeature Arom1 a1aaaaa1
    Family Aromatic
    Weights 1.0,1.0,1.0,1.0,1.0,1.0
EndFeature
"""
    cfac = ChemicalFeatures.BuildFeatureFactoryFromString(fdefBlock)
    self.failUnless(cfac.GetNumFeatureDefs() == 2)
    mol = Chem.MolFromSmiles('n1ccccc1')
    feats = cfac.GetFeaturesForMol(mol)
    self.failUnless(len(feats)==2)
    m = ChemicalFeatures.GetAtomMatch(feats)
    self.failIf(m)

    mol = Chem.MolFromSmiles('c1ccccc1N')
    feats = cfac.GetFeaturesForMol(mol)
    self.failUnless(len(feats)==2)
    m = ChemicalFeatures.GetAtomMatch(feats)
    self.failUnless(len(m)==2)
Пример #2
0
  def testBasic(self):
    ffeat = ChemicalFeatures.FreeChemicalFeature()
    ffeat.SetId(123)
    pos = ffeat.GetId()
    self.assertTrue(pos == 123)
    ffeat.SetFamily("HBondDonor")
    self.assertTrue(ffeat.GetFamily() == "HBondDonor")
    ffeat.SetPos(geom.Point3D(1.0, 2.0, 3.0))
    pos = ffeat.GetPos()
    self.assertTrue(ptFeq(pos, geom.Point3D(1.0, 2.0, 3.0)))
    ffeat.SetType("HBondDonor1")
    self.assertTrue(ffeat.GetType() == "HBondDonor1")

    ffeat = ChemicalFeatures.FreeChemicalFeature("HBondDonor", "HBondDonor1", geom.Point3D(1.0, 2.0,
                                                                                           3.0))
    self.assertTrue(ffeat.GetId() == -1)
    self.assertTrue(ffeat.GetFamily() == "HBondDonor")
    self.assertTrue(ffeat.GetType() == "HBondDonor1")

    ffeat = ChemicalFeatures.FreeChemicalFeature("HBondDonor", "HBondDonor1",
                                                 geom.Point3D(1.0, 2.0, 3.0), id=123)
    self.assertTrue(ffeat.GetId() == 123)
    self.assertTrue(ffeat.GetFamily() == "HBondDonor")
    self.assertTrue(ffeat.GetType() == "HBondDonor1")

    pos = ffeat.GetPos()
    self.assertTrue(ptFeq(pos, geom.Point3D(1.0, 2.0, 3.0)))

    ffeat = ChemicalFeatures.FreeChemicalFeature(id=123, type="HBondDonor1", family="HBondDonor",
                                                 loc=geom.Point3D(1.0, 2.0, 3.0))
    self.assertTrue(ffeat.GetId() == 123)
    self.assertTrue(ffeat.GetFamily() == "HBondDonor")
    self.assertTrue(ffeat.GetType() == "HBondDonor1")
    pos = ffeat.GetPos()
    self.assertTrue(ptFeq(pos, geom.Point3D(1.0, 2.0, 3.0)))
Пример #3
0
    def setUp(self):
        self.dataDir = os.path.join(RDConfig.RDCodeDir,
                                    'Chem/Pharm3D/test_data')
        self.fdefBlock = """
                   DefineFeature HAcceptor1 [N,O;H0]
                      Family HBondAcceptor
                      Weights 1.0
                   EndFeature
                   DefineFeature HDonor1 [N,O;!H0]
                      Family HBondDonor
                      Weights 1.0
                   EndFeature
                   DefineFeature Aromatic1 c1ccccc1
                      Family Aromatic
                      Weights 1.,1.,1.,1.,1.,1.
                   EndFeature\n"""

        self.featFactory = ChemicalFeatures.BuildFeatureFactoryFromString(
            self.fdefBlock)
        self.feats = [
            ChemicalFeatures.FreeChemicalFeature(
                'HBondAcceptor', 'HAcceptor1', Geometry.Point3D(0.0, 0.0,
                                                                0.0)),
            ChemicalFeatures.FreeChemicalFeature(
                'HBondDonor', 'HDonor1', Geometry.Point3D(2.65, 0.0, 0.0)),
            ChemicalFeatures.FreeChemicalFeature(
                'Aromatic', 'Aromatic1', Geometry.Point3D(5.12, 0.908, 0.0)),
        ]
        self.pcophore = Pharmacophore.Pharmacophore(self.feats)
        self.pcophore.setLowerBound(0, 1, 2.0)
        self.pcophore.setUpperBound(0, 1, 3.3)
        self.pcophore.setLowerBound(0, 2, 5.0)
        self.pcophore.setUpperBound(0, 2, 5.4)
        self.pcophore.setLowerBound(1, 2, 2.6)
        self.pcophore.setUpperBound(1, 2, 3.0)
Пример #4
0
    def setUp(self):
        self.fdefBlock = \
                       """DefineFeature HAcceptor1 [N,O;H0]
                      Family HBondAcceptor
                      Weights 1.0
                   EndFeature
                   DefineFeature HDonor1 [N,O;!H0]
                      Family HBondDonor
                      Weights 1.0
                   EndFeature
                   DefineFeature Aromatic1 c1ccccc1
                      Family Aromatic
                      Weights 1.0,1.0,1.0,1.0,1.0,1.0
                   EndFeature\n"""

        self.featFactory = ChemicalFeatures.BuildFeatureFactoryFromString(
            self.fdefBlock)
        self.feats = [
            ChemicalFeatures.FreeChemicalFeature(
                'HBondAcceptor', 'HAcceptor1', Geometry.Point3D(0.0, 0.0,
                                                                0.0)),
            ChemicalFeatures.FreeChemicalFeature(
                'HBondDonor', 'HDonor1', Geometry.Point3D(2.65, 0.0, 0.0)),
            ChemicalFeatures.FreeChemicalFeature(
                'Aromatic', 'Aromatic1', Geometry.Point3D(5.12, 0.908, 0.0)),
        ]
        self.pcophore = Pharmacophore.Pharmacophore(self.feats)
Пример #5
0
  def test4Search(self):
    featFactory = ChemicalFeatures.BuildFeatureFactory(os.path.join(self.dataDir,
                                                        'BaseFeatures.fdef'))

    activeFeats = [ChemicalFeatures.FreeChemicalFeature('Acceptor',
                                            Geometry.Point3D(0.0, 0.0, 0.0)),
                   ChemicalFeatures.FreeChemicalFeature('Donor',
                                            Geometry.Point3D(0.0, 0.0, 0.0)),
                   ChemicalFeatures.FreeChemicalFeature('Aromatic',
                                            Geometry.Point3D(0.0, 0.0, 0.0))]
    pcophore= Pharmacophore.Pharmacophore(activeFeats)
    pcophore.setLowerBound(0,1,2.251)
    pcophore.setUpperBound(0,1,2.451)
    pcophore.setUpperBound2D(0,1,3)

    pcophore.setLowerBound(0,2,4.970)
    pcophore.setUpperBound(0,2,5.170)
    pcophore.setUpperBound2D(0,2,6)

    pcophore.setLowerBound(1,2,2.681)
    pcophore.setUpperBound(1,2,2.881)
    pcophore.setUpperBound2D(1,2,6)

    inF = gzip.open(os.path.join(self.dataDir,'cdk2-syn-clip100.pkl.gz'),'rb')
    nDone = 0
    nMatches = 0
    nHits = 0

    while 1:
      try:
        name,molPkl,boundsMat = cPickle.load(inF, encoding='latin1')
        if PY3:
          molPkl = bytes(molPkl, encoding='latin1')
      except:
        break

      nDone += 1

      mol = Chem.Mol(molPkl)
      boundsMat = rdDistGeom.GetMoleculeBoundsMatrix(mol)
      DG.DoTriangleSmoothing(boundsMat)
    
      canMatch,matches = EmbedLib.MatchPharmacophoreToMol(mol,featFactory,
                                                          pcophore)
      if canMatch:
        nMatches+=1
        r = EmbedLib.MatchPharmacophore(matches,boundsMat,pcophore,
                                        useDownsampling=True,use2DLimits=True,
                                        mol=mol)
        failed,bm,match,details = r
        if not failed:
          nHits+=1

    self.assertEqual(nDone,100)
    self.assertEqual(nMatches,93)
    #print 'nhits:',nHits
    self.assertEqual(nHits,67)
Пример #6
0
 def setUp(self):
     fdefFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'Pharm2D',
                             'test_data', 'BaseFeatures.fdef')
     featFactory = ChemicalFeatures.BuildFeatureFactory(fdefFile)
     self.factory = SigFactory.SigFactory(featFactory, minPointCount=2, maxPointCount=3)
     self.factory.SetBins([(0, 2), (2, 5), (5, 8)])
     self.factory.Init()
Пример #7
0
def _getFeatureFamily(mol):
    FEATURE_DEF_FILE = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef')
    feat_factory = ChemicalFeatures.BuildFeatureFactory(FEATURE_DEF_FILE)
    hmol = rdkit.Chem.AddHs(mol)
    AllChem.EmbedMolecule(hmol, useRandomCoords=True)
    rc = rdkit.Chem.AllChem.EmbedMolecule(hmol)
    logging.debug("Getting features for mol " + mol.GetProp("_Name"))
    if rc < 0:
        rc = rdkit.Chem.AllChem.EmbedMolecule(hmol, useRandomCoords=True)
    if rc == 0:
        try:
            if rdkit.Chem.AllChem.UFFOptimizeMolecule(hmol) != 0:
                rdkit.Chem.AllChem.UFFOptimizeMolecule(hmol, maxIters=1000)
        except ValueError:
            logging.error("Problem with 3D version of molecule " +
                          hmol.GetProp("_Name"))
            pass
    feats = feat_factory.GetFeaturesForMol(hmol)
    atomFeatures = [["" for feature in range(len(feats))]
                    for atom in range(hmol.GetNumAtoms())]
    for feature in feats:
        for atomId in feature.GetAtomIds():
            if feature.GetFamily() not in atomFeatures[atomId]:
                atomFeatures[atomId].append(feature.GetFamily())
    return atomFeatures
Пример #8
0
  def testIncludeOnly(self):
    cfac = ChemicalFeatures.BuildFeatureFactory(
      os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'MolChemicalFeatures', 'test_data',
                   'featDef.txt'))
    self.failUnless(cfac.GetNumFeatureDefs() == 2)

    mol = Chem.MolFromSmiles("COCN")
    rdDistGeom.EmbedMolecule(mol)

    self.failUnless(cfac.GetNumMolFeatures(mol, includeOnly="HBondAcceptor") == 2)
    self.failUnless(cfac.GetNumMolFeatures(mol, includeOnly="HBondDonor") == 1)
    self.failUnless(cfac.GetNumMolFeatures(mol, includeOnly="Bogus") == 0)

    self.failUnlessRaises(IndexError, lambda: cfac.GetMolFeature(mol, 1, includeOnly="HBondDonor"))
    self.failUnlessRaises(IndexError,
                          lambda: cfac.GetMolFeature(mol, 2, includeOnly="HBondAcceptor"))
    f = cfac.GetMolFeature(mol, 0, includeOnly="HBondDonor")
    self.failUnless(f.GetFamily() == 'HBondDonor')

    feats = cfac.GetFeaturesForMol(mol, includeOnly="HBondAcceptor")
    self.failUnless(len(feats) == 2)
    feats = cfac.GetFeaturesForMol(mol, includeOnly="HBondDonor")
    self.failUnless(len(feats) == 1)
    feats = cfac.GetFeaturesForMol(mol, includeOnly="Bogus")
    self.failUnless(len(feats) == 0)
Пример #9
0
def ConstrainedEnum(matches,mol,pcophore,bounds,use2DLimits=False,
                    index=0,soFar=[]):
  """ Enumerates the list of atom mappings a molecule
  has to a particular pharmacophore.
  We do check distance bounds here.
  

  """
  nMatches = len(matches)
  if index>=nMatches:
    yield soFar,[]
  elif index==nMatches-1:
    for entry in matches[index]:
      nextStep = soFar+[entry]
      if index != 0:
        atomMatch = _checkMatch(nextStep,mol,bounds,pcophore,use2DLimits)
      else:
        atomMatch = ChemicalFeatures.GetAtomMatch(nextStep)
      if atomMatch:
        yield soFar+[entry],atomMatch
  else:
    for entry in matches[index]:
      nextStep = soFar+[entry]
      if index != 0:
        atomMatch = _checkMatch(nextStep,mol,bounds,pcophore,use2DLimits)
        if not atomMatch:
          continue
      for val in ConstrainedEnum(matches,mol,pcophore,bounds,use2DLimits=use2DLimits,
                                 index=index+1,soFar=nextStep):
        if val:
          yield val
Пример #10
0
    def testPickle(self):
        ffeat = ChemicalFeatures.FreeChemicalFeature(
            "HBondDonor", "HBondDonor1", geom.Point3D(1.0, 2.0, 3.0), 123)
        pkl = cPickle.dumps(ffeat)
        ffeat2 = cPickle.loads(pkl, encoding='bytes')
        self.assertTrue(ffeat2.GetId() == ffeat.GetId())
        self.assertTrue(ffeat2.GetFamily() == ffeat.GetFamily())
        self.assertTrue(ffeat2.GetType() == ffeat.GetType())
        self.assertTrue(ptFeq(ffeat2.GetPos(), ffeat.GetPos()))

        # Check that the old pickled versions have not been broken
        inF = open(
            os.path.join(RDConfig.RDBaseDir,
                         'Code/ChemicalFeatures/Wrap/testData/feat.pkl'), 'rb')
        ffeat2 = cPickle.load(inF, encoding='bytes')
        # this version (1.0) does not have an id in the byte stream
        self.assertTrue(ffeat2.GetFamily() == ffeat.GetFamily())
        self.assertTrue(ffeat2.GetType() == ffeat.GetType())
        self.assertTrue(ptFeq(ffeat2.GetPos(), ffeat.GetPos()))

        # Test the new version also has the id and works as expected

        # uncomment the following to generate (overrwrite) new version of pickled
        # data file
        #cPickle.dump(ffeat,file(os.path.join(RDConfig.RDBaseDir, 'Code/ChemicalFeatures/Wrap/testData/featv2.pkl'),'wb+'))
        inF = open(
            os.path.join(RDConfig.RDBaseDir,
                         'Code/ChemicalFeatures/Wrap/testData/featv2.pkl'),
            'rb')
        ffeat2 = cPickle.load(inF, encoding='bytes')
        self.assertTrue(ffeat2.GetId() == ffeat.GetId())
        self.assertTrue(ffeat2.GetFamily() == ffeat.GetFamily())
        self.assertTrue(ffeat2.GetType() == ffeat.GetType())
        self.assertTrue(ptFeq(ffeat2.GetPos(), ffeat.GetPos()))
Пример #11
0
    def initFromLines(self, lines):
        import re
        spaces = re.compile('[\ \t]+')

        feats = []
        rads = []
        for lineNum, line in enumerate(lines):
            txt = line.split('#')[0].strip()
            if txt:
                splitL = spaces.split(txt)
                if len(splitL) < 5:
                    logger.error(
                        'Input line %d only contains %d fields, 5 are required. Read failed.'
                        % (lineNum, len(splitL)))
                    return
                fName = splitL[0]
                try:
                    xP = float(splitL[1])
                    yP = float(splitL[2])
                    zP = float(splitL[3])
                    rad = float(splitL[4])
                except ValueError:
                    logger.error(
                        'Error parsing a number of line %d. Read failed.' %
                        (lineNum))
                    return
                feats.append(
                    ChemicalFeatures.FreeChemicalFeature(
                        fName, fName, Geometry.Point3D(xP, yP, zP)))
                rads.append(rad)
        self._initializeFeats(feats, rads)
Пример #12
0
def processArgs(args, parser):
    try:
        factory = ChemicalFeatures.BuildFeatureFactory(args.fdefFilename)
    except Exception:
        parser.error(
            "Could not parse Fdef file {0.fdefFilename}.".format(args))

    with open(args.smilesFilename) as inF:
        for lineNo, line in enumerate(inF, 1):
            if lineNo == args.maxLines + 1:
                break
            smi = splitExpr.split(line.strip())[0].strip()
            mol = Chem.MolFromSmiles(smi)
            if mol is None:
                logger.warning("Could not process smiles '%s' on line %d." %
                               (smi, lineNo))
                continue

            print('Mol-%d\t%s' % (lineNo, smi))
            if args.reverseIt:
                feats = factory.GetFeaturesForMol(mol)
                for feat in feats:
                    print('\t%s-%s: ' % (feat.GetFamily(), feat.GetType()),
                          end='')
                    print(', '.join([str(x) for x in feat.GetAtomIds()]))
            else:
                featInfo = GetAtomFeatInfo(factory, mol)
                for i, v in enumerate(featInfo):
                    print('\t% 2s(%d)' %
                          (mol.GetAtomWithIdx(i).GetSymbol(), i + 1),
                          end='')
                    if v:
                        print('\t', ', '.join(v))
                    else:
                        print()
Пример #13
0
  def get_hydrogen_bonding(self):
    """Gets hydrogen bonding character for all atoms.

    Returns:
      A dict mapping RDKit Atom indices to a HydrogenBonding object. Atom
      indices not in the dict are neither acceptors nor donors.

    Raises:
      TypeError: if more than one atom index is associated with the same
        acceptor or donor.
    """
    self.check_indices()
    factory = ChemicalFeatures.BuildFeatureFactoryFromString(_HBOND_FEATURE_DEF)
    features = factory.GetFeaturesForMol(self.mol)
    hb = collections.defaultdict(lambda: HydrogenBonding(False, False))
    for feat in features:
      family = feat.GetFamily().lower()
      if family in ['acceptor', 'donor']:
        if len(feat.GetAtomIds()) != 1:
          raise TypeError('More than one atom index for %s.' % family)
        idx = feat.GetAtomIds()[0]
        # pylint:disable=protected-access
        if family == 'acceptor':
          hb[idx] = hb[idx]._replace(acceptor=True)
        elif family == 'donor':
          hb[idx] = hb[idx]._replace(donor=True)
          # pylint:enable=protected-access
    return hb
Пример #14
0
    def _align_molecules(self, molecules: List[Chem.Mol]) -> None:
        """ Align a list of molecules to a given pharmacophore.

        Parameters
        ----------
        molecules : list of rdkit.Chem.Mol
            List of molecules to align.

        """
        self.n_molecules += len(molecules)

        rdkit_pharmacophore, radii = self.pharmacophore.to_rdkit()
        apply_radii_to_bounds(radii, rdkit_pharmacophore)

        fdef = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef')
        featFactory = ChemicalFeatures.BuildFeatureFactory(fdef)

        MolScore = namedtuple("MolScore", ["score", "id", "mol"])

        for mol in tqdm(molecules):

            bounds_matrix = rdDistGeom.GetMoleculeBoundsMatrix(mol)
            can_match, all_matches = EmbedLib.MatchPharmacophoreToMol(
                mol, featFactory, rdkit_pharmacophore)
            if can_match:
                failed, _, matched_mols, _ = EmbedLib.MatchPharmacophore(
                    all_matches,
                    bounds_matrix,
                    rdkit_pharmacophore,
                    useDownsampling=True)
                if failed:
                    matched_mol = MolScore(0.0, mol.GetProp("_Name"), mol)
                    self.molecules.append(matched_mol)
                    continue
            else:
                matched_mol = MolScore(0.0, mol.GetProp("_Name"), mol)
                self.molecules.append(matched_mol)
                continue
            atom_match = [list(x.GetAtomIds()) for x in matched_mols]

            try:
                mol_H = Chem.AddHs(mol)
                _, embeddings, _ = EmbedLib.EmbedPharmacophore(
                    mol_H, atom_match, rdkit_pharmacophore, count=10)
            except:
                continue

            SSDs = transform_embeddings(rdkit_pharmacophore, embeddings,
                                        atom_match)
            if len(SSDs) == 0:
                matched_mol = MolScore(0.0, mol.GetProp("_Name"), mol)
                self.molecules.append(matched_mol)
                continue
            best_fit_index = min(enumerate(SSDs), key=itemgetter(1))[0]

            score = 1 / SSDs[best_fit_index]
            matched_mol = MolScore(score, mol.GetProp("_Name"),
                                   embeddings[best_fit_index])
            self.molecules.append(matched_mol)
Пример #15
0
 def testGithub2603(self):
   cfac = ChemicalFeatures.BuildFeatureFactory(
     os.path.join(RDConfig.RDDataDir, "BaseFeatures.fdef"))
   m = Chem.MolFromSmiles('OCc1ccccc1CN')
   feats = cfac.GetFeaturesForMol(m)
   self.assertEqual(feats[0].GetFamily(), 'Donor')
   cfac = None
   self.assertEqual(feats[0].GetFamily(), 'Donor')
Пример #16
0
def alchemy_nodes(mol):
    """Featurization for all atoms in a molecule. The atom indices
    will be preserved.

    Parameters
    ----------
    mol : rdkit.Chem.rdchem.Mol
        RDKit molecule object

    Returns
    -------
    atom_feats_dict : dict
        Dictionary for atom features
    """
    atom_feats_dict = defaultdict(list)
    is_donor = defaultdict(int)
    is_acceptor = defaultdict(int)

    fdef_name = osp.join(RDConfig.RDDataDir, 'BaseFeatures.fdef')
    mol_featurizer = ChemicalFeatures.BuildFeatureFactory(fdef_name)
    mol_feats = mol_featurizer.GetFeaturesForMol(mol)
    mol_conformers = mol.GetConformers()
    assert len(mol_conformers) == 1

    for i in range(len(mol_feats)):
        if mol_feats[i].GetFamily() == 'Donor':
            node_list = mol_feats[i].GetAtomIds()
            for u in node_list:
                is_donor[u] = 1
        elif mol_feats[i].GetFamily() == 'Acceptor':
            node_list = mol_feats[i].GetAtomIds()
            for u in node_list:
                is_acceptor[u] = 1

    num_atoms = mol.GetNumAtoms()
    for u in range(num_atoms):
        atom = mol.GetAtomWithIdx(u)
        atom_type = atom.GetAtomicNum()
        num_h = atom.GetTotalNumHs()
        atom_feats_dict['node_type'].append(atom_type)

        h_u = []
        h_u += atom_type_one_hot(atom, ['H', 'C', 'N', 'O', 'F', 'S', 'Cl'])
        h_u.append(atom_type)
        h_u.append(is_acceptor[u])
        h_u.append(is_donor[u])
        h_u += atom_is_aromatic(atom)
        h_u += atom_hybridization_one_hot(atom, [Chem.rdchem.HybridizationType.SP,
                                                 Chem.rdchem.HybridizationType.SP2,
                                                 Chem.rdchem.HybridizationType.SP3])
        h_u.append(num_h)
        atom_feats_dict['n_feat'].append(F.tensor(np.asarray(h_u, dtype=np.float32)))

    atom_feats_dict['n_feat'] = F.stack(atom_feats_dict['n_feat'], dim=0)
    atom_feats_dict['node_type'] = F.tensor(
        np.asarray(atom_feats_dict['node_type'], dtype=np.int64))

    return atom_feats_dict
Пример #17
0
def str2molgraph(
    rawstr, length
):  # rawstr :tuple() e.g. ('<RX_6>', 'N', 'c', '1', 'n', 'c', '2', '[', 'n', 'H', ']', 'c', '(', 'C', 'C', 'C', 'c', '3', 'c', 's', 'c', '(', 'C', '(', '=', 'O', ')', 'O', ')', 'c', '3', ')', 'c', 'c', '2', 'c', '(', '=', 'O', ')', '[', 'n', 'H', ']', '1')

    smiles = ''.join(rawstr[:length])

    m = Chem.MolFromSmiles(smiles)

    g = nx.Graph()
    fdef_name = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef')
    factory = ChemicalFeatures.BuildFeatureFactory(fdef_name)
    feats = factory.GetFeaturesForMol(m)

    atom_true_index = {}
    atom_index = 0
    # Nodes
    for i in range(len(rawstr)):
        if not need_emb(rawstr[i], EMB_ATOMS):
            g.add_node(i)

        else:
            atom_true_index[
                atom_index] = i  # meanwhile, set a map dict to find the true index of atoms
            atom_i = m.GetAtomWithIdx(atom_index)
            atom_index += 1
            g.add_node(i,
                       a_type=atom_i.GetSymbol(),
                       a_num=atom_i.GetAtomicNum(),
                       acceptor=0,
                       donor=0,
                       aromatic=atom_i.GetIsAromatic(),
                       hybridization=atom_i.GetHybridization(),
                       num_h=atom_i.GetTotalNumHs())

    # Donor and Acceptor properties
    for i in range(0, len(feats)):
        if feats[i].GetFamily() == 'Donor':
            node_list = feats[i].GetAtomIds()
            for i in node_list:
                if i in atom_true_index:
                    g.nodes[atom_true_index[i]]['donor'] = 1
        elif feats[i].GetFamily() == 'Acceptor':
            node_list = feats[i].GetAtomIds()
            for i in node_list:
                if i in atom_true_index:
                    g.nodes[atom_true_index[i]]['acceptor'] = 1

    #Edges
    for i in range(0, m.GetNumAtoms()):
        for j in range(0, m.GetNumAtoms()):
            e_ij = m.GetBondBetweenAtoms(i, j)
            if e_ij is not None and i in atom_true_index and j in atom_true_index:
                g.add_edge(atom_true_index[i],
                           atom_true_index[j],
                           b_type=e_ij.GetBondType())

    return g
Пример #18
0
    def __config_feature_factory(self):
        """
        Initialize the 'feature factory' rdkit module with the
            current molecule.
        """
        fdefName = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef')
        self.__feat_factory = ChemicalFeatures.BuildFeatureFactory(fdefName)

        return
Пример #19
0
  def testParseErrorHandling(self):
    fdefBlock = \
"""DefineFeature HDonor1 [N,O;!HQ]
    Family HBondDonor
    Weights 1.0
EndFeature
"""
    self.failUnlessRaises(ValueError,
                          lambda: ChemicalFeatures.BuildFeatureFactoryFromString(fdefBlock))
    fdefBlock = \
"""DefineFeature HDonor1 [N,O;!H0]
    Family HBondDonor
    Weights 1.0
"""
    self.failUnlessRaises(ValueError,
                          lambda: ChemicalFeatures.BuildFeatureFactoryFromString(fdefBlock))

    self.failUnlessRaises(IOError, lambda: ChemicalFeatures.BuildFeatureFactory('noSuchFile.txt'))
Пример #20
0
 def get_factory(self):
     """
     Generate the Ph4 feature factory
     :return:
     """
     if self.factory is None:
         this_dir, this_filename = os.path.split(__file__)
         data_path = os.path.join(this_dir, "data", "RDKitPh4.fdef")
         self.factory = ChemicalFeatures.BuildFeatureFactory(data_path)
     return self.factory
Пример #21
0
  def test4Github252(self):
    fdef = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef')
    feat_factory = ChemicalFeatures.BuildFeatureFactory(fdef)

    m1 = Chem.MolFromSmiles('Cc1ccccc1')
    feats = feat_factory.GetFeaturesForMol(m1)
    self.assertRaises(RuntimeError, lambda: Pharmacophore.Pharmacophore(feats))

    AllChem.Compute2DCoords(m1)
    Pharmacophore.Pharmacophore(feats)
Пример #22
0
 def _initializeFeats(self, feats):
   self._feats = []
   for feat in feats:
     if isinstance(feat, ChemicalFeatures.MolChemicalFeature):
       pos = feat.GetPos()
       newFeat = ChemicalFeatures.FreeChemicalFeature(feat.GetFamily(), feat.GetType(),
                                                      Geometry.Point3D(pos[0], pos[1], pos[2]))
       self._feats.append(newFeat)
     else:
       self._feats.append(feat)
Пример #23
0
def GetAllPharmacophoreMatches(matches,
                               bounds,
                               pcophore,
                               useDownsampling=0,
                               progressCallback=None,
                               use2DLimits=False,
                               mol=None,
                               verbose=False):
    res = []
    nDone = 0
    for match in CombiEnum(matches):
        atomMatch = ChemicalFeatures.GetAtomMatch(match)
        if atomMatch and use2DLimits and mol:
            pass2D = Check2DBounds(atomMatch, mol, pcophore)
            if verbose:
                print('..', atomMatch)
                print('  ..Pass2d:', pass2D)
        else:
            pass2D = True
        if atomMatch and pass2D and \
           CoarseScreenPharmacophore(atomMatch,bounds,pcophore,verbose=verbose):
            if verbose:
                print('  ..CoarseScreen: Pass')

            bm = bounds.copy()
            if verbose:
                print('pre update:')
                for row in bm:
                    print(' ', ' '.join(['% 4.2f' % x for x in row]))
            bm = UpdatePharmacophoreBounds(bm, atomMatch, pcophore)
            sz = bm.shape[0]
            if verbose:
                print('pre downsample:')
                for row in bm:
                    print(' ', ' '.join(['% 4.2f' % x for x in row]))

            if useDownsampling:
                indices = []
                for entry in atomMatch:
                    indices += list(entry)
                bm = DownsampleBoundsMatrix(bm, indices)
            if verbose:
                print('post downsample:')
                for row in bm:
                    print(' ', ' '.join(['% 4.2f' % x for x in row]))

            if DG.DoTriangleSmoothing(bm):
                res.append(match)
            elif verbose:
                print('cannot smooth')
            nDone += 1
            if progressCallback:
                progressCallback(nDone)
    return res
  def get_instance(cls):
    try:
      from rdkit import RDConfig
      from rdkit.Chem import ChemicalFeatures
    except ModuleNotFoundError:
      raise ValueError("This class requires RDKit to be installed.")

    if not cls._instance:
      fdefName = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef')
      cls._instance = ChemicalFeatures.BuildFeatureFactory(fdefName)
    return cls._instance
    def __init__(self, atms: typing.List[str]):
        self.atms_to_idx = dict(zip(atms, range(len(atms))))
        self.number_atom_options = len(self.atms_to_idx)

        self.hyb_mapping = {Chem.rdchem.HybridizationType.SP:0 ,
                            Chem.rdchem.HybridizationType.SP2: 1,
                            Chem.rdchem.HybridizationType.SP3: 2}
        self.number_hyb_options = len(self.hyb_mapping)

        self.fdef_name = os.path.join(RDDataDir, 'BaseFeatures.fdef')
        self.feats_factory = ChemicalFeatures.BuildFeatureFactory(self.fdef_name)
Пример #26
0
def numpy_pp_fps(mols):
    """ Calculate Gobbi and Poppinger pharmacophore fingerprints and return them as numpy.ndarrays

    :param mols: {list} list of molecules (RDKit mols)
    :return: numpy array containing row-wise fingerprints for every molecule
    """
    feat_fact = ChemicalFeatures.BuildFeatureFactory()
    sig_fact = SigFactory(feat_fact, useCounts=False, minPointCount=2, maxPointCount=3)
    sig_fact.SetBins([(0, 2), (2, 4), (4, 6), (6, 8), (8, 100)])
    sig_fact.Init()
    return _rdk2numpy([Generate.Gen2DFingerprint(m, sig_fact) for m in mols if m])
Пример #27
0
def rdkit_featuredefinition() -> ChemicalFeatures.MolChemicalFeatureFactory:
    """ Loads rdkit chemical feature factory.
    
        Returns
        -------
        rdkit.Chem.rdMolChemicalFeatures.MolChemicalFeatureFactory
            The feature factory.

    """
    fdefName = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef')
    return ChemicalFeatures.BuildFeatureFactory(fdefName)
Пример #28
0
    def __call__(self, mol):
        """Featurizes the input molecule.

        Parameters
        ----------
        mol : rdkit.Chem.rdchem.Mol
            RDKit molecule instance.

        Returns
        -------
        dict
            Mapping atom_data_field as specified in the input argument to the atom
            features, which is a float32 tensor of shape (N, M), N is the number of
            atoms and M is the feature size.
        """
        atom_features = []

        AllChem.ComputeGasteigerCharges(mol)
        num_atoms = mol.GetNumAtoms()

        # Get information for donor and acceptor
        fdef_name = osp.join(RDConfig.RDDataDir, 'BaseFeatures.fdef')
        mol_featurizer = ChemicalFeatures.BuildFeatureFactory(fdef_name)
        mol_feats = mol_featurizer.GetFeaturesForMol(mol)
        is_donor, is_acceptor = self.get_donor_acceptor_info(mol_feats)

        # Get a symmetrized smallest set of smallest rings
        # Following the practice from Chainer Chemistry (https://github.com/chainer/
        # chainer-chemistry/blob/da2507b38f903a8ee333e487d422ba6dcec49b05/chainer_chemistry/
        # dataset/preprocessors/weavenet_preprocessor.py)
        sssr = Chem.GetSymmSSSR(mol)

        for i in range(num_atoms):
            atom = mol.GetAtomWithIdx(i)
            # Features that can be computed directly from RDKit atom instances, which is a list
            feats = self._featurizer(atom)
            # Donor/acceptor indicator
            feats.append(float(is_donor[i]))
            feats.append(float(is_acceptor[i]))
            # Count the number of rings the atom belongs to for ring size between 3 and 8
            count = [0 for _ in range(3, 9)]
            for ring in sssr:
                ring_size = len(ring)
                if i in ring and 3 <= ring_size <= 8:
                    count[ring_size - 3] += 1
            feats.extend(count)
            atom_features.append(feats)
        atom_features = np.stack(atom_features)

        return {
            self._atom_data_field:
            F.zerocopy_from_numpy(atom_features.astype(np.float32))
        }
Пример #29
0
def extract_features(mol):
    factory = ChemicalFeatures.BuildFeatureFactory('./LigityFeatures.fdef')
    feats = factory.GetFeaturesForMol(mol)
    features = []
    for feat in feats:
        feature = feat.GetFamily()
        if feature in pharmacophores:
            id = feat.GetId()
            x, y, z = list(feat.GetPos())
            string = str(id) + ',' + feature + ',' + str(x) + ',' + str(
                y) + ',' + str(z)
            features.append(string)
    return features
def construct_hydrogen_bonding(mol, num_max_atoms=WEAVE_DEFAULT_NUM_MAX_ATOMS):
    fdefName = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef')
    factory = ChemicalFeatures.BuildFeatureFactory(fdefName)
    feats = factory.GetFeaturesForMol(mol)
    hydrogen_bonding_vec = numpy.zeros((num_max_atoms, 2), dtype=numpy.float32)
    for f in feats:
        if f.GetFamily() == 'Donor':
            idx = f.GetAtomIds()[0]
            hydrogen_bonding_vec[idx, 0] = 1.0
        if f.GetFamily() == 'Acceptor':
            idx = f.GetAtomIds()[0]
            hydrogen_bonding_vec[idx, 1] = 1.0
    return hydrogen_bonding_vec