def Init(self): """ Initializes internal parameters. This **must** be called after making any changes to the signature parameters """ accum = 0 self._scaffolds = [0] * (len( Utils.nPointDistDict[self.maxPointCount + 1])) self._starts = {} if not self.skipFeats: self._nFeats = len(self.featFactory.GetFeatureFamilies()) else: self._nFeats = 0 for fam in self.featFactory.GetFeatureFamilies(): if fam not in self.skipFeats: self._nFeats += 1 for i in range(self.minPointCount, self.maxPointCount + 1): self._starts[i] = accum nDistsHere = len(Utils.nPointDistDict[i]) scaffoldsHere = Utils.GetPossibleScaffolds( i, self._bins, useTriangleInequality=self.trianglePruneBins) nBitsHere = len(scaffoldsHere) self._scaffolds[nDistsHere] = scaffoldsHere pointsHere = Utils.NumCombinations(self._nFeats, i) * nBitsHere accum += pointsHere self._sigSize = accum if not self.useCounts: self.sigKlass = SparseBitVect elif self._sigSize < 2**31: self.sigKlass = IntSparseIntVect else: self.sigKlass = LongSparseIntVect
def testUniquifyCombinations(self): combos = [[1, 2, 3], [3, 2, 1]] # Last equivalent combination is returned self.assertEqual(Utils.UniquifyCombinations(combos), [(3, 2, 1), ]) combos = [[1], [1], [2]] # Last equivalent combination is returned self.assertEqual(sorted(Utils.UniquifyCombinations(combos)), sorted([(1, ), (2, )]))
def testOrderTriangle(self): # Additional tests to complement the doctests self.assertRaises(ValueError, Utils.OrderTriangle, [0, 2], [1, 2, 3]) self.assertRaises(ValueError, Utils.OrderTriangle, [0, 2, 4], [1, 2]) self.assertEqual(Utils.OrderTriangle([1, 3, 1], [2, 3, 4]), ([1, 3, 1], [4, 3, 2])) self.assertEqual(Utils.OrderTriangle([1, 3, 1], [4, 3, 2]), ([1, 3, 1], [4, 3, 2])) # If all the features are the same, we want the distances in reverse order for dist in itertools.permutations([1, 2, 3], 3): self.assertEqual(Utils.OrderTriangle([1, 1, 1], dist), ([1, 1, 1], [3, 2, 1]))
def testGetPossiblePharmacophores(self): bins = [(1, 2), (2, 3), (5, 6)] vals = [ (2, 3), (3, 24), ] for tpl in vals: num, tgt = tpl pphores = Utils.GetPossibleScaffolds(num, bins) cnt = len(pphores) assert cnt == tgt, f'bad pharmacophore count {cnt} for probe {str(tpl)}' self.assertEqual(Utils.GetPossibleScaffolds(1, bins), 0)
def testCounts(self): vals = [ ((0, 1, 2), 4), ((0, 0, 0), 0), ((2, 2, 2), 9), ((1, 1, 2), 7), ((1, 2, 2), 8), ] for combo, tgt in vals: res = Utils.CountUpTo(3, 3, combo) assert res == tgt, f'Bad res ({res}) for combo {str((combo, tgt))}'
def testGetTriangles(self): vals = [ (2, 0, []), (3, 1, ((0, 1, 2), )), (4, 2, ((0, 1, 3), (1, 2, 4))), (5, 3, ((0, 1, 4), (1, 2, 5), (2, 3, 6))), ] for tpl in vals: nPts, cnt, tris = tpl r = Utils.GetTriangles(nPts) assert len(r) == cnt, f'bad triangle length {len(r)} for probe {str(tpl)}' assert r == tris, f'bad triangle list {str(r)} for probe {str(tpl)}'
def testGetPossiblePharmacophores(self): bins = [(1, 2), (2, 3), (5, 6)] vals = [ (2, 3), (3, 24), ] for tpl in vals: num, tgt = tpl pphores = Utils.GetPossibleScaffolds(num, bins) cnt = len(pphores) assert cnt == tgt, 'bad pharmacophore count %d for probe %s' % ( cnt, str(tpl))
def testLimitPharmacophores(self): bins = [(1, 2), (2, 3), (5, 6)] vals = [ ((0, 0, 0), 1), ((0, 0, 1), 1), ((0, 0, 2), 0), ((0, 1, 2), 1), ((1, 1, 2), 1), ] for tpl in vals: ds, tgt = tpl r = Utils.ScaffoldPasses(ds, bins) assert r == tgt, 'bad result %d for probe %s' % (r, str(tpl))
def testGetTriangles(self): vals = [ (2, 0, []), (3, 1, ((0, 1, 2), )), (4, 2, ((0, 1, 3), (1, 2, 4))), (5, 3, ((0, 1, 4), (1, 2, 5), (2, 3, 6))), ] for tpl in vals: nPts, cnt, tris = tpl r = Utils.GetTriangles(nPts) assert len(r) == cnt, 'bad triangle length %d for probe %s' % ( len(r), str(tpl)) assert r == tris, 'bad triangle list %s for probe %s' % (str(r), str(tpl))
def GetBitInfo(self, idx): """ returns information about the given bit **Arguments** - idx: the bit index to be considered **Returns** a 3-tuple: 1) the number of points in the pharmacophore 2) the proto-pharmacophore (tuple of pattern indices) 3) the scaffold (tuple of distance indices) """ if idx >= self._sigSize: raise IndexError('bad index (%d) queried. %d is the max' % (idx, self._sigSize)) # first figure out how many points are in the p'cophore nPts = self.minPointCount while nPts < self.maxPointCount and self._starts[nPts + 1] <= idx: nPts += 1 # how far are we in from the start point? offsetFromStart = idx - self._starts[nPts] if _verbose: print('\t %d Points, %d offset' % (nPts, offsetFromStart)) # lookup the number of scaffolds nDists = len(Utils.nPointDistDict[nPts]) scaffolds = self._scaffolds[nDists] nScaffolds = len(scaffolds) # figure out to which proto-pharmacophore we belong: protoIdx = offsetFromStart // nScaffolds indexCombos = Utils.GetIndexCombinations(self._nFeats, nPts) combo = tuple(indexCombos[protoIdx]) if _verbose: print('\t combo: %s' % (str(combo))) # and which scaffold: scaffoldIdx = offsetFromStart % nScaffolds scaffold = scaffolds[scaffoldIdx] if _verbose: print('\t scaffold: %s' % (str(scaffold))) return nPts, combo, scaffold
def testDistTriangleInequality(self): bins = [(1, 2), (2, 3), (5, 6)] vals = [ ((0, 0, 0), 1), ((0, 0, 1), 1), ((0, 0, 2), 0), ((0, 1, 2), 1), ((1, 1, 2), 1), ] for tpl in vals: ds, tgt = tpl distBins = [bins[x] for x in ds] r = Utils.BinsTriangleInequality(distBins[0], distBins[1], distBins[2]) assert r == tgt, f'bad result {r} for probe {str(tpl)}'
def GetAtomsMatchingBit(sigFactory,bitIdx,mol,dMat=None,justOne=0,matchingAtoms=None): """ Returns a list of lists of atom indices for a bit **Arguments** - sigFactory: a SigFactory - bitIdx: the bit to be queried - mol: the molecule to be examined - dMat: (optional) the distance matrix of the molecule - justOne: (optional) if this is nonzero, only the first match will be returned. - matchingAtoms: (optional) if this is nonzero, it should contain a sequence of sequences with the indices of atoms in the molecule which match each of the patterns used by the signature. **Returns** a list of tuples with the matching atoms """ assert sigFactory.shortestPathsOnly,'not implemented for non-shortest path signatures' nPts,featCombo,scaffold = sigFactory.GetBitInfo(bitIdx) if _verbose: print 'info:',nPts print '\t',featCombo print '\t',scaffold if matchingAtoms is None: matchingAtoms = sigFactory.GetMolFeats(mol) # find the atoms that match each features fams = sigFactory.GetFeatFamilies() choices = [] for featIdx in featCombo: tmp = matchingAtoms[featIdx] if tmp: choices.append(tmp) else: # one of the patterns didn't find a match, we # can return now if _verbose: print 'no match found for feature:',featIdx return [] if _verbose: print 'choices:' print choices if dMat is None: dMat = Chem.GetDistanceMatrix(mol,sigFactory.includeBondOrder) matches = [] distsToCheck = Utils.nPointDistDict[nPts] protoPharmacophores = Utils.GetAllCombinations(choices,noDups=1) res = [] for protoPharm in protoPharmacophores: if _verbose: print 'protoPharm:',protoPharm for i in range(len(distsToCheck)): dLow,dHigh = sigFactory.GetBins()[scaffold[i]] a1,a2 = distsToCheck[i] # # FIX: this is making all kinds of assumptions about # things being single-atom matches (or at least that # only the first atom matters # idx1,idx2 = protoPharm[a1][0],protoPharm[a2][0] dist = dMat[idx1,idx2] if _verbose: print '\t dist: %d->%d = %d (%d,%d)'%(idx1,idx2,dist,dLow,dHigh) if dist < dLow or dist >= dHigh: break else: if _verbose: print 'Found one' # we found it protoPharm.sort() protoPharm = tuple(protoPharm) if protoPharm not in res: res.append(protoPharm) if justOne: break return res
def Gen2DFingerprint(mol, sigFactory, perms=None, dMat=None): """ generates a 2D fingerprint for a molecule using the parameters in _sig_ **Arguments** - mol: the molecule for which the signature should be generated - sigFactory : the SigFactory object with signature parameters NOTE: no preprocessing is carried out for _sigFactory_. It *must* be pre-initialized. - perms: (optional) a sequence of permutation indices limiting which pharmacophore combinations are allowed - dMat: (optional) the distance matrix to be used """ if not isinstance(sigFactory, SigFactory.SigFactory): raise ValueError('bad factory') featFamilies = sigFactory.GetFeatFamilies() if _verbose: print('* feat famillies:', featFamilies) nFeats = len(featFamilies) minCount = sigFactory.minPointCount maxCount = sigFactory.maxPointCount if maxCount > 3: logger.warning( ' Pharmacophores with more than 3 points are not currently supported.\nSetting maxCount to 3.' ) maxCount = 3 # generate the molecule's distance matrix, if required if dMat is None: from rdkit import Chem useBO = sigFactory.includeBondOrder dMat = Chem.GetDistanceMatrix(mol, useBO) # generate the permutations, if required if perms is None: perms = [] for count in range(minCount, maxCount + 1): perms += Utils.GetIndexCombinations(nFeats, count) # generate the matches: featMatches = sigFactory.GetMolFeats(mol) if _verbose: print(' featMatches:', featMatches) sig = sigFactory.GetSignature() for perm in perms: # the permutation is a combination of feature indices # defining the feature set for a proto-pharmacophore featClasses = [0] for i in range(1, len(perm)): if perm[i] == perm[i - 1]: featClasses.append(featClasses[-1]) else: featClasses.append(featClasses[-1] + 1) # Get a set of matches at each index of # the proto-pharmacophore. matchPerms = [featMatches[x] for x in perm] if _verbose: print('\n->Perm: %s' % (str(perm))) print(' matchPerms: %s' % (str(matchPerms))) # Get all unique combinations of those possible matches: matchesToMap = Utils.GetUniqueCombinations(matchPerms, featClasses) for i, entry in enumerate(matchesToMap): entry = [x[1] for x in entry] matchesToMap[i] = entry if _verbose: print(' mtM:', matchesToMap) for match in matchesToMap: if sigFactory.shortestPathsOnly: _ShortestPathsMatch(match, perm, sig, dMat, sigFactory) return sig
def GetBitIdx(self, featIndices, dists, sortIndices=True): """ returns the index for a pharmacophore described using a set of feature indices and distances **Arguments*** - featIndices: a sequence of feature indices - dists: a sequence of distance between the features, only the unique distances should be included, and they should be in the order defined in Utils. - sortIndices : sort the indices **Returns** the integer bit index """ nPoints = len(featIndices) if nPoints > 3: raise NotImplementedError('>3 points not supported') if nPoints < self.minPointCount: raise IndexError('bad number of points') if nPoints > self.maxPointCount: raise IndexError('bad number of points') # this is the start of the nPoint-point pharmacophores startIdx = self._starts[nPoints] # # now we need to map the pattern indices to an offset from startIdx # if sortIndices: tmp = list(featIndices) tmp.sort() featIndices = tmp if featIndices[0] < 0: raise IndexError('bad feature index') if max(featIndices) >= self._nFeats: raise IndexError('bad feature index') if nPoints == 3: featIndices, dists = Utils.OrderTriangle(featIndices, dists) offset = Utils.CountUpTo(self._nFeats, nPoints, featIndices) if _verbose: print('offset for feature %s: %d' % (str(featIndices), offset)) offset *= len(self._scaffolds[len(dists)]) try: if _verbose: print('>>>>>>>>>>>>>>>>>>>>>>>') print('\tScaffolds:', repr(self._scaffolds[len(dists)]), type(self._scaffolds[len(dists)])) print('\tDists:', repr(dists), type(dists)) print('\tbins:', repr(self._bins), type(self._bins)) bin = self._findBinIdx(dists, self._bins, self._scaffolds[len(dists)]) except ValueError: fams = self.GetFeatFamilies() fams = [fams[x] for x in featIndices] raise IndexError( 'distance bin not found: feats: %s; dists=%s; bins=%s; scaffolds: %s' % (fams, dists, self._bins, self._scaffolds)) return startIdx + offset + bin