def ReadSymbols(self, tree): if tree[0] in ['any atom', '$']: atom = rdqueries.AtomNumGreaterQueryAtom(0) elif tree[0] in ['heteroatom', '&']: #N, O, P, S atom = rdqueries.AtomNumEqualsQueryAtom(7) atom.ExpandQuery(rdqueries.AtomNumEqualsQueryAtom(8),\ how=Chem.rdchem.CompositeQueryType.COMPOSITE_OR) atom.ExpandQuery(rdqueries.AtomNumEqualsQueryAtom(15),\ how=Chem.rdchem.CompositeQueryType.COMPOSITE_OR) atom.ExpandQuery(rdqueries.AtomNumEqualsQueryAtom(16),\ how=Chem.rdchem.CompositeQueryType.COMPOSITE_OR) elif tree[0] in ['heavy atom', 'X']: # heavier than H atom = rdqueries.AtomNumGreaterQueryAtom(1) elif tree[0][0].islower(): # aromatic molecule symbol = tree[0][0].upper() + tree[0][1:] try: atom = Chem.Atom(symbol) atom.SetIsAromatic(True) except RuntimeError: msg = 'Element aromatic ' + symbol + ' not found' raise RINGReaderError(msg) elif tree[0] == 'M': # metal atom = rdqueries.AtomNumGreaterQueryAtom(19) else: try: atom = Chem.Atom(tree[0]) atom = rdqueries.AtomNumEqualsQueryAtom(atom.GetAtomicNum()) except RuntimeError: msg = 'Element ' + tree[0] + ' not found' raise RINGReaderError(msg) return atom
def ReadAtomSuffix(self, tree, atom): constraint = None #'+','-','.',':','+.','-.','*' if tree[0] == '+.': constraint = AtomRadical(False, ConstraintNumber('=1')) atom.ExpandQuery(rdqueries.FormalChargeEqualsQueryAtom(1)) elif tree[0] == '-.': constraint = AtomRadical(False, ConstraintNumber('=1')) atom.ExpandQuery(rdqueries.FormalChargeEqualsQueryAtom(-1)) elif tree[0] == '+': atom.ExpandQuery(rdqueries.FormalChargeEqualsQueryAtom(1)) elif tree[0] == '-': atom.ExpandQuery(rdqueries.FormalChargeEqualsQueryAtom(-1)) elif tree[0] == '.': constraint = AtomRadical(False, ConstraintNumber('=1')) elif tree[0] == ':': constraint = AtomRadical(False, ConstraintNumber('=2')) elif tree[0] == ':.': constraint = AtomRadical(False, ConstraintNumber('=3')) elif tree[0] == '*': from rdkit.Chem import GetPeriodicTable #if type(atom).__name__ == 'QueryAtom': # raise NotImplementedError('Onium $,&,X atoms not supported yet') atomicnum = atom.GetAtomicNum() atom = rdqueries.AtomNumEqualsQueryAtom(atomicnum) valence = GetPeriodicTable().GetDefaultValence(atomicnum) atom.ExpandQuery(rdqueries.TotalValenceEqualsQueryAtom(valence + 1)) atom.ExpandQuery(rdqueries.FormalChargeEqualsQueryAtom(1)) elif tree[0] == '?': pass else: s = "Unsupported atom suffic: '" + tree[0] + "'" raise NotImplementedError(s) return constraint
def get_simple(ms): errors = 0 descr = {} for m in ms: try: name = m.GetProp('_Name') if name not in descr: descr[name] = [] #get descriptor vector dv = [] dv.append(m.GetNumHeavyAtoms()) a_nums = [5, 35, 6, 17, 9, 53, 7, 8, 15, 16] for n in a_nums: q = rdqueries.AtomNumEqualsQueryAtom(n) dv.append(len(m.GetAtomsMatchingQuery(q))) dv.append(Descriptors.NumHAcceptors(m)) dv.append(Descriptors.NumHDonors(m)) dv.append(Descriptors.MolLogP(m)) dv.append(Descriptors.RingCount(m)) #print(rdmolops.AssignAtomChiralTagsFromStructure(m)) descr[name].append(dv) except ValueError: if len(descr[name]) == 0: del descr[name] errors = errors + 1 print(str(errors) + ' ValueError(s) has(have) occured') return descr
def _findMissingReactiveReactants(rfps, pfps, currentReactants, unmappedPAtoms, output=False): if output: print("--- _findMissingReactiveReactants ---") if not len(unmappedPAtoms): return currentReactants # if there are unmapped product bits find possible reactants for those else: finalReactants = [] numReactants=len(rfps) # investigate all possible solutions of the scoring before for reacts,umPA in zip(currentReactants,unmappedPAtoms): # if there are unmapped product atoms find possible reactants for those finalReactants.append(reacts) if umPA[1] > 0: remainingReactants=set(range(numReactants)).difference(set(reacts)) # sort the possible reactants by the reactivity remainingReactants = sorted(remainingReactants, key=lambda x: rfps[x].reactivity/float(rfps[x].molecule.GetNumAtoms()),\ reverse=True) missingPAtoms = [] # get the missing atoms and counts for bit,c in umPA[-1]: for pbi in range(len(pfps)): if bit in pfps[pbi].bitInfoScaffoldFP: a = pfps[pbi].bitInfoScaffoldFP[bit][0] missingPAtoms.extend([pfps[pbi].molecule.GetAtomWithIdx(a[0]).GetAtomicNum()]*c) missingPAtoms = Counter(missingPAtoms) if output > 0: print(missingPAtoms) # build queries for the missing atoms queries=[(rdqueries.AtomNumEqualsQueryAtom(a),a) for a in missingPAtoms] maxFullfilledQueries=0 maxReactivity=-1 addReactants=[] # search for the most reactive reactants capturing all/most of the unmapped product atoms for r in remainingReactants: if output > 0: print(" >> Reactant", r, rfps[r].reactivity/float(rfps[r].molecule.GetNumAtoms())) countFullfilledQueries=0 for q,a in queries: if len(rfps[r].molecule.GetAtomsMatchingQuery(q)) >= missingPAtoms[a]: countFullfilledQueries+=1 if output > 0: print(" Max reactivity", maxReactivity) print(" Max fullfilled queries", maxFullfilledQueries) if countFullfilledQueries > maxFullfilledQueries: maxFullfilledQueries = countFullfilledQueries maxReactivity = rfps[r].reactivity/float(rfps[r].molecule.GetNumAtoms()) addReactants = [r] elif maxFullfilledQueries and countFullfilledQueries == maxFullfilledQueries and \ rfps[r].reactivity/float(rfps[r].molecule.GetNumAtoms()) >= maxReactivity: maxFullfilledQueries = countFullfilledQueries addReactants.append(r) if output > 0: print(" Added reactants", addReactants) finalReactants[-1].extend(addReactants) if output > 0: print(" >> Final reactants", finalReactants) return finalReactants
def _getAtomWeights(mol, molID, topicID, topicModel): weights = [0] * mol.GetNumAtoms() # ignore "wildcard atoms" in BRICS fragments q = rdqueries.AtomNumEqualsQueryAtom(0) # get all fragments of a certain molecule _, aBits = chemTopicModel._generateFPs(mol, topicModel.fragmentMethod) fp = topicModel.moldata.loc[molID, 'fps'] probs = topicModel.getTopicFragmentProbabilities() nTopics, nFrags = probs.shape # use the max probability of a fragment associated with a certain topic # to normalize the fragment weights maxWeightTopic = max(probs[topicID]) r = 0.0 # calculate the weight of an atom concerning a certain topic for bit in fp.keys(): try: idxBit = bit if topicModel.fragmentMethod in ['Morgan', 'RDK']: idxBit = topicModel.fragIdx[bit] except: continue try: r = probs[topicID, idxBit] except: continue if r <= 1. / nFrags: continue # Morgan/RDK fingerprints if topicModel.fragmentMethod in ['Morgan', 'RDK'] and bit in aBits: paths = aBits[bit] for p in paths: for b in p: bond = mol.GetBondWithIdx(b) # for overlapping fragments take the highest weight for the atom weights[bond.GetBeginAtomIdx()] = max( r, weights[bond.GetBeginAtomIdx()]) weights[bond.GetEndAtomIdx()] = max( r, weights[bond.GetEndAtomIdx()]) elif topicModel.fragmentMethod.startswith('Brics'): # BRICS fragments submol = Chem.MolFromSmarts(topicModel.vocabulary[idxBit]) ignoreWildcards = [ i.GetIdx() for i in list(submol.GetAtomsMatchingQuery(q)) ] matches = mol.GetSubstructMatches(submol) for m in matches: for n, atomidx in enumerate(m): if n in ignoreWildcards: continue # for overlapping fragments take the highest weight for the atom, this not happen for BRICS though :) weights[atomidx] = max(r, weights[atomidx]) atomWeights = np.array(weights) return atomWeights, maxWeightTopic
def __call__(self,comb_mol,mapped_index): atom = comb_mol.GetAtomWithIdx(mapped_index[self.idx]) if self.valence != 0: from rdkit.Chem import GetPeriodicTable atomicnum = atom.GetAtomicNum() atom = rdqueries.AtomNumEqualsQueryAtom(atomicnum) valence = GetPeriodicTable().GetDefaultValence(atomicnum) atom.ExpandQuery(rdqueries.TotalValenceEqualsQueryAtom(valence+self.valence)) atom.ExpandQuery(rdqueries.FormalChargeEqualsQueryAtom(self.charge)) comb_mol.ReplaceAtom(self.idx,atom) atom.SetNumRadicalElectrons(self.radical) atom.SetFormalCharge(self.charge)
FTYPE_CYCLIC_ACYCLIC = 'cyclic_and_acyclic' # Global SMARTS used by the program # acyclic bond smarts ACYC_SMARTS = Chem.MolFromSmarts("[*]!@!=!#[*]") # exocyclic/fused exocyclic bond smarts CYC_SMARTS = Chem.MolFromSmarts("[R1,R2]@[r;!R1]") # smarts used to find appropriate fragment for # would use SMARTS: [$([#0][r].[r][#0]),$([#0][r][#0])] # but RDkit doesn't support component SMARTS in recursive one - $([#0][r].[r][#0]) # hence split into two cSma1 = Chem.MolFromSmarts("[#0][r].[r][#0]") cSma2 = Chem.MolFromSmarts("[#0][r][#0]") dummyAtomQuery = rdqueries.AtomNumEqualsQueryAtom(0) def delete_bonds(mol, bonds, ftype, hac): """ Fragment molecule on bonds and reduce to fraggle fragmentation SMILES. If none exists, returns None """ # Replace the given bonds with attachment points (B1-B2 -> B1-[*].[*]-B2) bondIdx = [mol.GetBondBetweenAtoms(*bond).GetIdx() for bond in bonds] modifiedMol = Chem.FragmentOnBonds(mol, bondIdx, dummyLabels=[(0, 0)] * len(bondIdx)) # should be able to get away without sanitising mol as the valencies should be okay # do not do a full sanitization, but do find rings and calculate valences: Chem.SanitizeMol(
def main(): """ Main function """ sys.path.append(os.path.dirname(__file__)) # Parse the command line arguments cl_params = command_line_params() # Create the file parameter for the FragTreeLibrary file_params = { 'input_inchi_file': cl_params['inchi_file'], 'output_directory': cl_params['output_dir'], 'output_hdf5_file_base': cl_params['output_base_name'], 'output_error_log': cl_params['error_log'] } isotope_dict = get_isotope_dict(isostope_file=cl_params['isotope_file']) # Make output directory if it does not exist if not os.path.isdir(file_params['output_directory']): try: os.mkdir(file_params['output_directory']) except OSError: # When executed in parallel it is possible that another rank already created the dir # in the meantime. We can safely ignore this error. if os.path.isdir(file_params['output_directory']): pass else: raise # Get isotope dictionary (if none was provided) if isotope_dict is None: isotope_dict = get_isotope_dict() else: isotope_dict = isotope_dict # make list of inchis inchi_list = [] with open(file_params['input_inchi_file'], 'r') as inchi_file: for line in inchi_file: inchi_list.append(line.strip()) with open(file_params['output_error_log'], 'w') as _: pass # for inchi in inchi_list: # grow_tree_from_inchi(inchi,max_depth=cl_params['max_depth'], isotope_dict=isotope_dict, file_params=file_params) mp_params = [] for inchi in inchi_list: mol = Chem.MolFromInchi(inchi) q = rdqueries.AtomNumEqualsQueryAtom(6) try: # In the code above, you have to have at least one bond to break or it will crash. if len(mol.GetAtomsMatchingQuery(q)) > 1: #grow_tree_from_inchi(inchi,max_depth=cl_params['max_depth'], isotope_dict=isotope_dict, file_params=file_params) mp_params.append( (inchi, cl_params['max_depth'], isotope_dict, file_params)) except: print(inchi) pool = mp.Pool(processes=10) pool.map(grow_tree_mp, mp_params) pool.close() # for p in mp_params: # grow_tree_mp(p) return