def FragmentUnsantize(suppl1): try: newmol = Chem.FragmentOnBRICSBonds(suppl1) #print('here') mfl = Chem.GetMolFrags(newmol, asMols=True, sanitizeFrags=False) print('Good False') return mfl except: print('Not good for false') raise RDKitError(2)
def FragmentSanitize(tempSDFPath): try: suppl2 = Chem.SDMolSupplier(tempSDFPath, sanitize=True) newmol2 = Chem.FragmentOnBRICSBonds(suppl2[0]) mfl = Chem.GetMolFrags(newmol2, asMols=True, sanitizeFrags=False) #print('Good True') return mfl except: #print('Not good for true') raise RDKitError(1)
def BreakBRICSBonds(mol, bonds=None, sanitize=True, silent=True): """ breaks the BRICS bonds in a molecule and returns the results >>> from rdkit import Chem >>> m = Chem.MolFromSmiles('CCCOCC') >>> m2=BreakBRICSBonds(m) >>> Chem.MolToSmiles(m2,True) '[3*]O[3*].[4*]CC.[4*]CCC' a more complicated case: >>> m = Chem.MolFromSmiles('CCCOCCC(=O)c1ccccc1') >>> m2=BreakBRICSBonds(m) >>> Chem.MolToSmiles(m2,True) '[16*]c1ccccc1.[3*]O[3*].[4*]CCC.[4*]CCC([6*])=O' can also specify a limited set of bonds to work with: >>> m = Chem.MolFromSmiles('CCCOCC') >>> m2 = BreakBRICSBonds(m,[((3, 2), ('3', '4'))]) >>> Chem.MolToSmiles(m2,True) '[3*]OCC.[4*]CCC' this can be used as an alternate approach for doing a BRICS decomposition by following BreakBRICSBonds with a call to Chem.GetMolFrags: >>> m = Chem.MolFromSmiles('CCCOCC') >>> m2=BreakBRICSBonds(m) >>> frags = Chem.GetMolFrags(m2,asMols=True) >>> [Chem.MolToSmiles(x,True) for x in frags] ['[4*]CCC', '[3*]O[3*]', '[4*]CC'] """ if not bonds: #bonds = FindBRICSBonds(mol) res = Chem.FragmentOnBRICSBonds(mol) if sanitize: Chem.SanitizeMol(res) return res eMol = Chem.EditableMol(mol) nAts = mol.GetNumAtoms() dummyPositions = [] for indices, dummyTypes in bonds: ia, ib = indices obond = mol.GetBondBetweenAtoms(ia, ib) bondType = obond.GetBondType() eMol.RemoveBond(ia, ib) da, db = dummyTypes atoma = Chem.Atom(0) atoma.SetIsotope(int(da)) atoma.SetNoImplicit(True) idxa = nAts nAts += 1 eMol.AddAtom(atoma) eMol.AddBond(ia, idxa, bondType) atomb = Chem.Atom(0) atomb.SetIsotope(int(db)) atomb.SetNoImplicit(True) idxb = nAts nAts += 1 eMol.AddAtom(atomb) eMol.AddBond(ib, idxb, bondType) if mol.GetNumConformers(): dummyPositions.append((idxa, ib)) dummyPositions.append((idxb, ia)) res = eMol.GetMol() if sanitize: Chem.SanitizeMol(res) if mol.GetNumConformers(): for conf in mol.GetConformers(): resConf = res.GetConformer(conf.GetId()) for ia, pa in dummyPositions: resConf.SetAtomPosition(ia, conf.GetAtomPosition(pa)) return res
def ChopWithRDKit(outputDir, inputPath): #read input from terminal and get file name lig = os.path.basename(inputPath) #file name, no path #output folder output = outputDir + 'output-chop/' outputFolderPath_log = outputDir + 'output-log/' outputFolderPath_sdf = outputDir + 'output-sdf/' outputFolderPath_chop_comb = outputDir + 'output-chop-comb/' suppl = Chem.MolFromMol2File(inputPath, sanitize=False) newmol = Chem.FragmentOnBRICSBonds(suppl) mfl = Chem.GetMolFrags(newmol, asMols=True, sanitizeFrags=False) tempSDFPath = outputDir + 'output-sdf/' + lig + '.sdf' w = Chem.SDWriter(tempSDFPath) w.SetKekulize(False) w.write(suppl) w.close() #generate fragments with rdkit fileList = [] f = 0 l = 0 r = 0 for m in mfl: carbonC = 0 nitrogC = 0 oxygenC = 0 rmAtomCount = 0 for i in range(m.GetNumAtoms()): #record dummy atom and hydrogen number if m.GetAtomWithIdx(i).GetSymbol() == '*': rmAtomCount = rmAtomCount + 1 if m.GetAtomWithIdx(i).GetSymbol() == 'H': rmAtomCount = rmAtomCount + 1 if m.GetAtomWithIdx(i).GetSymbol() == 'C': carbonC = carbonC + 1 if m.GetAtomWithIdx(i).GetSymbol() == 'N': nitrogC = nitrogC + 1 if m.GetAtomWithIdx(i).GetSymbol() == 'O': oxygenC = oxygenC + 1 #create file totalAtomNum = m.GetNumAtoms() - rmAtomCount if m.GetNumAtoms() - rmAtomCount >= 4: tempFileName = output + 'b-' + lig + '-' + str(r).zfill(3) + '.sdf' r = r + 1 if m.GetNumAtoms() - rmAtomCount < 4: tempFileName = output + 'l-' + lig + '-' + str(l).zfill(3) + '.sdf' l = l + 1 w = Chem.SDWriter(tempFileName) w.SetKekulize(False) f = f + 1 w.write(m) w.close() fileList.append(tempFileName) #create file list with atom numbers with open(outputFolderPath_log + 'ListAll', 'at') as outlist: outlist.write(tempFileName + ' T ' + str(totalAtomNum) + ' C ' + str(carbonC) + ' N ' + str(nitrogC) + ' O ' + str(oxygenC) + ' \n') #with open(outputFolderPath_log+'Process.log','at') as outf: # outf.write('Files are created.\n') #read atom coordinates and atom type from mol2 file mol2AllList = [] with open(inputPath, 'r') as inf: mol2AllList = inf.readlines() mol2AtomInfo = [] molHead = mol2AllList.index('@<TRIPOS>ATOM\n') molEnd = mol2AllList.index('@<TRIPOS>BOND\n') mol2AtomInfo = mol2AllList[molHead + 1:molEnd] mol2X = [] mol2Y = [] mol2Z = [] mol2A = [] for i in range(len(mol2AtomInfo)): mol2Line = mol2AtomInfo[i].split() mol2X.append(float(mol2Line[2])) mol2Y.append(float(mol2Line[3])) mol2Z.append(float(mol2Line[4])) mol2A.append(mol2Line[5]) for filePath in fileList: fileName = os.path.basename(filePath) if len(fileName) > 0: #processing brick fragments if fileName[0] == 'b': brickInfoList = [] with open(filePath, 'r') as inf: brickInfoList = inf.readlines() #print(brickInfoList) brickMolEndList = [ i for i, x in enumerate(brickInfoList) if x == '$$$$\n' ] #print(brickInfoList[:brickMolEndList[0]]) fileHead = list(filter(lambda x: 'V2000' in x, brickInfoList)) fileHeadLineNum = brickInfoList.index(fileHead[0]) #print(fileHeadLineNum) fileHeadList = fileHead[0].split() atomNum = int(fileHead[0][0:3]) bondNum = int(fileHead[0][3:6]) atomList = brickInfoList[fileHeadLineNum + 1:fileHeadLineNum + atomNum + 1] bondList = brickInfoList[fileHeadLineNum + atomNum + 1:fileHeadLineNum + atomNum + bondNum + 1] #Search for atom type atomTypeList = [] dummyAtomList = [] dummyAtomLineList = [] hydrAtomList = [] hydrAtomLineList = [] for atomLine in atomList: atomLineInfoList = atomLine.split() #atom in brick.sdf, xyz coordinates of one line atomX = float(atomLineInfoList[0]) atomY = float(atomLineInfoList[1]) atomZ = float(atomLineInfoList[2]) #calculate norm normList = [] for i in range(len(mol2AtomInfo)): norm = (atomX - mol2X[i]) * (atomX - mol2X[i]) + ( atomY - mol2Y[i]) * (atomY - mol2Y[i]) + ( atomZ - mol2Z[i]) * (atomZ - mol2Z[i]) normList.append(norm) minInd = normList.index(min(normList)) atomTypeList.append(mol2A[minInd] + '\n') #dummy atom List if atomLineInfoList[3] == "R": dummyAtomList.append(atomList.index(atomLine)) dummyAtomLineList.append(atomLine) #hydrogen atom list if atomLineInfoList[3] == "H": hydrAtomList.append(atomList.index(atomLine)) hydrAtomLineList.append(atomLine) newBrickInfoList = brickInfoList[:brickMolEndList[0]] #Branch, eligible to connect bondInfoList = [] for bondLine in bondList: #bondLineInfoList=bondLine.split() bondLineInfoList = [bondLine[0:3], bondLine[3:6] ] + bondLine[6:].split() bondInfoList.append( [int(bondLineInfoList[0]), int(bondLineInfoList[1])]) dummyConnection = [ ] #dummyConnection is a list of connections of the original file, eg. ['8 14 1 0\n',''], which will be used to remove not using connections in the last step allConnection = [ ] #all connection is a list of connection pairs, eg. [[8,14],[6,15]], which will be used to generate appendix II for dummyIdx in dummyAtomList: connectionList = list( filter(lambda x: dummyIdx + 1 in x, bondInfoList)) for tempCon in connectionList: conIndex = bondInfoList.index(tempCon) dummyConnection.append(bondList[conIndex]) #remove the case both dummy atoms are in the bond rmBond = [] for connect in connectionList: if connect[0] - 1 in dummyAtomList: if connect[1] - 1 in dummyAtomList: rmBond.append(connect) for tempBond in rmBond: connectionList.remove(tempBond) allConnection = allConnection + connectionList tempDummyCon = [] for dummyCon in dummyConnection: if dummyCon not in tempDummyCon: tempDummyCon.append(dummyCon) dummyConnection = tempDummyCon branchCon = [] for connect in allConnection: if connect[0] - 1 in dummyAtomList: branchCon.append( str(connect[1]) + ' ' + atomTypeList[connect[0] - 1]) if connect[1] - 1 in dummyAtomList: branchCon.append( str(connect[0]) + ' ' + atomTypeList[connect[1] - 1]) #sort branch by atom index branchConAtomList = [] branchConAtomListBefore = [] branchConAtomIndexList = [] newBranchCon = [] #newBranchCon is the list of appendix II for branchLine in branchCon: branchLineList = branchLine.split() branchConAtomListBefore.append(branchLineList[0]) branchConAtomList = sorted(branchConAtomListBefore) branchConAtomIndexList = sorted( range(len(branchConAtomListBefore)), key=lambda k: branchConAtomListBefore[k]) for ind in range(len(branchConAtomList)): newBranchCon.append(branchCon[branchConAtomIndexList[ind]]) #hydrogen hydrConnection = [] for hydrIdx in hydrAtomList: connectionList = filter(lambda x: hydrIdx + 1 in x, bondInfoList) for tempCon in connectionList: conIndex = bondInfoList.index(tempCon) hydrConnection.append(bondList[conIndex]) #edit head line newAtomNum = atomNum - len(dummyAtomLineList) - len( hydrAtomLineList) newBondNum = bondNum - len(dummyConnection) - len( hydrConnection) newHead = str(newAtomNum).rjust(3) + str(newBondNum).rjust( 3) + fileHead[0][6:] newBrickInfoList[fileHeadLineNum] = newHead newBrickInfoList[0] = fileName + '\n' #edit output list #edit appendix I - ATOM TYPES newBrickInfoList.append('\n') newBrickInfoList.append('> <ATOMTYPES> \n') newBrickInfoList = newBrickInfoList + atomTypeList[:newAtomNum] #edit appendix II - BRANCH ATOM NUMBER AND ELIGIBLE ATMTYPE TO CONNECT newBrickInfoList.append('\n') newBrickInfoList.append( '> <BRANCH @atom-number eligible-atmtype-to-connect> \n') newBrickInfoList = newBrickInfoList + newBranchCon newBrickInfoList.append('\n') newBrickInfoList.append('$$$$\n') #remove dummy atoms for dummyLine in dummyAtomLineList: newBrickInfoList.remove(dummyLine) #remove dummy bonds for dummyCon in dummyConnection: newBrickInfoList.remove(dummyCon) #remove hydrogen atoms for hydrLine in hydrAtomLineList: newBrickInfoList.remove(hydrLine) #remove hydrogen bonds for hydrCon in hydrConnection: newBrickInfoList.remove(hydrCon) #remove M ISO line fileMISO = list( filter(lambda x: 'M ISO' in x, newBrickInfoList)) #print(fileMISO) if len(fileMISO) > 0: for ISO in fileMISO: newBrickInfoList.remove(ISO) #remove M CHG line fileMCHG = list( filter(lambda x: 'M CHG' in x, newBrickInfoList)) if len(fileMCHG) > 0: for CHG in fileMCHG: newBrickInfoList.remove(CHG) #write brick info to file with open(filePath, 'w') as outf: outf.writelines(newBrickInfoList) #Processing linker fragments if fileName[0] == 'l': linkerInfoList = [] with open(filePath, 'r+') as inf: linkerInfoList = inf.readlines() #find the end of molcules linkerMolEndList = [ i for i, x in enumerate(linkerInfoList) if x == '$$$$\n' ] #find the start of molecules fileHead = list(filter(lambda x: 'V2000' in x, linkerInfoList)) #indicate the line num of the head line fileHeadLineNum = linkerInfoList.index(fileHead[0]) #separate atom num and bond num, then separate atom and bond info fileHeadList = fileHead[0].split() atomNum = int(fileHead[0][0:3]) bondNum = int(fileHead[0][3:6]) atomList = linkerInfoList[fileHeadLineNum + 1:fileHeadLineNum + atomNum + 1] bondList = linkerInfoList[fileHeadLineNum + atomNum + 1:fileHeadLineNum + atomNum + bondNum + 1] #Search for atom type atomTypeList = [] dummyAtomList = [] dummyAtomLineList = [] hydrAtomList = [] hydrAtomLineList = [] for atomLine in atomList: atomLineInfoList = atomLine.split() #atom in brick.sdf, xyz coordinates of one line atomX = float(atomLineInfoList[0]) atomY = float(atomLineInfoList[1]) atomZ = float(atomLineInfoList[2]) #calculate norm normList = [] for i in range(len(mol2AtomInfo)): norm = (atomX - mol2X[i]) * (atomX - mol2X[i]) + ( atomY - mol2Y[i]) * (atomY - mol2Y[i]) + ( atomZ - mol2Z[i]) * (atomZ - mol2Z[i]) normList.append(norm) minInd = normList.index(min(normList)) atomTypeList.append(mol2A[minInd] + '\n') #dummy atom List if atomLineInfoList[3] == "R": dummyAtomList.append(atomList.index(atomLine)) dummyAtomLineList.append(atomLine) #hydrogen atom list if atomLineInfoList[3] == "H": hydrAtomList.append(atomList.index(atomLine)) hydrAtomLineList.append(atomLine) newLinkerInfoList = linkerInfoList[:linkerMolEndList[0]] #Branch, eligible to connect bondInfoList = [] for bondLine in bondList: #bondLineInfoList=bondLine.split() bondLineInfoList = [bondLine[0:3], bondLine[3:6] ] + bondLine[6:].split() bondInfoList.append( [int(bondLineInfoList[0]), int(bondLineInfoList[1])]) dummyConnection = [] allConnection = [] for dummyIdx in dummyAtomList: connectionList = list( filter(lambda x: dummyIdx + 1 in x, bondInfoList)) for tempCon in connectionList: conIndex = bondInfoList.index(tempCon) dummyConnection.append(bondList[conIndex]) #remove the case both dummy atom are in the bond rmBond = [] for connect in connectionList: if connect[0] - 1 in dummyAtomList: if connect[1] - 1 in dummyAtomList: rmBond.append(connect) for tempBond in rmBond: connectionList.remove(tempBond) allConnection = allConnection + connectionList #remove connection duplicates tempDummyCon = [] for dummyCon in dummyConnection: if dummyCon not in tempDummyCon: tempDummyCon.append(dummyCon) dummyConnection = tempDummyCon contactCount = [] for connect in allConnection: if connect[0] - 1 in dummyAtomList: contactCount.append(connect[1]) if connect[1] - 1 in dummyAtomList: contactCount.append(connect[0]) #hydrogen hydrConnection = [] for hydrIdx in hydrAtomList: connectionList = list( filter(lambda x: hydrIdx + 1 in x, bondInfoList)) for tempCon in connectionList: conIndex = bondInfoList.index(tempCon) hydrConnection.append(bondList[conIndex]) #edit output list #edit head line newAtomNum = atomNum - len(dummyAtomLineList) - len( hydrAtomLineList) newBondNum = bondNum - len(dummyConnection) - len( hydrConnection) newHead = str(newAtomNum).rjust(3) + str(newBondNum).rjust( 3) + fileHead[0][6:] newLinkerInfoList[fileHeadLineNum] = newHead newLinkerInfoList[0] = fileName + '\n' #edit appendix I - MAX NUMBER OF CONTACTS AND ATOMTYPES contactAppend = [] for i in range(atomNum): contactAppend.append( str(contactCount.count(i + 1)) + ' ' + atomTypeList[i]) newLinkerInfoList.append('\n') newLinkerInfoList.append( '> <MAX-NUMBER-Of-CONTACTS ATOMTYPES> \n') newLinkerInfoList = newLinkerInfoList + contactAppend[: newAtomNum] newLinkerInfoList.append('\n') newLinkerInfoList.append('$$$$\n') #remove dummy atoms for dummyLine in dummyAtomLineList: newLinkerInfoList.remove(dummyLine) #remove dummy bonds for dummyCon in dummyConnection: newLinkerInfoList.remove(dummyCon) #remove hydrogen atoms for hydrLine in hydrAtomLineList: newLinkerInfoList.remove(hydrLine) #remove hydrogen bonds for hydrCon in hydrConnection: newLinkerInfoList.remove(hydrCon) #remove M ISO line fileMISO = list( filter(lambda x: 'M ISO' in x, newLinkerInfoList)) if len(fileMISO) > 0: for ISO in fileMISO: newLinkerInfoList.remove(ISO) #remove M CHG line fileMCHG = list( filter(lambda x: 'M CHG' in x, newLinkerInfoList)) if len(fileMCHG) > 0: for CHG in fileMCHG: newLinkerInfoList.remove(CHG) #write linker info to file with open(filePath, 'w') as outf: outf.writelines(newLinkerInfoList) with open(outputFolderPath_log + 'Process.log', 'at') as outLog: outLog.write(time.asctime(time.localtime(time.time()))) outLog.write(' CHOP-MOL ') outLog.write(inputPath) outLog.write('\n') tempCombineList = [] tempCombineList.append(inputPath) tempCombineList = tempCombineList + fileList combineLinkers(outputDir, tempCombineList)