def getRefChemAtom(self, nmrAtoms): refChemAtom = nmrAtoms[0].chemAtom if len(refChemAtom.chemBonds) == 1: refChemAtom = getOtherAtom(refChemAtom, refChemAtom.chemBonds[0]) else: refChemAtom = None return refChemAtom
def createResidueProtonToHeavyAtom(residue): ccpCode = residue.ccpCode chemComp = residue.chemCompVar.chemComp protonToHeavyAtom = {ccpCode: {}} for heavyAtom in chemComp.sortedChemAtoms(): if heavyAtom.elementSymbol == 'H': continue atomName = heavyAtom.name if atomName == 'OXT': continue bondedProtons = [] for bond in heavyAtom.chemBonds: otherAtom = getOtherAtom(heavyAtom,bond) if otherAtom.elementSymbol == 'H': if atomName == 'N' and otherAtom.name != 'H': continue bondedProtons.append(otherAtom) if bondedProtons: chemAtomSet = bondedProtons[0].chemAtomSet if chemAtomSet: if chemAtomSet.isEquivalent or chemAtomSet.isProchiral == True: protonKey = [bp.name for bp in bondedProtons] protonKey.sort() protonKey = tuple(protonKey) protonToHeavyAtom[ccpCode][protonKey] = atomName if chemAtomSet.isEquivalent: continue for bp in bondedProtons: protonToHeavyAtom[ccpCode][bp.name] = atomName return protonToHeavyAtom
def findRingCarbon(chemAtom, connectedAtoms): returnConnectedAtoms = None for chemBond in chemAtom.chemBonds: otherChemAtom = getOtherAtom(chemAtom, chemBond) if otherChemAtom == connectedAtoms[-1]: returnConnectedAtoms = connectedAtoms[:] break elif otherChemAtom and otherChemAtom not in connectedAtoms and otherChemAtom.elementSymbol == 'C': returnConnectedAtoms = findRingCarbon( otherChemAtom, connectedAtoms[:-1] + [otherChemAtom, connectedAtoms[-1]]) if returnConnectedAtoms: break return returnConnectedAtoms
def getStereoPriorities(stereoAtom, chemBondsHandled): priorityList = [] priorityKeys = [] followPriorityKeys = [] for chemBond in stereoAtom.chemBonds: if chemBond in chemBondsHandled: otherAtom = None priorityKey = (0, 0) else: otherAtom = getOtherAtom(stereoAtom, chemBond) priorityKey = (otherAtom.chemElement.atomNumber, bondDict[chemBond.bondType]) chemBondsHandled.append(chemBond) if priorityKey not in priorityKeys: priorityKeys.append(priorityKey) elif priorityKey not in followPriorityKeys: followPriorityKeys.append(priorityKey) priorityList.append((priorityKey, otherAtom)) return (priorityList, followPriorityKeys)
def makeFullSugar(carboBaseName, coordSystem, baseGlycoCtCode, testMode, replace=False, saveData=True): carboMolType = 'carbohydrate' namingSystemName = 'EuroCarbDb' project = Implementation.MemopsRoot(name='chemComp') project.currentUserId = 'ccpnRef' # # Set archive dir info # if testMode: chemCompDataDir = testChemCompDataDir chemCompCoordDataDir = testChemCompCoordDataDir else: chemCompDataDir = editChemCompDataDir chemCompCoordDataDir = editChemCompCoordDataDir # # First import all mol2 files, pick one form as 'base' unit, adapt this one, # then add coords from other chemComps # # TODO: This is currently very specific! # # # TODO SET A STEREOCHEMISTRY CLASS FOR THE A/B CHEMATOMS!! # importDir = os.path.join(origMol2DataDir, 'carbo') importFiles = os.listdir(os.path.join(importDir, carboBaseName)) for importFile in importFiles[:]: if not importFile[-4:] == 'mol2': importFiles.pop(importFiles.index(importFile)) importFiles.sort() mol2Format = Mol2Format(project, guiParent=None, allowPopups=False) molTypes = [carboMolType] chemComps = [] rawChemComps = [] for importFile in importFiles: # Should be 'a', 'b' or 'o' anomericCenter = importFile[0] if anomericCenter != 'a': ccpCode = '%s-%s' % (anomericCenter, carboBaseName) else: ccpCode = carboBaseName fileName = os.path.join(importDir, carboBaseName, importFile) print "Reading mol2 file %s..." % fileName ccs = mol2Format.readChemComps(fileName, molTypes=molTypes, ccpCodes=[ccpCode], saveChemComp=False, minimalPrompts=True, makeNamingSystem=namingSystemName) chemComps.append(ccs[0]) rawChemComps.append(mol2Format.rawChemComp) # # Check whether only open form available (e.g. aldi ones) # if len(chemComps) == 1 and chemComps[0].ccpCode[0] == 'o': print " Warning: only open form available, not creating a/b isoforms." hasOnlyOpenForm = True else: hasOnlyOpenForm = False #print chemComps #print rawChemComps """ for cch in project.chemCompHeads: print cch.molType, cch.ccpCode for ccv in cch.chemComp.chemCompVars: print ccv.descriptor print ccv.chemAtoms print """ refChemComp = chemComps[0] # Should be the a form # # Reset save location, check if file already exists # chemCompXmlFile = findChemCompOrCoordFilePath(refChemComp, testMode=testMode) # In this case, getting nothing back with replace - False means that it does exist! if chemCompXmlFile and not replace: print " ChemComp %s, %s already exists - aborting creation." % ( carboMolType, carboBaseName) try: refChemComp = getChemComp(project, carboMolType, carboBaseName, download=False, chemCompArchiveDir=editChemCompDataDir, copyFile=False) except: print "WARNING: chemcomp was already loaded!" refChemComp = project.findFirstChemComp(molType=carboMolType, ccpCode=carboBaseName) return refChemComp # # Start creating/modifying... # print print drawBox("Creating sugar information") print # # Set the base Glyco CT code, this is always x-, except for -o only forms (aldehydes) # # TODO: might need to hack this for substituents so know which one is which... or just do it by order? Should be fine... # refGlycoCtCode = "RES\n1b:%s" % baseGlycoCtCode print "Setting GlycoCT code to:\n\n%s\n" % refGlycoCtCode print project.override = True try: refChemComp.baseGlycoCtCode = refGlycoCtCode refChemComp.findFirstChemCompVar().glycoCtCode = refGlycoCtCode finally: project.override = False # # Look for a C-O-C fragment. Should be O5 for 6 rings. # # Note: ONLY works on cyclic sugars!!! # centralOAtom = None for chemAtom in refChemComp.sortedChemAtoms(): # TODO: should really do this from the Var level, but... if chemAtom.elementSymbol == 'O' and len(chemAtom.chemBonds) == 2: connectedToC = True for chemBond in chemAtom.sortedChemBonds(): otherChemAtom = getOtherAtom(chemAtom, chemBond) if otherChemAtom.elementSymbol != 'C': connectedToC = False break if connectedToC: centralOAtom = chemAtom # # Try to generically determine the carbon atoms in the ring... trying not to depend on names # but if a C1 is connected to the O5, then start from there. # connectedAtoms = [] for chemBond in centralOAtom.sortedChemBonds(): otherChemAtom = getOtherAtom(centralOAtom, chemBond) if otherChemAtom.name == 'C1': connectedAtoms.insert(0, otherChemAtom) else: connectedAtoms.append(otherChemAtom) # # Start to loop... use recursive function to find right order # ringCarbons = findRingCarbon(connectedAtoms[0], connectedAtoms) if not ringCarbons: centralOAtom = None continue otherCarbons = [] for searchCarbon in refChemComp.findAllChemAtoms( elementSymbol='C'): if searchCarbon not in ringCarbons: otherCarbons.append(searchCarbon) break if not centralOAtom and not hasOnlyOpenForm: raise " Error: no central O atom found in ring!!" # # Now look for anomeric carbon and connection sites... # anomericCarbon = None anomericOxygen = None if not hasOnlyOpenForm: for searchCarbon in [ringCarbons[0], ringCarbons[-1]]: for chemBond in searchCarbon.chemBonds: otherChemAtom = getOtherAtom(searchCarbon, chemBond) if otherChemAtom and otherChemAtom.elementSymbol == 'O' and otherChemAtom != centralOAtom: anomericCarbon = searchCarbon anomericOxygen = otherChemAtom break if anomericCarbon: break else: # Hardset these... not really anomeric but good enough anomericCarbon = refChemComp.findFirstChemAtom(name='C1') anomericOxygen = refChemComp.findFirstChemAtom(name='O1') if not anomericCarbon: raise " Error: no anomeric carbon found." else: for chemBond in anomericOxygen.chemBonds: otherChemAtom = getOtherAtom(anomericOxygen, chemBond) if otherChemAtom and otherChemAtom.elementSymbol == 'H': anomericHydrogen = otherChemAtom break if not anomericHydrogen: print " Warning: no anomeric hydrogen found." # # Set the stereo information for the anomeric carbon, then create subtypes for beta and open forms # #anomericCarbons = {'a': None, 'b': None, 'o': None} stereoAtom = anomericCarbon bondDict = { 'single': 1, 'double': 2, 'triple': 3, 'aromatic': 1.5, 'dative': 1.0, 'singleplanar': 1.5 } # TODO CHECK THIS! """ # OK to do this because is neutral 'real' chemComp, but otherwise need to start from chemCompVar!!! # # TODO TODO also need to track chemBonds, but ONLY at beginning # then need to go back to coordinates to find out in which order the bonds appear based on the coords (when looking down # the main bond)! # # Might be easiest to use Martin's code (or someone else's), use the atom names to link back to here... # totalChemBonds = chemComp.chemBonds chemBondsHandled = [] priorityList = getStereoPriorities(stereoAtom,chemBondsHandled) priorityList.sort() priorityList.reverse() priorityKeys = [] for (priorityKey,otherAtom) in priorityList: priorityKeys.append(priorityKey) for priorityKey in priorityKeys: if priorityKeys.count(priorityKey) > 1: print priorityKey print priorityList sys.exit() """ # # Look for binding oxygens... has to be hydroxy group connected to carbon # bindingOxygens = [] bindingHydrogens = {} # # Set carbons to search for connected OH groups # if not hasOnlyOpenForm: searchCarbons = ringCarbons + otherCarbons else: searchCarbons = list(refChemComp.findAllChemAtoms(elementSymbol='C')) if 'aldi' in baseGlycoCtCode: # Don't do anything on 1 position for these - alditols searchCarbons.pop(searchCarbons.index(anomericCarbon)) for searchCarbon in searchCarbons: validConnectedAtoms = {} validOHgroups = [] for chemBond in searchCarbon.sortedChemBonds(): otherChemAtom = getOtherAtom(searchCarbon, chemBond) if otherChemAtom: for elementSymbol in ['O', 'N']: if otherChemAtom.elementSymbol == elementSymbol and otherChemAtom != centralOAtom: if not validConnectedAtoms.has_key(elementSymbol): validConnectedAtoms[elementSymbol] = [] validConnectedAtoms[elementSymbol].append( otherChemAtom) otherChemBonds = list(otherChemAtom.chemBonds) if elementSymbol == 'O' and len(otherChemBonds) == 2: otherChemBond = otherChemBonds[not otherChemBonds. index(chemBond)] connectedChemAtom = getOtherAtom( otherChemAtom, otherChemBond) if connectedChemAtom.elementSymbol == 'H': validOHgroups.append(otherChemAtom) # # Check if single OH (no double O, or amide, ...) # if len(validOHgroups) == 1: if len(validConnectedAtoms) == 1: # Carboxylic acid (except for ring O-C-OH!) if len(validConnectedAtoms['O'] ) == 2 and not searchCarbon == anomericCarbon: print " Warning: ignoring oxygen %s - is carboxylic acid (or similar)" % validOHgroups[ 0].name validOHgroups = [] else: # Amide or something similar print " Warning: ignoring oxygen %s - is amide (or similar)" % validOHgroups[ 0].name validOHgroups = [] if validOHgroups: bindingOxygens.append(validOHgroups[0]) bindingHydrogens[validOHgroups[0]] = connectedChemAtom if not hasOnlyOpenForm and searchCarbon in otherCarbons: print " Warning: setting oxygen %s as binding one (not directly connected to ring)." % ( validOHgroups[0].name) # # Now create variants... # # Need to have all combinations of: # # - anomeric a/b and free/bound # - binding oxygens free/bound # bindingOxygenCombs = makePowerSet(bindingOxygens) origAnomericCarbon = anomericCarbon linkAtomsMapForCoordinates = {} if hasOnlyOpenForm: stereoTypes = ('open_1', ) else: stereoTypes = ('stereo_1', 'stereo_2') for stereoType in stereoTypes: subType = int(stereoType.split('_')[1]) anomericCarbon = refChemComp.findFirstChemAtom(name='C1', subType=subType) if not anomericCarbon: # Should only happen for stereo_2 creationDict = {'name': 'C1', 'subType': subType} for attrName in ('elementSymbol', 'shortVegaType', 'waterExchangeable'): creationDict[attrName] = getattr(origAnomericCarbon, attrName) # TODO set chirality to OPPOSITE of whatever the first subtype is! anomericCarbon = refChemComp.newChemAtom(**creationDict) namingSystem = refChemComp.findFirstNamingSystem( name=namingSystemName) namingSystem.newAtomSysName(sysName=anomericCarbon.name, atomName=anomericCarbon.name, atomSubType=anomericCarbon.subType) # Bonds and other atoms are exactly the same! for chemBond in origAnomericCarbon.chemBonds: otherChemAtom = getOtherAtom(origAnomericCarbon, chemBond) refChemComp.newChemBond(chemAtoms=(anomericCarbon, otherChemAtom), bondType=chemBond.bondType, stereochem=chemBond.stereochem) for anomericBound in range(0, 2): for i in range(0, len(bindingOxygenCombs)): bindingOxygens = bindingOxygenCombs[i] # Main list only contains real ChemAtoms!! currentChemAtoms = list( refChemComp.findAllChemAtoms(className='ChemAtom')) anomericCarbons = refChemComp.findAllChemAtoms(name='C1') for tempAnomericCarbon in anomericCarbons: if tempAnomericCarbon != anomericCarbon and tempAnomericCarbon in currentChemAtoms: currentChemAtoms.pop( currentChemAtoms.index(tempAnomericCarbon)) # This is the neutral refChemComp - already exists! if not anomericBound and not bindingOxygens: continue # Can't be both bound on the anomeric carbon and have an oxygen link there... if anomericBound and len( bindingOxygens ) == 1 and bindingOxygens[0] == anomericOxygen: continue # Can't have a link to 1 and something else... if len(bindingOxygens ) > 1 and anomericOxygen in bindingOxygens: continue linkedAtomKeys = [] linkedAtoms = {} linkAtoms = {} if anomericBound and not hasOnlyOpenForm and 'aldi' not in baseGlycoCtCode: # Only do C1 when relevant - for alditols it's not, always reducing end. currentChemAtoms.pop( currentChemAtoms.index(anomericOxygen)) if anomericHydrogen: currentChemAtoms.pop( currentChemAtoms.index(anomericHydrogen)) anomericCarbonKey = getChemAtomKey(anomericCarbon) linkedAtomKeys.append(anomericCarbonKey) linkedAtoms[anomericCarbonKey] = anomericCarbon linkAtom = getLinkAtom(anomericCarbon, multi=True) linkAtoms[anomericCarbonKey] = linkAtom currentChemAtoms.append(linkAtom) linkAtomsMapForCoordinates[( linkAtom.name, linkAtom.subType)] = anomericOxygen.name for bindingOxygen in bindingOxygens: bindingHydrogen = bindingHydrogens[bindingOxygen] if bindingHydrogen and bindingHydrogen in currentChemAtoms: currentChemAtoms.pop( currentChemAtoms.index(bindingHydrogen)) bindingOxygenKey = getChemAtomKey(bindingOxygen) linkedAtomKeys.append(bindingOxygenKey) linkedAtoms[bindingOxygenKey] = bindingOxygen linkAtom = getLinkAtom(bindingOxygen) linkAtoms[bindingOxygenKey] = linkAtom currentChemAtoms.append(linkAtom) linkAtomsMapForCoordinates[( linkAtom.name, linkAtom.subType)] = bindingHydrogen.name linkedAtomKeys.sort() # Possible for C1 linkages if not handled (only reducing end) if not linkedAtomKeys: continue # # Create linkEnds # linkInfo = [] for linkedAtomKey in linkedAtomKeys: if linkedAtomKey[0] == 'C1': linkCode = "%s_%s" % (linkedAtomKey[0], linkedAtomKey[1]) else: linkCode = linkedAtomKey[0] linkInfo.append(linkCode) if not refChemComp.findFirstLinkEnd(linkCode=linkCode): boundChemAtom = linkedAtoms[linkedAtomKey] boundLinkAtom = linkAtoms[linkedAtomKey] linkEnd = refChemComp.newLinkEnd( linkCode=linkCode, boundChemAtom=boundChemAtom, boundLinkAtom=boundLinkAtom) # TODO THIS IS NOT GREAT - no way of telling what's what if atom names are messed up #linking = 'none' #descriptor = 'link:%s' % string.join(linkedAtomKeys,',') linking = 'link:%s' % string.join(linkInfo, ',') if stereoType.count('stereo'): descriptor = '%s:C1' % stereoType else: descriptor = 'neutral' if not refChemComp.findFirstChemCompVar(linking=linking, descriptor=descriptor): print " Trying %s,%s" % (linking, descriptor) #for ca in currentChemAtoms: # if ca.className == 'LinkAtom': # print " LA:",ca.name, ca.subType # else: # print " CA:",ca.name, ca.subType #print " ",linkedAtomKeys # Create the stereospecific GlycoCt code if stereoType == "stereo_1": stereoCode = 'a' elif stereoType == "stereo_2": stereoCode = 'b' elif stereoType == 'open_1': stereoCode = 'o' varGlycoCtCode = "RES\n1b:%s" % (stereoCode + baseGlycoCtCode[1:]) ccv = refChemComp.newChemCompVar( chemAtoms=currentChemAtoms, linking=linking, descriptor=descriptor, glycoCtCode=varGlycoCtCode, formalCharge=0, isParamagnetic=False, isAromatic=False) # # Make sure the chemElements are accessible # project.currentChemElementStore = project.findFirstChemElementStore() # # Reset the name and molType... # chMolType = 'carbohydrate' project.override = True try: refChemComp.ccpCode = carboBaseName refChemComp.molType = carboMolType finally: project.override = False # TODO SET THESE CORRECTLY? Where do I get info from for this though? Can this come from MSD? Ask Dimitris!! # Set the PDB/MSD name - NOTE that have to do this for the correct a/b forms! Var specific!! #ChemComp.ChemCompSysName(refChemComp,namingSystem = 'PDB',sysName=ccpCode,specificChemCompVars = refChemComp.sortedChemCompVars()) #ChemComp.ChemCompSysName(refChemComp,namingSystem = 'MSD',sysName=ccpCode,specificChemCompVars = refChemComp.sortedChemCompVars()) # # Check chemComp validatity and save # TODO make this all options in running script! # refChemComp.checkAllValid() if saveData: # # Get the original file GUID, if possible, when replacing existing file # if replace: (existingGuid, existingFile) = findExistingChemCompInfo(chemCompDataDir, refChemComp.ccpCode, refChemComp.molType) if existingGuid: project.override = True try: refChemComp.guid = existingGuid finally: project.override = False (tmpFilePath, existingFilePath) = saveTemporaryChemCompOrCoord(refChemComp, testMode=testMode) # Do a check here? Or don't bother? consolidateTemporaryChemCompOrCoord(refChemComp, tmpFilePath, existingFilePath, testMode=testMode, replace=replace) # # Get the coordinates as well! # print " Creating coordinates!!" chemCompCoord = project.newChemCompCoord(sourceName=coordSystem, molType=refChemComp.molType, ccpCode=refChemComp.ccpCode) for i in range(len(rawChemComps)): rawChemComp = rawChemComps[i] # Identify which one we're dealing with!! (dirName, baseName) = os.path.split(rawChemComp.parent.name) if baseName[0] == 'a': stereoDescriptor = "stereo_1:C1" elif baseName[0] == 'b': stereoDescriptor = "stereo_2:C1" elif baseName[0] == 'o': stereoDescriptor = "none" else: print " Not handling type '%s' for coordinates - ignored." % baseName[ 0] continue # # Mark that generated by this script... # applData = Implementation.AppDataString(application='ccpNmr', keyword='origin', value='makeFullSugar.py') chemCompCoord.addApplicationData(applData) # Don't do any link atoms (yet)... could in principle use atoms that are 'missing' # TODO: should decompose descriptor here, then check... chemCompVars = refChemComp.findAllChemCompVars( descriptor=stereoDescriptor) chemAtomKeys = [] for ccv in chemCompVars: for ca in ccv.sortedChemAtoms(): caKey = (ca.name, ca.subType) if caKey not in chemAtomKeys: chemAtomKeys.append(caKey) # # Create a dictionary for the coordinates, based on the 'raw' chemComp from the mol2 file # chemAtomCoordDict = {} for chemAtomKey in chemAtomKeys: coords = None if linkAtomsMapForCoordinates.has_key(chemAtomKey): useChemAtomName = linkAtomsMapForCoordinates[chemAtomKey] else: useChemAtomName = chemAtomKey[0] for rawAtom in rawChemComp.atoms: if rawAtom.name == useChemAtomName: coords = (rawAtom.x, rawAtom.y, rawAtom.z) break if not coords: print " Warning: no coordinate for %s, atom key %s." % ( coordSystem, chemAtomKey) elif not chemAtomCoordDict.has_key(chemAtomKey): chemAtomCoordDict[chemAtomKey] = coords else: print " Error: double atom key %s!" % chemAtomKey # # Set the coordinates # chemAtomCoords = {} #print [ra.name for ra in rawChemComp.atoms] for chemCompVar in chemCompVars: #print chemCompVar chemCompVarCoord = chemCompCoord.findFirstChemCompVarCoord( linking=chemCompVar.linking, descriptor=chemCompVar.descriptor) if not chemCompVarCoord: chemCompVarCoord = chemCompCoord.newChemCompVarCoord( linking=chemCompVar.linking, descriptor=chemCompVar.descriptor) #print chemCompVarCoord for ca in chemCompVar.sortedChemAtoms(): caKey = (ca.name, ca.subType) #print "%-20s" % str(caKey), if chemAtomCoordDict.has_key(caKey): coords = chemAtomCoordDict[caKey] if coords: if chemAtomCoords.has_key(caKey): chemAtomCoord = chemAtomCoords[caKey] else: chemAtomCoord = chemCompCoord.newChemAtomCoord( name=caKey[0], subType=caKey[1], x=coords[0], y=coords[1], z=coords[2]) chemAtomCoords[caKey] = chemAtomCoord if chemAtomCoord not in chemCompVarCoord.chemAtomCoords: chemCompVarCoord.addChemAtomCoord(chemAtomCoord) #print chemAtomCoord.name, chemAtomCoord.subType, #print chemCompCoord.checkAllValid() if saveData: if replace: (existingGuid, existingFile) = findExistingChemCompCoordInfo( chemCompCoordDataDir, coordSystem, chemCompCoord.ccpCode, chemCompCoord.molType) if existingGuid: project.override = True try: chemCompCoord.guid = existingGuid finally: project.override = False (tmpFilePath, existingFilePath) = saveTemporaryChemCompOrCoord(chemCompCoord, testMode=testMode) # Do a check here? Or don't bother? This is blank regeneration from reference data, so should be OK! consolidateTemporaryChemCompOrCoord(chemCompCoord, tmpFilePath, existingFilePath, testMode=testMode, replace=replace) return refChemComp
def addSubstituentToBaseUnit(baseUnitCcpCode, baseUnitMolType, testMode, mergeInfoList, coordSystem, substRemoveAtomName='SUB', saveData=True, replace=False, namingSystemName=None, resetGlycoCtCode=False): # # Set directories to read/write data from/to # if testMode: chemCompCoordDataDir = testChemCompCoordDataDir chemCompDataDir = testChemCompDataDir else: chemCompCoordDataDir = editChemCompCoordDataDir chemCompDataDir = editChemCompDataDir # # Now start setting up project # substIndexPatt = re.compile("(\d+)") substituentDir = os.path.join(origMol2DataDir, 'subst') ccpCode = baseUnitCcpCode substituentList = [] for mergeInfoDict in mergeInfoList: substituent = mergeInfoDict['substituent'] if not substituent in substituentList: substituentList.append(substituent) ccpCode += ":%s_%s" % (mergeInfoDict['baseBindingAtomName'], substituentInfo[substituent]['shortCode']) project = Implementation.MemopsRoot(name='chemComp') project.currentUserId = 'ccpnRef' project.currentChemElementStore = project.findFirstChemElementStore() # # First import all relevant subsituent mol2 files # TODO could in principle have these 'pre-imported' in CCPN, in special dir... # note though that the SUBST atoms have to be removed in that case... # mol2Format = Mol2Format(project, guiParent=None, allowPopups=False) chemComps = {} rawChemComps = {} for substituent in substituentList: fileName = os.path.join(substituentDir, "%s.mol2" % substituent) ccs = mol2Format.readChemComps(fileName, ccpCodes=[substituent], saveChemComp=False, minimalPrompts=True) chemComps[substituent] = ccs[0] rawChemComps[substituent] = mol2Format.rawChemComp # # Get the original base unit information # origBaseUnit = getChemComp(project, baseUnitMolType, baseUnitCcpCode, chemCompArchiveDir=chemCompDataDir, copyFile=False) # # If replacing, keep GUID of original as default # creationKeywds = {} if replace: if testMode: dataDir = testChemCompDataDir else: dataDir = editChemCompDataDir (existingGuid, existingFile) = findExistingChemCompInfo(dataDir, ccpCode, baseUnitMolType) if existingGuid: creationKeywds['guid'] = existingGuid # # Now create the new base unit, copy the original base unit file, change the guid in the file, then load it # baseUnit = project.newNonStdChemComp(molType=baseUnitMolType, ccpCode=ccpCode, **creationKeywds) repository = project.findFirstRepository(name='userData') isNewFile = copyBaseToModifiedFile(project, baseUnit, origBaseUnit, testMode, repository, replace=replace) if not isNewFile: return #print baseUnit.chemAtoms origBaseUnitCcc = getChemCompCoord( project, coordSystem, baseUnitMolType, baseUnitCcpCode, chemCompCoordArchiveDir=chemCompCoordDataDir, copyFile=False) if not origBaseUnitCcc: raise ("Error: no coordinates available for %s!" % baseUnitCcpCode) else: # # If replacing, keep GUID of original as default # creationKeywds = {} if replace: if testMode: dataDir = testChemCompCoordDataDir else: dataDir = editChemCompCoordDataDir (existingGuid, existingFile) = findExistingChemCompCoordInfo( dataDir, coordSystem, ccpCode, baseUnitMolType) if existingGuid: creationKeywds['guid'] = existingGuid baseUnitCcc = project.newChemCompCoord(sourceName=coordSystem, molType=baseUnitMolType, ccpCode=ccpCode, **creationKeywds) copyBaseToModifiedFile(project, baseUnitCcc, origBaseUnitCcc, testMode, repository, replace=replace) #print baseUnitCcc.chemAtomCoords # # Reset the glycoCtCode for the new base unit (if relevant) # if resetGlycoCtCode: substGlycoCtText = "" substGlycoCtInfo = {'RES': [], 'LIN': []} resIndex = 2 linIndex = 1 # Note: this assumes the mergeInfoList is ordered! for mergeInfoDict in mergeInfoList: baseBindingAtomName = mergeInfoDict['baseBindingAtomName'] removeBaseAtomNames = mergeInfoDict['removeBaseAtomNames'] substituent = mergeInfoDict['substituent'] substGlycoCtInfo['RES'].append("%ds:%s" % (resIndex, substituent)) parentAtomIndex = int(baseBindingAtomName[-1]) if baseBindingAtomName[0] == 'O': parentSubstitutionType = 'o' else: parentSubstitutionType = 'd' substGlycoCtInfo['LIN'].append( "%d:%d%s(%d+%d)%d%s" % (linIndex, 1, parentSubstitutionType, parentAtomIndex, 1, resIndex, 'n')) resIndex += 1 linIndex += 1 for tmpStr in substGlycoCtInfo['RES']: substGlycoCtText += "\n" + tmpStr substGlycoCtText += "\nLIN" for tmpStr in substGlycoCtInfo['LIN']: substGlycoCtText += "\n" + tmpStr newBaseGlycoCtCode = origBaseUnit.baseGlycoCtCode + substGlycoCtText project.override = True try: baseUnit.baseGlycoCtCode = newBaseGlycoCtCode for chemCompVar in baseUnit.chemCompVars: chemCompVar.glycoCtCode = chemCompVar.glycoCtCode + substGlycoCtText finally: project.override = False print "Setting base GlycoCT code to:\n\n%s\n" % newBaseGlycoCtCode print # # Set naming system # namingSystem = None if namingSystemName: namingSystem = baseUnit.findFirstNamingSystem(name=namingSystemName) if not namingSystem: namingSystem = baseUnit.newNamingSystem(name=namingSystemName) print "Created new naming system %s" % namingSystemName # # Add substituent info, remove atoms from base unit # addVariants = {} for mergeInfoDict in mergeInfoList: # # 0. Get the info from the mergeInfoDict, print a comment # baseBindingAtomName = mergeInfoDict['baseBindingAtomName'] removeBaseAtomNames = mergeInfoDict['removeBaseAtomNames'] substituent = mergeInfoDict['substituent'] renameSubstituentAtomNames = mergeInfoDict[ 'renameSubstituentAtomNames'] baseBindingAtoms = baseUnit.findAllChemAtoms(name=baseBindingAtomName) newBondType = substituentInfo[substituent]['bondType'] newStereochem = substituentInfo[substituent]['stereochem'] print print drawBox("Creating link between base atom %s to substituent %s" % (baseBindingAtomName, substituent)) print # # 1. Set the substUnitIndex - this is the identifier that is added to the substituents # when part of the base chemComp. It is taken from whichever number is part of the baseBindingAtomName # # TODO: should be molType specific - use A,B,G,... for amino acids!! # substUnitSearch = substIndexPatt.search(baseBindingAtomName) substUnitIndex = substUnitSearch.group(1) # # 2. Remove relevant atoms from the base unit, all subtypes # Keep track of atom directly linked to the baseBindingAtom for recalculating coordinates! # baseAtomCoords = {} for removeBaseAtomName in removeBaseAtomNames: removeBaseAtoms = baseUnit.findAllChemAtoms( name=removeBaseAtomName) for removeBaseAtom in removeBaseAtoms: # Search if bound to the baseBindingAtom isBoundToBaseBindingAtom = False for chemBond in baseUnit.chemBonds: bondChemAtoms = list(chemBond.chemAtoms) if removeBaseAtom in bondChemAtoms: otherBondChemAtom = bondChemAtoms[ not bondChemAtoms.index(removeBaseAtom)] if otherBondChemAtom in baseBindingAtoms: isBoundToBaseBindingAtom = True break # Track the coordinates if bound to the baseBindingAtom if isBoundToBaseBindingAtom: for baseBindingAtom in baseBindingAtoms: baseAtomCoords[baseBindingAtom] = {} baseBindingAtomCoords = baseUnitCcc.findAllChemAtomCoords( chemAtom=baseBindingAtom) for baseBindingAtomCoord in baseBindingAtomCoords: baseCoord = (baseBindingAtomCoord.x, baseBindingAtomCoord.y, baseBindingAtomCoord.z) for chemCompVarCoord in baseBindingAtomCoord.chemCompVarCoords: # Get the coordinates, if any removeBaseAtomCoord = chemCompVarCoord.findFirstChemAtomCoord( chemAtom=removeBaseAtom) if removeBaseAtomCoord: baseBoundCoord = (removeBaseAtomCoord.x, removeBaseAtomCoord.y, removeBaseAtomCoord.z) jointCoords = (baseCoord, baseBoundCoord) if not baseAtomCoords[ baseBindingAtom].has_key( jointCoords): baseAtomCoords[baseBindingAtom][ jointCoords] = [] baseAtomCoords[baseBindingAtom][ jointCoords].append(chemCompVarCoord) # Now start deleting on chemComp and chemCompCoord levels removeBaseAtomCoords = baseUnitCcc.findAllChemAtomCoords( chemAtom=removeBaseAtom) for removeBaseAtomCoord in removeBaseAtomCoords: removeBaseAtomCoord.delete() removeBaseAtom.delete() print " Removed atom %s, subType %d from base unit..." % ( removeBaseAtomName, removeBaseAtom.subType) for namingSystem in baseUnit.namingSystems: for asn in namingSystem.atomSysNames: if asn.atomName == removeBaseAtomName: asn.delete() # # 2.1 Also rename all linkEnds and chemCompVars that have this atom in the descriptor - are now irrelevant # for chemCompVar in baseUnit.chemCompVars: deleteVoidChemCompVar(chemCompVar, 'descriptor', removeBaseAtomName, baseUnit, baseUnitCcc) deleteVoidChemCompVar(chemCompVar, 'linking', removeBaseAtomName, baseUnit, baseUnitCcc) deleteLinkEnd(baseUnit, removeBaseAtomName) # # 3. Add the substituent info to the base unit # # Currently this works off the mol2 file. Could add, e.g. SUB_C, SUB_O, SUB_N, depending on substituted atom... # for better coordinates later on. TODO: try to implement this!!! # # TODO: look into avoiding mol2 step, just put into CCPN in temporary library, use linkAtoms as linking ones. # These can then also be identified by elementSymbol (O,C,N,...) # substUnit = chemComps[substituent] rawSubstChemComp = rawChemComps[substituent] # # 3.1 Initialise information # - determine which atom to remove from substituent (will need this for coordinates further down though) # - get single bond coming from this atom to identify the binding atom on the substituent side # - create substToBaseDict dictionary that maps substituent objects to newly created chemComp unit objects # - deal with the coordinates substRemoveAtom = substUnit.findFirstChemAtom(name=substRemoveAtomName) chemBond = substRemoveAtom.findFirstChemBond( ) # Should only have ONE single bond! substBindingAtom = getOtherAtom(substRemoveAtom, chemBond) substCoords = [ getAtomOrigCoords(rawSubstChemComp, substRemoveAtomName), getAtomOrigCoords(rawSubstChemComp, substBindingAtom.name) ] substCoordsBaseAtoms = [None, None] substToBaseDict = {} # # 3.2 Create chemAtoms and chemAtomSets # redoChemAtomSets = [] substChemAtoms = [] for chemAtomOrSet in substUnit.sortedChemAtoms( ) + substUnit.sortedChemAtomSets(): # Ignore atoms to be substituted if chemAtomOrSet.name[:len(substRemoveAtomName )] == substRemoveAtomName: continue createChemAtomOrSet = CreateChemAtomOrSet(chemAtomOrSet) if renameSubstituentAtomNames.has_key(chemAtomOrSet.name): createChemAtomOrSet.setForcedName( renameSubstituentAtomNames[chemAtomOrSet.name]) else: createChemAtomOrSet.setName(substUnitIndex) redoChemAtomSets.extend( createChemAtomOrSet.setChemAtomLinks( chemAtomOrSet, substToBaseDict, substRemoveAtomName)) # Only relevant for chemAtomSets! if createChemAtomOrSet.checkExistence(baseUnit): continue newChemAtomOrSet = createChemAtomOrSet.createNewObject(baseUnit) createChemAtomOrSet.setAtomSysName(namingSystem) substToBaseDict[chemAtomOrSet] = newChemAtomOrSet # Keep track of new chemAtoms, also track coordinates if newChemAtomOrSet.className in ('LinkAtom', 'ChemAtom'): substChemAtoms.append(newChemAtomOrSet) if chemAtomOrSet == substBindingAtom: baseSubstBindingAtom = newChemAtomOrSet substCoordsBaseAtoms[1] = newChemAtomOrSet else: substCoordsBaseAtoms.append(newChemAtomOrSet) substCoords.append( getAtomOrigCoords(rawSubstChemComp, chemAtomOrSet.name)) # # 3.2.1 Now set chemAtomSets that are linked to other chemAtomSets - have to do this later because # otherwise could have not been created yet. Could in principle do this in above loop by # organising list of chemAtomSets, but this is easier. # for chemAtomSet in redoChemAtomSets: createChemAtomOrSet = CreateChemAtomOrSet(chemAtomSet) if renameSubstituentAtomNames.has_key(chemAtomSet.name): createChemAtomOrSet.setForcedName( renameSubstituentAtomNames[chemAtomSet.name]) else: createChemAtomOrSet.setName(substUnitIndex) createChemAtomOrSet.setChemAtomSetLinks() if createChemAtomOrSet.checkExistence(baseUnit): continue newChemAtomSet = createChemAtomOrSet.createNewObject(baseUnit) createChemAtomOrSet.setAtomSysName(namingSystem) substToBaseDict[chemAtomSet] = newChemAtomSet # # 3.3 Now add all other objects connected to the chemComp (see global chemCompLinkList) # # TODO: this is not fully functional - have to add specific settings for some links (but probably never required, so will wait) # for ccLinkName in chemCompLinkList: for substObject in getattr(substUnit, ccLinkName): createCcpnObject = CreateCcpnObject(substObject) if not createCcpnObject.setLinks(substToBaseDict): continue newCcpnObject = createCcpnObject.createNewObject(baseUnit) substToBaseDict[substObject] = newCcpnObject """ # TODO: LINKS THAT REQUIRE SPECIAL TREATMENT/MERGING WITH EXISTING INFO # 'chemCompVars', 'applicationData', # TODO: object that require special treatment (have to be renamed, ...): chemAtomSysNames, chemCompSysNames (?) """ # # 4. Connect base binding atom(s) to the new subsituent atoms # for baseBindingAtom in baseBindingAtoms: # # 4.1 First remove all chemCompVars and linkEnds that have linking vars including this atom! # for chemCompVar in baseBindingAtom.chemCompVars: deleteVoidChemCompVar(chemCompVar, 'linking', baseBindingAtomName, baseUnit, baseUnitCcc, actionType='binding') deleteLinkEnd(baseUnit, baseBindingAtomName, actionType='binding') # # 4.2 Calculate the new coordinates for the substituent atoms # coordsForChemCompVar = {} if baseAtomCoords.has_key(baseBindingAtom): for jointCoords in baseAtomCoords[baseBindingAtom]: newSubstCoords = tuple([ tuple(coord) for coord in superposeNewVectorsOnOld( jointCoords, substCoords) ]) coordsForChemCompVar[newSubstCoords] = baseAtomCoords[ baseBindingAtom][jointCoords] # # 4.3 Now create the bond between the new substituent and the existing base unit # baseUnit.newChemBond(chemAtoms=(baseBindingAtom, baseSubstBindingAtom), bondType=newBondType, stereochem=newStereochem) #print " Creating chemBond between base atom %s and subsituent atom %s" % (baseBindingAtom.name,baseSubstBindingAtom.name) for chemCompVar in baseBindingAtom.chemCompVars: #print " ", baseSubstBindingAtom.name, baseSubstBindingAtom.subType,chemCompVar.linking,chemCompVar.descriptor for substChemAtom in substChemAtoms: chemCompVar.addChemAtom(substChemAtom) # TODO: have to RENAME the chemCompVar in case there are variants for the substituent!! # # 4.4 Finally set the new coordinates... ignore first one (is the substituted atom from the substituent!) # for newSubstCoords in coordsForChemCompVar.keys(): for i in range(1, len(substCoordsBaseAtoms)): newSubstCoord = newSubstCoords[i] substChemAtom = substCoordsBaseAtoms[i] substChemAtomCoord = baseUnitCcc.findFirstChemAtomCoord( chemAtom=substChemAtom, x=newSubstCoord[0], y=newSubstCoord[1], z=newSubstCoord[2]) if not substChemAtomCoord: substChemAtomCoord = baseUnitCcc.newChemAtomCoord( name=substChemAtom.name, subType=substChemAtom.subType, x=newSubstCoord[0], y=newSubstCoord[1], z=newSubstCoord[2]) for cccv in coordsForChemCompVar[newSubstCoords]: if not cccv.isDeleted: cccv.addChemAtomCoord(substChemAtomCoord) print " Connected new substituent chemAtom %s,%s to base unit atom %s,%s, and included in relevant chemCompVars" % ( baseSubstBindingAtom.name, baseSubstBindingAtom.subType, baseBindingAtom.name, baseBindingAtom.subType) # # NEXT ON LIST: make sure the chemCompVars make sense if there are any for the substituent!!! # # ALSO try this on amino acids when working for carbs - need split code and rename some variables though # # # Check validity and save # baseUnit.checkAllValid(complete=True) if baseUnitCcc: baseUnitCcc.checkAllValid(complete=True) if saveData: (filePath, existingFilePath) = saveTemporaryChemCompOrCoord(baseUnit, testMode=testMode) consolidateTemporaryChemCompOrCoord(baseUnit, filePath, existingFilePath, testMode=testMode, replace=replace) if baseUnitCcc: (filePath, existingFilePath) = saveTemporaryChemCompOrCoord( baseUnitCcc, testMode=testMode) consolidateTemporaryChemCompOrCoord(baseUnitCcc, filePath, existingFilePath, testMode=testMode, replace=replace)