def computeOneFile(self, pdbFName): ''' Computes DSSP for a given pdb file @param pdbFName: str. fname to pdb file ''' parser = PDBParser(QUIET=True) struct = parser.get_structure("pdbStruct", pdbFName) prefixAndChainTypeId = self.getExtendedPrefix(pdbFName) rawDsspOutName = os.path.join(self.outPathRaw, prefixAndChainTypeId + ".dssp.tab") proc = Popen([self.dsspBinPath, '-i', pdbFName, '-o', rawDsspOutName], stdin=PIPE, stdout=PIPE, stderr=PIPE) output = proc.communicate() if output == None or decodeFun(output[1]) != "": ## no atoms read before TER record print("Error when computing DSSP: %s" % pdbFName) print(output) ## 'no atoms read before TER record \nTER if not decodeFun( output[1]).startswith('no atoms read before TER record'): self.createFileForError(struct, rawDsspOutName) self.processDSSP(prefixAndChainTypeId, struct, rawDsspOutName) # raw_input("enter to continue") return None
def moveAndWriteAsPDBIfMmcif(fnameIn, fnameOut, removeInput=False): conf= Configuration() minNumResidues, maxNumResidues= conf.minNumResiduesPartner, conf.maxNumResiduesPartner try: parser= PDBParser(QUIET=True) struct= parser.get_structure("pdbStruct", fnameIn) totalNumRes=0 for chain in struct[0]: nResInChain= len(chain.get_list()) totalNumRes+= nResInChain if not ( minNumResidues < totalNumRes < maxNumResidues): raise BadNumberOfResidues(totalNumRes) else: writter=PDBIO() writter.set_structure(struct) writter.save(fnameOut) if removeInput: os.remove(fnameIn) return True except Exception as e: print("Error in moveAndWriteAsPDBIfMmcif !!!", e) return False
def __init__(self, rFname, lFname, computedFeatsRootDir=None, boundAvailable=True, res2res_dist=6.0, isForPrediction=False, statusManager=None): ''' @param rFname: str. path to receptor pdb file @param lFname: str. path to ligand pdb file @param computedFeatsRootDir: str. path where features will be stored @param boundAvailable: bool. True if bound structures are available. False otherwise. Bound structures must be located at the same path that unbound structures and need to be named as in the following example: 1A2K_l_u.pdb 1A2K_r_b.pdb @param res2res_dist: float. max distance between any heavy atoms of 2 amino acids to be considered as interacting (Amstrongs) @param isForPrediction: bool. False to compute contacts between amino acids, True otherwise. Positive contacts will be tag as 1, negative as -1. If True, all amino acids will have as tag np.nan @param statusManager: class that implements .setStatus(msg) to communicate ''' FeaturesComputer.__init__(self, rFname, lFname, computedFeatsRootDir) self.prefixR = os.path.split(rFname)[1].split(".")[0].split("_")[0] self.prefixL = os.path.split(lFname)[1].split(".")[0].split("_")[0] if self.prefixR == self.prefixL: self.prefix = self.prefixR else: if "<" in self.prefixL: raise FeatureComputerException( "Error. Ligand pdbFile name %s must not contain '<' or '>' character" % lFname) if ">" in self.prefixR: raise FeatureComputerException( "Error. Receptor pdbFile name %s must not contain '<' or'>' character" % rFname) self.prefixR = self.getExtendedPrefix(rFname) self.prefixL = self.getExtendedPrefix(lFname) self.prefix = self.prefixL + "<->" + self.prefixR self.isForPrediction = isForPrediction self.res2res_dist = res2res_dist self.boundAvailable = boundAvailable self.outPath = myMakeDir(self.computedFeatsRootDir, "common/contactMaps") self.outName = os.path.join(self.outPath, self.prefix + ".cMap.tab") self.parser = PDBParser(QUIET=True) # self.ppb=PPBuilder( radius= 200) # To not worry for broken chains self.ppb = CaPPBuilder() self.computeFun = self.contactMapOneComplex
def __init__(self, rFname, lFname, computedFeatsRootDir=None): ''' @param rFname: str. path to receptor pdb or fasta file @param lFname: str. path to ligand pdb or fasta file @param computedFeatsRootDir: str. path where features will be stored. If None they will be stored at default path (assigned in ../Config.py) ''' SeqFeatComputer.__init__(self, rFname, lFname, computedFeatsRootDir) self.outPath = myMakeDir(self.computedFeatsRootDir, "extractedSeqs") self.fastaOutDir = myMakeDir(self.outPath, "seqsData") self.seqToStructDir = myMakeDir(self.outPath, "seqToStructMap") self.parser = PDBParser(QUIET=True) self.seqsDict = {} self.seqToStruct = {} self.structToSeq = {} self.seqToStructFnames = {}
class ContactMapper(FeaturesComputer): ''' Extends FeaturesComputer class. Extracts res and chainIds for training and predicting and computes contact maps for training for a given complex ''' def __init__(self, rFname, lFname, computedFeatsRootDir=None, boundAvailable=True, res2res_dist=6.0, isForPrediction=False, statusManager=None): ''' @param rFname: str. path to receptor pdb file @param lFname: str. path to ligand pdb file @param computedFeatsRootDir: str. path where features will be stored @param boundAvailable: bool. True if bound structures are available. False otherwise. Bound structures must be located at the same path that unbound structures and need to be named as in the following example: 1A2K_l_u.pdb 1A2K_r_b.pdb @param res2res_dist: float. max distance between any heavy atoms of 2 amino acids to be considered as interacting (Amstrongs) @param isForPrediction: bool. False to compute contacts between amino acids, True otherwise. Positive contacts will be tag as 1, negative as -1. If True, all amino acids will have as tag np.nan @param statusManager: class that implements .setStatus(msg) to communicate ''' FeaturesComputer.__init__(self, rFname, lFname, computedFeatsRootDir) self.prefixR = os.path.split(rFname)[1].split(".")[0].split("_")[0] self.prefixL = os.path.split(lFname)[1].split(".")[0].split("_")[0] if self.prefixR == self.prefixL: self.prefix = self.prefixR else: if "<" in self.prefixL: raise FeatureComputerException( "Error. Ligand pdbFile name %s must not contain '<' or '>' character" % lFname) if ">" in self.prefixR: raise FeatureComputerException( "Error. Receptor pdbFile name %s must not contain '<' or'>' character" % rFname) self.prefixR = self.getExtendedPrefix(rFname) self.prefixL = self.getExtendedPrefix(lFname) self.prefix = self.prefixL + "<->" + self.prefixR self.isForPrediction = isForPrediction self.res2res_dist = res2res_dist self.boundAvailable = boundAvailable self.outPath = myMakeDir(self.computedFeatsRootDir, "common/contactMaps") self.outName = os.path.join(self.outPath, self.prefix + ".cMap.tab") self.parser = PDBParser(QUIET=True) # self.ppb=PPBuilder( radius= 200) # To not worry for broken chains self.ppb = CaPPBuilder() self.computeFun = self.contactMapOneComplex def mapBoundToUnbound(self, structureUnbound, structureBound, skipBoundChainsIds=set([])): ''' Obtains correspondence between unbound structure and bound structure when available. Returns a dictionary that maps bound_residue --> equivalent unbound_residue @param structureUnbound: Bio.PDB.Structure. Structure in bound state @param structureBound: Bio.PDB.Structure. Structure in unbound state @param skipBoundChainsIds: Set of Chars. Set of chain ids that will be skipped for calculations. @return bound2UnboundMapDict: Dict {Bio.PDB.Residue (from bound structure): Bio.PDB.Residue (from unbound structure)} ''' bound2UnboundMapDict = {} pp_list_unbound = self.ppb.build_peptides(structureUnbound, aa_only=False) if structureBound is None: # if there is no bound structure, use just unbound. boundToUnboundMap = lambda x: x #For a given residue will return the same residue pp_list_bound = pp_list_unbound else: pp_list_bound = self.ppb.build_peptides(structureBound, aa_only=False) mapper = BoundUnboundMapper( pp_list_unbound, pp_list_bound) # res_bound->res_unbound mapper object mapper.build_correspondence() boundToUnboundMap = mapper.mapBoundToUnbound #For a given bound residue will return its unbound equivalent for pp in pp_list_bound: for resBound in pp: chainBound = resBound.get_full_id()[2] # str chainId if chainBound in skipBoundChainsIds: continue resUnbound = boundToUnboundMap(resBound) if not resUnbound is None: #In case there is no equivalent unbound residue for a given bound residue bound2UnboundMapDict[resBound] = resUnbound return bound2UnboundMapDict def fixHomooligomers(self, structureL, structureR, positiveContacts, chainsInContactL, chainsInContactR): ''' For each interacting pair of residues (resL_1, resR_2), it will add to positiveContacts (res_1L', resR_2) and/or (resL_1, resR_2') where resL_1' is an equivalent residue in homooligomers of ligand @param structureL: Bio.PDB.Structure. Structure of ligand @param structureR: Bio.PDB.Structure. Structure of receptor @param positiveContacts: [(ligandResId, receptorResId)]: ligandResId and receptorResIds are full_ids of Bio.PDB.Residue @param chainsInContactL: [(ligandResId)]: ligandResId and receptorResIds are full_ids of Bio.PDB.Residue @param chainsInContactR: [(receptorResId)]: ligandResId and receptorResIds are full_ids of Bio.PDB.Residue @return positiveContacts, chainsInContactL, chainsInContactR. Updated with equivalent residues interactions added ''' pp_list_l = self.ppb.build_peptides(structureL, aa_only=False) equivalentLmapper = HomoOligomerFinder(pp_list_l, positiveContacts, chainType="l") positiveContacts, chainsInContactL = equivalentLmapper.update_interactions( ) pp_list_r = self.ppb.build_peptides(structureR, aa_only=False) equivalentRmapper = HomoOligomerFinder(pp_list_r, positiveContacts, chainType="r") positiveContacts, chainsInContactR = equivalentRmapper.update_interactions( ) return positiveContacts, chainsInContactL, chainsInContactR def getPairsOfResiduesInContact(self, structureL, structureR): ''' Computes which amino acids of ligand are in contact with which amino acids of receptor @param structureL: Bio.PDB.Structure. Structure of ligand (bound state if available) @param structureR: Bio.PDB.Structure. Structure of receptor (bound state if available). @return positiveContacts: Set {(Bio.PDB.Residue.fullResId (from bound structure structureL), Bio.PDB.Residue.fullResId (from bound structure structureR))} @return chainsNotContactL: Set { str(chainId structureL)} @return chainsNotContactR: Set { str(chainId structureR)} ''' try: atomListL = [ atom for atom in structureL.child_list[0].get_atoms() if not atom.name.startswith("H") ] except IndexError: raise NoValidPDBFile("Problems parsing pdbFile 1") try: atomListR = [ atom for atom in structureR.child_list[0].get_atoms() if not atom.name.startswith("H") ] except IndexError: raise NoValidPDBFile("Problems parsing pdbFile 2") searcher = NeighborSearch(atomListL + atomListR) allNeigs = searcher.search_all(self.res2res_dist, level="R") lStructId = structureL.get_id() rStructId = structureR.get_id() positiveContacts = set([]) chainsInContactL = set([]) chainsInContactR = set([]) for res1, res2 in allNeigs: pdbId1, modelId1, chainId1, resId1 = res1.get_full_id() pdbId2, modelId2, chainId2, resId2 = res2.get_full_id() fullResId1 = res1.get_full_id() fullResId2 = res2.get_full_id() if pdbId1 == lStructId and pdbId2 == rStructId: positiveContacts.add((fullResId1, fullResId2)) chainsInContactL.add(fullResId1[2]) chainsInContactR.add(fullResId2[2]) elif pdbId1 == rStructId and pdbId2 == lStructId: positiveContacts.add((fullResId2, fullResId1)) chainsInContactL.add(fullResId2[2]) chainsInContactR.add(fullResId1[2]) if CONSIDER_HOMOOLIG_AS_POS: positiveContacts, chainsInContactL, chainsInContactR = self.fixHomooligomers( structureL, structureR, positiveContacts, chainsInContactL, chainsInContactR) allChainsL = set([elem.get_id() for elem in structureL[0].get_list()]) allChainsR = set([elem.get_id() for elem in structureR[0].get_list()]) chainsNotContactL = allChainsL.difference(chainsInContactL) chainsNotContactR = allChainsR.difference(chainsInContactR) return positiveContacts, chainsNotContactL, chainsNotContactR def contactMapOneComplex(self): ''' Computes the contact map of a complex. Initial input for complex codification. Contact map is a file written at self.computedFeatsRootDir/common/contactMaps/ with name prefix.cMap.tab where prefix is either the common name of ligand and receptor pdb files or the concatenation of ligand and receptor names. 1A2K_l_u.pdb and 1A2K_r_u.pdb --> 1A2K.cMap.tab 1A2K_l_u.pdb and 1A22.pdb --> 1A2K-1A22.cMap.tab ''' outName = self.outName print(outName) if os.path.isfile(outName): print('Already computed contact map') return 0 lStructId = self.prefixL + "_l_u.pdb" rStructId = self.prefixR + "_r_u.pdb" structureL_u = self.parser.get_structure(lStructId, self.lFname) structureR_u = self.parser.get_structure(rStructId, self.rFname) if self.boundAvailable == False or self.isForPrediction: structureL_b = None structureR_b = None else: try: lStructId_b = self.prefix + "_l_b.pdb" rStructId_b = self.prefix + "_r_b.pdb" lFname_b = os.path.join( os.path.split(self.lFname)[0], lStructId_b) rFname_b = os.path.join( os.path.split(self.rFname)[0], rStructId_b) structureL_b = self.parser.get_structure(lStructId_b, lFname_b) structureR_b = self.parser.get_structure(rStructId_b, rFname_b) except IOError as e: # in this case there are just unbound pdbs available structureL_b = None structureR_b = None if self.isForPrediction: positiveContacts = None chainsNotContactR = set([]) chainsNotContactL = set([]) elif structureL_b is None or structureR_b is None: #Compute contacs in bound structures positiveContacts, chainsNotContactL, chainsNotContactR = self.getPairsOfResiduesInContact( structureL_u, structureR_u) else: #Compute contacs in unbound structures positiveContacts, chainsNotContactL, chainsNotContactR = self.getPairsOfResiduesInContact( structureL_b, structureR_b) if JUST_INTERACTING_CHAINS == False: chainsNotContactR = set([]) chainsNotContactL = set([]) rResDict = self.mapBoundToUnbound(structureR_u, structureR_b, skipBoundChainsIds=chainsNotContactR) lResDict = self.mapBoundToUnbound(structureL_u, structureL_b, skipBoundChainsIds=chainsNotContactL) nResiduesL = len(lResDict) nResiduesR = len(rResDict) if not (self.minNumResiduesPartner < nResiduesL < self.maxNumResiduesPartner): raise BadNumberOfResidues(nResiduesL, "1") if not (self.minNumResiduesPartner < nResiduesR < self.maxNumResiduesPartner): raise BadNumberOfResidues(nResiduesR, "2") outFile = open(outName, "w") outFile.write( "chainIdL structResIdL resNameL chainIdR structResIdR resNameR categ\n" ) # print(sorted(lResDict, key= lambda x: x.get_id())) # a= raw_input() try: for resL_bound in sorted(lResDict, key=lambda x: x.get_full_id()): # print(resL_bound.get_full_id()) resL_unbound = lResDict[resL_bound] pdbIdL, modelL, chainIdL, resIdL = resL_unbound.get_full_id() resIdL = self.makeStrResId(resIdL) try: letraL = three_to_one(resL_unbound.resname) if letraL != three_to_one(resL_bound.resname): continue except KeyError: continue for resR_bound in sorted(rResDict, key=lambda x: x.get_full_id()): resR_unbound = rResDict[resR_bound] pdbIdR, modelR, chainIdR, resIdR = resR_unbound.get_full_id( ) try: letraR = three_to_one(resR_unbound.resname) if letraR != three_to_one(resR_bound.resname): continue except KeyError: continue if self.isForPrediction: categ = np.nan elif (resL_bound.get_full_id(), resR_bound.get_full_id()) in positiveContacts: categ = 1 else: categ = -1 resIdR = self.makeStrResId(resIdR) if chainIdL == " ": chainIdL = "*" if chainIdR == " ": chainIdR = "*" # print("%s %s %s %s %s %s %s\n" %(chainIdL, resIdL, letraL, chainIdR, resIdR, letraR, categ) ) # raw_input("enter") outFile.write("%s %s %s %s %s %s %s\n" % (chainIdL, resIdL, letraL, chainIdR, resIdR, letraR, categ)) outFile.close() except (KeyboardInterrupt, Exception): print("Exception happend computing %s" % outName) tryToRemove(outName) raise def makeStrResId(self, resId): valList = [str(elem) for elem in resId[1:]] finalId = "".join(valList).strip() return finalId
class SeqsManager(SeqFeatComputer): ''' Extends SeqFeatComputer class. Extracts sequences from pdbFiles to fasta files. Then it allows to easily enumerate sequences (all letters) and fasta files and also allows for mapping between seqIndices and structIndices and vice versa ''' def __init__(self, rFname, lFname, computedFeatsRootDir=None): ''' @param rFname: str. path to receptor pdb or fasta file @param lFname: str. path to ligand pdb or fasta file @param computedFeatsRootDir: str. path where features will be stored. If None they will be stored at default path (assigned in ../Config.py) ''' SeqFeatComputer.__init__(self, rFname, lFname, computedFeatsRootDir) self.outPath = myMakeDir(self.computedFeatsRootDir, "extractedSeqs") self.fastaOutDir = myMakeDir(self.outPath, "seqsData") self.seqToStructDir = myMakeDir(self.outPath, "seqToStructMap") self.parser = PDBParser(QUIET=True) self.seqsDict = {} self.seqToStruct = {} self.structToSeq = {} self.seqToStructFnames = {} def computeOneFile(self, fileName, chainType): ''' Gets the seq to struct mapping for a given pdb file @param fileName: str. fname to pdb file @param chainType: str. "l" for ligand and "r" for receptor ''' if self.checkIfIsFasta(fileName): self.computeOneFileFromFasta(fileName, chainType) else: self.computeOneFileFromPDB(fileName, chainType) def addResiduesToSeqToStructMap(self, chainType, chainId, seqStr, resIds): assert len(seqStr) == len(resIds) fastaFname = self.seqsDict[chainType][chainId][1] self.seqsDict[chainType][chainId] = (seqStr, fastaFname) f = open(fastaFname, "w") f.write(">" + os.path.split(fastaFname)[-1] + "\n" + seqStr) f.close() # print(self.seqToStruct) # raw_input("press enter to continue") for key in sorted(self.seqToStruct): if key[:2] == (chainType, chainId): del self.seqToStruct[key] listForFile = [] for i, resId in enumerate(resIds): key_seqStruct = (chainType, chainId, i) flag = " " if resId == "-": continue if not resId[-1].isdigit(): flag = resId[-1] resId = resId[:-1] else: resId = int(resId) self.seqToStruct[key_seqStruct] = (" ", resId, flag) key_structSeq = (chainType, chainId, (" ", resId, flag)) if key_structSeq in self.structToSeq: self.structToSeq[key_structSeq] = i listForFile.append("%d;%s;%s" % (i, seqStr, str((" ", resId, flag)))) outName, prefixAndChainType = self.seqToStructFnames[(chainType, chainId)] f = open(outName, "w") f.write(">" + prefixAndChainType + "_" + chainId + "\n" + "\n".join(listForFile)) f.close() # print(self.seqToStruct) # raw_input("press enter to continue") def computeOneFileFromPDB(self, fileName, chainType): ''' Gets the seq to struct mapping for a given pdb file @param fileName: str. fname to pdb file @param chainType: str. "l" for ligand and "r" for receptor ''' self.seqsDict[chainType] = {} if not (fileName.endswith("_r_u.pdb") or fileName.endswith("_l_u.pdb")): prefixAndChainType = ( os.path.split(fileName)[-1]).split(".pdb")[0] + "_" + chainType else: prefixAndChainType = ( os.path.split(fileName)[-1]).split("_u.pdb")[0] ## print(fileName) struct = self.parser.get_structure(prefixAndChainType, fileName) for chain in struct[0]: chainId = chain.get_id() if chainId == " ": chainId = "*" nResStandard = sum( [1 for res in chain if is_aa(res, standard=True)]) resList = [ res for res in sorted(chain.child_list, key=lambda x: x.get_id()[1:]) if is_aa(res, standard=False) ] #New version feature nResAll = len(resList) # print(chainId, len(resList)) if nResStandard < int(0.5 * nResAll): continue #skip if most residues are not standard if len( resList ) > SMALL_CHAINS_LIMIT: #Too small chains will not be considered sequence = [] resIds = [] for i, res in enumerate(resList): try: letter = three_to_one(res.resname) except KeyError: # New version feature print("Exception", res) letter = "X" if i == (nResAll - 1): break #This case is for TCGR....TLRX where X is GDP or other molecule resId = res.get_full_id()[3] sequence.append(letter) ## print(sequence[-1]) resIds.append("%d;%s;%s" % (i, letter, resId)) self.seqToStruct[(chainType, chainId, i)] = resId self.structToSeq[(chainType, chainId, resId)] = i sequence = "".join(sequence) outNameFasta = os.path.join( self.fastaOutDir, prefixAndChainType + "_" + chainId + "_u.fasta") f = open(outNameFasta, "w") f.write(">" + prefixAndChainType + "_" + chainId + "\n" + sequence) f.close() resIds = "\n".join(resIds) outName = os.path.join( self.seqToStructDir, prefixAndChainType + "_" + chainId + "_u.seqStruMap") self.seqToStructFnames[(chainType, chainId)] = (outName, prefixAndChainType) f = open(outName, "w") f.write(">" + prefixAndChainType + "_" + chainId + "\n" + resIds) f.close() self.seqsDict[chainType][chainId] = (sequence, outNameFasta) def computeOneFileFromFasta(self, fileName, chainType): ''' Gets the seq to struct mapping for a given fasta file (dummy, used for compatibility) @param fileName: str. fname to fasta file @param chainType: str. "l" for ligand and "r" for receptor ''' self.seqsDict[chainType] = {} if not (fileName.endswith("_r_u.fasta") or fileName.endswith("_l_u.fasta")): prefixAndChainType = ( os.path.split(fileName)[-1]).split(".pdb")[0] + "_" + chainType else: prefixAndChainType = ( os.path.split(fileName)[-1]).split("_u.fasta")[0] # print(fileName,prefixAndChainType, chainType) seq = self.parseFasta(fileName) chainId = None if chainType == "l": chainId = "L" elif chainType == "r": chainId = "R" else: raise FeatureComputerException( "Error, bad chainType %s for computeOneFileFromFasta, must be 'r' or 'l'" % chainType) if len( seq ) > SMALL_CHAINS_LIMIT: #Too small chains will not be considered sequence = [] resIds = [] for i, resname in enumerate(seq): if not resname in d1_to_index: resname = "X" resId = (' ', i, ' ') sequence.append(resname) ## print(sequence[-1]) resIds.append("%d;%s;%s" % (i, resname, resId)) self.seqToStruct[(chainType, chainId, i)] = resId self.structToSeq[(chainType, chainId, resId)] = i sequence = "".join(sequence) outNameFasta = os.path.join( self.fastaOutDir, prefixAndChainType + "_" + chainId + "_u.fasta") if not os.path.isfile(outNameFasta): f = open(outNameFasta, "w") f.write(">" + prefixAndChainType + "_" + chainId + "\n" + sequence) f.close() resIds = "\n".join(resIds) outName = os.path.join( self.seqToStructDir, prefixAndChainType + "_" + chainId + "_u.seqStruMap") if not os.path.isfile(outName): f = open(outName, "w") f.write(">" + prefixAndChainType + "_" + chainId + "\n" + resIds) f.close() self.seqsDict[chainType][chainId] = (sequence, outNameFasta) else: raise FeatureComputerException( "Error, %s is to short (10 AA min) " % prefixAndChainType) def getSeq(self, chainType, chainId): ''' gets the desired seq of a pdb complex that matches chainType and chainId @param chainType: str. "l" for ligand and "r" for receptor @param chainId: str. chain id of sequence to be extracted @return (seqStr:str, fastaFileName:str). Tuple. 1st element sequence as str and second element path to a fasta file where sequence was extracted ''' return self.seqsDict[chainType][chainId] def enumSeqs(self, chainType): ''' yields all the sequences contained at pdb file. @param chainType: str. "l" for ligand and "r" for receptor @yields chainType: str chainId:str, (seqStr:str, fastaFileName:str) chainType and chain id of sequence to be extracted. ''' for chainId in self.seqsDict[chainType]: yield chainType, chainId def getSeqsOutDir(self): ''' returns the path where fasta files are saved for each of the chains of a pdb file @return fastaOutDir:str. Path to fasta file ''' return self.fastaOutDir def getSeqsMapperOutDir(self): ''' returns the path where seq to struct maps have been saved (No needed) @return seqToStructDir:str. Path to seq to struct map ''' return self.seqToStructDir def seqToStructIndex(self, chainType, chainId, seqIndex, asString=False): ''' gets the struct id that matches to the chainType, chainId, seqIndex asked @param chainType: str. "l" for ligand and "r" for receptor @param chainId: str. chain id of sequence @param seqIndex: int. Position of the residue at the sequence @param asString: boolean. If False, the returned value will be a tuple provided by Bio.PDB.Residue.get_full_id()[3] If True it will be a string obtained by concatenating the tuple and using strip() @return None if there is no mapping. Otherwise Bio.PDB.Residue.get_full_id()[3] if asString== False "".join(Bio.PDB.Residue.get_full_id()[3][1:])).strip() if asString== True ''' try: # print(">>", self.seqToStruct[(chainType, chainId, seqIndex)]) # raw_input("press enter to continue") if asString: valList = [ str(elem) for elem in self.seqToStruct[(chainType, chainId, seqIndex)] ] valList = "".join(valList[1:]).strip() return valList else: return self.seqToStruct[(chainType, chainId, seqIndex)] except KeyError: return None def structToSeqIndex(self, chainType, chainId, structIndex): ''' gets the seq index that matches to the chainType, chainId, structIndex asked @param chainType: str. "l" for ligand and "r" for receptor @param chainId: str. chain id of sequence @param structIndex: int. resId as the one provided by Bio.PDB.Residue.get_full_id()[3] @return seqIndex: integer. The sequential index of residue with resId==structIndex ''' return self.structToSeq[(chainType, chainId, structIndex)]