def isPDBFile(pathFileName): #This method check if the file is a PDB file or not. if the file contains ATOM, REMARK and SEQRES, this file is a PDB. Otherwise, # this file is not a PDB file. path,name = os.path.split(pathFileName) fcfrpFile = FcfrpFile(path,name) #if fcfrpFile.find("ATOM") == -1 and fcfrpFile.find("REMARK") == -1 and fcfrpFile.find("SEQRES") and -1: if fcfrpFile.find("ATOM") == -1 and fcfrpFile.find("SEQRES") == -1: mensage = "The %s file is not a PDB File. Please, check it." % pathFileName raise Exception(mensage)
def createSitesFile(self, last): pqr_file = FcfrpFile(self._pathPqr, self._pqrName + '.renumbered') record = '' refLine = -1 NTref = 1 flag = 0 lastResidue = last for line in pqr_file.readLines(): fields = line.split() if fields[0] != "END_CHAIN": campo = fields[0] numSeq = fields[1] atom = fields[2] resName = fields[3] numRes = fields[4] px = fields[5] py = fields[6] pz = fields[7] charge = fields[8] radii = fields[9] # It used to insert N Terminal if flag == 1: NTref = int(numSeq) flag = 0 if numRes != refLine: if int(numSeq) == NTref: # Get the all (the first and others) N-Terminal record = self.insertNterminal(numRes, resName, record) elif resName in self.resIonic: # It does not insert the last ionic residue if int(numSeq) == lastResidue: break else: record = record + str(numRes) + " " + str(resName) + '\n' refLine = numRes if fields[0] == "END_CHAIN": # Get the others C-Terminal record = self.insertCterminal(numRes, resName, record) flag = 1 # It indicates that the next residue is a N Terminal # Get the last C-Terminal record = self.insertCterminal(numRes, resName, record) self.saveFile(self._pqrName[:self._pqrName.__len__()-3] + 'sites', record)
def saveFile(self, fileName, conteudo): f = FcfrpFile(self._pathOut, fileName) f.getAsFile("w") f.write(conteudo) f.close()
def getStructure(id,path, modelChoose=None): # This function returns a Structure that will be from PDB file. # If there isn't PDB file, it will be obtained from PDB site pdbid = str(id) + ".PDB" F = FcfrpFile(path,pdbid) if not F.existsFile(): pdbFile = getPDBFromSite(id) savePDBFile(pdbFile, F._path) isPDBFile(F._path) errors, vs = checkPDBFile(id, F.getPath()) if len(errors) == 0: structure = loadStructure(id, F.getPath(), True) return structure else: s = FcfrpShowErrosDetails(vs, errors) s.showErrors() return loadStructure(id, F.getPath(), True)
def loadStructureFromFile(self, id, pathfileName, chainId=None): dir, fileName = os.path.split(pathfileName) filePDB = FcfrpFile(dir, fileName) #Define Model value currentModel = None #Start the Structure self._structure_builder.init_structure(id) #Here the PDB file will read and separate in dictionaries. for line in filePDB.readLines(): record = line.split() if record[0] == "SEQRES": if self.checkChain(record[2], chainId) == True: self._pdbFileLayout.setRes(record) self._pdbFileLayout.setSeqRes(record) elif record[0] == "SSBOND": if self.checkChain(line[15:16], chainId) == True: self._pdbFileLayout.setSSBonds(line) elif record[0] == "MODEL": #The MODEL line is MODEL 1 # So, when split command is used, the result'll be ['MODEL', '1'] #Therefore, the currentModel represents the Model. If there isn't model, its value'll be always zero. currentModel = int(record[1]) elif record[0] == "ATOM": if self.checkChain(line[21:22], chainId) == True: self._pdbFileLayout.setAtom(currentModel,line) #Here the dictionaries have been created by for command above, they will be used in specific methods. These methods # represent each PDB file section. self._loadSeqResfromFile(id) self._loadSSBondsfromfile(id) self._loadAtomsfromFile(id, self._pdbFileLayout.getAtoms()) structure = self._structure_builder.get_structure() self._loadErrorsfromFile(id, structure, pathfileName, chainId) #print "Comentado checagem de erro qdo arquvivo PDB" return structure
def __init__(self, structure,pathFileName, chainId = None): self._structure = structure self._descricaoAmino = FcfrpAminoTopol() dir,name = os.path.split(pathFileName) self._file = FcfrpFile(dir,name) # Used to remove heteroatoms self._valid_residues = ["ALA", "ARG", "ASN", "ASP", "CYS", "GLN", "GLU", "GLY", "HIS", "ILE" , "LEU", "LYS", "MET", "PHE", "PRO", "SER", "THR", "TRP", "TYR", "VAL"] self._dicResidueFromSeqres = {} self._dicResidueFromAtom = {} self._dicMissingResidues = {} self._dicMissingAtoms = {} self._dicDuplicatedAtoms = {} self._listUnknowResidues = [] self._erros = [] self._chainId = chainId
class FcfrpStructureChains(): def __init__(self,path, fileNameSet): self._path = path self.fileSet = FcfrpFile(path, fileNameSet) def getPDBIdFromName(self,name): return str(name).split("_")[0] def splitPDBChains(self,id): return getStructureChains(id) def getDirectory(self,id): #Check if there isn't the path, it'll create and return it path = os.path.join(self._path, id) if os.path.exists(path) == False: os.mkdir(path) return path def getPathFileName(self, name): id = self.getPDBIdFromName(name) path = self.getDirectory(id) FileName = name + ".pdb" return os.path.join(path,FileName) def saveStructures(self,dicStructures): # Save each structure stored in the dicStructures dictionary. for k,structure in dicStructures.iteritems(): pathFileName = self.getPathFileName(k) saveStructure(structure,pathFileName) def buildFileSetProteins(self,id,dicStructures): f = self.fileSet.getAsFile('a') f.write(";"+id+"\n")#The main pdb for k,v in dicStructures.iteritems(): f.write(" "+k+"\n") def executeTitration(self,dicStructures, table): for k, structure in dicStructures.iteritems(): id = self.getPDBIdFromName(k) path = self.getDirectory(id) FcfrpTitration(3,structure=structure,TableId=table,PDBid=k,path=path,FileName=None).execute(path) def executeCapacitance(self,dicStructures, table): for k, structure in dicStructures.iteritems(): id = self.getPDBIdFromName(k) path = self.getDirectory(id) FcfrpCapacitance(3,structure=structure,TableId=table,PDBid=k,path=path,FileName=None).execute(path)
def save(self,structure,dir,fileName): filePDB = FcfrpFile(dir, fileName) filePDB.getAsFile("w") #Create Remark Section self._saveRemark(structure, filePDB) #Create SEQRES Section self._saveSeqRes(structure, filePDB) #Create SSBOND Section self._saveSSBonds(structure, filePDB) #Create ATOM Section iModels = 0 #index for Models iAtom = 0 #index for atom iRes = 0 #index for Residue for model in structure: #Will write Model, if there are more than 1 models. if self._amountModels > 1: iModels += 1 filePDB.write(self._MODEL % str(iModels)) iAtom = 0 iRes = 0 for chain in model: for residue in chain: iRes += 1 for atom in residue: #Atoms iAtom += 1 self._saveAtom(iAtom,atom,residue,chain,iRes,filePDB, int(residue.id[1])) filePDB.write(self._TER) filePDB.write(self._ENDMDL) filePDB.close()
def __init__(self,path, fileNameSet): self._path = path self.fileSet = FcfrpFile(path, fileNameSet)
class FcfrpValidation(): def __init__(self, structure,pathFileName, chainId = None): self._structure = structure self._descricaoAmino = FcfrpAminoTopol() dir,name = os.path.split(pathFileName) self._file = FcfrpFile(dir,name) # Used to remove heteroatoms self._valid_residues = ["ALA", "ARG", "ASN", "ASP", "CYS", "GLN", "GLU", "GLY", "HIS", "ILE" , "LEU", "LYS", "MET", "PHE", "PRO", "SER", "THR", "TRP", "TYR", "VAL"] self._dicResidueFromSeqres = {} self._dicResidueFromAtom = {} self._dicMissingResidues = {} self._dicMissingAtoms = {} self._dicDuplicatedAtoms = {} self._listUnknowResidues = [] self._erros = [] self._chainId = chainId # Check if have missing residues and/or atoms on structure def checkStructure(self): self.setResiduesFromSeqres() self.setResiduesFromAtom() i = self.getDifferenceBetweenDictionaries(self.getResiduesFromSeqres(), self.getResiduesFromATOM()) # HAVE DIFFERENCE # Find missing residues if i == 1: self.setPositionMissingResidues(self.getResiduesFromSeqres(), self.getResiduesFromATOM()) self.findMissingAtoms() self.findDuplicatedAtoms() # Build a dictionary with all residues per chain # The residues are come from SEQRES def setResiduesFromSeqres(self): residuos = [] for line in self._file.readLines(): i = 0 field = line.split() if field[0] == "SEQRES": if self.checkChain(field[2], self._chainId) == True: for res in field: if res in self._valid_residues: cadeia = field[2] if i > 3: residuos.append(str(res)) else: if i > 3: self._listUnknowResidues.append(res) # Residue self._listUnknowResidues.append(field[2]) # chain i = i + 1 if self._dicResidueFromSeqres.has_key(cadeia) == 1: tmp = [] tmp = self._dicResidueFromSeqres[cadeia] for x in residuos: tmp.append(x) self._dicResidueFromSeqres[cadeia] = tmp residuos = [] else: self._dicResidueFromSeqres[cadeia] = residuos residuos = [] # Build a dictionary with all residues per chain # The residues are come from ATOM def setResiduesFromAtom(self): listResidues = [] for model in self._structure: for chain in model: if self.checkChain(chain.id, self._chainId) == True: for residue in chain: if residue.get_resname() in self._valid_residues: listResidues.append(str(residue.get_resname())) self._dicResidueFromAtom[chain.id] = listResidues listResidues = [] # Return residues dictionary from SEQRES def getResiduesFromSeqres(self): return self._dicResidueFromSeqres # Return residues dictionary from ATOM def getResiduesFromATOM(self): return self._dicResidueFromAtom # Return a dictionary with missing residues def getMissingResidues(self): return self._dicMissingResidues # Return a dictionary with missing atoms def getMissingAtoms(self): return self._dicMissingAtoms # Return a list with duplicated atoms def getDuplicatedAtoms(self): return self._dicDuplicatedAtoms # Return a list with unknown residues def getUnknownResidues(self): return self._listUnknowResidues # Return 1 if there are missing residues, and 0 if there aren't missing residues def haveMissingResidues(self): if self._dicMissingResidues.__len__() > 0: return 1 else: return 0 # Return 1 if there are missing atoms, and 0 if there aren't missing atoms def haveMissingAtoms(self): if self._dicMissingAtoms.__len__() > 0: return 1 else: return 0 # Return 1 if there are duplicated atoms, and 0 if there aren't duplicated atoms def haveDuplicatedAtoms(self): if self._dicDuplicatedAtoms.__len__() > 0: return 1 else: return 0 # Return 1 if there are unknown residues, and 0 if there aren't unknown residues def haveUnknownResidues(self): if self._listUnknowResidues.__len__() > 0: return 1 else: return 0 def hasErrors(self): if self.haveMissingResidues() == 1: self._erros.append(1) if self.haveMissingAtoms() == 1: self._erros.append(2) if self.haveDuplicatedAtoms() == 1: self._erros.append(3) if self.haveUnknownResidues() == 1: self._erros.append(4) return self._erros # Return different length between two dictionaries (SEQRES and ATOM) def getDifferenceBetweenDictionaries(self, seqres, atom): listSeqres = [] listAtom = [] diff = 0 for chain in seqres: listSeqres = seqres[chain] listAtom = atom[chain] if listSeqres.__len__() - listAtom.__len__() != 0: diff = 1 listSeqres = [] listAtom = [] return diff # Set position of missing residues def setPositionMissingResidues(self, dicResSeqres, dicResAtom): for chain in dicResSeqres: listResFromSeqres = [] listResFromAtom = [] listMiss = [] listResFromSeqres = dicResSeqres[chain] listResFromAtom = dicResAtom[chain] i = 0 j = 0 while i < listResFromSeqres.__len__(): if listResFromSeqres[i] == listResFromAtom[j]: i = i + 1 if j < listResFromAtom.__len__()-1: j = j + 1 else: listMiss.append(str(listResFromSeqres[i])) listMiss.append(str(i+1)) self._dicMissingResidues[chain] = listMiss i = i + 1 # Find for missing atoms in a structure def findMissingAtoms(self): result = [] for model in self._structure: for chain in model: if self.checkChain(chain.id, self._chainId) == True: for residue in chain: if residue.get_resname() in self._valid_residues: result = self.checkAtomsFromResidue(residue.get_resname(), self.getAtomsFromResidueInATOM(chain.id, residue.get_id())) if result.__len__() > 0: # result.append(chain.id) # result.append(residue.get_id()) key_comp = residue.get_id() self._dicMissingAtoms[chain.id, key_comp[1], residue.get_resname()] = result self.getAtomsFromMissingResidues() # Return a list of atom from residue - Search in ATOM def getAtomsFromResidueInATOM(self, chainRef, residueRef): atoms = [] for model in self._structure: for chain in model: if chain.id == chainRef: residue = chain[(residueRef[0],residueRef[1],residueRef[2])] for atom in residue: atoms.append(str(atom.get_name())) return atoms # check the amount atoms from residue def checkAtomsFromResidue(self, residue, atoms): dicRes = self._descricaoAmino.getDicionario(residue) atomRes = self.organizaAtomos(atoms) diff = {} missings = [] for key in dicRes: if atomRes.has_key(key) == 1: if key != 'H': if key == 'O': diff[key] = str( int(dicRes[key])-1 - int(atomRes[key])) # Peptide Bond else: diff[key] = str(int(dicRes[key]) - int(atomRes[key])) # Peptide Bond if int(diff[key]) > 0: # It may to have duplicates # [0] Residue, [1]amount, [2]atom missings = [str(diff[key]), str(key)] return missings # Get all atoms from missing residues def getAtomsFromMissingResidues(self): missRes = [] missResPosition = [] atoms = [] for chain in self._dicMissingResidues: missRes = [] missResPosition = [] i = 0 res = self._dicMissingResidues[chain] while i < res.__len__(): missRes.append(res[i]) missResPosition.append(res[i + 1]) i = i + 2 j = 0 for rs in missRes: dicRes = self._descricaoAmino.getDicionario(rs) # atoms.append(rs) for a in dicRes: atoms.append(dicRes[a]) atoms.append(a) self._dicMissingAtoms[chain, str(missResPosition[j]), rs] = atoms atoms = [] j = j + 1 # Build a dictionary with atoms and amount def organizaAtomos(self, atomos): dicAtomos = {} for atm in atomos: tmp = str(atm) # tmp and atom are use to get only the first character to each kind of atoms atom = tmp[0] if dicAtomos.has_key(str(atom)) == 1: valor = int(dicAtomos[str(atom)]) valor = valor + 1 dicAtomos[str(atom)] = str(valor) else: dicAtomos[str(atom)] = str(1) return dicAtomos # Look for duplicated atoms def findDuplicatedAtoms(self): # [0]Residue [1]Chain [2]id from residue [3]Atom [4]Alternative Local result = [] for model in self._structure: for chain in model: if self.checkChain(chain.id, self._chainId) == True: for residue in chain: for atom in residue: result = self.checkDuplicatedAtomsInResidue(chain, residue, atom) if result.__len__() > 0: key_comp = residue.get_id() key = chain.id, key_comp[1], residue.get_resname(), key_comp[2] if self._dicDuplicatedAtoms.has_key(key) == 1: tmp = [] tmp = self._dicDuplicatedAtoms[key] for x in result: tmp.append(x) self._dicDuplicatedAtoms[key] = tmp result = [] else: self._dicDuplicatedAtoms[key] = result result = [] def checkDuplicatedAtomsInResidue(self, chain, residue, atom): x = "" duplicateds = [] x = str(atom.get_altloc()) if x != " ": duplicateds.append(str(atom.get_name())) duplicateds.append(str(atom.get_altloc())) return duplicateds def checkChain(self, chainIdFile, chainIdSelected): cont = False if chainIdSelected == None: cont = True elif chainIdFile == chainIdSelected: cont = True else: cont = False return cont