def analyzeXplorLog(logFile, extraIgnoreLineList=[], extraIgnoreCountList=[]): ''' @param logFile: @param extraIgnoreLineList: @param extraIgnoreCountList: Returns [timeTaken, crashed, nr_error, nr_warning, nr_message, nr_debug] 0 1 2 3 4 5 Return None on error. The numbers of lines should add up to the total number of lines. For a specific other log file type look at the example in cing.Scripts.FC.utils ''' result = [None, None, 0, 0, 0, 0] if not logFile: nTerror("logFile %s was not specified." % logFile) return None if not os.path.exists(logFile): nTerror("logFile %s was not found." % logFile) return None for r in AwkLike(logFile): line = r.dollar[0] lineLower = line.lower() if line.startswith(' %') and line.count('ERR'): toIgnore = False for ignoreLine in ignoreLineXplorList: ignoreLineLower = ignoreLine.lower() if lineLower.count(ignoreLineLower): # nTdebug("Ignoring line for error count: %s" % line) toIgnore = True break # finding one is enough # end if # end if toIgnore: result[4] += 1 continue nTwarning("Found eeeeeeror in line: %s" % line) result[2] += 1 elif line.startswith(' %') and line.count('WRN'): nTdebug("Found warning in line: %s" % line) result[3] += 1 elif line.startswith(prefixDebug): nTdebug("Found debug in line: %s" % line) result[5] += 1 else: result[4] += 1 if line.count('total CPU time='): # nTdebug("Matched time in line: %s" % line) timeTakenStr = r.dollar[r.NF - 1] result[0] = float(timeTakenStr) # nTdebug("Found time: %s" % self.timeTakenDict[entry_code]) elif line.count('Program execution will be terminated'): nTdebug("Matched termination on line: %s" % line) result[1] = True # end elif # end else return result
def getPostscriptFileNames(self): """ Return a NTlist with (postscriptFileName, description) tuples """ result = NTlist() path = os.path.join(self.rootPath, 'postscriptFiles.lis') if os.path.exists(path): for line in AwkLike(path, separator=':'): result.append( (line.dollar[2][:-1].strip(), line.dollar[1][:-6].strip())) else: nTwarning("Failed to find %s in getPostscriptFileNames" % path) return result
def importCyanaStereoFile( project, stereoFileName, convention ): """Import stereo assignments from CYANA return project or None on error. CYANA stereo file: var info echo echo:=off info:=none atom stereo "HB2 HB3 509" # GLU- atom stereo "QG1 QG2 511" # VAL atom stereo "HB2 HB3 513" # HIS atom stereo "QG1 QG2 514" # VAL atom stereo "HG2 HG3 516" # GLU- atom stereo "HA1 HA2 519" # GLY """ if project.molecule == None: return None molecule = project.molecule atomDict = molecule.getAtomDict(convention) count = 0 for line in AwkLike( stereoFileName, minNF=5 ): if line.dollar[1] == 'atom' and line.dollar[2] == 'stereo': resnum = int (line.dollar[5].strip('"') ) for i in [3,4]: atm = None t = (resnum, line.dollar[i].strip('"')) if atomDict.has_key(t): atm = atomDict[t] # atm = molecule.decodeNameTuple( (convention, 'A', resnum, line.dollar[i].strip('"')) ) if atm == None: nTerror('importCyanaStereoFile: atom %s; line %d (%s)\n', line.dollar[i], line.NR, line.dollar[0] ) else: atm.stereoAssigned = True count += 1 #print atm.nameTuple() # Val, Ile methyls: Carbon implicit in CYANA defs if atm.residue.db.name in ['VAL', 'LEU'] and atm.isMethylProton(): heavy = atm.heavyAtom() heavy.stereoAssigned = True count += 1 #print heavy.nameTuple() #end if #end if #end for #end if #end for nTmessage('==> Derived %d stereo assignments from "%s"', count, stereoFileName ) return project
def readFile(self, tabFile): """ Read table from tabFile """ # nTmessage('==> Reading nmrPipe table file ... ' ) #end if for line in AwkLike(tabFile, minNF=1, commentString='#'): if (line.dollar[1] == 'REMARK' and line.NF > 1): self.remarks.append(line.dollar[2:]) elif (line.dollar[1] == 'VARS'): for v in line.dollar[2:]: self.addColumn(name=v) #end for elif (line.dollar[1] == 'FORMAT'): i = 0 for f in line.dollar[2:]: self.columnDefs[i].fmt = f i += 1 #end for elif (line.dollar[1] == 'DATA' and line.NF > 3): self.data[line.dollar[2]] = line.dollar[3:] elif (line.NF == len(self.columnDefs)): row = self.addRow() for i in range(0, line.NF): col = self.columnDefs[i] if (line.dollar[i + 1] == self.noneIndicator): row[col.name] = None else: # derive conversion function from fmt field if (col.fmt[-1:] in ['f', 'e', 'E', 'g', 'G']): func = float elif (col.fmt[-1:] in ['d', 'o', 'x', 'X']): func = int else: func = str #end if row[col.name] = func(line.dollar[i + 1]) #endif #end for else: pass #end if #end for self.tabFile = tabFile
def parseShiftxOutput( fileName, molecule, chainId ): """ Parse shiftx generated output (gv_version!). Store result in shiftx attribute (which is a NTlist type) of each atom format file: # Entries marked with a * may have inaccurate shift predictions. # Entries marked with a value < -600 should be ignored 501 H N 116.3173 501 H CA 55.4902 501 H CB 29.9950 501 H C 169.8446 501 H H 8.4401 or in 1y4o: 1 G N 109.7404 1 G CA 45.2787 or in 1afp 10 K HZ3 3.7795 # A HZ3 that might not be present. Return True on error; eg. when the file is absent. """ if not os.path.exists(fileName): nTerror("Failed to find %s" % fileName) return True atomDict = molecule.getAtomDict(IUPAC, chainId) for line in AwkLike( fileName, commentString = '#', minNF = 4 ): if (line.float(4) != -666.000): lineCol1 = int(line.dollar[1].strip('*')) if chainId != None: atm = molecule.decodeNameTuple( (IUPAC, chainId, lineCol1, line.dollar[3]) ) else: atm =None if atomDict.has_key( (lineCol1,line.dollar[3]) ): atm = atomDict[ (lineCol1,line.dollar[3]) ] #end if # if not atm: # atm = molecule.decodeNameTuple( (IUPAC, None, lineCol1, line.dollar[3]), fromCYANA2CING=True ) if not atm: pass # nTerror('parseShiftxOutput: chainId [%s] line %d (%s)', chainId, line.NR, line.dollar[0] ) # happens for all LYS without HZ3. else: atm.shiftx.append( line.float(4) )
def analyzeWattosLog(logFile): """ Returns [timeTaken, crashed, nr_error, nr_warning, nr_message, nr_debug] Return None on error. The numbers of lines should add up to the total number of lines. For a specific other log file type look at the example in cing.Scripts.FC.utils The """ result = [None, None, 0, 0, 0, 0] if not logFile: nTerror("logFile %s was not specified." % logFile) return None if not os.path.exists(logFile): nTerror("logFile %s was not found." % logFile) return None for r in AwkLike(logFile): line = r.dollar[0] if line.startswith(prefixError): result[2] += 1 elif line.startswith(prefixWarning): result[3] += 1 elif line.startswith(prefixDebug): result[5] += 1 else: result[4] += 1 # Wattos took (#ms): 2332 if line.startswith('Wattos took'): # TODO: check. nTdebug("Matched line: %s" % line) timeTakenStr = r.dollar[r.NF - 1] result[0] = float(timeTakenStr) if result[0]: result[0] /= 1000. # get seconds nTdebug("Found time: %s" % timeTakenStr) elif line.startswith('Exception in thread'): # TODO: check. nTdebug("Matched line: %s" % line) result[1] = True # end else return result
def parseResult(self): """ Get summary Parse procheck .rin files and store result in procheck NTdict of each residue of mol """ path = os.path.join(self.rootPath, sprintf('%s.sum', self.molecule.name)) fp = open(path, 'r') if not fp: nTerror('gvProcheck.parseResult: %s not found', path) else: self.summary = ''.join(fp.readlines()) fp.close() #end if for i in range(1, self.molecule.modelCount + 1): path = os.path.join(self.rootPath, sprintf('%s_%03d.rin', self.molecule.name, i)) #print '> parsing >', path for line in AwkLike(path, minLength=64, commentString="#"): result = self._parseProcheckLine(line.dollar[0]) chain = result['chain'] resNum = result['resNum'] residue = self.molecule.decodeNameTuple( (cing.PDB, chain, resNum, None)) if not residue: nTerror('Procheck.parseResult: residue not found (%s,%d)', chain, resNum) else: residue.setdefault('procheck', NTstruct()) for field, value in result.iteritems(): residue.procheck.setdefault(field, NTlist()) residue.procheck[field].append(value) #end for #end if del (result)
def initBMRB(project, bmrbFile, moleculeName=None): """ Initialize from edited BMRB file Return molecule instance """ mol = Molecule(name=moleculeName) project.appendMolecule(mol) error = False record = None for record in AwkLike(bmrbFile, minNF=8, commentString='#'): resName = record.dollar[3] resNum = record.int(2) atomName = record.dollar[4] # shift = record.float(6) # serror = record.float(7) # ambig = record.int(8) res = mol.addResidue(Chain.defaultChainId, resName, resNum, IUPAC) if (not res): nTerror('Error initBMRB: invalid residue %s %s line %d (%s)\n', resName, atomName, record.NR, record.dollar[0]) error = True #end if #end for error = error or (project.importBMRB(bmrbFile) == None) if error: nTmessage('==> initBMRB: completed with error(s)') else: nTmessage('==> initBMRB: successfully parsed %d lines from %s', record.NR, record.FILENAME) #end if nTmessage("%s", mol.format()) if error: return None return mol
def analyzeCingLog(logFile): """ Returns [timeTaken, crashed, nr_error, nr_warning, nr_message, nr_debug] Return None on error. The numbers of lines should add up to the total number of lines. For a specific other log file type look at the example in cing.Scripts.FC.utils The """ result = [None, None, 0, 0, 0, 0] if not logFile: nTerror("logFile %s was not specified." % logFile) return None if not os.path.exists(logFile): nTerror("logFile %s was not found." % logFile) return None for r in AwkLike(logFile): line = r.dollar[0] if line.startswith(prefixError): result[2] += 1 elif line.startswith(prefixWarning): result[3] += 1 elif line.startswith(prefixDebug): result[5] += 1 else: result[4] += 1 if line.startswith('CING took :'): # nTdebug("Matched line: %s" % line) timeTakenStr = r.dollar[r.NF - 1] result[0] = float(timeTakenStr) # nTdebug("Found time: %s" % self.timeTakenDict[entry_code]) elif line.startswith('Traceback (most recent call last)'): # nTdebug("Matched line: %s" % line) result[1] = True # end else return result
def analyzeFcLog(logFile): """ Returns [timeTaken, crashed, nr_error, nr_warning, nr_message, nr_debug] Return None on error. The numbers of lines should add up to the total number of lines. For a specific other log file type look at the example in cing.Scripts.FC.utils """ result = [None, None, 0, 0, 0, 0] if not logFile: nTerror("logFile %s was not specified." % logFile) return None if not os.path.exists(logFile): nTerror("logFile %s was not found." % logFile) return None for r in AwkLike(logFile): line = r.dollar[0] line = line.lower() if line.count('error'): result[2] += 1 elif line.count('warning'): result[3] += 1 elif line.count('debug'): result[5] += 1 else: result[4] += 1 # if line.startswith('CING took :'): # # nTdebug("Matched line: %s" % line) # timeTakenStr = r.dollar[r.NF - 1] # result[0] = float(timeTakenStr) # # nTdebug("Found time: %s" % self.timeTakenDict[entry_code]) if line.startswith('traceback (most recent call last)' ): # watch out this needs to be lowercase here. # nTdebug("Matched line: %s" % line) result[1] = True # end else return result
def parseOutput(config, project, parameters ): """ Parse the output in the Jobs directory parameters is a NTxplor instance Return None on error or results on success. """ nTmessage("\n-- parseOutput --") xplor = Xplor(config, parameters, project=project, outPath=config.directories.refined) logFileNameFmt = 'refine_%d.log' resultFileName = 'parsedOutput.txt' bestModelsFileNameFmt = 'best%dModels.txt' bestModelsParameterName = 'bestModels' bestModels = parameters.best # Integer allPreviousModels = parameters.models # String allPreviousModelCount = parameters.bestAnneal # Integer if getDeepByKeysOrAttributes( parameters, USE_ANNEALED_STR): logFileNameFmt = 'anneal_%d.log' resultFileName = 'parsedAnnealOutput.txt' bestModelsFileNameFmt = 'best%dModelsAnneal.txt' bestModelsParameterName = 'models' bestModels = parameters.bestAnneal allPreviousModels = parameters.modelsAnneal allPreviousModelCount = parameters.modelCountAnneal nTdebug( 'logFileNameFmt: %s' % logFileNameFmt) nTdebug( 'resultFileName: %s' % resultFileName) nTdebug( 'bestModelsFileNameFmt: %s' % bestModelsFileNameFmt) nTdebug( 'bestModelsParameterName:%s' % bestModelsParameterName) nTdebug( 'bestModels: %s (int)' % bestModels) nTdebug( 'allPreviousModels: %s (string)"' % allPreviousModels) nTdebug( 'allPreviousModelCount: %s (int)' % allPreviousModelCount) results = NTlist() keys = ['model', 'eTotal', 'Enoe', 'NOErmsd', 'NOEnumber', 'NOEbound1', 'NOEviol1', 'NOEbound2', 'NOEviol2', 'DIHEDrmsd', 'DIHEDnumber', 'DIHEDbound', 'DIHEDviol' ] # parse all output files for i in asci2list(allPreviousModels): file = xplor.checkPath(xplor.directories.jobs, logFileNameFmt % i) nTmessage('==> Parsing %s', file) data = NTdict() for key in keys: data[key] = None data.model = i foundEnergy = 0 foundNOE1 = 0 foundNOE2 = 0 foundDIHED = 0 awkf = AwkLike(file) for line in awkf: # nTdebug("line: %s" % line.dollar[0]) if (not foundEnergy) and find(line.dollar[0], '--------------- cycle= 1 ----------------') >= 0: awkf.next() # nTdebug("Getting total energy from line: %s" % line.dollar[0]) data['eTotal'] = float(line.dollar[0][11:22]) awkf.next() # nTdebug("Getting NOE energy from line: %s" % line.dollar[0]) if line.dollar[0].count("E(NOE"): # Dirty hack; use regexp next time. # nTdebug("Bingo") data['Enoe'] = float(line.dollar[0][68:75]) else: awkf.next() # nTdebug("Getting NOE energy (try 2) from line: %s" % line.dollar[0]) data['Enoe'] = float(line.dollar[0][68:75]) # end if foundEnergy = 1 elif (not foundNOE1) and find(line.dollar[0], 'NOEPRI: RMS diff. =') >= 0: data['NOErmsd'] = float(line.dollar[5][:-1]) data['NOEbound1'] = float(line.dollar[7][:-2]) data['NOEviol1'] = int(line.dollar[8]) foundNOE1 = 1 elif (not foundNOE2) and find(line.dollar[0], 'NOEPRI: RMS diff. =') >= 0: data['NOEbound2'] = float(line.dollar[7][:-2]) data['NOEviol2'] = int(line.dollar[8]) data['NOEnumber'] = float(line.dollar[10]) foundNOE2 = 1 elif (not foundDIHED) and find(line.dollar[0], 'Number of dihedral angle restraints=') >= 0: data['DIHEDnumber'] = int(line.dollar[6]) awkf.next() data['DIHEDbound'] = float(line.dollar[6][:-1]) data['DIHEDviol'] = int(line.dollar[7]) awkf.next() data['DIHEDrmsd'] = float(line.dollar[3]) foundDIHED = 1 #endif #end for eTotal = getDeepByKeysOrAttributes( data, 'eTotal' ) if eTotal == None: nTwarning("Failed to read energy for model: %s (probably crashed/stopped)." % i) continue results.append(data) #end for i # Since above compile might have ommissions check here how many may continue. resultCount = len(results) if allPreviousModelCount > resultCount: nTwarning("Will only consider %s results." %resultCount) elif allPreviousModelCount != resultCount: nTwarning("Got more results (%s) than expected input (%s). Will use all results." % (bestModels,resultCount)) # end if # sort the results if parameters.sortField in keys: # nTdebug("Now sorting on field: %s" % parameters.sortField) # if 0: # The below failed at some point but is also not much in use. Removing. # myComp = CompareDict(parameters.sortField) # results.sort(myComp) # else: NTsort( results, parameters.sortField, inplace=True ) else: parameters.sortField = None #endif # print results to file and screen resultFile = open(xplor.joinPath(resultFileName), 'w') msg = '\n=== Results: sorted on "%s" ===' % parameters.sortField nTmessage( msg ) fprintf(resultFile, msg + '\n') fmt = '%-11s ' for k in keys: nTmessageNoEOL(fmt % str(k)) fprintf(resultFile, fmt, str(k)) #end for nTmessage('') fprintf(resultFile, '\n') for data in results: for k in keys: value = val2Str(getDeepByKeysOrAttributes(data, k), fmt, count=11) nTmessageNoEOL(value) fprintf(resultFile, fmt, value) #end for nTmessage('') fprintf(resultFile, '\n') #end for # best results to put in parameter file. resultCountBest = min( resultCount, bestModels ) if resultCountBest > 0: msgLine = '\n=== Averages best %d models ===' % resultCountBest nTmessage(msgLine) fprintf(resultFile, msgLine + '\n' ) for key in keys: getKey = Key(key) values = map(getKey, results[:resultCountBest]) av, sd, dummy_n = nTaverage(values) msgLine = '%-12s: %10.3f +/- %-10.3f' % ( key, av, sd) nTmessage(msgLine) fprintf(resultFile, msgLine + '\n') #end for nTmessage('\n') fprintf(resultFile, '\n\n') fname = xplor.joinPath(bestModelsFileNameFmt % resultCountBest) f = open(fname, 'w') parameters[bestModelsParameterName] = '' for i in range(resultCountBest): fprintf(f, '%s/%s\n', xplor.outPath, xplor.baseName % results[i].model) parameters[bestModelsParameterName] = '%s%s,' % (parameters[bestModelsParameterName], results[i].model) #end for f.close() parameters[bestModelsParameterName] = parameters[bestModelsParameterName][:-1] # Remove trailing comma. nTmessage('==> Best %d models (%s) listed in %s\n', resultCountBest, parameters[bestModelsParameterName], fname) else: parameters[bestModelsParameterName] = allPreviousModels #end if resultFile.close() parameters.toFile(xplor.joinPath(PARAMETERS_FILE_NAME)) return results
def parseResult(self): """ Parse procheck .rin and .edt files and store result in procheck NTdict of each residue of molecule. Return True on error. """ # nTdebug("Starting pc parseResult") # modelCount = self.molecule.modelCount modelCount = self.getMaxModelCount() # nTdebug("==> Parsing procheck results") # if modelCount > MAX_PROCHECK_NMR_MODELS: # nTwarning("Limiting number of models analyzed from %d to %d" % (modelCount, MAX_PROCHECK_NMR_MODELS)) # modelCount = MAX_PROCHECK_NMR_MODELS # reset the procheck dictionary of each residue for res in self.molecule.allResidues(): if res.has_key(PROCHECK_STR): del (res[PROCHECK_STR]) res.procheck = ProcheckResidueResult(res) #end for for i in range(1, modelCount + 1): modelCountStr = "%03d" % i # special case in procheck_nmr if modelCount == 1: # special case for different versions Alan vs Jurgen... # JFD adds; this fails with my pc. Adding enabling code to handle both. modelCountStr = "000" path = os.path.join( self.rootPath, '%s_%s.rin' % (self.molecule.name, modelCountStr)) if not os.path.exists(path): # nTdebug('Procheck.parseResult: file "%s" not found assuming it was pc -server- version. ', path) modelCountStr = "***" path = os.path.join( self.rootPath, '%s_%s.rin' % (self.molecule.name, modelCountStr)) if not os.path.exists(path): nTerror('Procheck.parseResult: file "%s" not found', path) return True for line in AwkLike(path, minLength=64, commentString="#"): # nTdebug("working on line: %s" % line.dollar[0]) result = self._parseProcheckLine(line.dollar[0], self.procheckDefs) if not result: nTerror( "Failed to parse procheck rin file the below line; giving up." ) nTerror(line.dollar[0]) return True chain = result['chain'] resNum = result['resNum'] residue = self.molecule.decodeNameTuple( (None, chain, resNum, None)) if not residue: nTerror( 'in Procheck.parseResult: residue not found (%s,%d); giving up.' % (chain, resNum)) return True # nTdebug("working on residue %s" % residue) for field, value in result.iteritems(): if not self.procheckDefs[field][ 3]: # Checking store parameter. continue # Insert for key: "field" if missing an empty NTlist. residue.procheck.setdefault(field, NTlist()) # nTdebug( "For residue %s field %s found value %s" % ( residue, field, value ) ) residue.procheck[field].append(value) # nTdebug("field %s has values: %s" % ( field, residue.procheck[field])) #end for result #end for line #end for path = os.path.join(self.rootPath, '%s.edt' % self.molecule.name) if not os.path.exists(path): nTerror('Procheck.parseResult: file "%s" not found', path) return True # nTdebug( '> parsing edt >'+ path) for line in AwkLike(path, minLength=64, commentString="#"): result = self._parseProcheckLine(line.dollar[0], self.procheckEnsembleDefs) if not result: nTerror( "Failed to parse procheck edt file the below line; giving up." ) nTerror(line.dollar[0]) return chain = result['chain'] resNum = result['resNum'] residue = self.molecule.decodeNameTuple( (None, chain, resNum, None)) if not residue: nTerror( 'Procheck.parseResult: residue not found (%s,%d); giving up.' % (chain, resNum)) return #end if for field, value in result.iteritems(): if not self.procheckEnsembleDefs[field][ 3]: # Checking store parameter. continue # end if # Truncate for those rare instances ( < 10 for > 9,000 entries ) #pc gf phipsi can be extremely high: # 1b64 SER 82 34.36 this might be an installation bug as it's value in PDBe is normal. if value and (field in gf_LIST_STR): if value > PCgFactorMaxErrorValue: nTwarning( "A pc g-factor for %s of %s will be truncated to %s" % (field, value, PCgFactorMaxErrorValue)) value = PCgFactorMaxErrorValue # end if # end if residue.procheck[field] = value #end for #end for for field in gf_LIST_STR: resultList = NTlist() for residue in self.molecule.allResidues(): value = residue.getDeepByKeys(PROCHECK_STR, field) # nTdebug( "For residue %s field %s found value %s" % ( residue, field, value ) ) resultList.append(value) averageTuple = resultList.average() # nTdebug( "resultList %s" % resultList ) # Nones will be ignored. Empty list will return )None,,) if averageTuple: # get average only self.molecule[PROCHECK_STR][field] = averageTuple[0] else: nTwarning( "No average over molecule obtained in procheck for %s" % field) # summary path = os.path.join(self.rootPath, '%s.sum' % self.molecule.name) if not os.path.exists(path): nTerror('Procheck.parseResult: file "%s" not found', path) return True # nTdebug( '> parsing sum >'+ path) text = open(path, 'r').read() # nTdebug( 'got: \n'+ text) if text: self.summary = ProcheckSummaryResult(text, self.molecule, self.ranges) else: nTerror( 'Procheck.parseResult: Failed to read and parse Procheck_nmr summary file (%s)', path) return True #end if self.postProcess() self.fileList = self.getPostscriptFileNames() return False
def getCingAnnoEntryInfo(self): """Returns True for error Checks the completeness and errors from annotation. """ max_link_errors = 20 # VpR247Cheshire had 16 terminii etc. problems that can be ignored. max_chain_mapping_errors = 1 max_any_errors = 2 * max_link_errors + max_chain_mapping_errors nTmessage("Get the entries tried, todo, crashed, and stopped from file system.") self.entry_anno_list_obsolete = NTlist() self.entry_anno_list_tried = NTlist() self.entry_anno_list_crashed = NTlist() self.entry_anno_list_stopped = NTlist() # mutely exclusive from entry_list_crashed self.entry_anno_list_done = NTlist() self.entry_anno_list_todo = NTlist() cwdCache = os.getcwd() os.chdir(baseDir) subDirList = os.listdir(DATA_STR) subDirList.sort() for subDir in subDirList: if len(subDir) != 2: if subDir != DS_STORE_STR: nTdebug('Skipping subdir with other than 2 chars: [' + subDir + ']') continue entryList = os.listdir(os.path.join(DATA_STR, subDir)) for entryDir in entryList: entry_code = entryDir if entry_code.startswith( "."): # nTdebug('Skipping hidden file: [' + entry_code + ']') continue if entry_code.endswith( "Org") or entry_code.endswith( "Test"): # nTdebug('Skipping original entry: [' + entry_code + ']') continue entrySubDir = os.path.join(DATA_STR, subDir, entry_code) # if not entry_code in self.entry_list_nrg_docr: # nTwarning("Found entry %s in NRG-CING but not in NRG. Will be obsoleted in NRG-CING too" % entry_code) # if len(self.entry_list_obsolete) < self.ENTRY_DELETED_COUNT_MAX: # rmdir(entrySubDir) # self.entry_list_obsolete.append(entry_code) # else: # nTerror("Entry %s in NRG-CING not obsoleted since there were already removed: %s" % ( # entry_code, self.ENTRY_DELETED_COUNT_MAX)) # end if # cingDirEntry = os.path.join(entrySubDir, entry_code + ".cing") # if not os.path.exists(cingDirEntry): # nTmessage("Failed to find directory: %s" % cingDirEntry) # continue # Look for last log file logList = glob(entrySubDir + '/log_doAnno*/*.log') if not logList: nTmessage("Failed to find any log file in directory: %s" % entrySubDir) continue # .cing directory and .log file present so it was tried to start but might not have finished self.entry_anno_list_tried.append(entry_code) logLastFile = logList[-1] # nTdebug("Found logLastFile %s" % logLastFile) # set timeTaken = (` grep 'CING took :' $logFile | gawk '{print $(NF-1)}' `) # text = readTextFromFile(logLastFile) entryCrashed = False linkErrorList = [] chainMappingErrorList = [] anyErrorList = [] for r in AwkLike(logLastFile): line = r.dollar[0] if line.startswith('CING took :'): # nTdebug("Matched line: %s" % line) timeTakenStr = r.dollar[r.NF - 1] self.timeTakenDict[entry_code] = float(timeTakenStr) # nTdebug("Found time: %s" % self.timeTakenDict[entry_code]) if line.startswith('Traceback (most recent call last)'): # nTdebug("Matched line: %s" % line) if entry_code in self.entry_anno_list_crashed: nTwarning("%s was already found before; not adding again." % entry_code) else: self.entry_anno_list_crashed.append(entry_code) entryCrashed = True if line.count('ERROR:'): nTerror("Matched line: %s" % line) hasPseudoErrorListed = line.count(" .Q") # ignore the errors for pseudos e.g. in CGR26ALyon Hopefully this is unique enough; tested well. if line.count("Error: Not linking atom"): if not hasPseudoErrorListed: linkErrorList.append(line) if line.count("Error: no chain mapping"): chainMappingErrorList.append(line) lineLower = line.lower() hasApiErrorListed = line.count('ApiError: ccp.nmr.NmrConstraint.DistanceConstraintItem.__init__:') if lineLower.count("error"): if not (hasPseudoErrorListed or hasApiErrorListed): anyErrorList.append(line) if line.count('Aborting'): nTdebug("Matched line: %s" % line) entryCrashed = True if entry_code in self.entry_anno_list_crashed: nTwarning("%s was already found before; not adding again." % entry_code) else: self.entry_anno_list_crashed.append(entry_code) if entryCrashed: continue # don't mark it as stopped anymore. linkErrorListCount = len(linkErrorList) if linkErrorListCount > max_link_errors: nTerror("%-25s has more than %s link errors; %s" % (entry_code,max_link_errors,linkErrorListCount)) entryCrashed = True chainMappingListCount = len(chainMappingErrorList) if chainMappingListCount > max_chain_mapping_errors: nTerror("%-25s has more than %s chain mapping errors; %s" % (entry_code,max_chain_mapping_errors,chainMappingListCount)) entryCrashed = True anyErrorListCount = len(anyErrorList) if anyErrorListCount > max_any_errors: nTerror("%-25s has more than %s any errors; %s" % (entry_code,max_any_errors,anyErrorListCount)) entryCrashed = True if entryCrashed: continue # don't mark it as stopped anymore. if not self.timeTakenDict.has_key(entry_code): # was stopped by time out or by user or by system (any other type of stop but stack trace) nTmessage("%s Since CING end message was not found assumed to have stopped" % entry_code) self.entry_anno_list_stopped.append(entry_code) continue # Look for end statement from CING which shows it wasn't killed before it finished. ccpnFileEntry = os.path.join(entrySubDir, "%s.tgz"%entry_code) if not os.path.exists(ccpnFileEntry): nTmessage("%s Since ccpn file %s was not found assumed to have stopped" % (entry_code, ccpnFileEntry)) self.entry_anno_list_stopped.append(entry_code) continue self.entry_anno_list_done.append(entry_code) # end for entryDir # end for subDir timeTakenList = NTlist() # local variable. timeTakenList.addList(self.timeTakenDict.values()) nTmessage("Time taken by CING by statistics\n%s" % timeTakenList.statsFloat()) if not self.entry_anno_list_tried: nTerror("Failed to find entries that CING tried.") self.entry_anno_list_todo.addList(self.entry_anno_list_all) self.entry_anno_list_todo = self.entry_anno_list_todo.difference(self.entry_anno_list_done) nTmessage("Found %s entries overall for annotation." % len(self.entry_anno_list_all)) nTmessage("Found %s entries that CING tried (T)." % len(self.entry_anno_list_tried)) nTmessage("Found %s entries that CING crashed/failed (C)." % len(self.entry_anno_list_crashed)) nTmessage("Found %s entries that CING stopped (S)." % len(self.entry_anno_list_stopped)) if not self.entry_anno_list_done: nTerror("Failed to find entries that CING did.") nTmessage("Found %s entries that CING did (B=A-C-S)." % len(self.entry_anno_list_done)) nTmessage("Found %s entries todo (A-B)." % len(self.entry_anno_list_todo)) nTmessage("Found %s entries obsolete (not removed yet)." % len(self.entry_anno_list_obsolete)) nTmessage("Found entries todo:\n%s" % self.entry_anno_list_todo) os.chdir(cwdCache)
def getCingEntriesTriedAndDone(self): "Returns list or None for error" nTdebug("From disk get the entries done in CASP-NMR-CING") entry_list_tried = [] entry_list_done = [] entry_list_crashed = [] nTdebug("Now in: " + os.getcwd()) subDirList = os.listdir(DATA_STR) for subDir in subDirList: if len(subDir) != 2: if subDir != DS_STORE_STR: nTdebug('Skipping subdir with other than 2 chars: [' + subDir + ']') continue entryList = os.listdir(os.path.join(DATA_STR,subDir)) for entryDir in entryList: entry_code = entryDir if entry_code == DS_STORE_STR: continue entrySubDir = os.path.join(DATA_STR, subDir, entry_code) cingDirEntry = os.path.join(entrySubDir, entry_code + ".cing") if not os.path.exists(cingDirEntry): continue logFileValidate = 'log_validateEntryForCasp' for logFile in ( logFileValidate, 'log_storeCING2db' ): # Look for last log file logList = glob(entrySubDir + '/%s/*.log' % logFile) if not logList: nTmessage("Failed to find any log file in subdirectory of: %s" % entrySubDir) continue # .cing directory and .log file present so it was tried to start but might not have finished # self.entry_anno_list_tried.append(entry_code) if logFile == logFileValidate: entry_list_tried.append(entry_code) logLastFile = logList[-1] entryCrashed = False entryWithErrorMessage = False for r in AwkLike(logLastFile): line = r.dollar[0] if line.startswith('CING took :'): # nTdebug("Matched line: %s" % line) timeTakenStr = r.dollar[r.NF - 1] self.timeTakenDict[entry_code] = float(timeTakenStr) # nTdebug("Found time: %s" % self.timeTakenDict[entry_code]) if line.startswith('Traceback (most recent call last)'): # nTdebug("Matched line: %s" % line) if entry_code in entry_list_crashed: nTwarning("%s was already found before; not adding again." % entry_code) else: entry_list_crashed.append(entry_code) entryCrashed = True if line.count('ERROR:'): nTerror("Matched line: %s" % line) entryWithErrorMessage = True if line.count('Aborting'): nTdebug("Matched line: %s" % line) entryCrashed = True if entry_code in entry_list_crashed: nTwarning("%s was already found before; not adding again." % entry_code) else: entry_list_crashed.append(entry_code) if entryWithErrorMessage: nTerror("Above for entry: %s" % entry_code) if entryCrashed: continue # don't mark it as stopped anymore. indexFileEntry = os.path.join(cingDirEntry, "index.html") if os.path.exists(indexFileEntry): entry_list_done.append(entry_code) # end if index exists # end for log # end for entry # end for subdir return (entry_list_tried, entry_list_done, entry_list_crashed)
def __init__(self, seqFile, protFile, convention): NTdict.__init__(self) #print '>', seqFile, protFile # parse the seqFile self.seq = {} resNum = 1 self.resCount = 0 for f in AwkLike(seqFile, commentString='#'): #print '>>', f.dollar[0] if (not f.isEmpty() and not f.isComment('#')): if (f.dollar[1] in CYANA_NON_RESIDUES # skip the bloody CYANA non-residue stuff ): pass elif (not NTdb.isValidResidueName(f.dollar[1], convention)): nTerror( 'Xeasy: residue "%s" invalid for convention "%s" in "%s:%d"', f.dollar[1], convention, seqFile, f.NR) self.error = 1 else: if (f.NF > 1): resNum = f.int(2) if resNum == None: self.error = 1 #end if #endif self.seq[resNum] = f.dollar[ 1] # store original 'convention' name resNum += 1 self.resCount += 1 #end if #end if #end for self.seqFile = seqFile self.convention = convention # parse the prot file self.prot = {} self.protCount = 0 self.error = 0 for f in AwkLike(protFile, commentString='#'): if f.NF == 5: # Xeasy/Cyana atom index index = f.int(1) atomName = f.dollar[4] resNum = f.int(5) if resNum not in self.seq: nTwarning( 'Xeasy: undefined residue number %d in "%s:%d" (%s)' % (resNum, protFile, f.NR, f.dollar[0])) self.error = 1 else: resName = self.seq[resNum] if not NTdb.isValidAtomName(resName, atomName, convention): nTwarning( 'Xeasy parsing "%s:%d": invalid atom "%s" for residue %s%d' % (protFile, f.NR, atomName, resName, resNum)) self.error = 1 else: p = NTdict(index=index, shift=f.float(2), error=f.float(3), atomName=atomName, resNum=resNum, resName=resName, atom=None) self.prot[index] = p self.protCount += 1 #end if #end if #end if #end for self.protFile = protFile nTmessage('Xeasy.__init__: parsed %d residues, %d atoms from %s, %s', self.resCount, self.protCount, self.seqFile, self.protFile)
def getCingEntryInfo(self): """Returns True for error Will remove entry directories if they do not occur in NRG up to a maximum number as not to whip out every one in a single blow by accident. """ nTmessage( "Get the entries tried, todo, crashed, and stopped in PDB-CING from file system." ) self.entry_list_obsolete = NTlist() self.entry_list_tried = NTlist() self.entry_list_crashed = NTlist() self.entry_list_stopped = NTlist( ) # mutely exclusive from entry_list_crashed self.entry_list_done = NTlist() self.entry_list_todo = NTlist() subDirList = os.listdir(DATA_STR) for subDir in subDirList: if len(subDir) != 2: if subDir != DS_STORE_STR: nTdebug('Skipping subdir with other than 2 chars: [' + subDir + ']') continue entryList = os.listdir(os.path.join(DATA_STR, subDir)) for entryDir in entryList: entry_code = entryDir if not is_pdb_code(entry_code): if entry_code != DS_STORE_STR: nTerror("String doesn't look like a pdb code: " + entry_code) continue # nTdebug("Working on: " + entry_code) entrySubDir = os.path.join(DATA_STR, subDir, entry_code) if not entry_code in self.entry_list_pdb: nTwarning( "Found entry %s in PDB-CING-CING but not in PDB. Will be obsoleted in PDB-CING too" % entry_code) if len(self.entry_list_obsolete ) < self.entry_to_delete_count_max: rmdir(entrySubDir) self.entry_list_obsolete.append(entry_code) else: nTerror( "Entry %s in PDB-CING not obsoleted since there were already removed: %s" % (entry_code, self.entry_to_delete_count_max)) # end if cingDirEntry = os.path.join(entrySubDir, entry_code + ".cing") if not os.path.exists(cingDirEntry): nTmessage("Failed to find directory: %s" % cingDirEntry) continue # Look for last log file logList = glob(entrySubDir + '/log_validateEntry/*.log') if not logList: nTmessage("Failed to find any log file in directory: %s" % entrySubDir) continue # .cing directory and .log file present so it was tried to start but might not have finished self.entry_list_tried.append(entry_code) logLastFile = logList[-1] # nTdebug("Found logLastFile %s" % logLastFile) # set timeTaken = (` grep 'CING took :' $logFile | gawk '{print $(NF-1)}' `) # text = readTextFromFile(logLastFile) entryCrashed = False for r in AwkLike(logLastFile): line = r.dollar[0] if entryCrashed: nTdebug(line) if line.startswith('CING took :'): # nTdebug("Matched line: %s" % line) timeTakenStr = r.dollar[r.NF - 1] self.timeTakenDict[entry_code] = float(timeTakenStr) # nTdebug("Found time: %s" % self.timeTakenDict[entry_code]) if line.startswith('Traceback (most recent call last)'): nTdebug("%s Matched line: %s" % (entry_code, line)) if entry_code in self.entry_list_crashed: nTwarning( "%s was already found before; not adding again." % entry_code) else: self.entry_list_crashed.append(entry_code) entryCrashed = True if entryCrashed: continue # don't mark it as stopped anymore. # end for AwkLike if not self.timeTakenDict.has_key(entry_code): # was stopped by time out or by user or by system (any other type of stop but stack trace) nTmessage( "%s Since CING end message was not found assumed to have stopped" % entry_code) self.entry_list_stopped.append(entry_code) continue # Look for end statement from CING which shows it wasn't killed before it finished. indexFileEntry = os.path.join(cingDirEntry, "index.html") if not os.path.exists(indexFileEntry): nTmessage( "%s Since index file %s was not found assumed to have stopped" % (entry_code, indexFileEntry)) self.entry_list_stopped.append(entry_code) continue projectHtmlFile = os.path.join(cingDirEntry, entry_code, "HTML/index.html") if not os.path.exists(projectHtmlFile): nTmessage( "%s Since project html file %s was not found assumed to have stopped" % (entry_code, projectHtmlFile)) self.entry_list_stopped.append(entry_code) continue if False: # Default is True molGifFile = os.path.join(cingDirEntry, entry_code, "HTML/mol.gif") if not os.path.exists(molGifFile): nTmessage( "%s Since mol.gif file %s was not found assumed to have stopped" % (entry_code, projectHtmlFile)) self.entry_list_stopped.append(entry_code) continue self.entry_list_done.append(entry_code) # end for entryDir # end for subDir timeTakenList = NTlist() # local variable. timeTakenList.addList(self.timeTakenDict.values()) nTmessage("Time taken by CING by statistics\n%s" % timeTakenList.statsFloat()) if not self.entry_list_tried: nTerror("Failed to find entries that CING tried.") self.entry_list_todo.addList(self.entry_list_pdb) self.entry_list_todo = self.entry_list_todo.difference( self.entry_list_done) nTmessage("Found %s entries that CING tried (T)." % len(self.entry_list_tried)) nTmessage("Found %s entries that CING crashed (C)." % len(self.entry_list_crashed)) nTmessage("Found %s entries that CING stopped (S)." % len(self.entry_list_stopped)) if not self.entry_list_done: nTerror("Failed to find entries that CING did.") nTmessage("Found %s entries that CING did (B=A-C-S)." % len(self.entry_list_done)) nTmessage("Found %s entries todo (A-B)." % len(self.entry_list_todo)) nTmessage("Found %s entries in PDB-CING made obsolete." % len(self.entry_list_obsolete))
def parseOutput(config, project, parameters): """ Parse the output in the Jobs directory parameters is a NTxplor instance Return None on error or results on success. """ nTmessage("\n-- parseOutput --") xplor = Xplor(config, parameters, project=project, outPath=config.directories.refined) logFileNameFmt = 'refine_%d.log' resultFileName = 'parsedOutput.txt' bestModelsFileNameFmt = 'best%dModels.txt' bestModelsParameterName = 'bestModels' bestModels = parameters.best # Integer allPreviousModels = parameters.models # String allPreviousModelCount = parameters.bestAnneal # Integer if getDeepByKeysOrAttributes(parameters, USE_ANNEALED_STR): logFileNameFmt = 'anneal_%d.log' resultFileName = 'parsedAnnealOutput.txt' bestModelsFileNameFmt = 'best%dModelsAnneal.txt' bestModelsParameterName = 'models' bestModels = parameters.bestAnneal allPreviousModels = parameters.modelsAnneal allPreviousModelCount = parameters.modelCountAnneal nTdebug('logFileNameFmt: %s' % logFileNameFmt) nTdebug('resultFileName: %s' % resultFileName) nTdebug('bestModelsFileNameFmt: %s' % bestModelsFileNameFmt) nTdebug('bestModelsParameterName:%s' % bestModelsParameterName) nTdebug('bestModels: %s (int)' % bestModels) nTdebug('allPreviousModels: %s (string)"' % allPreviousModels) nTdebug('allPreviousModelCount: %s (int)' % allPreviousModelCount) results = NTlist() keys = [ 'model', 'eTotal', 'Enoe', 'NOErmsd', 'NOEnumber', 'NOEbound1', 'NOEviol1', 'NOEbound2', 'NOEviol2', 'DIHEDrmsd', 'DIHEDnumber', 'DIHEDbound', 'DIHEDviol' ] # parse all output files for i in asci2list(allPreviousModels): file = xplor.checkPath(xplor.directories.jobs, logFileNameFmt % i) nTmessage('==> Parsing %s', file) data = NTdict() for key in keys: data[key] = None data.model = i foundEnergy = 0 foundNOE1 = 0 foundNOE2 = 0 foundDIHED = 0 awkf = AwkLike(file) for line in awkf: # nTdebug("line: %s" % line.dollar[0]) if (not foundEnergy) and find( line.dollar[0], '--------------- cycle= 1 ----------------') >= 0: awkf.next() # nTdebug("Getting total energy from line: %s" % line.dollar[0]) data['eTotal'] = float(line.dollar[0][11:22]) awkf.next() # nTdebug("Getting NOE energy from line: %s" % line.dollar[0]) if line.dollar[0].count( "E(NOE"): # Dirty hack; use regexp next time. # nTdebug("Bingo") data['Enoe'] = float(line.dollar[0][68:75]) else: awkf.next() # nTdebug("Getting NOE energy (try 2) from line: %s" % line.dollar[0]) data['Enoe'] = float(line.dollar[0][68:75]) # end if foundEnergy = 1 elif (not foundNOE1) and find(line.dollar[0], 'NOEPRI: RMS diff. =') >= 0: data['NOErmsd'] = float(line.dollar[5][:-1]) data['NOEbound1'] = float(line.dollar[7][:-2]) data['NOEviol1'] = int(line.dollar[8]) foundNOE1 = 1 elif (not foundNOE2) and find(line.dollar[0], 'NOEPRI: RMS diff. =') >= 0: data['NOEbound2'] = float(line.dollar[7][:-2]) data['NOEviol2'] = int(line.dollar[8]) data['NOEnumber'] = float(line.dollar[10]) foundNOE2 = 1 elif (not foundDIHED) and find( line.dollar[0], 'Number of dihedral angle restraints=') >= 0: data['DIHEDnumber'] = int(line.dollar[6]) awkf.next() data['DIHEDbound'] = float(line.dollar[6][:-1]) data['DIHEDviol'] = int(line.dollar[7]) awkf.next() data['DIHEDrmsd'] = float(line.dollar[3]) foundDIHED = 1 #endif #end for eTotal = getDeepByKeysOrAttributes(data, 'eTotal') if eTotal == None: nTwarning( "Failed to read energy for model: %s (probably crashed/stopped)." % i) continue results.append(data) #end for i # Since above compile might have ommissions check here how many may continue. resultCount = len(results) if allPreviousModelCount > resultCount: nTwarning("Will only consider %s results." % resultCount) elif allPreviousModelCount != resultCount: nTwarning( "Got more results (%s) than expected input (%s). Will use all results." % (bestModels, resultCount)) # end if # sort the results if parameters.sortField in keys: # nTdebug("Now sorting on field: %s" % parameters.sortField) # if 0: # The below failed at some point but is also not much in use. Removing. # myComp = CompareDict(parameters.sortField) # results.sort(myComp) # else: NTsort(results, parameters.sortField, inplace=True) else: parameters.sortField = None #endif # print results to file and screen resultFile = open(xplor.joinPath(resultFileName), 'w') msg = '\n=== Results: sorted on "%s" ===' % parameters.sortField nTmessage(msg) fprintf(resultFile, msg + '\n') fmt = '%-11s ' for k in keys: nTmessageNoEOL(fmt % str(k)) fprintf(resultFile, fmt, str(k)) #end for nTmessage('') fprintf(resultFile, '\n') for data in results: for k in keys: value = val2Str(getDeepByKeysOrAttributes(data, k), fmt, count=11) nTmessageNoEOL(value) fprintf(resultFile, fmt, value) #end for nTmessage('') fprintf(resultFile, '\n') #end for # best results to put in parameter file. resultCountBest = min(resultCount, bestModels) if resultCountBest > 0: msgLine = '\n=== Averages best %d models ===' % resultCountBest nTmessage(msgLine) fprintf(resultFile, msgLine + '\n') for key in keys: getKey = Key(key) values = map(getKey, results[:resultCountBest]) av, sd, dummy_n = nTaverage(values) msgLine = '%-12s: %10.3f +/- %-10.3f' % (key, av, sd) nTmessage(msgLine) fprintf(resultFile, msgLine + '\n') #end for nTmessage('\n') fprintf(resultFile, '\n\n') fname = xplor.joinPath(bestModelsFileNameFmt % resultCountBest) f = open(fname, 'w') parameters[bestModelsParameterName] = '' for i in range(resultCountBest): fprintf(f, '%s/%s\n', xplor.outPath, xplor.baseName % results[i].model) parameters[bestModelsParameterName] = '%s%s,' % ( parameters[bestModelsParameterName], results[i].model) #end for f.close() parameters[bestModelsParameterName] = parameters[ bestModelsParameterName][:-1] # Remove trailing comma. nTmessage('==> Best %d models (%s) listed in %s\n', resultCountBest, parameters[bestModelsParameterName], fname) else: parameters[bestModelsParameterName] = allPreviousModels #end if resultFile.close() parameters.toFile(xplor.joinPath(PARAMETERS_FILE_NAME)) return results
def parseResult(self): """ Parse .dssp files and store result in dssp NTdict of each residue of mol. Return True on error. """ modelCount = self.molecule.modelCount # nTdebug("Parse dssp files and store result in each residue for " + repr(modelCount) + " model(s)") for model in range(modelCount): fullnameOut = 'model_%03d.dssp' % model path = os.path.join(self.rootPath, fullnameOut) if not os.path.exists(path): nTerror('Dssp.parseResult: file "%s" not found', path) return True # nTmessage("Parsing " + path) isDataStarted = False for line in AwkLike(path): if line.dollar[0].find("RESIDUE AA STRUCTURE BP1 BP2") >= 0: isDataStarted = True continue if not isDataStarted: continue # nTdebug("working on line: %s" % line.dollar[0]) if not len(line.dollar[0][6:10].strip()): # nTdebug('Skipping line without residue number') continue result = self._parseLine(line.dollar[0], self.dsspDefs) if not result: nTerror( "Failed to parse dssp file the below line; giving up.") nTerror(line.dollar[0]) return True chain = result['chain'] resNum = result['resNum'] residue = self.molecule.decodeNameTuple( (None, chain, resNum, None)) if not residue: nTerror( 'in Dssp.parseResult: residue not found (%s,%d); giving up.' % (chain, resNum)) return True # For first model reset the dssp dictionary in the residue if model == 0 and residue.has_key('dssp'): del (residue['dssp']) residue.setdefault('dssp', NTdict()) # nTdebug("working on residue %s" % residue) for field, value in result.iteritems(): if not self.dsspDefs[field][3]: # Checking store parameter. continue # Insert for key: "field" if missing an empty NTlist. residue.dssp.setdefault(field, NTlist()) residue.dssp[field].append(value) # nTdebug("field %s has values: %s" % ( field, residue.dssp[field])) #end for #end for #end for for residue in self.molecule.allResidues(): if residue.has_key(DSSP_STR): # residue[DSSP_STR].consensus = residue[DSSP_STR].secStruct.setConsensus(CONSENSUS_SEC_STRUCT_FRACTION) residue[DSSP_STR].consensus = residue[ DSSP_STR].secStruct.setConsensus(useLargest=True) residue[DSSP_STR].keysformat() #end for self.project.status.dssp.parsed = True
) (options, args) = parser.parse_args() #print options #print args if options.doc: parser.print_help(file=sys.stdout) print __doc__ sys.exit(0) parser.check_required('-o') if options.modelList != None: files = [] for line in AwkLike( options.modelList ): files.append( line.dollar[1] ) #end for else: files = args #endif # convention convention = 'PDB' if options.convention != None: convention = options.convention #end if verbose = 1 #=======================================================================
def importUpl( project, uplFile, convention, lower = 0.0 ): """ Read Cyana upl file return a DistanceRestraintList or None on error """ #print 'Convention: ' + convention maxErrorCount = 50 errorCount = 0 # check the molecule if not project or not project.molecule: nTerror("importUpl: initialize molecule first") return None #end if molecule = project.molecule # Sometimes set from other than CYANA coordinate file. # chainId = molecule.chains[0].name # assumed unkown rite? if not os.path.exists( uplFile ): nTerror('importUpl: file "%s" not found', uplFile) return None #end if _dir,name,_ext = nTpath( uplFile ) result = project.distances.new( name=name, status='keep') atomDict = molecule.getAtomDict(convention) for line in AwkLike( uplFile, commentString="#", minNF=7 ): # if line.isComment(): ## nTdebug("Skipping upl file line with comment: [" + line.dollar[0] +']') # continue # if line.NF < 7: ## nTdebug("Skipping upl file line with too few fields: [" + line.dollar[0] +']') # continue atmIdxList = [[1,3],[4,6]] atmList = [] # i=0 for atmIdx in atmIdxList: # nTdebug("Doing atmIdx: " + repr(atmIdx)) t = (line.int(atmIdx[0]), line.dollar[atmIdx[1]]) atm = None if atomDict.has_key(t): atm = atomDict[t] # atm = molecule.decodeNameTuple( (convention, None, line.int(atmIdx[0]), line.dollar[atmIdx[1]]), # fromCYANA2CING=True) if not atm: if errorCount <= maxErrorCount: nTerror('Failed to decode for atom %s; line: %s', t, line.dollar[0] ) if errorCount == maxErrorCount+1: nTerror("And so on") errorCount += 1 # i+=1 continue atmList.append( atm ) # i+=1 if len(atmList) != 2: continue # Unpack convenience variables. atm1 = atmList[0] atm2 = atmList[1] # nTdebug("atom 1: " + repr(atm1)) # nTdebug("atom 2: " + repr(atm2)) upper = line.float(7) # ambiguous restraint, should be append to last one if upper == 0: result().appendPair( (atm1,atm2) ) continue if not upper: nTerror("Skipping line without valid upper bound on line: [" + line.dollar[0]+']') continue r = DistanceRestraint( atomPairs= [(atm1,atm2)], lower=lower, upper=upper ) result.append( r ) # also store the Candid info if present if line.NF >= 9: r.peak = line.int( 9 ) if line.NF >= 11: r.SUP = line.float( 11 ) if line.NF >= 13: r.QF = line.float( 13 ) #end for if errorCount: nTerror("Found number of errors importing upl file: %s" % errorCount) # nTmessage("Imported upl items: " + repr(len(result))) nTmessage('==> importUpl: new %s from "%s"', result, uplFile ) return result
def importFromBMRB(project, bmrbFile): """ Import chemical shifts from edited BMRB file No reassigned Pseudo atoms yet; Return molecule instance or None on error """ if not project.molecule: nTerror("Error importBMRB: no molecule defined") return None #end if mol = project.molecule mol.newResonances(source=bmrbFile) error = False # for f in AwkLike( bmrbFile, minNF = 8, commentString = '#' ): # # resName = f.dollar[3] # resNum = f.int(2) # # atomName= f.dollar[4] # shift = f.float(6) # serror = f.float(7) # _ambig = f.int(8) for f in AwkLike(bmrbFile, minNF=9, commentString='#'): resName = f.dollar[4] resNum = f.int(2) atomName = f.dollar[5] shift = f.float(7) serror = f.float(8) _ambig = f.int(9) atm = mol.decodeNameTuple( (IUPAC, Chain.defaultChainId, resNum, atomName)) if not atm: nTerror('Error initBMRB: invalid atom %s %s line %d (%s)', resName, atomName, f.NR, f.dollar[0]) error = True else: atm.resonances().value = shift atm.resonances().error = serror if _ambig == 1 and atm.isProChiral(): atm.stereoAssigned = True #end if #end for # now fix the assignments; for atm in mol.allAtoms(): # Check if all realAtoms are assigned in case there is a pseudo atom if atm.isAssigned( resonanceListIdx=RESONANCE_LIST_IDX_ANY ) and not atm.isStereoAssigned() and atm.hasPseudoAtom(): fix = False pseudo = atm.pseudoAtom() for a in pseudo.realAtoms(): if not a.isAssigned(resonanceListIdx=RESONANCE_LIST_IDX_ANY): fix = True break #end if #end for if fix: pseudo.resonances().value = atm.resonances().value pseudo.resonances().error = atm.resonances().error atm.resonances().value = NaN atm.resonances().value = NaN nTmessage('Deassigned %s, assigned %s', atm, pseudo) #end if #end if #end for if error: nTerror('==> importFromBMRB: completed with error(s)') else: nr = getDeepByKeysOrAttributes(f, 'NR') # pylint: disable=W0631 nTmessage('==> importFromBMRB: successfully parsed %d lines from %s', nr, bmrbFile) #end if if error: return None return mol
def importAco( project, acoFile ): """Read Cyana acoFile ( 512 THR PSI 116.0 148.0) convention = CYANA or CYANA2 return a DihedralRestraintList or None on error """ maxErrorCount = 50 errorCount = 0 # check the molecule if (not project or not project.molecule ): nTerror("importAco: initialize molecule first") return None #end if molecule = project.molecule # Sometimes set from other than CYANA coordinate file. # chainId = molecule.chains[0].name if not os.path.exists( acoFile ): nTerror('importAco: file "%s" not found\n', acoFile) return None #end if _dir,name,_ext = nTpath( acoFile ) result = project.dihedrals.new( name=name, status='keep') resNumDict = molecule.getResNumDict() nTmessage("Now reading: " + acoFile) for line in AwkLike( acoFile, commentString = '#' , minNF = 5): resNum = line.int(1) res = None if resNum in resNumDict: res = resNumDict[resNum] angle = line.dollar[3] lower = line.float(4) upper = line.float(5) if res and angle in res.db: atoms = translateTopology( res, res.db[angle].atoms ) # print '>', atoms, res, res.db[angle] if None in atoms: if errorCount <= maxErrorCount: nTerror("Failed to decode all atoms from line:"+ line.dollar[0]) if errorCount == (maxErrorCount+1): nTerror("And so on") errorCount += 1 continue else: r = DihedralRestraint( atoms = atoms, lower=lower, upper=upper, angle = angle, residue = res ) #print r.format() result.append( r ) #end if #end if #end for if errorCount: nTerror("Found number of errors importing upl file: %s" % errorCount) # nTmessage("Imported items: " + repr(len(result))) nTmessage('==> importAco: new %s from "%s"', result, acoFile ) return result
def importPeaks(self, molecule, peakFile, status='keep'): """Read Xeasy peak file returns a PeaksList instance or None on error JFD: description of XEASY peak list format: 43 1.760 3.143 1 T 0.000e+00 0.00e+00 - 0 2260 2587 0 46 1.649 4.432 1 T 1.035e+05 0.00e+00 r 0 2583 2257 0 ^ peak id ^ height ^ chemical shifts ^ height dev ^ resonance ids ^ ? ^ ? ^ ? ^ ? ^ ? resonance id is zero for unassigned. """ #print '>>', molecule, peakFile self.map2molecule(molecule) _path, name, _ext = nTpath(peakFile) peaks = PeakList(name=name, status=status) dimension = 0 # f stands for field. for f in AwkLike(peakFile): if (f.NR == 1 and f.NF == 5): dimension = f.int(5) elif (not f.isComment('#')): # if (f.NF == 12): # dimension = 2 # elif (f.NF == 13 or f.NF == 14 or (f.NF>): # dimension = 3 # else: # nTerror('Xeasy.importPeaks: invalid number of fields (%d) in file "%s" on line %d (%s)', # f.NF, peakFile, f.NR, f.dollar[0] # ) # return None # #end if if not dimension: nTerror( 'Xeasy.importPeaks: invalid dimensionality in file "%s" (line %d, "%s")' % (peakFile, f.NR, f.dollar[0])) return None #end if cur = 1 # preserve the Xeasy peak id peakId = f.int(cur) if (peakId == None): return None cur += 1 peakpos = [] for _i in range(X_AXIS, dimension): p = f.float(cur) if (p == None): return None peakpos.append(p) cur += 1 #end if cur += 2 # skip two fields height = f.float(cur) if height == None: return None cur += 1 heightError = f.float(cur) if heightError == None: return None cur += 1 resonances = [] error = 0 cur += 2 # skip two fields for _i in range(X_AXIS, dimension): aIndex = f.int(cur) if aIndex == None: return None cur += 1 # 0 means unassigned according to Xeasy convention if aIndex == 0: resonances.append(None) else: if not aIndex in self.prot: nTerror( 'Xeasy.importPeaks: invalid atom id %d on line %d (%s)', aIndex, f.NR, f.dollar[0]) error = 1 break else: atom = self.prot[aIndex].atom if atom != None: resonances.append(atom.resonances()) else: resonances.append(None) #end if #end if #end for if not error: peak = Peak( dimension=dimension, positions=peakpos, height=height, heightError=heightError, resonances=resonances, ) # store original peak id peak.xeasyIndex = peakId peaks.append(peak) #end if #end if #end for nTmessage('Xeasy.importPeaks: extracted %d peaks from %s', len(peaks), peakFile) #end if return peaks