def reportError(self, msg, d ): """ Report error. @param msg: error message @type msg: str @param d: error data @type d: any """ try: s = '%s on %s, job %r\n' % (msg, os.uname()[1], d) s += '\nErrorTrace:\n' + T.lastErrorTrace() + '\n' self.errorLog.add( s ) try: print msg except: pass except Exception, why: f = open('ErrorReportError_XRefineSlave','a') f.write( str(why) ) try: f.write( T.lastErrorTrace() ) except: pass f.close()
def reportError(self, msg, d): """ Report error. @param msg: error message @type msg: str @param d: error data @type d: any """ try: s = '%s on %s, job %r\n' % (msg, os.uname()[1], d) s += '\nErrorTrace:\n' + T.lastErrorTrace() + '\n' self.errorLog.add(s) try: print msg except: pass except Exception, why: f = open('ErrorReportError_XRefineSlave', 'a') f.write(str(why)) try: f.write(T.lastErrorTrace()) except: pass f.close()
def reportError(self, msg, window ): """ Report errors. @param msg: error message @type msg: str @param window: start and end of two frame chunks within the whole trajectory @type window: ((int, int),(int,int)) """ try: s = '%s on %s, frames %s \n' % \ (msg, os.uname()[1], str(window) ) s += '\nErrorTrace:\n' + T.lastErrorTrace() + '\n' try: print s except: pass self.errorLog.add( s ) except Exception, why: f = open('ErrorReportError_TrajFlexSlave','a') f.write( str(why) ) try: f.write( T.lastErrorTrace() ) except: pass f.close()
def update(self, model, source, skipRes=None, updateMissing=0, force=0, headPatterns=[]): """ Update empty or missing fields of model from the source. The model will be connected to the source via model.source. Profiles that are taken from the source are labeled 'changed'=0. The same holds for coordinates (xyzChanged=0). However, existing profiles or coordinates or fields remain untouched. @param model: existing model @type model: PDBModel @param source: source PDB file or pickled PDBModel or PDBModel object @type source: str || file || PDBModel @param skipRes: list residue names that should not be parsed @type skipRes: [ str ] @param updateMissing: check source for additional profiles [0] @type updateMissing: 1|0 """ try: if force or updateMissing or self.needsUpdate(model): s = T.load(source) super(PDBParsePickle, self).update(model, s, skipRes=skipRes, updateMissing=updateMissing, force=force) except Exception, why: print T.lastErrorTrace() raise PDBParserError, "Cannot unpickle source model from %s, "\ % str(source) + "Reason:\n" + str(why)
def reportError(self, msg, id): try: try: print msg except: pass msg = 'trouble with ' + msg s = '%s on %s, run %s\n' % (msg, os.uname()[1], id) s += '\Error:' + T.lastError() s += '\nErrorTrace:\n' + T.lastErrorTrace() + '\n' s += '\n' self.errorLog.add(s) except Exception, why: f = open('ErrorReportError_AmberEntropySlave', 'a') f.write(str(type(why))) try: f.write(T.lastErrorTrace()) except: pass f.close()
def reportError(self, msg, id ): try: try: print msg except: pass msg = 'trouble with ' + msg s = '%s on %s, run %s\n' % (msg, os.uname()[1], id) s += '\Error:' + T.lastError() s += '\nErrorTrace:\n' + T.lastErrorTrace() + '\n' s += '\n' self.errorLog.add( s ) except Exception, why: f = open('ErrorReportError_AmberEntropySlave','a') f.write( str(type(why)) ) try: f.write( T.lastErrorTrace() ) except: pass f.close()
def fatal(self, message): """ Handle a fatal error (likely a bug), stop program execution. @param message: message to be given to user @type message: str @raise FatalError: """ s = '\nFatal Error: ' + str(message) s += '\n\t' + T.lastError() + '\n' s += 'TraceBack: \n' + T.lastErrorTrace() + '\n' self.log.add(s) raise FatalError
def fatal( self, message ): """ Handle a fatal error (likely a bug), stop program execution. @param message: message to be given to user @type message: str @raise FatalError: """ s = '\nFatal Error: '+str(message) s += '\n\t' + T.lastError() + '\n' s += 'TraceBack: \n' + T.lastErrorTrace() + '\n' self.log.add(s) raise FatalError
def error(self, message): """ Handle a normal error (like non-existing file) that is not necessarily a bug. @param message: message to be given to user @type message: str @raise NormalError: """ s = '\nError: ' + str(message) s += '\n\t' + T.lastError() s += '\nTraceBack: \n' + T.lastErrorTrace() + '\n' self.log.add(s) raise NormalError
def error( self, message ): """ Handle a normal error (like non-existing file) that is not necessarily a bug. @param message: message to be given to user @type message: str @raise NormalError: """ s = '\nError: '+str(message) s += '\n\t' + T.lastError() s += '\nTraceBack: \n' + T.lastErrorTrace() + '\n' self.log.add(s) raise NormalError
def reportError(self, msg, soln): """ Report any errors to log @param msg: error message @type msg: str @param soln: solution number for complex giving the error @type soln: int """ try: s = '%s on %s, soln %i\n' % (msg, os.uname()[1], soln) s += '\t' + T.lastError() + '\n' s += 'TraceBack: \n' + T.lastErrorTrace() + '\n' f = open(self.ferror, 'a') f.write(s) f.close() except: f = open('ErrorReportError_ContactSlave', 'a') f.write('') f.close()
def reportError(self, msg, soln ): """ Report any errors to log @param msg: error message @type msg: str @param soln: solution number for complex giving the error @type soln: int """ try: s = '%s on %s, soln %i\n' % (msg, os.uname()[1], soln) s += '\t' + T.lastError() + '\n' s += 'TraceBack: \n' + T.lastErrorTrace() + '\n' f = open( self.ferror, 'a' ) f.write( s ) f.close() except: f = open('ErrorReportError_ContactSlave','a') f.write('') f.close()
def warning( self, message, error=1, trace=0 ): """ Issue a warning. No exception is raised. @param message: message to be given to user @type message: str @param error: report Exception with line (default: 1) @type error: 1||0 @param trace: report full back trace to exception (default: 0) @type trace: 1||0 """ s = '\nWarning (ignored): '+str(message) try: if trace: error = 1 if error: s += '\n\t' + T.lastError() + '\n' if trace: s += '\nTraceBack: \n' + T.lastErrorTrace() + '\n' except: pass self.log.add(s)
def warning(self, message, error=1, trace=0): """ Issue a warning. No exception is raised. @param message: message to be given to user @type message: str @param error: report Exception with line (default: 1) @type error: 1||0 @param trace: report full back trace to exception (default: 0) @type trace: 1||0 """ s = '\nWarning (ignored): ' + str(message) try: if trace: error = 1 if error: s += '\n\t' + T.lastError() + '\n' if trace: s += '\nTraceBack: \n' + T.lastErrorTrace() + '\n' except: pass self.log.add(s)
complex_lst += sub os.unlink( f ) t.dump( complex_lst, options['o'] ) else: subLst = checkListStatus(complex_lst, update, force, version ) if subLst: ## initialize nodes, and start distributed calculation master = ContactMaster(complex_lst, int( options['c'] ), cpus_all[:host_number], refComplex = refComplex, updateOnly = update, force = force, niceness = nice_dic, outFile = options['o'], com_version = version, show_output = show_x, add_hosts = add_hosts) master.start() else: t.flushPrint( "\n #### Nothing to update! #### " ) except IOError, why: t.errWriteln("IOError while working on %s:" % t.absfile(options['i']) \ + str(why) ) t.errWriteln( t.lastErrorTrace() )
fin = t.absfile(sys.argv[1]) fout = fin + '_backup' exit = 0 try: o = t.load(fin) print "%s looks alright.\nnothing to be done." % fin exit = 1 except ImportError: pass except AttributeError: pass except: print "Something else is wrong with %s:" % fin print t.lastErrorTrace() exit = 1 if exit: sys.exit() print fin, '->', fout s = open(fin).read() s2 = s.replace('cBiskit.PDBDope\narray_constructor', 'cNumeric\narray_constructor') s2 = s2.replace('cBiskit.PDBModel\narray_constructor', 'cNumeric\narray_constructor')
def localPSIBlast(self, seqFile, db, method='blastp', resultOut=None, e='0.001', **kw): """ Performa a local psi-blast search (requires that the blast binaries and databases are installed localy). Uses Bio.Blast.NCBIStandalone.blastpgp (Biopython) for the search @param seqFile: file name with search sequence in FASTA format @type seqFile: str @param db: database(s) to search e.g. ['swissprot', 'pdb'] @type db: [str] @param e: expectation value cutoff (default: 0.001) @type e: float @param resultOut: save blast output to this new file @type resultOut: str @param kw: optional keywords:: --- New Blast+ routine --- (see NcbipsiblastCommandline) num_iterations Number of passes (default 1). matrix Matrix to use (default BLOSUM62). --- old blastall routine --- --- Scoring --- matrix Matrix to use (default BLOSUM62). gap_open Gap open penalty (default 11). gap_extend Gap extension penalty (default 1). window_size Multiple hits window size (default 40). npasses Number of passes (default 1). passes Hits/passes (Integer 0-2, default 1). --- Algorithm --- gapped Whether to do a gapped alignment (T/F, default T). wordsize Word size (default 3). keep_hits Number of beset hits from a region to keep (def 0) xdrop Dropoff value (bits) for gapped alignments (def 15) hit_extend Threshold for extending hits (default 11). nbits_gapping Number of bits to trigger gapping (default 22). pseudocounts Pseudocounts constants for multiple passes (def 9). xdrop_final X dropoff for final gapped alignment (default 25). xdrop_extension Dropoff for blast extensions (default 7). model_threshold E-value threshold to include in multipass model (default 0.005). required_start Start of required region in query (default 1). required_end End of required region in query (default -1). --- Processing --- filter Filter query sequence with SEG? (T/F, default F) believe_query Believe the query defline? (T/F, default F) nprocessors Number of processors to use (default 1). --- Formatting --- alignments Number of alignments (default 250). @type kw: any @raise BlastError: if program call failes """ ## the following should work for new Blast+ tools: #from Bio.Blast.Applications import NcbipsiblastCommandline #resultOut = resultOut or self.outFolder+ self.F_BLAST_RAW_OUT #blastx_cline = NcbipsiblastCommandline(query=seqFile, #db=db, #evalue=e, #outfmt=5, #out=resultOut, #**kw) #stdout, stderr = blastx_cline() #parsed = NCBIXML.parse( results ).next() #self.__blast2dict( parsed, db ) results = err = None resultOut = resultOut or self.outFolder + self.F_BLAST_RAW_OUT kw = self.__dictvalues2str(kw) e = str(e) try: results, err = NCBIStandalone.blastpgp( settings.psi_blast_bin, db, seqFile, program='blastpgp', align_view='7', ## XML output expectation=e, **kw) results = self.__copyFileHandle(results, resultOut) err = self.__copyFileHandle(err, self.outFolder + self.F_BLAST_ERROR) if self.verbose: self.log.writeln('Raw blast output copied to: ' + resultOut) parsed = NCBIXML.parse(results).next() self.__blast2dict(parsed, db) except Exception, why: self.log.add(T.lastErrorTrace()) globals().update(locals()) self.log.writeln('local namespace is pushed into global ') raise BlastError(str(why))
def localBlast(self, seqFile, db, method='blastp', resultOut=None, e='0.01', **kw): """ Performa a local blast search (requires that the blast binaries and databases are installed localy). Uses Bio.Blast.NCBIStandalone.blastall (Biopython) for the search. @param seqFile: file name with search sequence in FASTA format @type seqFile: str @param db: database(s) to search, e.g. ['swissprot', 'pdb'] @type db: [str] @param method: search program to use, e.g. 'blastp', 'fasta' (default: blastp) @type method: str @param e: expectation value cutoff @type e: float @param resultOut: save blast output to this new file @type resultOut: str @param kw: optional keywords:: --- Scoring --- matrix Matrix to use (default BLOSUM62). gap_open Gap open penalty (default 0). gap_extend Gap extension penalty (default 0). --- Algorithm --- gapped Whether to do a gapped alignment. T/F (default T) wordsize Word size (blastp default 11). keep_hits Number of best hits from a region to keep (default off). xdrop Dropoff value (bits) for gapped alignments (blastp default 25). hit_extend Threshold for extending hits (blastp default 11) --- Processing --- filter Filter query sequence? (T/F, default F) restrict_gi Restrict search to these GI's. believe_query Believe the query defline? (T/F, default F) nprocessors Number of processors to use (default 1). --- Formatting --- alignments Number of alignments. (default 250) @type kw: any @raise BlastError: if program call failes """ results = err = p = None resultOut = resultOut or self.outFolder + self.F_BLAST_RAW_OUT kw = self.__dictvalues2str(kw) e = str(e) try: if self.verbose: self.log.add('running blast...') results, err = NCBIStandalone.blastall( settings.blast_bin, method, db, seqFile, expectation=e, align_view='7', ## XML output **kw) results = self.__copyFileHandle(results, resultOut) err = self.__copyFileHandle(err, self.outFolder + self.F_BLAST_ERROR) if self.verbose: self.log.writeln('Raw blast output copied to: ' + resultOut) parsed = NCBIXML.parse(results).next() self.__blast2dict(parsed, db) except Exception, why: self.log.add(T.lastErrorTrace()) globals().update(locals()) self.log.writeln('local namespace is pushed into global ') raise BlastError(str(why))
def remoteBlast(self, seqFile, db, method, e='0.01', **kw): """ Perform a remote BLAST search using the QBLAST server at NCBI. Uses Bio.Blast.NCBIWWW.qblast (Biopython) for the search @param seqFile: file name with search sequence as FASTA @type seqFile: str @param db: database(s) to search in, e.g. ['swissprot', 'pdb'] @type db: [str] @param method: search method, e.g. 'blastp', 'fasta' @type method: str @param e: expectation value cutoff @type e: float @param kw: optional keywords:: program BLASTP, BLASTN, BLASTX, TBLASTN, or TBLASTX. database Which database to search against. sequence The sequence to search. ncbi_gi TRUE/FALSE whether to give 'gi' identifier. (default: FALSE) descriptions Number of descriptions to show. Def 500. alignments Number of alignments to show. Def 500. expect An expect value cutoff. Def 10.0. matrix Specify an alt. matrix (PAM30, PAM70, BLOSUM80, BLOSUM45). filter 'none' turns off filtering. Default uses 'seg' or 'dust'. format_type 'HTML', 'Text', 'ASN.1', or 'XML'. Def. 'HTML @type kw: any @note: Using the remoteBlast is asking for trouble, as every change in the output file might kill the parser. If you still want to use remoteBlast we strongly recomend that you install BioPython from CVS. Information on how to do this can be found on the BioPython homepage. @todo: Remote Blasting is running as expected but sequences are still retrieved from a local database. Implement remote collection of fasta seuqences from NCBI (there should be something alike in Biopython). Otherwise something like this will also work:: ## collect the entry with gi 87047648 url = 'http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?CMD=text&db=protein&dopt=FASTA&dispmax=20&uid=87047648' import urllib handle = urllib.urlopen(url) lines = handle.readlines() seq = '' for i in range(len(lines)): if re.match( "[<]pre[>][<]div class='recordbody'[>]{2}gi", l[i] ): i+= 1 while not re.match( "^[<]/pre[>][<]/div[>]", l[i]): seq += l[i][:-1] i+= 1 print seq """ kw = self.__dictvalues2str(kw) e = str(e) try: fasta = SeqIO.parse(open(seqFile), "fasta") query = fasta.next() if self.verbose: self.log.add('starting blast query...') results = NCBIWWW.qblast(program=method, database=db, sequence=query, expect=e, ncbi_gi='FALSE', **kw) results = self.__copyFileHandle( results, self.outFolder + self.F_BLAST_RAW_OUT) if self.verbose: self.log.writeln('Raw blast output copied to: ' +\ self.outFolder + self.F_BLAST_RAW_OUT ) parsed = NCBIXML.parse(results).next() self.__blast2dict(parsed, db, remote=True) except Exception, why: self.log.add(T.lastErrorTrace()) globals().update(locals()) self.log.writeln('local namespace is pushed into global ') raise BlastError(str(why))
if 'log' in options: options['log'] = LogFile(options['log']) ## create a complex com = inputComplex(options) dg = DelphiBindingEnergy(com, **options) r = dg.run() print "Saving result complex to ", f_ocom T.dump(dg.delphicom, f_ocom) print "Final Result" print "============" print report(dg.delphicom) f = open(f_out, 'w') f.write(report(dg.delphicom)) f.close() print "energy values written to ", f_out except KeyError, why: print 'Insufficient options. Missing: ', (str(why)) _use(options) except Exception, why: print "There was an error..." print T.lastError() print T.lastErrorTrace()
print "Saving result complex to ", f_ocom T.dump( dg.delphicom, f_ocom ) print "Final Result" print "============" print report( dg.delphicom ) f = open( f_out, 'w' ) f.write( report( dg.delphicom ) ) f.close() print "energy values written to ", f_out except KeyError, why: print 'Insufficient options. Missing: ', (str(why)) _use( options ) except Exception, why: print "There was an error..." print T.lastError() print T.lastErrorTrace()
os.unlink(f) t.dump(complex_lst, options['o']) else: subLst = checkListStatus(complex_lst, update, force, version) if subLst: ## initialize nodes, and start distributed calculation master = ContactMaster(complex_lst, int(options['c']), cpus_all[:host_number], refComplex=refComplex, updateOnly=update, force=force, niceness=nice_dic, outFile=options['o'], com_version=version, show_output=show_x, add_hosts=add_hosts) master.start() else: t.flushPrint("\n #### Nothing to update! #### ") except IOError, why: t.errWriteln("IOError while working on %s:" % t.absfile(options['i']) \ + str(why) ) t.errWriteln(t.lastErrorTrace())
fin = t.absfile( sys.argv[1] ) fout = fin + '_backup' exit = 0 try: o = t.load( fin ) print "%s looks alright.\nnothing to be done." % fin exit = 1 except ImportError: pass except AttributeError: pass except: print "Something else is wrong with %s:" % fin print t.lastErrorTrace() exit = 1 if exit: sys.exit() print fin, '->', fout s = open( fin ).read() s2 = s.replace( 'cBiskit.PDBDope\narray_constructor', 'cNumeric\narray_constructor' ) s2 = s2.replace( 'cBiskit.PDBModel\narray_constructor', 'cNumeric\narray_constructor' )
def remoteBlast( self, seqFile, db, method, e='0.01', **kw ): """ Perform a remote BLAST search using the QBLAST server at NCBI. Uses Bio.Blast.NCBIWWW.qblast (Biopython) for the search @param seqFile: file name with search sequence as FASTA @type seqFile: str @param db: database(s) to search in, e.g. ['swissprot', 'pdb'] @type db: [str] @param method: search method, e.g. 'blastp', 'fasta' @type method: str @param e: expectation value cutoff @type e: float @param kw: optional keywords:: program BLASTP, BLASTN, BLASTX, TBLASTN, or TBLASTX. database Which database to search against. sequence The sequence to search. ncbi_gi TRUE/FALSE whether to give 'gi' identifier. (default: FALSE) descriptions Number of descriptions to show. Def 500. alignments Number of alignments to show. Def 500. expect An expect value cutoff. Def 10.0. matrix Specify an alt. matrix (PAM30, PAM70, BLOSUM80, BLOSUM45). filter 'none' turns off filtering. Default uses 'seg' or 'dust'. format_type 'HTML', 'Text', 'ASN.1', or 'XML'. Def. 'HTML @type kw: any @note: Using the remoteBlast is asking for trouble, as every change in the output file might kill the parser. If you still want to use remoteBlast we strongly recomend that you install BioPython from CVS. Information on how to do this can be found on the BioPython homepage. @todo: Remote Blasting is running as expected but sequences are still retrieved from a local database. Implement remote collection of fasta seuqences from NCBI (there should be something alike in Biopython). Otherwise something like this will also work:: ## collect the entry with gi 87047648 url = 'http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?CMD=text&db=protein&dopt=FASTA&dispmax=20&uid=87047648' import urllib handle = urllib.urlopen(url) lines = handle.readlines() seq = '' for i in range(len(lines)): if re.match( "[<]pre[>][<]div class='recordbody'[>]{2}gi", l[i] ): i+= 1 while not re.match( "^[<]/pre[>][<]/div[>]", l[i]): seq += l[i][:-1] i+= 1 print seq """ kw = self.__dictvalues2str( kw ) e = str(e) try: fasta = SeqIO.parse( open(seqFile), "fasta" ) query = fasta.next() if self.verbose: self.log.add('starting blast query...') results = NCBIWWW.qblast( program=method, database=db, sequence=query, expect=e, ncbi_gi='FALSE', **kw ) results = self.__copyFileHandle(results, self.outFolder+self.F_BLAST_RAW_OUT) if self.verbose: self.log.writeln('Raw blast output copied to: ' +\ self.outFolder + self.F_BLAST_RAW_OUT ) parsed = NCBIXML.parse( results ).next() self.__blast2dict( parsed, db, remote=True ) except Exception, why: self.log.add( T.lastErrorTrace() ) globals().update( locals() ) self.log.writeln('local namespace is pushed into global ') raise BlastError( str(why) )
class PDBCleaner: """ PDBCleaner performs the following tasks: * remove HETAtoms from PDB * replace non-standard AA by its closest standard AA * remove non-standard atoms from standard AA residues * delete atoms that follow missing atoms (in a chain) * remove multiple occupancy atoms (except the one with highest occupancy) * add ACE and NME capping residues to C- and N-terminals or chain breaks (see capTerminals(), this is NOT done automatically in process()) Usage: ======= >>> c = PDBCleaner( model ) >>> c.process() >>> c.capTerminals( auto=True ) This will modify the model in-place and report changes to STDOUT. Alternatively, you can specify a log file instance for the output. PDBCleaner.process accepts several options to modify the processing. Capping ======= Capping will add N-methyl groups to free C-terminal carboxy ends or Acetyl groups to free N-terminal Amines and will thus 'simulate' the continuation of the protein chain -- a common practice in order to prevent fake terminal charges. The automatic discovery of missing residues is guess work at best. The more conservative approach is to use, for example: >>> c.capTerminals( breaks=1, capC=[0], capN=[2] ) In this case, only the chain break detection is used for automatic capping -- the last residue before a chain break is capped with NME and the first residue after the chain break is capped with ACE. Chain break detection relies on PDBModel.chainBreaks() (via PDBModel.chainIndex( breaks=1 )). The normal terminals to be capped are now specified explicitely. The first chain (not counting chain breaks) will receive a NME C-terminal cap and the third chain of the PDB will receive a N-terminal ACE cap. Note: Dictionaries with standard residues and atom content are defined in Biskit.molUtils. This is a duplicate effort with the new strategy to parse Amber prep files for very similar information (AmberResidueType, AmberResidueLibrary) and should change once we implement a real framework for better residue handling. """ #: these atoms always occur at the tip of of a chain or within a ring #: and, if missing, will not trigger the removal of other atoms TOLERATE_MISSING = [ 'O', 'CG2', 'CD1', 'CD2', 'OG1', 'OE1', 'NH1', 'OD1', 'OE1', 'H5T', "O5'", ] ## PDB with ACE capping residue F_ace_cap = t.dataRoot() + '/amber/leap/ace_cap.pdb' ## PDB with NME capping residue F_nme_cap = t.dataRoot() + '/amber/leap/nme_cap.pdb' def __init__(self, fpdb, log=None, verbose=True): """ @param fpdb: pdb file OR PDBModel instance @type fpdb: str OR Biskit.PDBModel @param log: Biskit.LogFile object (default: STDOUT) @type log: Biskit.LogFile @param verbose: log warnings and infos (default: True) @type verbose: bool """ self.model = PDBModel(fpdb) self.log = log or StdLog() self.verbose = verbose def logWrite(self, msg, force=1): if self.log: self.log.add(msg) else: if force: print msg def remove_multi_occupancies(self): """ Keep only atoms with alternate A field (well, or no alternate). """ if self.verbose: self.logWrite(self.model.pdbCode + ': Removing multiple occupancies of atoms ...') i = 0 to_be_removed = [] for a in self.model: if a['alternate']: try: str_id = "%i %s %s %i" % (a['serial_number'], a['name'], a['residue_name'], a['residue_number']) if a['alternate'].upper() == 'A': a['alternate'] = '' else: if float(a['occupancy']) < 1.0: to_be_removed += [i] if self.verbose: self.logWrite( 'removing %s (%s %s)' % (str_id, a['alternate'], a['occupancy'])) else: if self.verbose: self.logWrite(( 'keeping non-A duplicate %s because of 1.0 ' + 'occupancy') % str_id) except: self.logWrite("Error removing duplicate: " + t.lastError()) i += 1 try: self.model.remove(to_be_removed) if self.verbose: self.logWrite('Removed %i atoms' % len(to_be_removed)) except: if self.verbose: self.logWrite('No atoms with multiple occupancies to remove') def replace_non_standard_AA(self, amber=0, keep=[]): """ Replace amino acids with none standard names with standard amino acids according to L{MU.nonStandardAA} @param amber: don't rename HID, HIE, HIP, CYX, NME, ACE [0] @type amber: 1||0 @param keep: names of additional residues to keep @type keep: [ str ] """ standard = MU.atomDic.keys() + keep if amber: standard.extend(['HID', 'HIE', 'HIP', 'CYX', 'NME', 'ACE']) replaced = 0 if self.verbose: self.logWrite(self.model.pdbCode + ': Looking for non-standard residue names...') resnames = self.model['residue_name'] for i in self.model.atomRange(): resname = resnames[i].upper() if resname not in standard: if resname in MU.nonStandardAA: resnames[i] = MU.nonStandardAA[resname] if self.verbose: self.logWrite('renamed %s %i to %s' % \ (resname, i, MU.nonStandardAA[ resname ])) else: resnames[i] = 'ALA' self.logWrite('Warning: unknown residue name %s %i: ' \ % (resname, i ) ) if self.verbose: self.logWrite('\t->renamed to ALA.') replaced += 1 if self.verbose: self.logWrite('Found %i atoms with non-standard residue names.'% \ replaced ) def __standard_res(self, resname, amber=0): """ Check if resname is a standard residue (according to L{MU.atomDic}) if not return the closest standard residue (according to L{MU.nonStandardAA}). @param resname: 3-letter residue name @type resname: str @return: name of closest standard residue or resname itself @rtype: str """ if resname in MU.atomDic: return resname if resname in MU.nonStandardAA: return MU.nonStandardAA[resname] return resname def remove_non_standard_atoms(self): """ First missing standard atom triggers removal of standard atoms that follow in the standard order. All non-standard atoms are removed too. Data about standard atoms are taken from L{MU.atomDic} and symomym atom name is defined in L{MU.atomSynonyms}. @return: number of atoms removed @rtype: int """ mask = [] if self.verbose: self.logWrite("Checking content of standard amino-acids...") for res in self.model.resList(): resname = self.__standard_res(res[0]['residue_name']).upper() if resname == 'DC5': pass ## bugfix: ignore non-standard residues that have no matching ## standard residue if resname in MU.atomDic: standard = copy.copy(MU.atomDic[resname]) ## replace known synonyms by standard atom name for a in res: n = a['name'] if not n in standard and MU.atomSynonyms.get( n, 0) in standard: a['name'] = MU.atomSynonyms[n] if self.verbose: self.logWrite('%s: renaming %s to %s in %s %i' %\ ( self.model.pdbCode, n, a['name'], a['residue_name'], a['residue_number'])) anames = [a['name'] for a in res] keep = 1 ## kick out all standard atoms that follow a missing one rm = [] for n in standard: if (not n in anames) and not (n in self.TOLERATE_MISSING): keep = 0 if not keep: rm += [n] for n in rm: standard.remove(n) ## keep only atoms that are standard (and not kicked out above) for a in res: if a['name'] not in standard: mask += [1] if self.verbose: self.logWrite('%s: removing atom %s in %s %i '%\ ( self.model.pdbCode, a['name'], a['residue_name'], a['residue_number'])) else: mask += [0] self.model.remove(mask) if self.verbose: self.logWrite('Removed ' + str(N0.sum(mask)) + ' atoms because they were non-standard' + ' or followed a missing atom.') return N0.sum(mask) def capACE(self, model, chain, breaks=True): """ Cap N-terminal of given chain. Note: In order to allow the capping of chain breaks, the chain index is, by default, based on model.chainIndex(breaks=True), that means with chain break detection activated! This is not the default behaviour of PDBModel.chainIndex or takeChains or chainLength. Please use the wrapping method capTerminals() for more convenient handling of the index. @param model: model @type model: PDBMode @param chain: index of chain to be capped @type chain: int @param breaks: consider chain breaks when identifying chain boundaries @type breaks: bool @return: model with added NME capping @rtype : PDBModel """ if self.verbose: self.logWrite('Capping N-terminal of chain %i with ACE' % chain) c_start = model.chainIndex(breaks=breaks) c_end = model.chainEndIndex(breaks=breaks) Nterm_is_break = False Cterm_is_break = False if breaks: Nterm_is_break = c_start[chain] not in model.chainIndex() Cterm_is_break = c_end[chain] not in model.chainEndIndex() m_ace = PDBModel(self.F_ace_cap) chains_before = model.takeChains(range(chain), breaks=breaks) m_chain = model.takeChains([chain], breaks=breaks) chains_after = model.takeChains(range(chain + 1, len(c_start)), breaks=breaks) m_term = m_chain.resModels()[0] ## we need 3 atoms for superposition, CB might mess things up but ## could help if there is no HN ## if 'HN' in m_term.atomNames(): m_ace.remove(['CB']) ## use backbone 'C' rather than CB for fitting ## rename overhanging residue in cap PDB for a in m_ace: if a['residue_name'] != 'ACE': a['residue_name'] = m_term.atoms['residue_name'][0] else: a['residue_number'] = m_term.atoms['residue_number'][0] - 1 a['chain_id'] = m_term.atoms['chain_id'][0] a['segment_id'] = m_term.atoms['segment_id'][0] ## fit cap onto first residue of chain m_ace = m_ace.magicFit(m_term) cap = m_ace.resModels()[0] serial = m_term['serial_number'][0] - len(cap) cap['serial_number'] = range(serial, serial + len(cap)) ## concat cap on chain m_chain = cap.concat(m_chain, newChain=False) ## re-assemble whole model r = chains_before.concat(m_chain, newChain=not Nterm_is_break) r = r.concat(chains_after, newChain=not Cterm_is_break) if len(c_start) != r.lenChains(breaks=breaks): raise CappingError, 'Capping ACE would mask a chain break. '+\ 'This typically indicates a tight gap with high risk of '+\ 'clashes and other issues.' return r def capNME(self, model, chain, breaks=True): """ Cap C-terminal of given chain. Note: In order to allow the capping of chain breaks, the chain index is, by default, based on model.chainIndex(breaks=True), that means with chain break detection activated! This is not the default behaviour of PDBModel.chainIndex or takeChains or chainLength. Please use the wrapping method capTerminals() for more convenient handling of the index. @param model: model @type model: PDBMode @param chain: index of chain to be capped @type chain: int @param breaks: consider chain breaks when identifying chain boundaries @type breaks: bool @return: model with added NME capping residue @rtype : PDBModel """ if self.verbose: self.logWrite('Capping C-terminal of chain %i with NME.' % chain) m_nme = PDBModel(self.F_nme_cap) c_start = model.chainIndex(breaks=breaks) c_end = model.chainEndIndex(breaks=breaks) Nterm_is_break = False Cterm_is_break = False if breaks: Nterm_is_break = c_start[chain] not in model.chainIndex() Cterm_is_break = c_end[chain] not in model.chainEndIndex() chains_before = model.takeChains(range(chain), breaks=breaks) m_chain = model.takeChains([chain], breaks=breaks) chains_after = model.takeChains(range(chain + 1, len(c_start)), breaks=breaks) m_term = m_chain.resModels()[-1] ## rename overhanging residue in cap PDB, renumber cap residue for a in m_nme: if a['residue_name'] != 'NME': a['residue_name'] = m_term.atoms['residue_name'][0] else: a['residue_number'] = m_term.atoms['residue_number'][0] + 1 a['chain_id'] = m_term.atoms['chain_id'][0] a['segment_id'] = m_term.atoms['segment_id'][0] ## chain should not have any terminal O after capping m_chain.remove(['OXT']) ## fit cap onto last residue of chain m_nme = m_nme.magicFit(m_term) cap = m_nme.resModels()[-1] serial = m_term['serial_number'][-1] + 1 cap['serial_number'] = range(serial, serial + len(cap)) ## concat cap on chain m_chain = m_chain.concat(cap, newChain=False) ## should be obsolete now if getattr(m_chain, '_PDBModel__terAtoms', []) != []: m_chain._PDBModel__terAtoms = [len(m_chain) - 1] assert m_chain.lenChains() == 1 ## re-assemble whole model r = chains_before.concat(m_chain, newChain=not Nterm_is_break) r = r.concat(chains_after, newChain=not Cterm_is_break) if len(c_start) != r.lenChains(breaks=breaks): raise CappingError, 'Capping NME would mask a chain break. '+\ 'This typically indicates a tight gap with high risk of '+\ 'clashes and other issues.' return r def convertChainIdsNter(self, model, chains): """ Convert normal chain ids to chain ids considering chain breaks. """ if len(chains) == 0: return chains i = N0.take(model.chainIndex(), chains) ## convert back to chain indices but this time including chain breaks return model.atom2chainIndices(i, breaks=1) def convertChainIdsCter(self, model, chains): """ Convert normal chain ids to chain ids considering chain breaks. """ if len(chains) == 0: return chains ## fetch last atom of given chains index = N0.concatenate((model.chainIndex(), [len(model)])) i = N0.take(index, N0.array(chains) + 1) - 1 ## convert back to chain indices but this time including chain breaks return model.atom2chainIndices(i, breaks=1) def unresolvedTerminals(self, model): """ Autodetect (aka "guess") which N- and C-terminals are most likely not the real end of each chain. This guess work is based on residue numbering: * unresolved N-terminal: a protein residue with a residue number > 1 * unresolved C-terminal: a protein residue that does not contain either OXT or OT or OT1 or OT2 atoms @param model: PDBModel @return: chains with unresolved N-term, with unresolved C-term @rtype : ([int], [int]) """ c_first = model.chainIndex() c_last = model.chainEndIndex() capN = [ i for (i,pos) in enumerate(c_first)\ if model['residue_number'][pos] > 1 ] capN = [i for i in capN if model['residue_name'][c_first[i]] != 'ACE'] capN = self.filterProteinChains(model, capN, c_first) capC = [] for (i, pos) in enumerate(c_last): atoms = model.takeResidues(model.atom2resIndices([pos ])).atomNames() if not( 'OXT' in atoms or 'OT' in atoms or 'OT1' in atoms or \ 'OT2' in atoms ): capC += [i] capC = self.filterProteinChains(model, capC, c_last) return capN, capC #@todo filter for protein positions in breaks=1 def filterProteinChains(self, model, chains, chainindex): maskProtein = model.maskProtein() chains = [i for i in chains if maskProtein[chainindex[i]]] return chains def capTerminals(self, auto=False, breaks=False, capN=[], capC=[]): """ Add NME and ACE capping residues to chain breaks or normal N- and C-terminals. Note: these capping residues contain hydrogen atoms. Chain indices for capN and capC arguments can be interpreted either with or without chain break detection enabled. For example, let's assume we have a two-chain protein with some missing residues (chain break) in the first chain: A: MGSKVSK---FLNAGSK B: FGHLAKSDAK Then: capTerminals( breaks=False, capN=[1], capC=[1]) will add N-and C-terminal caps to chain B. However: capTerminals( breaks=True, capN=[1], capC=[1]) will add N- and C-terminal caps to the second fragment of chain A. Note: this operation *replaces* the internal model. @param auto: put ACE and NME capping residue on chain breaks and on suspected false N- and C-termini (default: False) @type auto: bool @param breaks: switch on chain break detection before interpreting capN and capC @type breaks: False @param capN: indices of chains that should get ACE cap (default: []) @type capN: [int] @param capC: indices of chains that should get NME cap (default: []) @type capC: [int] """ m = self.model c_len = m.lenChains() i_breaks = m.chainBreaks() if auto: if not breaks: capN = self.convertChainIdsNter(m, capN) capC = self.convertChainIdsCter(m, capC) breaks = True capN, capC = self.unresolvedTerminals(m) end_broken = m.atom2chainIndices(m.chainBreaks(), breaks=1) capC = M.union(capC, end_broken) capN = M.union(capN, N0.array(end_broken) + 1) capN = self.filterProteinChains(m, capN, m.chainIndex(breaks=breaks)) capC = self.filterProteinChains(m, capC, m.chainEndIndex(breaks=breaks)) for i in capN: m = self.capACE(m, i, breaks=breaks) assert m.lenChains() == c_len, '%i != %i' % \ (m.lenChains(), c_len) assert len(m.chainBreaks(force=True)) == len(i_breaks) assert m[ 'serial_number'].dtype == N0.Int32, 'serial_number not int' for i in capC: m = self.capNME(m, i, breaks=breaks) assert m.lenChains() == c_len assert len(m.chainBreaks(force=True)) == len(i_breaks) self.model = m return self.model def process(self, keep_hetatoms=0, amber=0, keep_xaa=[]): """ Remove Hetatoms, waters. Replace non-standard names. Remove non-standard atoms. @param keep_hetatoms: option @type keep_hetatoms: 0||1 @param amber: don't rename amber residue names (HIE, HID, CYX,..) @type amber: 0||1 @param keep_xaa: names of non-standard residues to be kept @type keep_xaa: [ str ] @return: PDBModel (reference to internal) @rtype: PDBModel @raise CleanerError: if something doesn't go as expected ... """ try: if not keep_hetatoms: self.model.remove(self.model.maskHetatm()) self.model.remove(self.model.maskH2O()) self.model.remove(self.model.maskH()) self.remove_multi_occupancies() self.replace_non_standard_AA(amber=amber, keep=keep_xaa) self.remove_non_standard_atoms() except KeyboardInterrupt, why: raise KeyboardInterrupt(why) except Exception, why: self.logWrite('Error: ' + t.lastErrorTrace()) raise CleanerError('Error cleaning model: %r' % why)
def localPSIBlast( self, seqFile, db, method='blastp', resultOut=None, e='0.001', **kw ): """ Performa a local psi-blast search (requires that the blast binaries and databases are installed localy). Uses Bio.Blast.NCBIStandalone.blastpgp (Biopython) for the search @param seqFile: file name with search sequence in FASTA format @type seqFile: str @param db: database(s) to search e.g. ['swissprot', 'pdb'] @type db: [str] @param e: expectation value cutoff (default: 0.001) @type e: float @param resultOut: save blast output to this new file @type resultOut: str @param kw: optional keywords:: --- New Blast+ routine --- (see NcbipsiblastCommandline) num_iterations Number of passes (default 1). matrix Matrix to use (default BLOSUM62). --- old blastall routine --- --- Scoring --- matrix Matrix to use (default BLOSUM62). gap_open Gap open penalty (default 11). gap_extend Gap extension penalty (default 1). window_size Multiple hits window size (default 40). npasses Number of passes (default 1). passes Hits/passes (Integer 0-2, default 1). --- Algorithm --- gapped Whether to do a gapped alignment (T/F, default T). wordsize Word size (default 3). keep_hits Number of beset hits from a region to keep (def 0) xdrop Dropoff value (bits) for gapped alignments (def 15) hit_extend Threshold for extending hits (default 11). nbits_gapping Number of bits to trigger gapping (default 22). pseudocounts Pseudocounts constants for multiple passes (def 9). xdrop_final X dropoff for final gapped alignment (default 25). xdrop_extension Dropoff for blast extensions (default 7). model_threshold E-value threshold to include in multipass model (default 0.005). required_start Start of required region in query (default 1). required_end End of required region in query (default -1). --- Processing --- filter Filter query sequence with SEG? (T/F, default F) believe_query Believe the query defline? (T/F, default F) nprocessors Number of processors to use (default 1). --- Formatting --- alignments Number of alignments (default 250). @type kw: any @raise BlastError: if program call failes """ ## the following should work for new Blast+ tools: #from Bio.Blast.Applications import NcbipsiblastCommandline #resultOut = resultOut or self.outFolder+ self.F_BLAST_RAW_OUT #blastx_cline = NcbipsiblastCommandline(query=seqFile, #db=db, #evalue=e, #outfmt=5, #out=resultOut, #**kw) #stdout, stderr = blastx_cline() #parsed = NCBIXML.parse( results ).next() #self.__blast2dict( parsed, db ) results = err = None resultOut = resultOut or self.outFolder+ self.F_BLAST_RAW_OUT kw = self.__dictvalues2str( kw ) e = str(e) try: results, err = NCBIStandalone.blastpgp( settings.psi_blast_bin, db, seqFile, program='blastpgp', align_view='7', ## XML output expectation=e, **kw) results = self.__copyFileHandle(results,resultOut ) err = self.__copyFileHandle(err, self.outFolder+self.F_BLAST_ERROR) if self.verbose: self.log.writeln('Raw blast output copied to: ' + resultOut ) parsed = NCBIXML.parse( results ).next() self.__blast2dict( parsed, db ) except Exception, why: self.log.add( T.lastErrorTrace() ) globals().update( locals() ) self.log.writeln('local namespace is pushed into global ') raise BlastError( str(why) )
def localBlast( self, seqFile, db, method='blastp', resultOut=None, e='0.01', **kw ): """ Performa a local blast search (requires that the blast binaries and databases are installed localy). Uses Bio.Blast.NCBIStandalone.blastall (Biopython) for the search. @param seqFile: file name with search sequence in FASTA format @type seqFile: str @param db: database(s) to search, e.g. ['swissprot', 'pdb'] @type db: [str] @param method: search program to use, e.g. 'blastp', 'fasta' (default: blastp) @type method: str @param e: expectation value cutoff @type e: float @param resultOut: save blast output to this new file @type resultOut: str @param kw: optional keywords:: --- Scoring --- matrix Matrix to use (default BLOSUM62). gap_open Gap open penalty (default 0). gap_extend Gap extension penalty (default 0). --- Algorithm --- gapped Whether to do a gapped alignment. T/F (default T) wordsize Word size (blastp default 11). keep_hits Number of best hits from a region to keep (default off). xdrop Dropoff value (bits) for gapped alignments (blastp default 25). hit_extend Threshold for extending hits (blastp default 11) --- Processing --- filter Filter query sequence? (T/F, default F) restrict_gi Restrict search to these GI's. believe_query Believe the query defline? (T/F, default F) nprocessors Number of processors to use (default 1). --- Formatting --- alignments Number of alignments. (default 250) @type kw: any @raise BlastError: if program call failes """ results = err = p = None resultOut = resultOut or self.outFolder+ self.F_BLAST_RAW_OUT kw = self.__dictvalues2str( kw ) e = str(e) try: if self.verbose: self.log.add('running blast...') results, err = NCBIStandalone.blastall( settings.blast_bin, method, db, seqFile, expectation=e, align_view='7', ## XML output **kw) results = self.__copyFileHandle(results, resultOut) err = self.__copyFileHandle(err, self.outFolder+self.F_BLAST_ERROR) if self.verbose: self.log.writeln('Raw blast output copied to: ' + resultOut ) parsed = NCBIXML.parse( results ).next() self.__blast2dict( parsed, db ) except Exception, why: self.log.add( T.lastErrorTrace() ) globals().update( locals() ) self.log.writeln('local namespace is pushed into global ') raise BlastError( str(why) )