def parse(query_sequence, blast_output_file, self_hit, hitid_format): ''' Processes a blast xml formated output into a {BlastResult} object. @param: query_sequence @pdef: sequence of the query protein/nucleotide. @ptype: {String} @param: blast_output_file @pdef: output file from BLAST. @ptype: {String} @param: self_hit @pdef: when _True_ if the query is found in the database, it is retrieved. @pdefault: _False_ @ptype: {Boolean} @param: hitid_format @pdef: format of the name of the hit. If given a wrong option, it defaults to 'single' @pdefault: 'single' @poptions: 'single' -> first word of the name, 'double' -> first two words of the hit name, 'all' -> all the text in the hit name @ptype: {String} @raises: {BlastError} if there are problems while parsing the XML file. @returns: {BlastResult} ''' f = File(blast_output_file) s = BeautifulSoup(f.read()) h = BlastHeader(version = str(s.find('blastoutput_version').string), matrix = str(s.find('parameters_matrix').string), gap_open = int(s.find('parameters_gap-open').string), gap_extend = int(s.find('parameters_gap-extend').string), database = str(s.find('blastoutput_db').string), self_hit = self_hit) b = BlastResult(query_name = str(s.find('blastoutput_query-def').string), query_sequence = query_sequence, header = h) SBIg.alert('debug', BlastParser(), b.str_blast_header()) error_bool = False error_str = [] for iteration in s.find_all('iteration'): iternum = int(iteration.find('iteration_iter-num').string) for hit in iteration.find_all('hit'): hit_name = BlastParser.hit_name(str(hit.find('hit_def').string), hitid_format) hit_lenth = int(hit.find("hit_len").string) for subhit in hit.find_all("hsp"): data = BlastParser.parse_subhit(subhit) r = BlastHit(hit = [hit_name, hit_lenth], sequences = [data['qs'], data['hs'], data['sc']], sequence_inits = [data['qp'], data['hp']], iteration = iternum, stats = [data['hi'], data['h+'], data['hg'], data['ev']]) if not BlastParser.same_query_hit_names(b.query, hit_name, self_hit): dbug_info = 'Added hit {0} in iteration {1}' SBIg.alert('debug', BlastParser(), dbug_info.format(hit_name, r.iteration)) b.add_hit(r) if not r.are_segments_ok: error_bool = True error_str.append("Check the alignment's fragmentation") error_str.append("for the query %s with %s\n".format(b.query, hit_name)) error_str.append("{0}\n".format(r)) b.set_last_iteration() if error_bool: SBIg.warn(BlastParser(), error_str) be = BlastError() raise be.parse_error() return b