def _parse_qresult(self): # state values state_EOF = 0 state_QRES_NEW = 1 state_QRES_SAME = 3 state_HIT_NEW = 2 state_HIT_SAME = 4 # initial dummies qres_state, hit_state = None, None file_state = None cur_qid, cur_hid = None, None prev_qid, prev_hid = None, None cur, prev = None, None hit_list, hsp_list = [], [] # if the file has c4 alignments, use that as the alignment mark if self.has_c4_alignment: self._ALN_MARK = 'C4 Alignment:' while True: self.read_until(lambda line: line.startswith(self._ALN_MARK)) if cur is not None: prev = cur prev_qid = cur_qid prev_hid = cur_hid # only parse the result row if it's not EOF if self.line: assert self.line.startswith(self._ALN_MARK), self.line # create temp dicts for storing parsed values header = {'qresult': {}, 'hit': {}, 'hsp': {}} # if the file has c4 alignments, try to parse the header if self.has_c4_alignment: self.read_until( lambda line: line.strip().startswith('Query:')) header = self._parse_alignment_header() # parse the block contents cur = self.parse_alignment_block(header) cur_qid = cur['qresult']['id'] cur_hid = cur['hit']['id'] elif not self.line or self.line.startswith('-- completed '): file_state = state_EOF cur_qid, cur_hid = None, None # get the state of hit and qresult if prev_qid != cur_qid: qres_state = state_QRES_NEW else: qres_state = state_QRES_SAME # new hits are hits with different ids or hits in a new query if prev_hid != cur_hid or qres_state == state_QRES_NEW: hit_state = state_HIT_NEW else: hit_state = state_HIT_SAME if prev is not None: hsp = _create_hsp(prev_hid, prev_qid, prev['hsp']) hsp_list.append(hsp) if hit_state == state_HIT_NEW: hit = Hit(hsp_list) for attr, value in prev['hit'].items(): setattr(hit, attr, value) hit_list.append(hit) hsp_list = [] if qres_state == state_QRES_NEW or file_state == state_EOF: qresult = QueryResult(id=prev_qid) for hit in hit_list: # not using append since Exonerate may separate the # same hit if it has different strands qresult.absorb(hit) for attr, value in prev['qresult'].items(): setattr(qresult, attr, value) yield qresult if file_state == state_EOF: break hit_list = [] # only readline() here if we're not parsing C4 alignments # C4 alignments readline() is handled by its parse_alignment_block # function if not self.has_c4_alignment: self.line = self.handle.readline()
def _parse_qresult(self): # state values state_EOF = 0 state_QRES_NEW = 1 state_QRES_SAME = 3 state_HIT_NEW = 2 state_HIT_SAME = 4 # initial dummies qres_state, hit_state = None, None file_state = None cur_qid, cur_hid = None, None prev_qid, prev_hid = None, None cur, prev = None, None hit_list, hsp_list = [], [] # if the file has c4 alignments, use that as the alignment mark if self.has_c4_alignment: self._ALN_MARK = 'C4 Alignment:' while True: self.read_until(lambda line: line.startswith(self._ALN_MARK)) if cur is not None: prev = cur prev_qid = cur_qid prev_hid = cur_hid # only parse the result row if it's not EOF if self.line: assert self.line.startswith(self._ALN_MARK), self.line # create temp dicts for storing parsed values header = {'qresult': {}, 'hit': {}, 'hsp': {}} # if the file has c4 alignments, try to parse the header if self.has_c4_alignment: self.read_until(lambda line: line.strip().startswith('Query:')) header = self._parse_alignment_header() # parse the block contents cur = self.parse_alignment_block(header) cur_qid = cur['qresult']['id'] cur_hid = cur['hit']['id'] elif not self.line or self.line.startswith('-- completed '): file_state = state_EOF cur_qid, cur_hid = None, None # get the state of hit and qresult if prev_qid != cur_qid: qres_state = state_QRES_NEW else: qres_state = state_QRES_SAME # new hits are hits with different ids or hits in a new query if prev_hid != cur_hid or qres_state == state_QRES_NEW: hit_state = state_HIT_NEW else: hit_state = state_HIT_SAME if prev is not None: hsp = _create_hsp(prev_hid, prev_qid, prev['hsp']) hsp_list.append(hsp) if hit_state == state_HIT_NEW: hit = Hit(hsp_list) for attr, value in prev['hit'].items(): setattr(hit, attr, value) hit_list.append(hit) hsp_list = [] if qres_state == state_QRES_NEW or file_state == state_EOF: qresult = QueryResult(id=prev_qid) for hit in hit_list: # not using append since Exonerate may separate the # same hit if it has different strands qresult.absorb(hit) for attr, value in prev['qresult'].items(): setattr(qresult, attr, value) yield qresult if file_state == state_EOF: break hit_list = [] # only readline() here if we're not parsing C4 alignments # C4 alignments readline() is handled by its parse_alignment_block # function if not self.has_c4_alignment: self.line = self.handle.readline()
def _parse_qresult(self): """Generator function that returns QueryResult objects.""" # state values, determines what to do for each line state_EOF = 0 state_QRES_NEW = 1 state_QRES_SAME = 3 state_HIT_NEW = 2 state_HIT_SAME = 4 # initial dummy values qres_state = None file_state = None prev_qid, prev_hid = None, None cur, prev = None, None hit_list, hsp_list = [], [] while True: # store previous line's parsed values for all lines after the first if cur is not None: prev = cur prev_qid = cur_qid prev_hid = cur_hid # only parse the result row if it's not EOF if self.line: cur = self._parse_row() cur_qid = cur['qname'] cur_hid = cur['tname'] else: file_state = state_EOF # mock values, since we have nothing to parse cur_qid, cur_hid = None, None # get the state of hit and qresult if prev_qid != cur_qid: qres_state = state_QRES_NEW else: qres_state = state_QRES_SAME # new hits are hits with different ids or hits in a new qresult if prev_hid != cur_hid or qres_state == state_QRES_NEW: hit_state = state_HIT_NEW else: hit_state = state_HIT_SAME if prev is not None: # create fragment and HSP and set their attributes hsp = _create_hsp(prev_hid, prev_qid, prev) hsp_list.append(hsp) if hit_state == state_HIT_NEW: # create Hit and set its attributes hit = Hit(hsp_list) hit.seq_len = prev['tsize'] hit_list.append(hit) hsp_list = [] # create qresult and yield if we're at a new qresult or at EOF if qres_state == state_QRES_NEW or file_state == state_EOF: qresult = QueryResult(prev_qid) for hit in hit_list: qresult.absorb(hit) qresult.seq_len = prev['qsize'] yield qresult # if we're at EOF, break if file_state == state_EOF: break hit_list = [] self.line = self.handle.readline()
def _parse_qresult(self): """Yield QueryResult objects (PRIVATE).""" # state values, determines what to do for each line state_EOF = 0 state_QRES_NEW = 1 state_QRES_SAME = 3 state_HIT_NEW = 2 state_HIT_SAME = 4 # initial dummy values qres_state = None file_state = None cur_qid, cur_hid = None, None prev_qid, prev_hid = None, None cur, prev = None, None hit_list, hsp_list = [], [] while True: # store previous line's parsed values for all lines after the first if cur is not None: prev = cur prev_qid = cur_qid prev_hid = cur_hid # only parse the result row if it's not EOF if self.line: cur = self._parse_row() cur_qid = cur["qname"] cur_hid = cur["tname"] else: file_state = state_EOF # mock values, since we have nothing to parse cur_qid, cur_hid = None, None # get the state of hit and qresult if prev_qid != cur_qid: qres_state = state_QRES_NEW else: qres_state = state_QRES_SAME # new hits are hits with different ids or hits in a new qresult if prev_hid != cur_hid or qres_state == state_QRES_NEW: hit_state = state_HIT_NEW else: hit_state = state_HIT_SAME if prev is not None: # create fragment and HSP and set their attributes hsp = _create_hsp(prev_hid, prev_qid, prev) hsp_list.append(hsp) if hit_state == state_HIT_NEW: # create Hit and set its attributes hit = Hit(hsp_list) hit.seq_len = prev["tsize"] hit_list.append(hit) hsp_list = [] # create qresult and yield if we're at a new qresult or at EOF if qres_state == state_QRES_NEW or file_state == state_EOF: qresult = QueryResult(id=prev_qid) for hit in hit_list: qresult.absorb(hit) qresult.seq_len = prev["qsize"] yield qresult # if we're at EOF, break if file_state == state_EOF: break hit_list = [] self.line = self.handle.readline()