def _parse_hit(self, root_hit_elem, query_id): """Yield a generator object that transforms Iteration_hits XML elements into Hit objects (PRIVATE). :param root_hit_elem: root element of the Iteration_hits tag. :type root_hit_elem: XML element tag :param query_id: QueryResult ID of this Hit :type query_id: string """ # Hit level processing # Hits are stored in the Iteration_hits tag, with the following # DTD # <!ELEMENT Hit ( # Hit_num, # Hit_id, # Hit_def, # Hit_accession, # Hit_len, # Hit_hsps?)> # feed the loop below an empty list so iteration still works if root_hit_elem is None: root_hit_elem = [] for hit_elem in root_hit_elem: # BLAST sometimes mangles the sequence IDs and descriptions, so we need # to extract the actual values. raw_hit_id = hit_elem.findtext('Hit_id') raw_hit_desc = hit_elem.findtext('Hit_def') if not self._use_raw_hit_ids: ids, descs, blast_hit_id = _extract_ids_and_descs(raw_hit_id, raw_hit_desc) else: ids, descs, blast_hit_id = [raw_hit_id], [raw_hit_desc], raw_hit_id hit_id, alt_hit_ids = ids[0], ids[1:] hit_desc, alt_hit_descs = descs[0], descs[1:] hsps = [hsp for hsp in self._parse_hsp(hit_elem.find('Hit_hsps'), query_id, hit_id)] hit = Hit(hsps) hit.description = hit_desc hit._id_alt = alt_hit_ids hit._description_alt = alt_hit_descs hit.blast_id = blast_hit_id for key, val_info in _ELEM_HIT.items(): value = hit_elem.findtext(key) if value is not None: caster = val_info[1] # recast only if value is not intended to be str if value is not None and caster is not str: value = caster(value) setattr(hit, val_info[0], value) # delete element after we finish parsing it hit_elem.clear() yield hit
def _parse_hit(self, root_hit_elem, query_id): """Generator that transforms Iteration_hits XML elements into Hit objects. :param root_hit_elem: root element of the Iteration_hits tag. :type root_hit_elem: XML element tag :param query_id: QueryResult ID of this Hit :type query_id: string """ # Hit level processing # Hits are stored in the Iteration_hits tag, with the following # DTD # <!ELEMENT Hit ( # Hit_num, # Hit_id, # Hit_def, # Hit_accession, # Hit_len, # Hit_hsps?)> # feed the loop below an empty list so iteration still works if root_hit_elem is None: root_hit_elem = [] for hit_elem in root_hit_elem: # create empty hit object hit_id = hit_elem.findtext('Hit_id') hit_desc = hit_elem.findtext('Hit_def') # handle blast searches against databases with Blast's IDs if hit_id.startswith('gnl|BL_ORD_ID|'): blast_hit_id = hit_id id_desc = hit_desc.split(' ', 1) hit_id = id_desc[0] try: hit_desc = id_desc[1] except IndexError: hit_desc = '' else: blast_hit_id = '' # combine primary ID and defline first before splitting full_id_desc = hit_id + ' ' + hit_desc id_descs = [(x.strip(), y.strip()) for x, y in \ [a.split(' ', 1) for a in full_id_desc.split(' >')]] hit_id, hit_desc = id_descs[0] hsps = [hsp for hsp in self._parse_hsp(hit_elem.find('Hit_hsps'), query_id, hit_id)] hit = Hit(hsps) hit.description = hit_desc hit._id_alt = [x[0] for x in id_descs[1:]] hit._description_alt = [x[1] for x in id_descs[1:]] # blast_hit_id is only set if the hit ID is Blast-generated hit._blast_id = blast_hit_id for key, val_info in _ELEM_HIT.items(): value = hit_elem.findtext(key) if value is not None: caster = val_info[1] # recast only if value is not intended to be str if value is not None and caster is not str: value = caster(value) setattr(hit, val_info[0], value) # delete element after we finish parsing it hit_elem.clear() yield hit
def _parse_hit(self, root_hit_elem, query_id): """Generator that transforms Iteration_hits XML elements into Hit objects. :param root_hit_elem: root element of the Iteration_hits tag. :type root_hit_elem: XML element tag :param query_id: QueryResult ID of this Hit :type query_id: string """ # Hit level processing # Hits are stored in the Iteration_hits tag, with the following # DTD # <!ELEMENT Hit ( # Hit_num, # Hit_id, # Hit_def, # Hit_accession, # Hit_len, # Hit_hsps?)> # feed the loop below an empty list so iteration still works if root_hit_elem is None: root_hit_elem = [] for hit_elem in root_hit_elem: # create empty hit object hit_id = hit_elem.findtext('Hit_id') hit_desc = hit_elem.findtext('Hit_def') # handle blast searches against databases with Blast's IDs if hit_id.startswith('gnl|BL_ORD_ID|'): blast_hit_id = hit_id id_desc = hit_desc.split(' ', 1) hit_id = id_desc[0] try: hit_desc = id_desc[1] except IndexError: hit_desc = '' else: blast_hit_id = '' # combine primary ID and defline first before splitting full_id_desc = hit_id + ' ' + hit_desc id_descs = _extract_ids_and_descs(full_id_desc) hit_id, hit_desc = id_descs[0] hsps = [ hsp for hsp in self._parse_hsp(hit_elem.find('Hit_hsps'), query_id, hit_id) ] hit = Hit(hsps) hit.description = hit_desc hit._id_alt = [x[0] for x in id_descs[1:]] hit._description_alt = [x[1] for x in id_descs[1:]] # blast_hit_id is only set if the hit ID is Blast-generated hit._blast_id = blast_hit_id for key, val_info in _ELEM_HIT.items(): value = hit_elem.findtext(key) if value is not None: caster = val_info[1] # recast only if value is not intended to be str if value is not None and caster is not str: value = caster(value) setattr(hit, val_info[0], value) # delete element after we finish parsing it hit_elem.clear() yield hit
def _parse_hit(self, root_hit_elem, query_id): """Yield a generator object that transforms Iteration_hits XML elements into Hit objects (PRIVATE). :param root_hit_elem: root element of the Iteration_hits tag. :type root_hit_elem: XML element tag :param query_id: QueryResult ID of this Hit :type query_id: string """ # Hit level processing # Hits are stored in the Iteration_hits tag, with the following # DTD # <!ELEMENT Hit ( # Hit_num, # Hit_id, # Hit_def, # Hit_accession, # Hit_len, # Hit_hsps?)> # feed the loop below an empty list so iteration still works if root_hit_elem is None: root_hit_elem = [] for hit_elem in root_hit_elem: # BLAST sometimes mangles the sequence IDs and descriptions, so we need # to extract the actual values. raw_hit_id = hit_elem.findtext('Hit_id') raw_hit_desc = hit_elem.findtext('Hit_def') if not self._use_raw_hit_ids: ids, descs, blast_hit_id = _extract_ids_and_descs( raw_hit_id, raw_hit_desc) else: ids, descs, blast_hit_id = [raw_hit_id], [raw_hit_desc ], raw_hit_id hit_id, alt_hit_ids = ids[0], ids[1:] hit_desc, alt_hit_descs = descs[0], descs[1:] hsps = [ hsp for hsp in self._parse_hsp(hit_elem.find('Hit_hsps'), query_id, hit_id) ] hit = Hit(hsps) hit.description = hit_desc hit._id_alt = alt_hit_ids hit._description_alt = alt_hit_descs hit.blast_id = blast_hit_id for key, val_info in _ELEM_HIT.items(): value = hit_elem.findtext(key) if value is not None: caster = val_info[1] # recast only if value is not intended to be str if value is not None and caster is not str: value = caster(value) setattr(hit, val_info[0], value) # delete element after we finish parsing it hit_elem.clear() yield hit