Пример #1
0
    def _parse_qresult(self):
        """Parses query results."""
        # parse the queries
        for event, qresult_elem in self.xml_iter:
            # </Iteration> marks the end of a single query
            # which means we can process it
            if event == 'end' and qresult_elem.tag == 'Iteration':

                # we'll use the following schema
                # <!ELEMENT Iteration (
                #        Iteration_iter-num,
                #        Iteration_query-ID?,
                #        Iteration_query-def?,
                #        Iteration_query-len?,
                #        Iteration_hits?,
                #        Iteration_stat?,
                #        Iteration_message?)>

                # assign query attributes with fallbacks
                query_id = qresult_elem.findtext('Iteration_query-ID')
                if query_id is None:
                    query_id = self._fallback['id']

                query_desc = qresult_elem.findtext('Iteration_query-def')
                if query_desc is None:
                    query_desc = self._fallback['description']

                query_len = qresult_elem.findtext('Iteration_query-len')
                if query_len is None:
                    query_len = self._fallback['len']

                # handle blast searches against databases with Blast's IDs
                # 'Query_' marks the beginning of a BLAST+-generated ID,
                # 'lcl|' marks the beginning of a BLAST legacy-generated ID
                if query_id.startswith('Query_') or query_id.startswith('lcl|'):
                    # store the Blast-generated query ID
                    blast_query_id = query_id
                    id_desc = query_desc.split(' ', 1)
                    query_id = id_desc[0]
                    try:
                        query_desc = id_desc[1]
                    except IndexError:
                        query_desc = ''
                else:
                    blast_query_id = ''

                hit_list, key_list = [], []
                for hit in self._parse_hit(qresult_elem.find('Iteration_hits'),
                        query_id):
                    if hit:
                        # need to keep track of hit IDs, since there could be duplicates,
                        if hit.id in key_list:
                            warnings.warn("Adding hit with BLAST-generated ID "
                                    "%r since hit ID %r is already present "
                                    "in query %r. Your BLAST database may contain "
                                    "duplicate entries." %
                                    (hit._blast_id, hit.id, query_id), BiopythonParserWarning)
                            # fallback to Blast-generated IDs, if the ID is already present
                            # and restore the desc, too
                            hit.description = '%s %s' % (hit.id, hit.description)
                            hit.id = hit._blast_id
                            # and change the hit_id of the HSPs contained
                            for hsp in hit:
                                hsp.hit_id = hit._blast_id
                        else:
                            key_list.append(hit.id)

                        hit_list.append(hit)

                # create qresult and assign its attributes
                qresult = QueryResult(hit_list, query_id)
                qresult.description = query_desc
                qresult.seq_len = int(query_len)
                qresult._blast_id = blast_query_id
                for key, value in self._meta.items():
                    setattr(qresult, key, value)

                # statistics are stored in Iteration_stat's 'grandchildren' with the
                # following DTD
                # <!ELEMENT Statistics (
                #        Statistics_db-num,
                #        Statistics_db-len,
                #        Statistics_hsp-len,
                #        Statistics_eff-space,
                #        Statistics_kappa,
                #        Statistics_lambda,
                #        Statistics_entropy)>

                stat_iter_elem = qresult_elem.find('Iteration_stat')
                if stat_iter_elem is not None:
                    stat_elem = stat_iter_elem.find('Statistics')

                    for key, val_info in _ELEM_QRESULT_OPT.items():
                        value = stat_elem.findtext(key)
                        if value is not None:
                            caster = val_info[1]
                            # recast only if value is not intended to be str
                            if value is not None and caster is not str:
                                value = caster(value)
                            setattr(qresult, val_info[0], value)

                # delete element after we finish parsing it
                qresult_elem.clear()
                yield qresult
Пример #2
0
    def _parse_qresult(self):
        """Parses query results."""
        # parse the queries
        for event, qresult_elem in self.xml_iter:
            # </Iteration> marks the end of a single query
            # which means we can process it
            if event == 'end' and qresult_elem.tag == 'Iteration':

                # we'll use the following schema
                # <!ELEMENT Iteration (
                #        Iteration_iter-num,
                #        Iteration_query-ID?,
                #        Iteration_query-def?,
                #        Iteration_query-len?,
                #        Iteration_hits?,
                #        Iteration_stat?,
                #        Iteration_message?)>

                # assign query attributes with fallbacks
                query_id = qresult_elem.findtext('Iteration_query-ID')
                if query_id is None:
                    query_id = self._fallback['id']

                query_desc = qresult_elem.findtext('Iteration_query-def')
                if query_desc is None:
                    query_desc = self._fallback['description']

                query_len = qresult_elem.findtext('Iteration_query-len')
                if query_len is None:
                    query_len = self._fallback['len']

                # handle blast searches against databases with Blast's IDs
                # 'Query_' marks the beginning of a BLAST+-generated ID,
                # 'lcl|' marks the beginning of a BLAST legacy-generated ID
                if query_id.startswith('Query_') or query_id.startswith(
                        'lcl|'):
                    # store the Blast-generated query ID
                    blast_query_id = query_id
                    id_desc = query_desc.split(' ', 1)
                    query_id = id_desc[0]
                    try:
                        query_desc = id_desc[1]
                    except IndexError:
                        query_desc = ''
                else:
                    blast_query_id = ''

                hit_list, key_list = [], []
                for hit in self._parse_hit(qresult_elem.find('Iteration_hits'),
                                           query_id):
                    if hit:
                        # need to keep track of hit IDs, since there could be duplicates,
                        if hit.id in key_list:
                            warnings.warn(
                                "Adding hit with BLAST-generated ID "
                                "%r since hit ID %r is already present "
                                "in query %r. Your BLAST database may contain "
                                "duplicate entries." %
                                (hit._blast_id, hit.id, query_id),
                                BiopythonParserWarning)
                            # fallback to Blast-generated IDs, if the ID is already present
                            # and restore the desc, too
                            hit.description = '%s %s' % (hit.id,
                                                         hit.description)
                            hit.id = hit._blast_id
                            # and change the hit_id of the HSPs contained
                            for hsp in hit:
                                hsp.hit_id = hit._blast_id
                        else:
                            key_list.append(hit.id)

                        hit_list.append(hit)

                # create qresult and assign its attributes
                qresult = QueryResult(hit_list, query_id)
                qresult.description = query_desc
                qresult.seq_len = int(query_len)
                qresult._blast_id = blast_query_id
                for key, value in self._meta.items():
                    setattr(qresult, key, value)

                # statistics are stored in Iteration_stat's 'grandchildren' with the
                # following DTD
                # <!ELEMENT Statistics (
                #        Statistics_db-num,
                #        Statistics_db-len,
                #        Statistics_hsp-len,
                #        Statistics_eff-space,
                #        Statistics_kappa,
                #        Statistics_lambda,
                #        Statistics_entropy)>

                stat_iter_elem = qresult_elem.find('Iteration_stat')
                if stat_iter_elem is not None:
                    stat_elem = stat_iter_elem.find('Statistics')

                    for key, val_info in _ELEM_QRESULT_OPT.items():
                        value = stat_elem.findtext(key)
                        if value is not None:
                            caster = val_info[1]
                            # recast only if value is not intended to be str
                            if value is not None and caster is not str:
                                value = caster(value)
                            setattr(qresult, val_info[0], value)

                # delete element after we finish parsing it
                qresult_elem.clear()
                yield qresult