class Hit(_BaseSearchObject):
    """Class representing a single database hit of a search result.

    Hit objects are the second-level container in the SearchIO module. They
    are the objects contained within a QueryResult (see QueryResult). They
    themselves are container for HSP objects and will contain at least one
    HSP.

    To have a quick look at a Hit and its contents, invoke ``print`` on it::

        >>> from Bio import SearchIO
        >>> qresult = next(SearchIO.parse('Blast/mirna.xml', 'blast-xml'))
        >>> hit = qresult[3]
        >>> print(hit)
        Query: 33211
               mir_1
          Hit: gi|301171322|ref|NR_035857.1| (86)
               Pan troglodytes microRNA mir-520c (MIR520C), microRNA
         HSPs: ----  --------  ---------  ------  ---------------  ---------------------
                  #   E-value  Bit score    Span      Query range              Hit range
               ----  --------  ---------  ------  ---------------  ---------------------
                  0   8.9e-20     100.47      60           [1:61]                [13:73]
                  1   3.3e-06      55.39      60           [0:60]                [13:73]

    You can invoke ``len`` on a Hit object to see how many HSP objects it contains::

        >>> len(hit)
        2

    Hit objects behave very similar to Python lists. You can retrieve the HSP
    object inside a Hit using the HSP's integer index. Hit objects can also be
    sliced, which will return a new Hit objects containing only the sliced HSPs::

        # HSP items inside the Hit can be retrieved using its integer index
        >>> hit[0]
        HSP(hit_id='gi|301171322|ref|NR_035857.1|', query_id='33211', 1 fragments)

        # slicing returns a new Hit
        >>> hit
        Hit(id='gi|301171322|ref|NR_035857.1|', query_id='33211', 2 hsps)
        >>> hit[:1]
        Hit(id='gi|301171322|ref|NR_035857.1|', query_id='33211', 1 hsps)
        >>> print(hit[1:])
        Query: 33211
               mir_1
          Hit: gi|301171322|ref|NR_035857.1| (86)
               Pan troglodytes microRNA mir-520c (MIR520C), microRNA
         HSPs: ----  --------  ---------  ------  ---------------  ---------------------
                  #   E-value  Bit score    Span      Query range              Hit range
               ----  --------  ---------  ------  ---------------  ---------------------
                  0   3.3e-06      55.39      60           [0:60]                [13:73]

    Hit objects provide ``filter`` and ``map`` methods, which are analogous to
    Python's built-in ``filter`` and ``map`` except that they return a new Hit
    object instead of a list.

    Here is an example of using ``filter`` to select for HSPs whose e-value is
    less than 1e-10::

        >>> evalue_filter = lambda hsp: hsp.evalue < 1e-10
        >>> filtered_hit = hit.filter(evalue_filter)
        >>> len(hit)
        2
        >>> len(filtered_hit)
        1
        >>> print(filtered_hit)
        Query: 33211
               mir_1
          Hit: gi|301171322|ref|NR_035857.1| (86)
               Pan troglodytes microRNA mir-520c (MIR520C), microRNA
         HSPs: ----  --------  ---------  ------  ---------------  ---------------------
                  #   E-value  Bit score    Span      Query range              Hit range
               ----  --------  ---------  ------  ---------------  ---------------------
                  0   8.9e-20     100.47      60           [1:61]                [13:73]

    There are also other methods which are counterparts of Python lists' methods
    with the same names: ``append``, ``index``, ``pop``, and ``sort``. Consult their
    respective documentations for more details and examples of their usage.

    """

    # attributes we don't want to transfer when creating a new Hit class
    # from this one
    _NON_STICKY_ATTRS = ("_items", )

    def __init__(self, hsps=(), id=None, query_id=None):
        """Initialize a Hit object.

        :param hsps: HSP objects contained in the Hit object
        :type hsps: iterable yielding HSP
        :param id: hit ID
        :type id: string
        :param query_id: query ID
        :type query_id: string

        If multiple HSP objects are used for initialization, they must all
        have the same ``query_id``, ``query_description``, ``hit_id``, and
        ``hit_description`` properties.
        """
        # default attribute values
        self._id = id
        self._id_alt = []
        self._query_id = query_id
        self._description = None
        self._description_alt = []
        self._query_description = None
        self.attributes = {}
        self.dbxrefs = []

        # TODO - Move this into the for look below in case
        # hsps is a single use iterator?
        for attr in ("query_id", "query_description", "hit_id",
                     "hit_description"):
            # HACK: setting the if clause to '> 1' allows for empty hit objects.
            # This makes it easier to work with file formats with unpredictable
            # hit-hsp ordering. The empty hit object itself is nonfunctional,
            # however, since all its cascading properties are empty.
            if len({getattr(hsp, attr) for hsp in hsps}) > 1:
                raise ValueError(
                    "Hit object can not contain HSPs with more than one %s." %
                    attr)

        self._items = []
        for hsp in hsps:
            # validate each HSP
            self._validate_hsp(hsp)
            # and store it them as an instance attribute
            self.append(hsp)

    def __repr__(self):
        """Return string representation of Hit object."""
        return "Hit(id=%r, query_id=%r, %r hsps)" % (self.id, self.query_id,
                                                     len(self))

    def __iter__(self):
        """Iterate over hsps."""
        return iter(self.hsps)

    def __len__(self):
        """Return number of hsps."""
        return len(self.hsps)

    # Python 3:
    def __bool__(self):
        """Return True if there are hsps."""
        return bool(self.hsps)

    # Python 2:
    __nonzero__ = __bool__

    def __contains__(self, hsp):
        """Return True if hsp in items."""
        return hsp in self._items

    def __str__(self):
        """Return a human readable summary of the Hit object."""
        lines = []

        # set query id line
        qid_line = "Query: %s" % self.query_id
        if self.query_description:
            qid_line += trim_str("\n       %s" % self.query_description, 80,
                                 "...")
        lines.append(qid_line)

        # set hit id line
        hid_line = "  Hit: %s" % self.id
        if hasattr(self, "seq_len"):
            hid_line += " (%i)" % self.seq_len
        if self.description:
            hid_line += trim_str("\n       %s" % self.description, 80, "...")
        lines.append(hid_line)

        # set attributes lines
        for key, value in sorted(self.attributes.items()):
            lines.append(" %s: %s" % (key, value))

        # set dbxrefs line
        if self.dbxrefs:
            lines.append("Database cross-references: " +
                         ", ".join(self.dbxrefs))

        # set hsp line and table
        if not self.hsps:
            lines.append(" HSPs: ?")
        else:
            lines.append(
                " HSPs: %s  %s  %s  %s  %s  %s" %
                ("-" * 4, "-" * 8, "-" * 9, "-" * 6, "-" * 15, "-" * 21))
            pattern = "%11s  %8s  %9s  %6s  %15s  %21s"
            lines.append(pattern % ("#", "E-value", "Bit score", "Span",
                                    "Query range", "Hit range"))
            lines.append(
                pattern %
                ("-" * 4, "-" * 8, "-" * 9, "-" * 6, "-" * 15, "-" * 21))
            for idx, hsp in enumerate(self.hsps):
                # evalue
                evalue = getattr_str(hsp, "evalue", fmt="%.2g")
                # bitscore
                bitscore = getattr_str(hsp, "bitscore", fmt="%.2f")
                # alignment length
                aln_span = getattr_str(hsp, "aln_span")
                # query region
                query_start = getattr_str(hsp, "query_start")
                query_end = getattr_str(hsp, "query_end")
                query_range = "[%s:%s]" % (query_start, query_end)
                # max column length is 18
                query_range = trim_str(query_range, 15, "~]")
                # hit region
                hit_start = getattr_str(hsp, "hit_start")
                hit_end = getattr_str(hsp, "hit_end")
                hit_range = "[%s:%s]" % (hit_start, hit_end)
                hit_range = trim_str(hit_range, 21, "~]")
                # append the hsp row
                lines.append(pattern % (str(idx), evalue, bitscore, aln_span,
                                        query_range, hit_range))

        return "\n".join(lines)

    def __getitem__(self, idx):
        """Return the HSP object at the given index."""
        # if key is slice, return a new Hit instance
        if isinstance(idx, slice):
            obj = self.__class__(self.hsps[idx])
            self._transfer_attrs(obj)
            return obj
        return self._items[idx]

    def __setitem__(self, idx, hsps):
        """Assign hsps to index idx."""
        # handle case if hsps is a list of hsp
        if isinstance(hsps, (list, tuple)):
            for hsp in hsps:
                self._validate_hsp(hsp)
        else:
            self._validate_hsp(hsps)

        self._items[idx] = hsps

    def __delitem__(self, idx):
        """Delete item of index idx."""
        del self._items[idx]

    # hsp properties #
    def _validate_hsp(self, hsp):
        """Validate an HSP object (PRIVATE).

        Valid HSP objects have the same hit_id as the Hit object ID and the
        same query_id as the Hit object's query_id.

        """
        if not isinstance(hsp, HSP):
            raise TypeError("Hit objects can only contain HSP objects.")
        # HACK: to make validation during __init__ work
        if self._items:
            if self.id is not None:
                if hsp.hit_id != self.id:
                    raise ValueError(
                        "Expected HSP with hit ID %r, found %r instead." %
                        (self.id, hsp.hit_id))
            else:
                self.id = hsp.hit_id

            if self.description is not None:
                if hsp.hit_description != self.description:
                    raise ValueError(
                        "Expected HSP with hit description %r, found %r instead."
                        % (self.description, hsp.hit_description))
            else:
                self.description = hsp.hit_description

            if self.query_id is not None:
                if hsp.query_id != self.query_id:
                    raise ValueError(
                        "Expected HSP with query ID %r, found %r instead." %
                        (self.query_id, hsp.query_id))
            else:
                self.query_id = hsp.query_id

            if self.query_description is not None:
                if hsp.query_description != self.query_description:
                    raise ValueError(
                        "Expected HSP with query description %r, found %r instead."
                        % (self.query_description, hsp.query_description))
            else:
                self.query_description = hsp.query_description

    # properties #
    description = optionalcascade("_description", "hit_description",
                                  """Hit description""")
    query_description = optionalcascade(
        "_query_description",
        "query_description",
        """Description of the query that produced the hit""",
    )
    id = optionalcascade("_id", "hit_id", """Hit ID string.""")
    query_id = optionalcascade(
        "_query_id", "query_id",
        """ID string of the query that produced the hit""")
    # returns all hsps
    hsps = allitems(doc="""HSP objects contained in the Hit""")

    @property
    def id_all(self):
        """Alternative ID(s) of the Hit."""
        return [self.id] + self._id_alt

    @property
    def description_all(self):
        """Alternative descriptions of the Hit."""
        return [self.description] + self._description_alt

    @property
    def fragments(self):
        """Access the HSPFragment objects contained in the Hit."""
        return list(chain(*self._items))

    # public methods #
    def append(self, hsp):
        """Add a HSP object to the end of Hit.

        Parameters
        hsp -- HSP object to append.

        Any HSP object appended must have the same ``hit_id`` property as the
        Hit object's ``id`` property and the same ``query_id`` property as the
        Hit object's ``query_id`` property.

        """
        self._validate_hsp(hsp)
        self._items.append(hsp)

    def filter(self, func=None):
        """Create new Hit object whose HSP objects pass the filter function.

        :param func: function for filtering
        :type func: callable, accepts HSP, returns bool

        ``filter`` is analogous to Python's built-in ``filter`` function, except
        that instead of returning a list it returns a ``Hit`` object. Here is an
        example of using ``filter`` to select for HSPs having bitscores bigger
        than 60::

            >>> from Bio import SearchIO
            >>> qresult = next(SearchIO.parse('Blast/mirna.xml', 'blast-xml'))
            >>> hit = qresult[3]
            >>> evalue_filter = lambda hsp: hsp.bitscore > 60
            >>> filtered_hit = hit.filter(evalue_filter)
            >>> len(hit)
            2
            >>> len(filtered_hit)
            1
            >>> print(filtered_hit)
            Query: 33211
                   mir_1
              Hit: gi|301171322|ref|NR_035857.1| (86)
                   Pan troglodytes microRNA mir-520c (MIR520C), microRNA
             HSPs: ----  --------  ---------  ------  ---------------  ---------------------
                      #   E-value  Bit score    Span      Query range              Hit range
                   ----  --------  ---------  ------  ---------------  ---------------------
                      0   8.9e-20     100.47      60           [1:61]                [13:73]

        """
        hsps = list(filter(func, self.hsps))
        if hsps:
            obj = self.__class__(hsps)
            self._transfer_attrs(obj)
            return obj

    def index(self, hsp):
        """Return the index of a given HSP object, zero-based.

        :param hsp: object to look up
        :type hsp: HSP

        """
        return self._items.index(hsp)

    def map(self, func=None):
        """Create new Hit object, mapping the given function to its HSPs.

        :param func: function for mapping
        :type func: callable, accepts HSP, returns HSP

        ``map`` is analogous to Python's built-in ``map`` function. It is applied to
        all HSPs contained in the Hit object and returns a new Hit object.

        """
        if func is not None:
            hsps = [func(x)
                    for x in self.hsps[:]]  # this creates a shallow copy
        else:
            hsps = self.hsps[:]
        if hsps:
            obj = self.__class__(hsps)
            self._transfer_attrs(obj)
            return obj

    def pop(self, index=-1):
        """Remove and returns the HSP object at the specified index.

        :param index: index of HSP object to pop
        :type index: int

        """
        return self._items.pop(index)

    def sort(self, key=None, reverse=False, in_place=True):
        """Sort the HSP objects.

        :param key: sorting function
        :type key: callable, accepts HSP, returns key for sorting
        :param reverse: whether to reverse sorting results or no
        :type reverse: bool
        :param in_place: whether to do in-place sorting or no
        :type in_place: bool

        ``sort`` defaults to sorting in-place, to mimick Python's ``list.sort``
        method. If you set the ``in_place`` argument to False, it will treat
        return a new, sorted Hit object and keep the initial one unsorted

        """
        if in_place:
            self._items.sort(key=key, reverse=reverse)
        else:
            hsps = self.hsps[:]
            hsps.sort(key=key, reverse=reverse)
            obj = self.__class__(hsps)
            self._transfer_attrs(obj)
            return obj
Пример #2
0
class HSP(_BaseHSP):
    """Class representing high-scoring region(s) between query and hit.

    HSP (high-scoring pair) objects are contained by Hit objects (see Hit).
    In most cases, HSP objects store the bulk of the statistics and results
    (e.g. e-value, bitscores, query sequence, etc.) produced by a search
    program.

    Depending on the search output file format, a given HSP will contain one
    or more HSPFragment object(s). Examples of search programs that produce HSP
    with one HSPFragments are BLAST, HMMER, and FASTA. Other programs such as
    BLAT or Exonerate may produce HSPs containing more than one HSPFragment.
    However, their native terminologies may differ: in BLAT these fragments
    are called 'blocks' while in Exonerate they are called exons or NER.

    Here are examples from each type of HSP. The first one comes from a BLAST
    search::

        >>> from Bio import SearchIO
        >>> blast_qresult = next(SearchIO.parse('Blast/mirna.xml', 'blast-xml'))
        >>> blast_hsp = blast_qresult[1][0]     # the first HSP from the second hit
        >>> blast_hsp
        HSP(hit_id='gi|301171311|ref|NR_035856.1|', query_id='33211', 1 fragments)
        >>> print(blast_hsp)
              Query: 33211 mir_1
                Hit: gi|301171311|ref|NR_035856.1| Pan troglodytes microRNA mir-520b ...
        Query range: [1:61] (1)
          Hit range: [0:60] (1)
        Quick stats: evalue 1.7e-22; bitscore 109.49
          Fragments: 1 (60 columns)
             Query - CCTCTACAGGGAAGCGCTTTCTGTTGTCTGAAAGAAAAGAAAGTGCTTCCTTTTAGAGGG
                     ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
               Hit - CCTCTACAGGGAAGCGCTTTCTGTTGTCTGAAAGAAAAGAAAGTGCTTCCTTTTAGAGGG

    For HSPs with a single HSPFragment, you can invoke ``print`` on it and see the
    underlying sequence alignment, if it exists. This is not the case for HSPs
    with more than one HSPFragment. Below is an example, using an HSP from a
    BLAT search. Invoking ``print`` on these HSPs will instead show a table of the
    HSPFragment objects it contains::

        >>> blat_qresult = SearchIO.read('Blat/mirna.pslx', 'blat-psl', pslx=True)
        >>> blat_hsp = blat_qresult[1][0]       # the first HSP from the second hit
        >>> blat_hsp
        HSP(hit_id='chr11', query_id='blat_1', 2 fragments)
        >>> print(blat_hsp)
              Query: blat_1 <unknown description>
                Hit: chr11 <unknown description>
        Query range: [42:67] (-1)
          Hit range: [59018929:59018955] (1)
        Quick stats: evalue ?; bitscore ?
          Fragments: ---  --------------  ----------------------  ----------------------
                       #            Span             Query range               Hit range
                     ---  --------------  ----------------------  ----------------------
                       0               6                 [61:67]     [59018929:59018935]
                       1              16                 [42:58]     [59018939:59018955]

    Notice that in HSPs with more than one HSPFragments, the HSP's ``query_range``
    ``hit_range`` properties encompasses all fragments it contains.

    You can check whether an HSP has more than one HSPFragments or not using the
    ``is_fragmented`` property::

        >>> blast_hsp.is_fragmented
        False
        >>> blat_hsp.is_fragmented
        True

    Since HSP objects are also containers similar to Python lists, you can
    access a single fragment in an HSP using its integer index::

        >>> blat_fragment = blat_hsp[0]
        >>> print(blat_fragment)
              Query: blat_1 <unknown description>
                Hit: chr11 <unknown description>
        Query range: [61:67] (-1)
          Hit range: [59018929:59018935] (1)
          Fragments: 1 (6 columns)
             Query - tatagt
               Hit - tatagt

    This applies to HSPs objects with a single fragment as well::

        >>> blast_fragment = blast_hsp[0]
        >>> print(blast_fragment)
              Query: 33211 mir_1
                Hit: gi|301171311|ref|NR_035856.1| Pan troglodytes microRNA mir-520b ...
        Query range: [1:61] (1)
          Hit range: [0:60] (1)
          Fragments: 1 (60 columns)
             Query - CCTCTACAGGGAAGCGCTTTCTGTTGTCTGAAAGAAAAGAAAGTGCTTCCTTTTAGAGGG
                     ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
               Hit - CCTCTACAGGGAAGCGCTTTCTGTTGTCTGAAAGAAAAGAAAGTGCTTCCTTTTAGAGGG

    Regardless of the search output file format, HSP objects provide the
    properties listed below. These properties always return values in a list,
    due to the HSP object itself being a list-like container. However, for
    HSP objects with a single HSPFragment, shortcut properties that fetches
    the item from the list are also provided.

    +----------------------+---------------------+-----------------------------+
    | Property             | Shortcut            | Value                       |
    +======================+=====================+=============================+
    | aln_all              | aln                 | HSP alignments as           |
    |                      |                     | MultipleSeqAlignment object |
    +----------------------+---------------------+-----------------------------+
    | aln_annotation_all   | aln_annotation      | dictionary of annotation(s) |
    |                      |                     | of all fragments' alignments|
    +----------------------+---------------------+-----------------------------+
    | fragments            | fragment            | HSPFragment objects         |
    +----------------------+---------------------+-----------------------------+
    | hit_all              | hit                 | hit sequence as SeqRecord   |
    |                      |                     | objects                     |
    +----------------------+---------------------+-----------------------------+
    | hit_features_all     | hit_features        | SeqFeatures of all hit      |
    |                      |                     | fragments                   |
    +----------------------+---------------------+-----------------------------+
    | hit_start_all        | hit_start*          | start coordinates of the    |
    |                      |                     | hit fragments               |
    +----------------------+---------------------+-----------------------------+
    | hit_end_all          | hit_end*            | end coordinates of the hit  |
    |                      |                     | fragments                   |
    +----------------------+---------------------+-----------------------------+
    | hit_span_all         | hit_span*           | sizes of each hit fragments |
    +----------------------+---------------------+-----------------------------+
    | hit_strand_all       | hit_strand          | strand orientations of the  |
    |                      |                     | hit fragments               |
    +----------------------+---------------------+-----------------------------+
    | hit_frame_all        | hit_frame           | reading frames of the hit   |
    |                      |                     | fragments                   |
    +----------------------+---------------------+-----------------------------+
    | hit_range_all        | hit_range           | tuples of start and end     |
    |                      |                     | coordinates of each hit     |
    |                      |                     | fragment                    |
    +----------------------+---------------------+-----------------------------+
    | query_all            | query               | query sequence as SeqRecord |
    |                      |                     | object                      |
    +----------------------+---------------------+-----------------------------+
    | query_features_all   | query_features      | SeqFeatures of all query    |
    |                      |                     | fragments                   |
    +----------------------+---------------------+-----------------------------+
    | query_start_all      | query_start*        | start coordinates of the    |
    |                      |                     | fragments                   |
    +----------------------+---------------------+-----------------------------+
    | query_end_all        | query_end*          | end coordinates of the      |
    |                      |                     | query fragments             |
    +----------------------+---------------------+-----------------------------+
    | query_span_all       | query_span*         | sizes of each query         |
    |                      |                     | fragments                   |
    +----------------------+---------------------+-----------------------------+
    | query_strand_all     | query_strand        | strand orientations of the  |
    |                      |                     | query fragments             |
    +----------------------+---------------------+-----------------------------+
    | query_frame_all      | query_frame         | reading frames of the query |
    |                      |                     | fragments                   |
    +----------------------+---------------------+-----------------------------+
    | query_range_all      | query_range         | tuples of start and end     |
    |                      |                     | coordinates of each query   |
    |                      |                     | fragment                    |
    +----------------------+---------------------+-----------------------------+

    For all types of HSP objects, the property will return the values in a list.
    Shorcuts are only applicable for HSPs with one fragment. Except the ones
    noted, if they are used on an HSP with more than one fragments, an exception
    will be raised.

    For properties that may be used in HSPs with multiple or single fragments
    (``*_start``, ``*_end``, and ``*_span`` properties), their interpretation depends
    on how many fragment the HSP has:

    +------------+---------------------------------------------------+
    | Property   | Value                                             |
    +============+===================================================+
    | hit_start  | smallest coordinate value of all hit fragments    |
    +------------+---------------------------------------------------+
    | hit_end    | largest coordinate value of all hit fragments     |
    +------------+---------------------------------------------------+
    | hit_span   | difference between ``hit_start`` and ``hit_end``  |
    +------------+---------------------------------------------------+
    | query_start| smallest coordinate value of all query fragments  |
    +------------+---------------------------------------------------+
    | query_end  | largest coordinate value of all query fragments   |
    +------------+---------------------------------------------------+
    | query_span | difference between ``query_start`` and            |
    |            | ``query_end``                                     |
    +------------+---------------------------------------------------+

    In addition to the objects listed above, HSP objects also provide the
    following properties:

    +--------------------+------------------------------------------------------+
    | Property           | Value                                                |
    +====================+======================================================+
    | aln_span           | total number of residues in all HSPFragment objects  |
    +--------------------+------------------------------------------------------+
    | alphabet           | alphabet used in hit and query SeqRecord objects     |
    +--------------------+------------------------------------------------------+
    | is_fragmented      | boolean, whether there are multiple fragments or not |
    +--------------------+------------------------------------------------------+
    | hit_id             | ID of the hit sequence                               |
    +--------------------+------------------------------------------------------+
    | hit_description    | description of the hit sequence                      |
    +--------------------+------------------------------------------------------+
    | hit_inter_ranges   | list of hit sequence coordinates of the regions      |
    |                    | between fragments                                    |
    +--------------------+------------------------------------------------------+
    | hit_inter_spans    | list of lengths of the regions between hit fragments |
    +--------------------+------------------------------------------------------+
    | query_id           | ID of the query sequence                             |
    +--------------------+------------------------------------------------------+
    | query_description  | description of the query sequence                    |
    +--------------------+------------------------------------------------------+
    | query_inter_ranges | list of query sequence coordinates of the regions    |
    |                    | between fragments                                    |
    +--------------------+------------------------------------------------------+
    | query_inter_spans  | list of lengths of the regions between query         |
    |                    | fragments                                            |
    +--------------------+------------------------------------------------------+

    .. [1] may be used in HSPs with multiple fragments

    """
    # attributes we don't want to transfer when creating a new Hit class
    # from this one
    _NON_STICKY_ATTRS = ('_items', )

    def __init__(self, fragments=[]):
        """Initializes an HSP object.

        :param fragments: fragments contained in the HSP object
        :type fragments: iterable yielding HSPFragment

        HSP objects must be initialized with a list containing at least one
        HSPFragment object. If multiple HSPFragment objects are used for
        initialization, they must all have the same ``query_id``,
        ``query_description``, ``hit_id``, ``hit_description``, and alphabet
        properties.

        """
        if not fragments:
            raise ValueError("HSP objects must have at least one HSPFragment "
                             "object.")
        # check that all fragments contain the same IDs, descriptions, alphabet
        for attr in ('query_id', 'query_description', 'hit_id',
                     'hit_description', 'alphabet'):
            if len(set(getattr(frag, attr) for frag in fragments)) != 1:
                raise ValueError("HSP object can not contain fragments with "
                                 "more than one %s." % attr)

        self._items = []
        for fragment in fragments:
            self._validate_fragment(fragment)
            self._items.append(fragment)

    def __repr__(self):
        return "%s(hit_id=%r, query_id=%r, %r fragments)" % \
                (self.__class__.__name__, self.hit_id, self.query_id, len(self))

    def __iter__(self):
        return iter(self._items)

    def __contains__(self, fragment):
        return fragment in self._items

    def __len__(self):
        return len(self._items)

    # Python 3:
    def __bool__(self):
        return bool(self._items)

    # Python 2:
    __nonzero__ = __bool__

    def __str__(self):

        lines = []
        # set hsp info line
        statline = []
        # evalue
        evalue = getattr_str(self, 'evalue', fmt='%.2g')
        statline.append('evalue ' + evalue)
        # bitscore
        bitscore = getattr_str(self, 'bitscore', fmt='%.2f')
        statline.append('bitscore ' + bitscore)
        lines.append('Quick stats: ' + '; '.join(statline))

        if len(self.fragments) == 1:
            return '\n'.join([
                self._str_hsp_header(), '\n'.join(lines),
                self.fragments[0]._str_aln()
            ])
        else:
            lines.append('  Fragments: %s  %s  %s  %s' %
                         ('-' * 3, '-' * 14, '-' * 22, '-' * 22))
            pattern = '%16s  %14s  %22s  %22s'
            lines.append(pattern % ('#', 'Span', 'Query range', 'Hit range'))
            lines.append(pattern % ('-' * 3, '-' * 14, '-' * 22, '-' * 22))
            for idx, block in enumerate(self.fragments):
                # set hsp line and table
                # alignment span
                aln_span = getattr_str(block, 'aln_span')
                # query region
                query_start = getattr_str(block, 'query_start')
                query_end = getattr_str(block, 'query_end')
                query_range = '[%s:%s]' % (query_start, query_end)
                # max column length is 20
                query_range = trim_str(query_range, 22, '~]')
                # hit region
                hit_start = getattr_str(block, 'hit_start')
                hit_end = getattr_str(block, 'hit_end')
                hit_range = '[%s:%s]' % (hit_start, hit_end)
                hit_range = trim_str(hit_range, 22, '~]')
                # append the hsp row
                lines.append(pattern %
                             (str(idx), aln_span, query_range, hit_range))

            return self._str_hsp_header() + '\n' + '\n'.join(lines)

    def __getitem__(self, idx):
        # if key is slice, return a new HSP instance
        if isinstance(idx, slice):
            obj = self.__class__(self._items[idx])
            self._transfer_attrs(obj)
            return obj
        return self._items[idx]

    def __setitem__(self, idx, fragments):
        # handle case if hsps is a list of hsp
        if isinstance(fragments, (list, tuple)):
            for fragment in fragments:
                self._validate_fragment(fragment)
        else:
            self._validate_fragment(fragments)

        self._items[idx] = fragments

    def __delitem__(self, idx):
        # note that this may result in an empty HSP object, which should be
        # invalid
        del self._items[idx]

    def _validate_fragment(self, fragment):
        if not isinstance(fragment, HSPFragment):
            raise TypeError("HSP objects can only contain HSPFragment "
                            "objects.")
        # HACK: to make validation during __init__ work
        if self._items:
            if fragment.hit_id != self.hit_id:
                raise ValueError("Expected HSPFragment with hit ID %r, "
                                 "found %r instead." %
                                 (self.id, fragment.hit_id))

            if fragment.hit_description != self.hit_description:
                raise ValueError("Expected HSPFragment with hit "
                                 "description %r, found %r instead." %
                                 (self.description, fragment.hit_description))

            if fragment.query_id != self.query_id:
                raise ValueError("Expected HSPFragment with query ID %r, "
                                 "found %r instead." %
                                 (self.query_id, fragment.query_id))

            if fragment.query_description != self.query_description:
                raise ValueError(
                    "Expected HSP with query description %r, "
                    "found %r instead." %
                    (self.query_description, fragment.query_description))

    def _aln_span_get(self):
        # length of all alignments
        # alignment span can be its own attribute, or computed from
        # query / hit length
        return sum(frg.aln_span for frg in self.fragments)

    aln_span = property(
        fget=_aln_span_get,
        doc="""Total number of columns in all HSPFragment objects.""")

    # coordinate properties #
    def _get_coords(self, seq_type, coord_type):
        assert seq_type in ('hit', 'query')
        assert coord_type in ('start', 'end')
        coord_name = '%s_%s' % (seq_type, coord_type)
        coords = [getattr(frag, coord_name) for frag in self.fragments]
        if None in coords:
            warnings.warn(
                "'None' exist in %s coordinates; ignored" % (coord_name),
                BiopythonWarning)
        return coords

    def _hit_start_get(self):
        return min(self._get_coords('hit', 'start'))

    hit_start = property(
        fget=_hit_start_get,
        doc="""Smallest coordinate value of all hit fragments""")

    def _query_start_get(self):
        return min(self._get_coords('query', 'start'))

    query_start = property(
        fget=_query_start_get,
        doc="""Smallest coordinate value of all query fragments""")

    def _hit_end_get(self):
        return max(self._get_coords('hit', 'end'))

    hit_end = property(fget=_hit_end_get,
                       doc="""Largest coordinate value of all hit fragments""")

    def _query_end_get(self):
        return max(self._get_coords('query', 'end'))

    query_end = property(
        fget=_query_end_get,
        doc="""Largest coordinate value of all hit fragments""")

    # coordinate-dependent properties #
    def _hit_span_get(self):
        try:
            return self.hit_end - self.hit_start
        except TypeError:  # triggered if any of the coordinates are None
            return None

    hit_span = property(
        fget=_hit_span_get,
        doc="""The number of hit residues covered by the HSP.""")

    def _query_span_get(self):
        try:
            return self.query_end - self.query_start
        except TypeError:  # triggered if any of the coordinates are None
            return None

    query_span = property(
        fget=_query_span_get,
        doc="""The number of query residues covered by the HSP.""")

    def _hit_range_get(self):
        return (self.hit_start, self.hit_end)

    hit_range = property(fget=_hit_range_get,
                         doc="""Tuple of HSP hit start and end coordinates.""")

    def _query_range_get(self):
        return (self.query_start, self.query_end)

    query_range = property(
        fget=_query_range_get,
        doc="""Tuple of HSP query start and end coordinates.""")

    def _inter_ranges_get(self, seq_type):
        # this property assumes that there are no mixed strands in a hit/query
        assert seq_type in ('query', 'hit')
        strand = getattr(self, '%s_strand_all' % seq_type)[0]
        coords = getattr(self, '%s_range_all' % seq_type)
        # determine function used to set inter range
        # start and end coordinates, given two pairs
        # of fragment start and end coordinates
        if strand == -1:
            startfunc, endfunc = min, max
        else:
            startfunc, endfunc = max, min
        inter_coords = []
        for idx, coord in enumerate(coords[:-1]):
            start = startfunc(coords[idx])
            end = endfunc(coords[idx + 1])
            inter_coords.append((min(start, end), max(start, end)))

        return inter_coords

    def _hit_inter_ranges_get(self):
        return self._inter_ranges_get('hit')

    hit_inter_ranges = property(
        fget=_hit_inter_ranges_get,
        doc="""Hit sequence coordinates of the regions between fragments""")

    def _query_inter_ranges_get(self):
        return self._inter_ranges_get('query')

    query_inter_ranges = property(
        fget=_query_inter_ranges_get,
        doc="""Query sequence coordinates of the regions between fragments""")

    def _inter_spans_get(self, seq_type):
        assert seq_type in ('query', 'hit')
        attr_name = '%s_inter_ranges' % seq_type
        return [coord[1] - coord[0] for coord in getattr(self, attr_name)]

    def _hit_inter_spans_get(self):
        return self._inter_spans_get('hit')

    hit_inter_spans = property(
        fget=_hit_inter_spans_get,
        doc="""Lengths of regions between hit fragments""")

    def _query_inter_spans_get(self):
        return self._inter_spans_get('query')

    query_inter_spans = property(
        fget=_query_inter_spans_get,
        doc="""Lengths of regions between query fragments""")

    # shortcuts for fragments' properties #

    # bool check if there's more than one fragments
    is_fragmented = property(
        lambda self: len(self) > 1,
        doc="""Whether the HSP has more than one HSPFragment objects""")

    # first item properties with setters
    hit_description = fullcascade('hit_description',
                                  doc="""Description of the hit sequence""")

    query_description = fullcascade(
        'query_description', doc="""Description of the query sequence""")

    hit_id = fullcascade('hit_id', doc="""ID of the hit sequence""")

    query_id = fullcascade('query_id', doc="""ID of the query sequence""")

    alphabet = fullcascade(
        'alphabet', doc="""Alphabet used in hit and query SeqRecord objects""")

    # properties for single-fragment HSPs
    fragment = singleitem(doc="""HSPFragment object, first fragment""")

    hit = singleitem(
        'hit', doc="""Hit sequence as a SeqRecord object, first fragment""")

    query = singleitem(
        'query',
        doc="""Query sequence as a SeqRecord object, first fragment""")

    aln = singleitem(
        'aln',
        doc=
        """Alignment of the first fragment as a MultipleSeqAlignment object""")

    aln_annotation = singleitem(
        'aln_annotation',
        doc="""Dictionary of annotation(s) of the first fragment's alignment"""
    )

    hit_features = singleitem('hit_features',
                              doc="""Hit sequence features, first fragment""")

    query_features = singleitem(
        'query_features', doc="""Query sequence features, first fragment""")

    hit_strand = singleitem('hit_strand',
                            doc="""Hit strand orientation, first fragment""")

    query_strand = singleitem(
        'query_strand', doc="""Query strand orientation, first fragment""")

    hit_frame = singleitem(
        'hit_frame', doc="""Hit sequence reading frame, first fragment""")

    query_frame = singleitem(
        'query_frame', doc="""Query sequence reading frame, first fragment""")

    # properties for multi-fragment HSPs
    fragments = allitems(doc="""List of all HSPFragment objects""")

    hit_all = allitems(
        'hit',
        doc="""List of all fragments' hit sequences as SeqRecord objects""")

    query_all = allitems(
        'query',
        doc="""List of all fragments' query sequences as SeqRecord objects""")

    aln_all = allitems(
        'aln',
        doc=
        """List of all fragments' alignments as MultipleSeqAlignment objects"""
    )

    aln_annotation_all = allitems(
        'aln_annotation',
        doc="""Dictionary of annotation(s) of all fragments' alignments""")

    hit_features_all = allitems('hit_features',
                                doc="""List of all hit sequence features""")

    query_features_all = allitems(
        'query_features', doc="""List of all query sequence features""")

    hit_strand_all = allitems(
        'hit_strand', doc="""List of all fragments' hit sequence strands""")

    query_strand_all = allitems(
        'query_strand',
        doc="""List of all fragments' query sequence strands""")

    hit_frame_all = allitems(
        'hit_frame',
        doc="""List of all fragments' hit sequence reading frames""")

    query_frame_all = allitems(
        'query_frame',
        doc="""List of all fragments' query sequence reading frames""")

    hit_start_all = allitems(
        'hit_start', doc="""List of all fragments' hit start coordinates""")

    query_start_all = allitems(
        'query_start',
        doc="""List of all fragments' query start coordinates""")

    hit_end_all = allitems(
        'hit_end', doc="""List of all fragments' hit end coordinates""")

    query_end_all = allitems(
        'query_end', doc="""List of all fragments' query end coordinates""")

    hit_span_all = allitems('hit_span',
                            doc="""List of all fragments' hit sequence size""")

    query_span_all = allitems(
        'query_span', doc="""List of all fragments' query sequence size""")

    hit_range_all = allitems(
        'hit_range',
        doc="""List of all fragments' hit start and end coordinates""")

    query_range_all = allitems(
        'query_range',
        doc="""List of all fragments' query start and end coordinates""")
Пример #3
0
class Hit(_BaseSearchObject):
    """Class representing a single database hit of a search result.

    Hit objects are the second-level container in the SearchIO module. They
    are the objects contained within a QueryResult (see QueryResult). They
    themselves are container for HSP objects and will contain at least one
    HSP.

    To have a quick look at a Hit and its contents, invoke `print` on it:

    >>> from Bio import SearchIO
    >>> qresult = SearchIO.parse('Blast/mirna.xml', 'blast-xml').next()
    >>> hit = qresult[3]
    >>> print hit
    Query: 33211
           mir_1
      Hit: gi|301171322|ref|NR_035857.1| (86)
           Pan troglodytes microRNA mir-520c (MIR520C), microRNA
     HSPs: ----  --------  ---------  ------  ---------------  ---------------------
              #   E-value  Bit score    Span      Query range              Hit range
           ----  --------  ---------  ------  ---------------  ---------------------
              0   8.9e-20     100.47      60           [1:61]                [13:73]
              1   3.3e-06      55.39      60           [0:60]                [13:73]

    You can invoke `len` on a Hit object to see how many HSP objects it contains:

    >>> len(hit)
    2

    Hit objects behave very similar to Python lists. You can retrieve the HSP
    object inside a Hit using the HSP's integer index. Hit objects can also be
    sliced, which will return a new Hit objects containing only the sliced HSPs:

    # HSP items inside the Hit can be retrieved using its integer index
    >>> hit[0]
    HSP(hit_id='gi|301171322|ref|NR_035857.1|', query_id='33211', 1 fragments)

    # slicing returns a new Hit
    >>> hit
    Hit(id='gi|301171322|ref|NR_035857.1|', query_id='33211', 2 hsps)
    >>> hit[:1]
    Hit(id='gi|301171322|ref|NR_035857.1|', query_id='33211', 1 hsps)
    >>> print hit[1:]
    Query: 33211
           mir_1
      Hit: gi|301171322|ref|NR_035857.1| (86)
           Pan troglodytes microRNA mir-520c (MIR520C), microRNA
     HSPs: ----  --------  ---------  ------  ---------------  ---------------------
              #   E-value  Bit score    Span      Query range              Hit range
           ----  --------  ---------  ------  ---------------  ---------------------
              0   3.3e-06      55.39      60           [0:60]                [13:73]

    Hit objects provide `filter` and `map` methods, which are analogous to
    Python's built-in `filter` and `map` except that they return a new Hit
    object instead of a list.

    Here is an example of using `filter` to select for HSPs whose e-value is
    less than 1e-10:

    >>> evalue_filter = lambda hsp: hsp.evalue < 1e-10
    >>> filtered_hit = hit.filter(evalue_filter)
    >>> len(hit)
    2
    >>> len(filtered_hit)
    1
    >>> print filtered_hit
    Query: 33211
           mir_1
      Hit: gi|301171322|ref|NR_035857.1| (86)
           Pan troglodytes microRNA mir-520c (MIR520C), microRNA
     HSPs: ----  --------  ---------  ------  ---------------  ---------------------
              #   E-value  Bit score    Span      Query range              Hit range
           ----  --------  ---------  ------  ---------------  ---------------------
              0   8.9e-20     100.47      60           [1:61]                [13:73]

    There are also other methods which are counterparts of Python lists' methods
    with the same names: `append`, `index`, `pop`, and `sort`. Consult their
    respective documentations for more details and examples of their usage.

    """

    # attributes we don't want to transfer when creating a new Hit class
    # from this one
    _NON_STICKY_ATTRS = ('_items', )

    def __init__(self, hsps=[], id=None, query_id=None):
        """Initializes a Hit object.

        Arguments:
        hsps -- List containing HSP objects.
        id -- String of the Hit ID
        query_id -- String of the Hit's query ID

        If multiple HSP objects are used for initialization, they must all
        have the same `query_id`, `query_description`, `hit_id`, and
        `hit_description` properties.
        """
        # default attribute values
        self._id = id
        self._query_id = query_id
        self._description = None
        self._query_description = None

        for attr in ('query_id', 'query_description', 'hit_id',
                     'hit_description'):
            # HACK: setting the if clause to '> 1' allows for empty hit objects.
            # This makes it easier to work with file formats with unpredictable
            # hit-hsp ordering. The empty hit object itself is nonfunctional,
            # however, since all its cascading properties are empty.
            if len(set([getattr(hsp, attr) for hsp in hsps])) > 1:
                raise ValueError("Hit object can not contain HSPs with "
                                 "more than one %s." % attr)

        self._items = []
        for hsp in hsps:
            # validate each HSP
            self._validate_hsp(hsp)
            # and store it them as an instance attribute
            self.append(hsp)

    def __repr__(self):
        return "Hit(id=%r, query_id=%r, %r hsps)" % (self.id, self.query_id,
                                                     len(self))

    def __iter__(self):
        return iter(self.hsps)

    def __len__(self):
        return len(self.hsps)

    def __nonzero__(self):
        return bool(self.hsps)

    def __contains__(self, hsp):
        return hsp in self._items

    def __str__(self):
        lines = []

        # set query id line
        qid_line = 'Query: %s' % self.query_id
        if self.query_description:
            qid_line += trim_str('\n       %s' % self.query_description, 80,
                                 '...')
        lines.append(qid_line)

        # set hit id line
        hid_line = '  Hit: %s' % self.id
        if hasattr(self, 'seq_len'):
            hid_line += ' (%i)' % self.seq_len
        if self.description:
            hid_line += trim_str('\n       %s' % self.description, 80, '...')
        lines.append(hid_line)

        # set hsp line and table
        if not self.hsps:
            lines.append(' HSPs: ?')
        else:
            lines.append(
                ' HSPs: %s  %s  %s  %s  %s  %s' %
                ('-' * 4, '-' * 8, '-' * 9, '-' * 6, '-' * 15, '-' * 21))
            pattern = '%11s  %8s  %9s  %6s  %15s  %21s'
            lines.append(pattern % ('#', 'E-value', 'Bit score', 'Span',
                                    'Query range', 'Hit range'))
            lines.append(
                pattern %
                ('-' * 4, '-' * 8, '-' * 9, '-' * 6, '-' * 15, '-' * 21))
            for idx, hsp in enumerate(self.hsps):
                # evalue
                evalue = getattr_str(hsp, 'evalue', fmt='%.2g')
                # bitscore
                bitscore = getattr_str(hsp, 'bitscore', fmt='%.2f')
                # alignment length
                aln_span = getattr_str(hsp, 'aln_span')
                # query region
                query_start = getattr_str(hsp, 'query_start')
                query_end = getattr_str(hsp, 'query_end')
                query_range = '[%s:%s]' % (query_start, query_end)
                # max column length is 18
                query_range = trim_str(query_range, 15, '~]')
                # hit region
                hit_start = getattr_str(hsp, 'hit_start')
                hit_end = getattr_str(hsp, 'hit_end')
                hit_range = '[%s:%s]' % (hit_start, hit_end)
                hit_range = trim_str(hit_range, 21, '~]')
                # append the hsp row
                lines.append(pattern % (str(idx), evalue, bitscore, aln_span,
                                        query_range, hit_range))

        return '\n'.join(lines)

    def __getitem__(self, idx):
        # if key is slice, return a new Hit instance
        if isinstance(idx, slice):
            obj = self.__class__(self.hsps[idx])
            self._transfer_attrs(obj)
            return obj
        return self._items[idx]

    def __setitem__(self, idx, hsps):
        # handle case if hsps is a list of hsp
        if isinstance(hsps, (list, tuple)):
            for hsp in hsps:
                self._validate_hsp(hsp)
        else:
            self._validate_hsp(hsps)

        self._items[idx] = hsps

    def __delitem__(self, idx):
        del self._items[idx]

    ## hsp properties ##
    def _validate_hsp(self, hsp):
        """Validates an HSP object.

        Valid HSP objects have the same hit_id as the Hit object ID and the
        same query_id as the Hit object's query_id.

        """
        if not isinstance(hsp, HSP):
            raise TypeError("Hit objects can only contain HSP objects.")
        # HACK: to make validation during __init__ work
        if self._items:
            if self.id is not None:
                if hsp.hit_id != self.id:
                    raise ValueError("Expected HSP with hit ID %r, "
                                     "found %r instead." %
                                     (self.id, hsp.hit_id))
            else:
                self.id = hsp.hit_id

            if self.description is not None:
                if hsp.hit_description != self.description:
                    raise ValueError("Expected HSP with hit description %r, "
                                     "found %r instead." %
                                     (self.description, hsp.hit_description))
            else:
                self.description = hsp.hit_description

            if self.query_id is not None:
                if hsp.query_id != self.query_id:
                    raise ValueError("Expected HSP with query ID %r, "
                                     "found %r instead." %
                                     (self.query_id, hsp.query_id))
            else:
                self.query_id = hsp.query_id

            if self.query_description is not None:
                if hsp.query_description != self.query_description:
                    raise ValueError(
                        "Expected HSP with query description %r, "
                        "found %r instead." %
                        (self.query_description, hsp.query_description))
            else:
                self.query_description = hsp.query_description

    ## properties ##
    description = optionalcascade('_description', 'hit_description',
                                  """Hit description""")
    query_description = optionalcascade(
        '_query_description', 'query_description',
        """Description of the query that produced the hit""")
    id = optionalcascade('_id', 'hit_id', """Hit ID string.""")
    query_id = optionalcascade(
        '_query_id', 'query_id',
        """ID string of the query that produced the hit""")
    # returns all hsps
    hsps = allitems(doc="""HSP objects contained in the Hit""")

    @property
    def fragments(self):
        """HSPFragment objects contained in the Hit"""
        return [frag for frag in chain(*self._items)]

    ## public methods ##
    def append(self, hsp):
        """Adds a HSP object to the end of Hit.

        Parameters
        hsp -- HSP object to append.

        Any HSP object appended must have the same `hit_id` property as the
        Hit object's `id` property and the same `query_id` property as the
        Hit object's `query_id` property.

        """
        self._validate_hsp(hsp)
        self._items.append(hsp)

    def filter(self, func=None):
        """Creates a new Hit object whose HSP objects pass the filter
        function.

        Arguments:
        func -- Callback function that accepts a HSP object as its parameter,
                does a boolean check, and returns True or False.

        `filter` is analogous to Python's built-in `filter` function, except
        that instead of returning a list it returns a `Hit` object. Here is an
        example of using `filter` to select for HSPs having bitscores bigger
        than 60:

        >>> from Bio import SearchIO
        >>> qresult = SearchIO.parse('Blast/mirna.xml', 'blast-xml').next()
        >>> hit = qresult[3]
        >>> evalue_filter = lambda hsp: hsp.bitscore > 60
        >>> filtered_hit = hit.filter(evalue_filter)
        >>> len(hit)
        2
        >>> len(filtered_hit)
        1
        >>> print filtered_hit
        Query: 33211
               mir_1
          Hit: gi|301171322|ref|NR_035857.1| (86)
               Pan troglodytes microRNA mir-520c (MIR520C), microRNA
         HSPs: ----  --------  ---------  ------  ---------------  ---------------------
                  #   E-value  Bit score    Span      Query range              Hit range
               ----  --------  ---------  ------  ---------------  ---------------------
                  0   8.9e-20     100.47      60           [1:61]                [13:73]

        """
        hsps = filter(func, self.hsps)
        if hsps:
            obj = self.__class__(hsps)
            self._transfer_attrs(obj)
            return obj

    def index(self, hsp):
        """Returns the index of a given HSP object, zero-based.

        Arguments:
        hsp -- HSP object to be looked up.

        """
        return self._items.index(hsp)

    def map(self, func=None):
        """Creates a new Hit object, mapping the given function to its HSPs.

        Arguments:
        func -- Callback function that accepts a HSP object as its parameter and
                also returns a HSP object.

        `map` is analogous to Python's built-in `map` function. It is applied to
        all HSPs contained in the Hit object and returns a new Hit object.

        """
        if func is not None:
            hsps = map(func, self.hsps[:])  # this creates a shallow copy
        else:
            hsps = self.hsps[:]
        if hsps:
            obj = self.__class__(hsps)
            self._transfer_attrs(obj)
            return obj

    def pop(self, index=-1):
        """Removes and returns the HSP object at the specified index.

        Arguments:
        index -- Integer denoting the index of the HSP object to remove.

        """
        return self._items.pop(index)

    def sort(self, key=None, reverse=False, in_place=True):
        """Sorts the HSP objects.

        Arguments:
        key -- Function used to sort the HSP objects.
        reverse -- Boolean, whether to reverse the sorting or not.
        in_place -- Boolean, whether to perform sorting in place (in the same
                    object) or not (creating a new object).

        `sort` defaults to sorting in-place, to mimick Python's `list.sort`
        method. If you set the `in_place` argument to False, it will treat
        return a new, sorted Hit object and keep the initial one unsorted

        """
        if in_place:
            self._items.sort(key=key, reverse=reverse)
        else:
            hsps = self.hsps[:]
            hsps.sort(key=key, reverse=reverse)
            obj = self.__class__(hsps)
            self._transfer_attrs(obj)
            return obj