Exemplo n.º 1
0
class HSPFragment(_BaseHSP):
    """Class representing a contiguous alignment of hit-query sequence.

    HSPFragment forms the core of any parsed search output file. Depending on
    the search output file format, it may contain the actual query and/or hit
    sequences that produces the search hits. These sequences are stored as
    SeqRecord objects (see SeqRecord):

    >>> from Bio import SearchIO
    >>> qresult = next(SearchIO.parse('Blast/mirna.xml', 'blast-xml'))
    >>> fragment = qresult[0][0][0]   # first hit, first hsp, first fragment
    >>> print(fragment)
          Query: 33211 mir_1
            Hit: gi|262205317|ref|NR_030195.1| H**o sapiens microRNA 520b (MIR520...
    Query range: [0:61] (1)
      Hit range: [0:61] (1)
      Fragments: 1 (61 columns)
         Query - CCCTCTACAGGGAAGCGCTTTCTGTTGTCTGAAAGAAAAGAAAGTGCTTCCTTTTAGAGGG
                 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
           Hit - CCCTCTACAGGGAAGCGCTTTCTGTTGTCTGAAAGAAAAGAAAGTGCTTCCTTTTAGAGGG

    # the query sequence is a SeqRecord object
    >>> fragment.query.__class__
    <class 'Bio.SeqRecord.SeqRecord'>
    >>> print(fragment.query)
    ID: 33211
    Name: aligned query sequence
    Description: mir_1
    Number of features: 0
    Seq('CCCTCTACAGGGAAGCGCTTTCTGTTGTCTGAAAGAAAAGAAAGTGCTTCCTTT...GGG', DNAAlphabet())

    # the hit sequence is a SeqRecord object as well
    >>> fragment.hit.__class__
    <class 'Bio.SeqRecord.SeqRecord'>
    >>> print(fragment.hit)
    ID: gi|262205317|ref|NR_030195.1|
    Name: aligned hit sequence
    Description: H**o sapiens microRNA 520b (MIR520B), microRNA
    Number of features: 0
    Seq('CCCTCTACAGGGAAGCGCTTTCTGTTGTCTGAAAGAAAAGAAAGTGCTTCCTTT...GGG', DNAAlphabet())

    # when both query and hit are present, we get a MultipleSeqAlignment object
    >>> fragment.aln.__class__
    <class 'Bio.Align.MultipleSeqAlignment'>
    >>> print(fragment.aln)
    DNAAlphabet() alignment with 2 rows and 61 columns
    CCCTCTACAGGGAAGCGCTTTCTGTTGTCTGAAAGAAAAGAAAG...GGG 33211
    CCCTCTACAGGGAAGCGCTTTCTGTTGTCTGAAAGAAAAGAAAG...GGG gi|262205317|ref|NR_030195.1|

    """
    def __init__(self,
                 hit_id='<unknown id>',
                 query_id='<unknown id>',
                 hit=None,
                 query=None,
                 alphabet=single_letter_alphabet):

        self._alphabet = alphabet
        self.aln_annotation = {}

        self._hit_id = hit_id
        self._query_id = query_id

        for seq_type in ('query', 'hit'):
            # query or hit attributes default attributes
            setattr(self, '_%s_description' % seq_type,
                    '<unknown description>')
            setattr(self, '_%s_features' % seq_type, [])
            # query or hit attributes whose default attribute is None
            for attr in ('strand', 'frame', 'start', 'end'):
                setattr(self, '%s_%s' % (seq_type, attr), None)
            # self.query or self.hit
            if eval(seq_type):
                setattr(self, seq_type, eval(seq_type))
            else:
                setattr(self, seq_type, None)

    def __repr__(self):
        info = "hit_id=%r, query_id=%r" % (self.hit_id, self.query_id)
        try:
            info += ", %i columns" % len(self)
        except AttributeError:
            pass
        return "%s(%s)" % (self.__class__.__name__, info)

    def __len__(self):
        return self.aln_span

    def __str__(self):
        return self._str_hsp_header() + '\n' + self._str_aln()

    def __getitem__(self, idx):
        if self.aln is not None:
            obj = self.__class__(hit_id=self.hit_id,
                                 query_id=self.query_id,
                                 alphabet=self.alphabet)
            # transfer query and hit attributes
            # let SeqRecord handle feature slicing, then retrieve the sliced
            # features into the sliced HSPFragment
            if self.query is not None:
                obj.query = self.query[idx]
                obj.query_features = obj.query.features
            if self.hit is not None:
                obj.hit = self.hit[idx]
                obj.hit_features = obj.hit.features
            # description, strand, frame
            for attr in ('description', 'strand', 'frame'):
                for seq_type in ('hit', 'query'):
                    attr_name = '%s_%s' % (seq_type, attr)
                    self_val = getattr(self, attr_name)
                    setattr(obj, attr_name, self_val)
            # alignment annotation should be transferred, since we can compute
            # the resulting annotation
            obj.aln_annotation = {}
            for key, value in self.aln_annotation.items():
                assert len(value[idx]) == len(obj)
                obj.aln_annotation[key] = value[idx]
            return obj
        else:
            raise TypeError("Slicing for HSP objects without "
                            "alignment is not supported.")

    def _str_aln(self):
        lines = []
        # alignment length
        aln_span = getattr_str(self, 'aln_span')
        lines.append('  Fragments: 1 (%s columns)' % aln_span)
        # sequences
        if self.query is not None and self.hit is not None:
            try:
                qseq = str(self.query.seq)
            except AttributeError:  # query is None
                qseq = '?'
            try:
                hseq = str(self.hit.seq)
            except AttributeError:  # hit is None
                hseq = '?'

            # similarity line
            simil = ''
            if 'similarity' in self.aln_annotation and \
                    isinstance(self.aln_annotation.get('similarity'), basestring):
                simil = self.aln_annotation['similarity']

            if self.aln_span <= 67:
                lines.append("%10s - %s" % ('Query', qseq))
                if simil:
                    lines.append("             %s" % simil)
                lines.append("%10s - %s" % ('Hit', hseq))
            else:
                # adjust continuation character length, so we don't display
                # the same residues twice
                if self.aln_span - 66 > 3:
                    cont = '~' * 3
                else:
                    cont = '~' * (self.aln_span - 66)
                lines.append("%10s - %s%s%s" %
                             ('Query', qseq[:59], cont, qseq[-5:]))
                if simil:
                    lines.append("             %s%s%s" %
                                 (simil[:59], cont, simil[-5:]))
                lines.append("%10s - %s%s%s" %
                             ('Hit', hseq[:59], cont, hseq[-5:]))

        return '\n'.join(lines)

    # sequence properties #
    def _set_seq(self, seq, seq_type):
        """Checks the given sequence for attribute setting

        :param seq: sequence to check
        :type seq: string or SeqRecord
        :param seq_type: sequence type
        :type seq_type: string, choice of 'hit' or 'query'

        """
        assert seq_type in ('hit', 'query')
        if seq is None:
            return seq  # return immediately if seq is None
        else:
            if not isinstance(seq, (basestring, SeqRecord)):
                raise TypeError("%s sequence must be a string or a SeqRecord"
                                " object." % seq_type)
        # check length if the opposite sequence is not None
        opp_type = 'hit' if seq_type == 'query' else 'query'
        opp_seq = getattr(self, '_%s' % opp_type, None)
        if opp_seq is not None:
            if len(seq) != len(opp_seq):
                raise ValueError("Sequence lengths do not match. Expected: "
                                 "%r (%s); found: %r (%s)." %
                                 (len(opp_seq), opp_type, len(seq), seq_type))

        seq_id = getattr(self, '%s_id' % seq_type)
        seq_desc = getattr(self, '%s_description' % seq_type)
        seq_feats = getattr(self, '%s_features' % seq_type)
        seq_name = 'aligned %s sequence' % seq_type

        if isinstance(seq, SeqRecord):
            seq.id = seq_id
            seq.description = seq_desc
            seq.name = seq_name
            seq.features = seq_feats
            seq.seq.alphabet = self.alphabet
        elif isinstance(seq, basestring):
            seq = SeqRecord(Seq(seq, self.alphabet),
                            id=seq_id,
                            name=seq_name,
                            description=seq_desc,
                            features=seq_feats)

        return seq

    def _hit_get(self):
        return self._hit

    def _hit_set(self, value):
        self._hit = self._set_seq(value, 'hit')

    hit = property(
        fget=_hit_get,
        fset=_hit_set,
        doc="""Hit sequence as a SeqRecord object, defaults to None""")

    def _query_get(self):
        return self._query

    def _query_set(self, value):
        self._query = self._set_seq(value, 'query')

    query = property(
        fget=_query_get,
        fset=_query_set,
        doc="""Query sequence as a SeqRecord object, defaults to None""")

    def _aln_get(self):
        if self.query is None and self.hit is None:
            return None
        elif self.hit is None:
            return MultipleSeqAlignment([self.query], self.alphabet)
        elif self.query is None:
            return MultipleSeqAlignment([self.hit], self.alphabet)
        else:
            return MultipleSeqAlignment([self.query, self.hit], self.alphabet)

    aln = property(fget=_aln_get,
                   doc="""Query-hit alignment as a MultipleSeqAlignment object,
            defaults to None""")

    def _alphabet_get(self):
        return self._alphabet

    def _alphabet_set(self, value):
        self._alphabet = value
        try:
            self.query.seq.alphabet = value
        except AttributeError:
            pass
        try:
            self.hit.seq.alphabet = value
        except AttributeError:
            pass

    alphabet = property(
        fget=_alphabet_get,
        fset=_alphabet_set,
        doc="""Alphabet object used in the fragment's sequences and alignment,
            defaults to single_letter_alphabet""")

    def _aln_span_get(self):
        # length of alignment (gaps included)
        # alignment span can be its own attribute, or computed from
        # query / hit length
        if not hasattr(self, '_aln_span'):
            if self.query is not None:
                self._aln_span = len(self.query)
            elif self.hit is not None:
                self._aln_span = len(self.hit)

        return self._aln_span

    def _aln_span_set(self, value):
        self._aln_span = value

    aln_span = property(
        fget=_aln_span_get,
        fset=_aln_span_set,
        doc="""The number of alignment columns covered by the fragment""")

    # id, description, and features properties #
    hit_description = fragcascade('description',
                                  'hit',
                                  doc="""Hit sequence description""")

    query_description = fragcascade('description',
                                    'query',
                                    doc="""Query sequence description""")

    hit_id = fragcascade('id', 'hit', doc="""Hit sequence ID""")

    query_id = fragcascade('id', 'query', doc="""Query sequence ID""")

    hit_features = fragcascade('features',
                               'hit',
                               doc="""Hit sequence features""")

    query_features = fragcascade('features',
                                 'query',
                                 doc="""Query sequence features""")

    # strand properties #
    def _prep_strand(self, strand):
        # follow SeqFeature's convention
        if strand not in (-1, 0, 1, None):
            raise ValueError("Strand should be -1, 0, 1, or None; not %r" %
                             strand)
        return strand

    def _get_strand(self, seq_type):
        assert seq_type in ('hit', 'query')
        strand = getattr(self, '_%s_strand' % seq_type)

        if strand is None:
            # try to compute strand from frame
            frame = getattr(self, '%s_frame' % seq_type)
            if frame is not None:
                try:
                    strand = frame // abs(frame)
                except ZeroDivisionError:
                    strand = 0
                setattr(self, '%s_strand' % seq_type, strand)

        return strand

    def _hit_strand_get(self):
        return self._get_strand('hit')

    def _hit_strand_set(self, value):
        self._hit_strand = self._prep_strand(value)

    hit_strand = property(fget=_hit_strand_get,
                          fset=_hit_strand_set,
                          doc="""Hit sequence strand, defaults to None""")

    def _query_strand_get(self):
        return self._get_strand('query')

    def _query_strand_set(self, value):
        self._query_strand = self._prep_strand(value)

    query_strand = property(fget=_query_strand_get,
                            fset=_query_strand_set,
                            doc="""Query sequence strand, defaults to None""")

    # frame properties #
    def _prep_frame(self, frame):
        if frame not in (-3, -2, -1, 0, 1, 2, 3, None):
            raise ValueError("Strand should be an integer between -3 and 3, "
                             "or None; not %r" % frame)
        return frame

    def _hit_frame_get(self):
        return self._hit_frame

    def _hit_frame_set(self, value):
        self._hit_frame = self._prep_frame(value)

    hit_frame = property(
        fget=_hit_frame_get,
        fset=_hit_frame_set,
        doc="""Hit sequence reading frame, defaults to None""")

    def _query_frame_get(self):
        return self._query_frame

    def _query_frame_set(self, value):
        self._query_frame = self._prep_frame(value)

    query_frame = property(
        fget=_query_frame_get,
        fset=_query_frame_set,
        doc="""Query sequence reading frame, defaults to None""")

    # coordinate properties #
    def _prep_coord(self, coord, opp_coord_name, op):
        # coord must either be None or int
        if coord is None:
            return coord
        assert isinstance(coord, int)
        # try to get opposite coordinate, if it's not present, return
        try:
            opp_coord = getattr(self, opp_coord_name)
        except AttributeError:
            return coord
        # if opposite coordinate is None, return
        if opp_coord is None:
            return coord
        # otherwise compare it to coord ('>=' or '<=')
        else:
            assert op(coord, opp_coord)
        return coord

    def _hit_start_get(self):
        return self._hit_start

    def _hit_start_set(self, value):
        self._hit_start = self._prep_coord(value, 'hit_end', le)

    hit_start = property(
        fget=_hit_start_get,
        fset=_hit_start_set,
        doc="""Hit sequence start coordinate, defaults to None""")

    def _query_start_get(self):
        return self._query_start

    def _query_start_set(self, value):
        self._query_start = self._prep_coord(value, 'query_end', le)

    query_start = property(
        fget=_query_start_get,
        fset=_query_start_set,
        doc="""Query sequence start coordinate, defaults to None""")

    def _hit_end_get(self):
        return self._hit_end

    def _hit_end_set(self, value):
        self._hit_end = self._prep_coord(value, 'hit_start', ge)

    hit_end = property(
        fget=_hit_end_get,
        fset=_hit_end_set,
        doc="""Hit sequence start coordinate, defaults to None""")

    def _query_end_get(self):
        return self._query_end

    def _query_end_set(self, value):
        self._query_end = self._prep_coord(value, 'query_start', ge)

    query_end = property(
        fget=_query_end_get,
        fset=_query_end_set,
        doc="""Query sequence end coordinate, defaults to None""")

    # coordinate-dependent properties #
    def _hit_span_get(self):
        try:
            return self.hit_end - self.hit_start
        except TypeError:  # triggered if any of the coordinates are None
            return None

    hit_span = property(
        fget=_hit_span_get,
        doc="""The number of residues covered by the hit sequence""")

    def _query_span_get(self):
        try:
            return self.query_end - self.query_start
        except TypeError:  # triggered if any of the coordinates are None
            return None

    query_span = property(
        fget=_query_span_get,
        doc="""The number of residues covered by the query sequence""")

    def _hit_range_get(self):
        return (self.hit_start, self.hit_end)

    hit_range = property(fget=_hit_range_get,
                         doc="""Tuple of hit start and end coordinates""")

    def _query_range_get(self):
        return (self.query_start, self.query_end)

    query_range = property(fget=_query_range_get,
                           doc="""Tuple of query start and end coordinates""")
class HSPFragment(_BaseHSP):
    """Class representing a contiguous alignment of hit-query sequence.

    HSPFragment forms the core of any parsed search output file. Depending on
    the search output file format, it may contain the actual query and/or hit
    sequences that produces the search hits. These sequences are stored as
    SeqRecord objects (see SeqRecord):

    >>> from Bio import SearchIO
    >>> qresult = next(SearchIO.parse('Blast/mirna.xml', 'blast-xml'))
    >>> fragment = qresult[0][0][0]   # first hit, first hsp, first fragment
    >>> print(fragment)
          Query: 33211 mir_1
            Hit: gi|262205317|ref|NR_030195.1| H**o sapiens microRNA 520b (MIR520...
    Query range: [0:61] (1)
      Hit range: [0:61] (1)
      Fragments: 1 (61 columns)
         Query - CCCTCTACAGGGAAGCGCTTTCTGTTGTCTGAAAGAAAAGAAAGTGCTTCCTTTTAGAGGG
                 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
           Hit - CCCTCTACAGGGAAGCGCTTTCTGTTGTCTGAAAGAAAAGAAAGTGCTTCCTTTTAGAGGG

    # the query sequence is a SeqRecord object
    >>> fragment.query.__class__
    <class 'Bio.SeqRecord.SeqRecord'>
    >>> print(fragment.query)
    ID: 33211
    Name: aligned query sequence
    Description: mir_1
    Number of features: 0
    /molecule_type=DNA
    Seq('CCCTCTACAGGGAAGCGCTTTCTGTTGTCTGAAAGAAAAGAAAGTGCTTCCTTT...GGG')

    # the hit sequence is a SeqRecord object as well
    >>> fragment.hit.__class__
    <class 'Bio.SeqRecord.SeqRecord'>
    >>> print(fragment.hit)
    ID: gi|262205317|ref|NR_030195.1|
    Name: aligned hit sequence
    Description: H**o sapiens microRNA 520b (MIR520B), microRNA
    Number of features: 0
    /molecule_type=DNA
    Seq('CCCTCTACAGGGAAGCGCTTTCTGTTGTCTGAAAGAAAAGAAAGTGCTTCCTTT...GGG')

    # when both query and hit are present, we get a MultipleSeqAlignment object
    >>> fragment.aln.__class__
    <class 'Bio.Align.MultipleSeqAlignment'>
    >>> print(fragment.aln)
    Alignment with 2 rows and 61 columns
    CCCTCTACAGGGAAGCGCTTTCTGTTGTCTGAAAGAAAAGAAAG...GGG 33211
    CCCTCTACAGGGAAGCGCTTTCTGTTGTCTGAAAGAAAAGAAAG...GGG gi|262205317|ref|NR_030195.1|

    """
    def __init__(
        self,
        hit_id="<unknown id>",
        query_id="<unknown id>",
        hit=None,
        query=None,
        molecule_type=None,
    ):
        """Initialize the class."""
        self._molecule_type = molecule_type
        self.aln_annotation = {}

        self._hit_id = hit_id
        self._query_id = query_id

        for seq_type in ("query", "hit"):
            # query or hit attributes default attributes
            setattr(self, "_%s_description" % seq_type,
                    "<unknown description>")
            setattr(self, "_%s_features" % seq_type, [])
            # query or hit attributes whose default attribute is None
            for attr in ("strand", "frame", "start", "end"):
                setattr(self, "%s_%s" % (seq_type, attr), None)
            # self.query or self.hit
            if eval(seq_type):
                setattr(self, seq_type, eval(seq_type))
            else:
                setattr(self, seq_type, None)

    def __repr__(self):
        """Return HSPFragment info; hit id, query id, number of columns."""
        info = "hit_id=%r, query_id=%r" % (self.hit_id, self.query_id)
        try:
            info += ", %i columns" % len(self)
        except AttributeError:
            pass
        return "%s(%s)" % (self.__class__.__name__, info)

    def __len__(self):
        """Return alignment span."""
        return self.aln_span

    def __str__(self):
        """Return string of HSP header and alignments."""
        return self._str_hsp_header() + "\n" + self._str_aln()

    def __getitem__(self, idx):
        """Return object of index idx."""
        if self.aln is not None:
            obj = self.__class__(
                hit_id=self.hit_id,
                query_id=self.query_id,
                molecule_type=self.molecule_type,
            )
            # transfer query and hit attributes
            # let SeqRecord handle feature slicing, then retrieve the sliced
            # features into the sliced HSPFragment
            if self.query is not None:
                obj.query = self.query[idx]
                obj.query_features = obj.query.features
            if self.hit is not None:
                obj.hit = self.hit[idx]
                obj.hit_features = obj.hit.features
            # description, strand, frame
            for attr in ("description", "strand", "frame"):
                for seq_type in ("hit", "query"):
                    attr_name = "%s_%s" % (seq_type, attr)
                    self_val = getattr(self, attr_name)
                    setattr(obj, attr_name, self_val)
            # alignment annotation should be transferred, since we can compute
            # the resulting annotation
            obj.aln_annotation = {}
            for key, value in self.aln_annotation.items():
                assert len(value[idx]) == len(obj)
                obj.aln_annotation[key] = value[idx]
            return obj
        else:
            raise TypeError(
                "Slicing for HSP objects without alignment is not supported.")

    def _str_aln(self):
        lines = []
        # alignment length
        aln_span = getattr_str(self, "aln_span")
        lines.append("  Fragments: 1 (%s columns)" % aln_span)
        # sequences
        if self.query is not None and self.hit is not None:
            try:
                qseq = str(self.query.seq)
            except AttributeError:  # query is None
                qseq = "?"
            try:
                hseq = str(self.hit.seq)
            except AttributeError:  # hit is None
                hseq = "?"

            # similarity line
            simil = ""
            if "similarity" in self.aln_annotation and isinstance(
                    self.aln_annotation.get("similarity"), str):
                simil = self.aln_annotation["similarity"]

            if self.aln_span <= 67:
                lines.append("%10s - %s" % ("Query", qseq))
                if simil:
                    lines.append("             %s" % simil)
                lines.append("%10s - %s" % ("Hit", hseq))
            else:
                # adjust continuation character length, so we don't display
                # the same residues twice
                if self.aln_span - 66 > 3:
                    cont = "~" * 3
                else:
                    cont = "~" * (self.aln_span - 66)
                lines.append("%10s - %s%s%s" %
                             ("Query", qseq[:59], cont, qseq[-5:]))
                if simil:
                    lines.append("             %s%s%s" %
                                 (simil[:59], cont, simil[-5:]))
                lines.append("%10s - %s%s%s" %
                             ("Hit", hseq[:59], cont, hseq[-5:]))

        return "\n".join(lines)

    # sequence properties #
    def _set_seq(self, seq, seq_type):
        """Check the given sequence for attribute setting (PRIVATE).

        :param seq: sequence to check
        :type seq: string or SeqRecord
        :param seq_type: sequence type
        :type seq_type: string, choice of 'hit' or 'query'

        """
        assert seq_type in ("hit", "query")
        if seq is None:
            return seq  # return immediately if seq is None
        else:
            if not isinstance(seq, (str, SeqRecord)):
                raise TypeError(
                    "%s sequence must be a string or a SeqRecord object." %
                    seq_type)
        # check length if the opposite sequence is not None
        opp_type = "hit" if seq_type == "query" else "query"
        opp_seq = getattr(self, "_%s" % opp_type, None)
        if opp_seq is not None:
            if len(seq) != len(opp_seq):
                raise ValueError(
                    "Sequence lengths do not match. Expected: %r (%s); found: %r (%s)."
                    % (len(opp_seq), opp_type, len(seq), seq_type))

        seq_id = getattr(self, "%s_id" % seq_type)
        seq_desc = getattr(self, "%s_description" % seq_type)
        seq_feats = getattr(self, "%s_features" % seq_type)
        seq_name = "aligned %s sequence" % seq_type

        if isinstance(seq, SeqRecord):
            seq.id = seq_id
            seq.description = seq_desc
            seq.name = seq_name
            seq.features = seq_feats
            seq.annotations["molecule_type"] = self.molecule_type
        elif isinstance(seq, str):
            seq = SeqRecord(
                Seq(seq),
                id=seq_id,
                name=seq_name,
                description=seq_desc,
                features=seq_feats,
                annotations={"molecule_type": self.molecule_type},
            )

        return seq

    def _hit_get(self):
        return self._hit

    def _hit_set(self, value):
        self._hit = self._set_seq(value, "hit")

    hit = property(
        fget=_hit_get,
        fset=_hit_set,
        doc="Hit sequence as a SeqRecord object, defaults to None.",
    )

    def _query_get(self):
        return self._query

    def _query_set(self, value):
        self._query = self._set_seq(value, "query")

    query = property(
        fget=_query_get,
        fset=_query_set,
        doc="Query sequence as a SeqRecord object, defaults to None.",
    )

    def _aln_get(self):
        if self.query is None and self.hit is None:
            return None
        if self.hit is None:
            msa = MultipleSeqAlignment([self.query])
        elif self.query is None:
            msa = MultipleSeqAlignment([self.hit])
        else:
            msa = MultipleSeqAlignment([self.query, self.hit])
        molecule_type = self.molecule_type
        if molecule_type is not None:
            msa.molecule_type = molecule_type
        return msa

    aln = property(
        fget=_aln_get,
        doc=
        "Query-hit alignment as a MultipleSeqAlignment object, defaults to None.",
    )

    def _molecule_type_get(self):
        return self._molecule_type

    def _molecule_type_set(self, value):
        self._molecule_type = value
        try:
            self.query.annotations["molecule_type"] = value
        except AttributeError:
            pass
        try:
            self.hit.annotations["molecule_type"] = value
        except AttributeError:
            pass

    molecule_type = property(
        fget=_molecule_type_get,
        fset=_molecule_type_set,
        doc="molecule type used in the fragment's "
        "sequence records and alignment, defaults to None.",
    )

    def _aln_span_get(self):
        # length of alignment (gaps included)
        # alignment span can be its own attribute, or computed from
        # query / hit length
        try:
            self._aln_span
        except AttributeError:
            if self.query is not None:
                self._aln_span = len(self.query)
            elif self.hit is not None:
                self._aln_span = len(self.hit)

        return self._aln_span

    def _aln_span_set(self, value):
        self._aln_span = value

    aln_span = property(
        fget=_aln_span_get,
        fset=_aln_span_set,
        doc="The number of alignment columns covered by the fragment.",
    )

    # id, description, and features properties #
    hit_description = fragcascade("description",
                                  "hit",
                                  doc="Hit sequence description.")

    query_description = fragcascade("description",
                                    "query",
                                    doc="Query sequence description.")

    hit_id = fragcascade("id", "hit", doc="Hit sequence ID.")

    query_id = fragcascade("id", "query", doc="Query sequence ID.")

    hit_features = fragcascade("features", "hit", doc="Hit sequence features.")

    query_features = fragcascade("features",
                                 "query",
                                 doc="Query sequence features.")

    # strand properties #
    def _prep_strand(self, strand):
        # follow SeqFeature's convention
        if strand not in (-1, 0, 1, None):
            raise ValueError("Strand should be -1, 0, 1, or None; not %r" %
                             strand)
        return strand

    def _get_strand(self, seq_type):
        assert seq_type in ("hit", "query")
        strand = getattr(self, "_%s_strand" % seq_type)

        if strand is None:
            # try to compute strand from frame
            frame = getattr(self, "%s_frame" % seq_type)
            if frame is not None:
                try:
                    strand = frame // abs(frame)
                except ZeroDivisionError:
                    strand = 0
                setattr(self, "%s_strand" % seq_type, strand)

        return strand

    def _hit_strand_get(self):
        return self._get_strand("hit")

    def _hit_strand_set(self, value):
        self._hit_strand = self._prep_strand(value)

    hit_strand = property(
        fget=_hit_strand_get,
        fset=_hit_strand_set,
        doc="Hit sequence strand, defaults to None.",
    )

    def _query_strand_get(self):
        return self._get_strand("query")

    def _query_strand_set(self, value):
        self._query_strand = self._prep_strand(value)

    query_strand = property(
        fget=_query_strand_get,
        fset=_query_strand_set,
        doc="Query sequence strand, defaults to None.",
    )

    # frame properties #
    def _prep_frame(self, frame):
        if frame not in (-3, -2, -1, 0, 1, 2, 3, None):
            raise ValueError(
                "Strand should be an integer between -3 and 3, or None; not %r"
                % frame)
        return frame

    def _hit_frame_get(self):
        return self._hit_frame

    def _hit_frame_set(self, value):
        self._hit_frame = self._prep_frame(value)

    hit_frame = property(
        fget=_hit_frame_get,
        fset=_hit_frame_set,
        doc="Hit sequence reading frame, defaults to None.",
    )

    def _query_frame_get(self):
        """Get query sequence reading frame (PRIVATE)."""
        return self._query_frame

    def _query_frame_set(self, value):
        """Set query sequence reading frame (PRIVATE)."""
        self._query_frame = self._prep_frame(value)

    query_frame = property(
        fget=_query_frame_get,
        fset=_query_frame_set,
        doc="Query sequence reading frame, defaults to None.",
    )

    # coordinate properties #
    def _prep_coord(self, coord, opp_coord_name, op):
        # coord must either be None or int
        if coord is None:
            return coord
        assert isinstance(coord, int)
        # try to get opposite coordinate, if it's not present, return
        try:
            opp_coord = getattr(self, opp_coord_name)
        except AttributeError:
            return coord
        # if opposite coordinate is None, return
        if opp_coord is None:
            return coord
        # otherwise compare it to coord ('>=' or '<=')
        else:
            assert op(coord, opp_coord)
        return coord

    def _hit_start_get(self):
        """Get the sequence hit start coordinate (PRIVATE)."""
        return self._hit_start

    def _hit_start_set(self, value):
        """Set the sequence hit start coordinate (PRIVATE)."""
        self._hit_start = self._prep_coord(value, "hit_end", le)

    hit_start = property(
        fget=_hit_start_get,
        fset=_hit_start_set,
        doc="Hit sequence start coordinate, defaults to None.",
    )

    def _query_start_get(self):
        """Get the query sequence start coordinate (PRIVATE)."""
        return self._query_start

    def _query_start_set(self, value):
        """Set the query sequence start coordinate (PRIVATE)."""
        self._query_start = self._prep_coord(value, "query_end", le)

    query_start = property(
        fget=_query_start_get,
        fset=_query_start_set,
        doc="Query sequence start coordinate, defaults to None.",
    )

    def _hit_end_get(self):
        """Get the hit sequence end coordinate (PRIVATE)."""
        return self._hit_end

    def _hit_end_set(self, value):
        """Set the hit sequence end coordinate (PRIVATE)."""
        self._hit_end = self._prep_coord(value, "hit_start", ge)

    hit_end = property(
        fget=_hit_end_get,
        fset=_hit_end_set,
        doc="Hit sequence end coordinate, defaults to None.",
    )

    def _query_end_get(self):
        """Get the query sequence end coordinate (PRIVATE)."""
        return self._query_end

    def _query_end_set(self, value):
        """Set the query sequence end coordinate (PRIVATE)."""
        self._query_end = self._prep_coord(value, "query_start", ge)

    query_end = property(
        fget=_query_end_get,
        fset=_query_end_set,
        doc="Query sequence end coordinate, defaults to None.",
    )

    # coordinate-dependent properties #
    def _hit_span_get(self):
        """Return the number of residues covered by the hit sequence (PRIVATE)."""
        try:
            return self.hit_end - self.hit_start
        except TypeError:  # triggered if any of the coordinates are None
            return None

    hit_span = property(
        fget=_hit_span_get,
        doc="The number of residues covered by the hit sequence.")

    def _query_span_get(self):
        """Return the number or residues covered by the query (PRIVATE)."""
        try:
            return self.query_end - self.query_start
        except TypeError:  # triggered if any of the coordinates are None
            return None

    query_span = property(
        fget=_query_span_get,
        doc="The number of residues covered by the query sequence.",
    )

    def _hit_range_get(self):
        """Return the start and end of a hit (PRIVATE)."""
        return (self.hit_start, self.hit_end)

    hit_range = property(fget=_hit_range_get,
                         doc="Tuple of hit start and end coordinates.")

    def _query_range_get(self):
        """Return the start and end of a query (PRIVATE)."""
        return (self.query_start, self.query_end)

    query_range = property(fget=_query_range_get,
                           doc="Tuple of query start and end coordinates.")