Пример #1
0
    def __encrypt_unmapped(alignment: pysam.AlignedSegment, secret: bytes):
        """
        Stream cipher encryption / decryption.
        alignment + secret => encrypted_alignment
        encrypted_alignment + secret => alignment
        :param alignment:
        :param secret:
        :return: encrypter/decrypted alignment
        """
        if alignment.is_unmapped:
            if secret is None:
                raise ValueError(
                    'Secret key must be present when unmapped alignments are iterated.'
                )

            # use 64B long hash (encrypts 256 bases)
            sha512 = hashlib.sha512()
            sha512.update(secret + alignment.query_name.encode())
            mut_seq = cmn.stream_cipher(alignment.query_sequence,
                                        sha512.digest())

            # change and preserve quality
            # TODO: maybe something else with the quality?
            quality = alignment.query_qualities
            alignment.query_sequence = mut_seq
            alignment.query_qualities = quality
Пример #2
0
def softclip_end_of_alignment_by_query(
        rec: AlignedSegment,
        bases_to_clip: int,
        clipped_base_quality: Optional[int] = None,
        tags_to_invalidate: Iterable[str] = TAGS_TO_INVALIDATE
) -> ClippingInfo:
    """
    Adds soft-clipping to the end of a read's alignment.

    Clipping is applied before any existing hard or soft clipping.  E.g. a read with cigar 100M5S
    that is clipped with bases_to_clip=10 will yield a cigar of 90M15S.

    If the read is unmapped or bases_to_clip < 1 then nothing is done.

    If the read has fewer clippable bases than requested the read will be unmapped.

    Args:
        rec: the BAM record to clip
        bases_to_clip: the number of additional bases of clipping desired in the read/query
        clipped_base_quality: if not None, set bases in the clipped region to this quality
        tags_to_invalidate: the set of extended attributes to remove upon clipping

    Returns:
        ClippingInfo: a named tuple containing the number of query/read bases and the number
            of target/reference bases clipped.
    """
    if rec.is_unmapped or bases_to_clip < 1:
        return ClippingInfo(0, 0)

    num_clippable_bases = rec.query_alignment_length

    if bases_to_clip >= num_clippable_bases:
        return _clip_whole_read(rec, tags_to_invalidate)

    # Reverse the cigar and qualities so we can clip from the start
    cigar = Cigar.from_cigartuples(rec.cigartuples).reversed()
    quals = rec.query_qualities
    quals.reverse()
    new_cigar, clipping_info = _clip(cigar, quals, bases_to_clip,
                                     clipped_base_quality)

    # Then reverse everything back again
    quals.reverse()
    rec.query_qualities = quals
    rec.cigarstring = str(new_cigar.reversed())

    _cleanup(rec, tags_to_invalidate)
    return clipping_info
Пример #3
0
    def _set_length_dependent_fields(
        self,
        rec: pysam.AlignedSegment,
        length: int,
        bases: Optional[str] = None,
        quals: Optional[List[int]] = None,
        cigar: Optional[str] = None,
    ) -> None:
        """Fills in bases, quals and cigar on a record.

        If any of bases, quals or cigar are defined, they must all have the same length/query
        length.  If none are defined then the length parameter is used.  Undefined values are
        synthesize at the inferred length.

        Args:
            rec: a SAM record
            length: the length to use if all of bases/quals/cigar are None
            bases: an optional string of bases for the read
            quals: an optional list of qualities for the read
            cigar: an optional cigar string for the read
        """

        # Do some validation to make sure all defined things have the same lengths
        lengths = set()
        if bases is not None:
            lengths.add(len(bases))
        if quals is not None:
            lengths.add(len(quals))
        if cigar is not None:
            cig = sam.Cigar.from_cigarstring(cigar)
            lengths.add(sum([elem.length_on_query for elem in cig.elements]))

        if not lengths:
            lengths.add(length)

        if len(lengths) != 1:
            raise ValueError(
                "Provided bases/quals/cigar are not length compatible.")

        # Fill in the record, making any parts that were not defined as params
        length = lengths.pop()
        rec.query_sequence = bases if bases else self._bases(length)
        rec.query_qualities = quals if quals else [self.base_quality] * length
        if not rec.is_unmapped:
            rec.cigarstring = cigar if cigar else f"{length}M"
Пример #4
0
def _make_read_unmapped(rec: AlignedSegment) -> None:
    """Removes mapping information from a read."""
    if rec.is_reverse:
        quals = rec.query_qualities
        quals.reverse()
        rec.query_sequence = dnautils.reverse_complement(rec.query_sequence)
        rec.query_qualities = quals
        rec.is_reverse = False

    rec.reference_id = sam.NO_REF_INDEX
    rec.reference_start = sam.NO_REF_POS
    rec.cigar = None
    rec.mapping_quality = 0
    rec.template_length = 0
    rec.is_duplicate = False
    rec.is_secondary = False
    rec.is_supplementary = False
    rec.is_proper_pair = False
    rec.is_unmapped = True