示例#1
0
class Line:
    """
    Represents a line in the SAM file.
    """

    TYPE_HEADER = 0
    TYPE_ALIGNMENT = 1

    def __init__(self, line_string):
        self.type = self.TYPE_HEADER if line_string.startswith('@') \
                else self.TYPE_ALIGNMENT

        if self.type == self.TYPE_HEADER:
            self.fields = [line_string]
            return

        self.fields = line_string.split()
        pos, cigar = self.fields[3], self.fields[5]

        if cigar == '*':
            raise CigarUnavailableError

        md = next(filter(lambda field: field.startswith('MD:Z:'), self.fields))
        md = md.replace('MD:Z:', '')
        self.alignment = Alignment(pos, cigar, md)

    def soft_clip(self, start, stop):
        if self.type == self.TYPE_HEADER:
            return

        self.strip_paired_end_info()

        self.fields[2] = '{}:{}-{}'.format(self.fields[2], start, stop)
        self.alignment.soft_clip(start, stop)
        self.fields[3] = str(self.alignment.pos)
        self.fields[5] = self.alignment.cigar
        self.fields = list(map(
                lambda field: 'MD:Z:'+self.alignment.md if \
                        field.startswith('MD:Z:') else field,
                self.fields))

    def strip_paired_end_info(self):
        '''
        fields[1]: Bitwise flags according to the SAM specifications:
              1 -- template having multiple segments in sequencing
              2 -- each segment properly aligned according to the aligner
              4 -- segment unmapped
              8 -- next segment in template unmapped
             16 -- SEQ being reverse complemented
             32 -- SEQ of the next segment in the template being reversed
                   complemented
             64 -- the first segment in the template
            128 -- the last segment in the template
            ...
        fields[6]: reference sequence name of the primary alignment of the next
                   read in the template; '*' when information is unavailable.
        fields[7]: 1-based position of the primary alignment of the next read in
                   the template; '0' when information is unavailable.
        fields[8]: signed observed template length; '0' for single-segment
                   template, or when information is unavailable.
        '''
        flags = int(self.fields[1])
        flags &= 0b00111100
        self.fields[1] = str(flags)

        self.fields[6:9] = ['*', '0', '0']

    def __repr__(self):
        return '\t'.join(self.fields)
示例#2
0
def test_alignment_soft_clip(pre_clip, to_clip, post_clip):
    alignment_original = Alignment(*pre_clip)
    alignment_original.soft_clip(*to_clip)
    assert alignment_original == Alignment(*post_clip)