class Line: """ Represents a line in the SAM file. """ TYPE_HEADER = 0 TYPE_ALIGNMENT = 1 def __init__(self, line_string): self.type = self.TYPE_HEADER if line_string.startswith('@') \ else self.TYPE_ALIGNMENT if self.type == self.TYPE_HEADER: self.fields = [line_string] return self.fields = line_string.split() pos, cigar = self.fields[3], self.fields[5] if cigar == '*': raise CigarUnavailableError md = next(filter(lambda field: field.startswith('MD:Z:'), self.fields)) md = md.replace('MD:Z:', '') self.alignment = Alignment(pos, cigar, md) def soft_clip(self, start, stop): if self.type == self.TYPE_HEADER: return self.strip_paired_end_info() self.fields[2] = '{}:{}-{}'.format(self.fields[2], start, stop) self.alignment.soft_clip(start, stop) self.fields[3] = str(self.alignment.pos) self.fields[5] = self.alignment.cigar self.fields = list(map( lambda field: 'MD:Z:'+self.alignment.md if \ field.startswith('MD:Z:') else field, self.fields)) def strip_paired_end_info(self): ''' fields[1]: Bitwise flags according to the SAM specifications: 1 -- template having multiple segments in sequencing 2 -- each segment properly aligned according to the aligner 4 -- segment unmapped 8 -- next segment in template unmapped 16 -- SEQ being reverse complemented 32 -- SEQ of the next segment in the template being reversed complemented 64 -- the first segment in the template 128 -- the last segment in the template ... fields[6]: reference sequence name of the primary alignment of the next read in the template; '*' when information is unavailable. fields[7]: 1-based position of the primary alignment of the next read in the template; '0' when information is unavailable. fields[8]: signed observed template length; '0' for single-segment template, or when information is unavailable. ''' flags = int(self.fields[1]) flags &= 0b00111100 self.fields[1] = str(flags) self.fields[6:9] = ['*', '0', '0'] def __repr__(self): return '\t'.join(self.fields)
def test_alignment_soft_clip(pre_clip, to_clip, post_clip): alignment_original = Alignment(*pre_clip) alignment_original.soft_clip(*to_clip) assert alignment_original == Alignment(*post_clip)