def set_pair_info(r1: AlignedSegment, r2: AlignedSegment, proper_pair: bool = True) -> None: """Resets mate pair information between reads in a pair. Requires that both r1 and r2 are mapped. Can be handed reads that already have pairing flags setup or independent R1 and R2 records that are currently flagged as SE reads. Args: r1: read 1 r2: read 2 with the same queryname as r1 """ assert not r1.is_unmapped, f"Cannot process unmapped mate {r1.query_name}/1" assert not r2.is_unmapped, f"Cannot process unmapped mate {r2.query_name}/2" assert r1.query_name == r2.query_name, f"Attempting to pair reads with different qnames." for r in [r1, r2]: r.is_paired = True r.is_proper_pair = proper_pair r1.is_read1 = True r1.is_read2 = False r2.is_read2 = True r2.is_read1 = False for src, dest in [(r1, r2), (r2, r1)]: dest.next_reference_id = src.reference_id dest.next_reference_start = src.reference_start dest.mate_is_reverse = src.is_reverse dest.mate_is_unmapped = False dest.set_tag("MC", src.cigarstring) insert_size = isize(r1, r2) r1.template_length = insert_size r2.template_length = -insert_size
def _make_read_unmapped(rec: AlignedSegment) -> None: """Removes mapping information from a read.""" if rec.is_reverse: quals = rec.query_qualities quals.reverse() rec.query_sequence = dnautils.reverse_complement(rec.query_sequence) rec.query_qualities = quals rec.is_reverse = False rec.reference_id = sam.NO_REF_INDEX rec.reference_start = sam.NO_REF_POS rec.cigar = None rec.mapping_quality = 0 rec.template_length = 0 rec.is_duplicate = False rec.is_secondary = False rec.is_supplementary = False rec.is_proper_pair = False rec.is_unmapped = True
def _set_mate_info(self, r1: pysam.AlignedSegment, r2: pysam.AlignedSegment) -> None: """Sets the mate information on a pair of sam records. Handles cases where both reads are mapped, one of the two reads is unmapped or both reads are unmapped. Args: r1: the first read in the pair r2: the sceond read in the pair """ for rec in r1, r2: rec.template_length = 0 rec.is_proper_pair = False if r1.is_unmapped and r2.is_unmapped: # If they're both unmapped just clean the records up for rec, other in [(r1, r2), (r2, r1)]: rec.reference_id = sam.NO_REF_INDEX rec.next_reference_id = sam.NO_REF_INDEX rec.reference_start = sam.NO_REF_POS rec.next_reference_start = sam.NO_REF_POS rec.is_unmapped = True rec.mate_is_unmapped = True rec.is_proper_pair = False rec.mate_is_reverse = other.is_reverse elif r1.is_unmapped or r2.is_unmapped: # If only one is mapped/unmapped copy over the relevant stuff (m, u) = (r1, r2) if r2.is_unmapped else (r2, r1) u.reference_id = m.reference_id u.reference_start = m.reference_start u.next_reference_id = m.reference_id u.next_reference_start = m.reference_start u.mate_is_reverse = m.is_reverse u.mate_is_unmapped = False u.set_tag("MC", m.cigarstring) m.next_reference_id = u.reference_id m.next_reference_start = u.reference_start m.mate_is_reverse = u.is_reverse m.mate_is_unmapped = True else: # Else they are both mapped for rec, other in [(r1, r2), (r2, r1)]: rec.next_reference_id = other.reference_id rec.next_reference_start = other.reference_start rec.mate_is_reverse = other.is_reverse rec.mate_is_unmapped = False rec.set_tag("MC", other.cigarstring) if r1.reference_id == r2.reference_id: r1p = r1.reference_end if r1.is_reverse else r1.reference_start r2p = r2.reference_end if r2.is_reverse else r2.reference_start r1.template_length = r2p - r1p r2.template_length = r1p - r2p # Arbitrarily set proper pair if the we have an FR pair with isize <= 1000 if r1.is_reverse != r2.is_reverse and abs( r1.template_length) <= 1000: fpos, rpos = (r2p, r1p) if r1.is_reverse else (r1p, r2p) if fpos < rpos: r1.is_proper_pair = True r2.is_proper_pair = True