def is_snp_called_correctly(record: pysam.AlignedSegment) -> bool: for query_pos, ref_pos, ref_base in record.get_aligned_pairs( with_seq=True): if ref_pos == 100: if ref_base.islower(): return False else: return True
def record_contains_expected_snp(record: pysam.AlignedSegment) -> bool: expected_base = record.query_name[-1] for query_pos, ref_pos, ref_base in record.get_aligned_pairs( with_seq=True): if query_pos == REF_PANEL_FLANK_WIDTH: return expected_base == ref_base return False
def select_snps_from_single_read( read: pysam.AlignedSegment, snp_positions: Set[int], region_start: int ) -> Tuple[List[int], List[str]]: positions, nucls = [], [] # TODO use indels seq = read.query_sequence for read_pos, ref_pos in read.get_aligned_pairs(matches_only=True): ref_pos = ref_pos - region_start if ref_pos in snp_positions: positions.append(ref_pos) nucls.append(SNP.process_nucl(seq[read_pos])) return positions, nucls
def _read_pos_at_ref_pos(rec: AlignedSegment, ref_pos: int, previous: Optional[bool] = None) -> Optional[int]: """ Returns the read or query position at the reference position. If the reference position is not within the span of reference positions to which the read is aligned an exception will be raised. If the reference position is within the span but is not aligned (i.e. it is deleted in the read) behavior is controlled by the "previous" argument. Args: rec: the AlignedSegment within which to find the read position ref_pos: the reference position to be found previous: Controls behavior when the reference position is not aligned to any read position. True indicates to return the previous read position, False indicates to return the next read position and None indicates to return None. Returns: The read position at the reference position, or None. """ if ref_pos < rec.reference_start or ref_pos >= rec.reference_end: raise ValueError( f"{ref_pos} is not within the reference span for read {rec.query_name}" ) pairs = rec.get_aligned_pairs() index = 0 read_pos = None for read, ref in pairs: if ref == ref_pos: read_pos = read break else: index += 1 if not read_pos and previous is not None: if previous: while read_pos is None and index > 0: index -= 1 read_pos = pairs[index][0] else: while read_pos is None and index < len(pairs): read_pos = pairs[index][0] index += 1 return read_pos
def _ref_pos2seq_pos(alignment: pysam.AlignedSegment, ref_pos: int) -> int: """ Retrieve base position in sequence string at refence position. Alignment and ref_pos are assumed to be of the same reference. :param alignment: pysam.AlignedSegment :param ref_pos: reference position of base :return: AlignedSegment.query_sequence position matched to ref_pos. None is returned if matching position is not found. """ # TODO optimalize: (try matches_only=True) # TODO optimalize: case when alignment is full matched based on CIGAR (e.g. 30M) seq_pos = None for current_seq_pos, current_ref_pos in alignment.get_aligned_pairs(matches_only=False, with_seq=False): # search for base in snv position if current_ref_pos == ref_pos: seq_pos = current_seq_pos break return seq_pos