Пример #1
0
def is_snp_called_correctly(record: pysam.AlignedSegment) -> bool:
    for query_pos, ref_pos, ref_base in record.get_aligned_pairs(
            with_seq=True):
        if ref_pos == 100:
            if ref_base.islower():
                return False
            else:
                return True
Пример #2
0
def record_contains_expected_snp(record: pysam.AlignedSegment) -> bool:
    expected_base = record.query_name[-1]

    for query_pos, ref_pos, ref_base in record.get_aligned_pairs(
            with_seq=True):
        if query_pos == REF_PANEL_FLANK_WIDTH:
            return expected_base == ref_base

    return False
Пример #3
0
 def select_snps_from_single_read(
         read: pysam.AlignedSegment,
         snp_positions: Set[int],
         region_start: int
 ) -> Tuple[List[int], List[str]]:
     positions, nucls = [], []
     # TODO use indels
     seq = read.query_sequence
     for read_pos, ref_pos in read.get_aligned_pairs(matches_only=True):
         ref_pos = ref_pos - region_start
         if ref_pos in snp_positions:
             positions.append(ref_pos)
             nucls.append(SNP.process_nucl(seq[read_pos]))
     return positions, nucls
Пример #4
0
def _read_pos_at_ref_pos(rec: AlignedSegment,
                         ref_pos: int,
                         previous: Optional[bool] = None) -> Optional[int]:
    """
    Returns the read or query position at the reference position.

    If the reference position is not within the span of reference positions to which the
    read is aligned an exception will be raised.  If the reference position is within the span
    but is not aligned (i.e. it is deleted in the read) behavior is controlled by the
    "previous" argument.

    Args:
        rec: the AlignedSegment within which to find the read position
        ref_pos: the reference position to be found
        previous: Controls behavior when the reference position is not aligned to any
            read position.  True indicates to return the previous read position, False
            indicates to return the next read position and None indicates to return None.

    Returns:
        The read position at the reference position, or None.
    """
    if ref_pos < rec.reference_start or ref_pos >= rec.reference_end:
        raise ValueError(
            f"{ref_pos} is not within the reference span for read {rec.query_name}"
        )

    pairs = rec.get_aligned_pairs()
    index = 0
    read_pos = None
    for read, ref in pairs:
        if ref == ref_pos:
            read_pos = read
            break
        else:
            index += 1

    if not read_pos and previous is not None:
        if previous:
            while read_pos is None and index > 0:
                index -= 1
                read_pos = pairs[index][0]
        else:
            while read_pos is None and index < len(pairs):
                read_pos = pairs[index][0]
                index += 1

    return read_pos
Пример #5
0
 def _ref_pos2seq_pos(alignment: pysam.AlignedSegment, ref_pos: int) -> int:
     """
     Retrieve base position in sequence string at refence position.
     Alignment and ref_pos are assumed to be of the same reference.
     :param alignment: pysam.AlignedSegment
     :param ref_pos: reference position of base
     :return: AlignedSegment.query_sequence position matched to ref_pos.
     None is returned if matching position is not found.
     """
     # TODO optimalize: (try matches_only=True)
     # TODO optimalize: case when alignment is full matched based on CIGAR (e.g. 30M)
     
     seq_pos = None
     for current_seq_pos, current_ref_pos in alignment.get_aligned_pairs(matches_only=False, with_seq=False):
         # search for base in snv position
         if current_ref_pos == ref_pos:
             seq_pos = current_seq_pos
             break
     
     return seq_pos