def lowercase_below_qual_threshold(seq, qual, threshold):
    ''' Returns seq with characters made lowercase at any position for which
    qual is below threshold.
    '''
    seq = list(seq)
    qual = fastq.decode_sanger(qual)
    for p, (s, q) in enumerate(zip(seq, qual)):
        if q <= threshold:
            seq[p] = s.lower()
    return ''.join(seq)
def lowercase_below_qual_threshold(seq, qual, threshold):
    ''' Returns seq with characters made lowercase at any position for which
    qual is below threshold.
    '''
    seq = list(seq)
    qual = fastq.decode_sanger(qual)
    for p, (s, q) in enumerate(zip(seq, qual)):
        if q <= threshold:
            seq[p] = s.lower()
    return ''.join(seq)
Exemplo n.º 3
0
def align_reads(
    target_fasta_fn,
    reads,
    bam_fn,
    min_path_length=15,
    error_fn='/dev/null',
    alignment_type='overlap',
):
    ''' Aligns reads to targets in target_fasta_fn by Smith-Waterman, storing
    alignments in bam_fn and yielding unaligned reads.
    '''
    targets = {r.name: r.seq for r in fasta.reads(target_fasta_fn)}

    target_names = sorted(targets)
    target_lengths = [len(targets[n]) for n in target_names]
    alignment_sorter = sam.AlignmentSorter(
        target_names,
        target_lengths,
        bam_fn,
    )
    statistics = Counter()

    with alignment_sorter:
        for original_read in reads:
            statistics['input'] += 1

            alignments = []

            rc_read = fastq.Read(
                original_read.name,
                utilities.reverse_complement(original_read.seq),
                original_read.qual[::-1],
            )

            for read, is_reverse in ([original_read, False], [rc_read, True]):
                qual = fastq.decode_sanger(read.qual)
                for target_name, target_seq in targets.iteritems():
                    alignment = generate_alignments(read.seq, target_seq,
                                                    alignment_type)[0]
                    path = alignment['path']
                    if len(path) >= min_path_length and alignment['score'] / (
                            2. * len(path)) > 0.8:
                        aligned_segment = pysam.AlignedSegment()
                        aligned_segment.seq = read.seq
                        aligned_segment.query_qualities = qual
                        aligned_segment.is_reverse = is_reverse

                        char_pairs = make_char_pairs(path, read.seq,
                                                     target_seq)

                        cigar = sam.aligned_pairs_to_cigar(char_pairs)
                        clip_from_start = first_query_index(path)
                        if clip_from_start > 0:
                            cigar = [(sam.BAM_CSOFT_CLIP, clip_from_start)
                                     ] + cigar
                        clip_from_end = len(
                            read.seq) - 1 - last_query_index(path)
                        if clip_from_end > 0:
                            cigar = cigar + [
                                (sam.BAM_CSOFT_CLIP, clip_from_end)
                            ]
                        aligned_segment.cigar = cigar

                        read_aligned, ref_aligned = zip(*char_pairs)
                        md = sam.alignment_to_MD_string(
                            ref_aligned, read_aligned)
                        aligned_segment.set_tag('MD', md)

                        aligned_segment.set_tag('AS', alignment['score'])
                        aligned_segment.tid = alignment_sorter.get_tid(
                            target_name)
                        aligned_segment.query_name = read.name
                        aligned_segment.next_reference_id = -1
                        aligned_segment.reference_start = first_target_index(
                            path)

                        alignments.append(aligned_segment)

            if alignments:
                statistics['aligned'] += 1

                sorted_alignments = sorted(alignments,
                                           key=lambda m: m.get_tag('AS'),
                                           reverse=True)
                grouped = utilities.group_by(sorted_alignments,
                                             key=lambda m: m.get_tag('AS'))
                _, highest_group = grouped.next()
                primary_already_assigned = False
                for alignment in highest_group:
                    if len(highest_group) == 1:
                        alignment.mapping_quality = 2
                    else:
                        alignment.mapping_quality = 1

                    if not primary_already_assigned:
                        primary_already_assigned = True
                    else:
                        alignment.is_secondary = True

                    alignment_sorter.write(alignment)
            else:
                statistics['unaligned'] += 1

                yield read

        with open(error_fn, 'w') as error_fh:
            for key in ['input', 'aligned', 'unaligned']:
                error_fh.write('{0}: {1:,}\n'.format(key, statistics[key]))
Exemplo n.º 4
0
def trim_mismatches_from_start(mapping, region_fetcher, type_counts):
    ''' Remove all consecutive Q30+ mismatches from the beginning of alignments,
        under the assumption that these represent untemplated additions during
        reverse transcription.
        Characterize the mismatches into type_counts.
    '''
    if sam.contains_indel_pysam(mapping) or mapping.is_unmapped:
        set_nongenomic_length(mapping, 0)
        return mapping

    if mapping.is_reverse:
        aligned_pairs = mapping.aligned_pairs[::-1]
        index_lookup = utilities.base_to_complement_index
    else:
        aligned_pairs = mapping.aligned_pairs
        index_lookup = utilities.base_to_index

    decoded_qual = fastq.decode_sanger(mapping.qual)

    bases_to_trim = 0
    found_trim_point = False
    first_ref_index = None
    for read_index, ref_index in aligned_pairs:
        if read_index == None:
            # This shouldn't be able to be triggered since alignments
            # containing indels are ruled out above.
            continue

        if mapping.is_reverse:
            corrected_read_index = mapping.qlen - 1 - read_index
        else:
            corrected_read_index = read_index

        ref_base = region_fetcher(mapping.tid, ref_index, ref_index + 1)
        read_base = mapping.seq[read_index]
        read_qual = decoded_qual[read_index]
        coords = (
            mapping.qlen,
            corrected_read_index,
            read_qual,
            index_lookup[ref_base],
            index_lookup[read_base],
        )
        type_counts[coords] += 1

        if not found_trim_point:
            if read_base != ref_base and read_qual >= 30:
                bases_to_trim += 1
            else:
                first_ref_index = ref_index
                found_trim_point = True

    if first_ref_index == None:
        raise ValueError('first_ref_index not set')

    if bases_to_trim == 0:
        trimmed_mapping = mapping
    else:
        trimmed_mapping = pysam.AlignedRead()
        trimmed_mapping.qname = mapping.qname
        trimmed_mapping.tid = mapping.tid

        # first_ref_index has been set above to the be index of the
        # reference base aligned to the first non-trimmed base in the
        # read. If the mapping is forward, this will be the new pos.
        # If the mapping is reverse, the pos won't change.
        if mapping.is_reverse:
            first_ref_index = mapping.pos
        trimmed_mapping.pos = first_ref_index

        trimmed_mapping.is_reverse = mapping.is_reverse
        trimmed_mapping.is_secondary = mapping.is_secondary
        trimmed_mapping.mapq = mapping.mapq

        if mapping.is_reverse:
            # bases_to_trim is never zero here, so there is no danger
            # of minus zero
            trimmed_slice = slice(None, -bases_to_trim)
        else:
            trimmed_slice = slice(bases_to_trim, None)

        trimmed_mapping.seq = mapping.seq[trimmed_slice]
        trimmed_mapping.qual = mapping.qual[trimmed_slice]
        trimmed_mapping.rnext = -1
        trimmed_mapping.pnext = -1

        trimmed_length = len(mapping.seq) - bases_to_trim
        if mapping.is_reverse:
            # Remove blocks from the end
            trimmed_cigar = sam.truncate_cigar_blocks_up_to(
                mapping.cigar, trimmed_length)
        else:
            # Remove blocks from the beginning
            trimmed_cigar = sam.truncate_cigar_blocks_from_beginning(
                mapping.cigar, trimmed_length)

        trimmed_mapping.cigar = trimmed_cigar

    return trimmed_mapping
Exemplo n.º 5
0
def combine_paired_mappings(R1_mapping, R2_mapping, verbose=False):
    ''' Takes two pysam mappings representing opposite ends of a fragment and
    combines them into one mapping, (ab)using BAM_CREF_SKIP to bridge the gap
    (if any) between them.
    '''
    R1_strand = sam.get_strand(R1_mapping)

    if R1_strand == '+':
        left_mapping, right_mapping = R1_mapping, R2_mapping
    elif R1_strand == '-':
        left_mapping, right_mapping = R2_mapping, R1_mapping

    left_md = dict(left_mapping.tags)['MD']
    right_md = dict(right_mapping.tags)['MD']

    right_aligned_pairs = sam.cigar_to_aligned_pairs(
        right_mapping.cigar, right_mapping.reference_start)

    right_after_overlap_pair_index = len(right_aligned_pairs)
    for i, (read, ref) in enumerate(right_aligned_pairs):
        if ref != None and ref >= left_mapping.aend:
            right_after_overlap_pair_index = i
            break

    right_overlap_pairs = right_aligned_pairs[:right_after_overlap_pair_index]
    right_after_overlap_pairs = right_aligned_pairs[
        right_after_overlap_pair_index:]

    right_reads_after = [
        read for read, ref in right_after_overlap_pairs
        if read != None and read != 'N'
    ]
    right_refs_after = [
        ref for read, ref in right_after_overlap_pairs if ref != None
    ]

    right_overlap_cigar = sam.aligned_pairs_to_cigar(right_overlap_pairs)
    right_after_overlap_cigar = sam.aligned_pairs_to_cigar(
        right_after_overlap_pairs)
    right_after_overlap_md = sam.truncate_md_string_from_beginning(
        right_md, len(right_refs_after))

    right_after_overlap_read_start = len(
        right_mapping.seq) - len(right_reads_after)

    right_overlap_seq = right_mapping.seq[:right_after_overlap_read_start]
    right_overlap_qual = right_mapping.qual[:right_after_overlap_read_start]

    right_after_overlap_seq = right_mapping.seq[
        right_after_overlap_read_start:]
    right_after_overlap_qual = right_mapping.qual[
        right_after_overlap_read_start:]

    left_aligned_pairs = sam.cigar_to_aligned_pairs(
        left_mapping.cigar, left_mapping.reference_start)

    left_before_overlap_pair_index = -1
    for i, (read, ref) in list(enumerate(left_aligned_pairs))[::-1]:
        if ref != None and ref < right_mapping.pos:
            left_before_overlap_pair_index = i
            break

    left_overlap_pairs = left_aligned_pairs[left_before_overlap_pair_index +
                                            1:]
    left_before_overlap_pairs = left_aligned_pairs[:
                                                   left_before_overlap_pair_index
                                                   + 1]

    left_reads_before = [
        read for read, ref in left_before_overlap_pairs
        if read != None and read != 'N'
    ]
    left_refs_before = [
        ref for read, ref in left_before_overlap_pairs if ref != None
    ]

    left_overlap_cigar = sam.aligned_pairs_to_cigar(left_overlap_pairs)
    left_before_overlap_cigar = sam.aligned_pairs_to_cigar(
        left_before_overlap_pairs)
    left_before_overlap_md = sam.truncate_md_string_up_to(
        left_md, len(left_refs_before))

    left_overlap_read_start = len(left_reads_before)
    left_overlap_seq = left_mapping.seq[left_overlap_read_start:]
    left_overlap_qual = left_mapping.qual[left_overlap_read_start:]

    left_before_overlap_seq = left_mapping.seq[:left_overlap_read_start]
    left_before_overlap_qual = left_mapping.qual[:left_overlap_read_start]

    if left_overlap_pairs or right_overlap_pairs:
        gap_length = 0

        left_has_splicing = sam.contains_splicing(left_mapping)
        right_has_splicing = sam.contains_splicing(right_mapping)

        if left_overlap_cigar == right_overlap_cigar:
            # If the two mappings agree about the location of indels in their overlap,
            # use the seq from the mapping with the higher average quality in the
            # overlap.
            left_mean_qual = np.mean(fastq.decode_sanger(left_overlap_qual))
            right_mean_qual = np.mean(fastq.decode_sanger(right_overlap_qual))

            if left_mean_qual > right_mean_qual:
                use_overlap_from = 'left'
            else:
                use_overlap_from = 'right'
        elif left_has_splicing != right_has_splicing:
            # A temporary(?) heuristic - if one read has splicing and the other
            # doesn't, use the overlap from the one with splicing under the
            # assumption that the other just has a few bases overhanging the
            # splice junction.
            if left_has_splicing:
                use_overlap_from = 'left'
            else:
                use_overlap_from = 'right'
        else:
            # If the two mappings disagree about the location of indels in their overlap,
            # we need a heuristic for picking which mapping we believe reflects the
            # true structure of the input fragment. The most innocuous explanation
            # is that a 'true' indel happened to lie close to the edge of one of the
            # mappings. A more problematic situation is a 'false' indel (that is,
            # produced during cluster generation or sequencing-by-synthesis, NOT
            # template production). Our strategy is: realign the overlapping part of
            # left mapping starting from the left edge of the overlap according to the
            # cigar of the right mapping and realign the overlapping part of the right
            # mapping starting from the right edge of the overlap according to the cigar
            # of the left mapping. Count the number of mismatches produced by each.
            # If the left overlap can accomodate the right cigar with fewer mismatches,
            # use the right cigar and seq. If the right overlap can accomodate the left
            # cigar with fewer mismatches, use the left cigar and seq.

            # The leftmost aligned_pair from the right mapping is guaranteed by the
            # mapping process to not involve a gap.
            _, overlap_ref_start = right_overlap_pairs[0]
            # Similarly, the rightmost aligned_pair from the left mapping can't be a
            # gap.
            _, overlap_ref_end = left_overlap_pairs[-1]

            realigned_left_cigar = sam.truncate_cigar_blocks_up_to(
                right_mapping.cigar, len(left_overlap_seq))
            realigned_right_cigar = sam.truncate_cigar_blocks_from_beginning(
                left_mapping.cigar, len(right_overlap_seq))

            ref_dict = sam.merge_ref_dicts(
                sam.ref_dict_from_mapping(left_mapping),
                sam.ref_dict_from_mapping(right_mapping),
            )

            try:
                left_using_right_mismatches = realigned_mismatches(
                    left_overlap_seq, overlap_ref_start, realigned_left_cigar,
                    ref_dict)
                right_using_left_mismatches = realigned_mismatches_backwards(
                    right_overlap_seq, overlap_ref_end, realigned_right_cigar,
                    ref_dict)
            except ValueError:
                print left_mapping
                print right_mapping
                raise

            if verbose:
                logging.info('disagreements in {0}'.format(left_mapping.qname))
                logging.info('left overlap cigar is  {0}'.format(
                    str(left_overlap_cigar)))
                logging.info('right overlap cigar is {0}'.format(
                    str(right_overlap_cigar)))
                logging.info('left_using_right_mismatches - {0}'.format(
                    len(left_using_right_mismatches)))
                logging.info('right_using_left_mismatches - {0}'.format(
                    len(right_using_left_mismatches)))

            if len(left_using_right_mismatches) < len(
                    right_using_left_mismatches):
                use_overlap_from = 'right'
            elif len(right_using_left_mismatches) < len(
                    left_using_right_mismatches):
                use_overlap_from = 'left'
            else:
                logging.info('disagreements in {0}'.format(left_mapping.qname))
                logging.info('left overlap cigar is  {0}'.format(
                    str(left_overlap_cigar)))
                logging.info('right overlap cigar is {0}'.format(
                    str(right_overlap_cigar)))
                logging.info('left_using_right_mismatches - {0}'.format(
                    len(left_using_right_mismatches)))
                logging.info('right_using_left_mismatches - {0}'.format(
                    len(right_using_left_mismatches)))
                logging.info('ambiguous disagreement')
                return False

    else:
        gap_length = right_mapping.pos - left_mapping.aend
        # It doesn't matter what use_overlap_from is set to; there is no overlap
        use_overlap_from = 'left'

    combined_mapping = pysam.AlignedRead()
    combined_mapping.qname = left_mapping.qname
    combined_mapping.tid = left_mapping.tid
    combined_mapping.mapq = min(left_mapping.mapq, right_mapping.mapq)
    combined_mapping.rnext = -1
    combined_mapping.pnext = -1
    combined_mapping.pos = left_mapping.pos

    if R1_strand == '-':
        combined_mapping.is_reverse = True

    gap_cigar = [(sam.BAM_CREF_SKIP, gap_length)]

    if use_overlap_from == 'left':
        combined_mapping.seq = left_mapping.seq + right_after_overlap_seq
        combined_mapping.qual = left_mapping.qual + right_after_overlap_qual
        combined_mapping.cigar = left_mapping.cigar + gap_cigar + right_after_overlap_cigar

        combined_md = sam.combine_md_strings(left_md, right_after_overlap_md)
        combined_mapping.setTag('MD', combined_md)

        overlap_seq_tag = right_overlap_seq
        overlap_qual_tag = right_overlap_qual

    elif use_overlap_from == 'right':
        combined_mapping.seq = left_before_overlap_seq + right_mapping.seq
        combined_mapping.qual = left_before_overlap_qual + right_mapping.qual
        combined_mapping.cigar = left_before_overlap_cigar + gap_cigar + right_mapping.cigar

        combined_md = sam.combine_md_strings(left_before_overlap_md, right_md)
        combined_mapping.setTag('MD', combined_md)

        overlap_seq_tag = left_overlap_seq
        overlap_qual_tag = left_overlap_qual

    if len(overlap_seq_tag) > 0:
        # Having empty tags causes problems, so don't create them.
        combined_mapping.setTag('Xs', overlap_seq_tag)
        combined_mapping.setTag('Xq', overlap_qual_tag)
        combined_mapping.setTag('Xw', use_overlap_from)

    return combined_mapping
Exemplo n.º 6
0
def collapseUMIs(reads, readThres, outfile):
    # collpase reads assuming cellBC and UMI are true
    # identifies consensus sequence and reports as sequence for each cellBC-UMI combination

    numReadsQualFilt = 0

    UMIGrps = {}
    for r in reads:  # itertools.islice(reads,10000):
        avgQ50 = np.mean(fastq.decode_sanger(r.qual[0:50]))
        if avgQ50 < 20:
            numReadsQualFilt = numReadsQualFilt + 1
            continue
        n = r.name.split('_')
        cellGroup = n[1] + "_" + n[2]
        readcount = int(n[3])

        if cellGroup in UMIGrps:
            [seqs, counts] = UMIGrps[cellGroup]
            seqs.append(r.seq)
            counts.append(readcount)
        else:
            UMIGrps[cellGroup] = [[r.seq], [readcount]]

    print("# of cell-UMI groups: " + str(len(UMIGrps)) + " (includes <" +
          str(readThres) + ")")

    read_dist = []
    for u in UMIGrps:
        [seqs, counts] = UMIGrps[u]
        read_dist.append(sum(counts))

    h = plt.figure(figsize=(14, 10))
    ax = plt.hist(read_dist, log=True)
    plt.ylabel("Frequency")
    plt.xlabel("Number of Reads")
    plt.title("Reads Per UMI")
    plt.savefig("collapsedUMIs_reads_per_umi.init.png")
    plt.close()

    readThresh = np.percentile(read_dist, 99) / 10
    print("Filtering out UMIs with less than " + str(readThresh) + " reads")

    fh = open(outfile, 'w')
    fh.write("cellBC\tUMI\treadCount\tconsensusSeq\n")

    numBelowReadThres = 0
    numMaj = 0
    numCon = 0
    numSingles = 0
    counter = 1
    for k in UMIGrps:  # each UMI group consists of reads from the same molecule

        [seqs, counts] = UMIGrps[k]
        grpSize = sum(counts)
        if grpSize < readThres:  # too few reads to include
            numBelowReadThres = numBelowReadThres + 1
            continue

        n = k.split("_")
        if len(seqs) == 1:  # trivial case added 9/11/2017
            numSingles = numSingles + 1
            fh.write("\t".join([str(n[0]),
                                str(n[1]),
                                str(counts[0]), seqs[0]]) + "\n")
        else:
            #
            # Update 9/1/2017: try to improve speeds by increasing the number of same reads to feed
            #	into majority instead of consensus finding
            #   trim to length of 25th percentile read ranked by length
            #
            s1 = pd.DataFrame({"seq": seqs, "readCount": counts})
            s1["seqLen"] = s1["seq"].str.len()
            s1 = s1.sort_values("seqLen").reset_index(
                drop=True)  # sorts reads by length in ascending
            totalReads = s1["readCount"].sum()
            cReads = s1["readCount"].cumsum()  # cumulative
            rPctile = 0.3 * totalReads  # 30th percentile
            rPctileIndex = cReads[cReads >= rPctile].index[
                0]  # index of seq length
            sLen = s1.loc[rPctileIndex, "seqLen"]
            s1["seq"] = s1["seq"].str[0:sLen]
            s2 = s1.groupby(["seq"]).agg({
                "readCount": np.sum
            }).sort_values("readCount", ascending=False)  # indexed by seq

            grpProp = s2.loc[s2.index[0], "readCount"] / float(totalReads)

            if grpProp > .50:
                consensusSeq = s2.index[0]
                numMaj = numMaj + 1
            else:
                consensusSeq = get_consensus(s2.index.tolist(),
                                             s2["readCount"].tolist())
                numCon = numCon + 1

            # print Entry
            fh.write("\t".join([
                str(n[0]), str(n[1]),
                str(totalReads), consensusSeq
            ]) + "\n")

        counter = counter + 1
        if counter % 1000 == 0:
            print(str(counter) + " groups processed...")

    fh.close()

    print("# of cell-UMI groups = " + str(len(UMIGrps)))
    print("# reads qual <20 (filtered) = " + str(numReadsQualFilt))
    print("# grps w/ reads<" + str(readThres) + " = " + str(numBelowReadThres))
    print("# grps singles = " + str(numSingles))
    print("# grps >0.5 = " + str(numMaj))
    print("# grps concensus = " + str(numCon))
Exemplo n.º 7
0
def trim_mismatches_from_start(mapping, region_fetcher, type_counts):
    ''' Remove all consecutive Q30+ mismatches from the beginning of alignments,
        under the assumption that these represent untemplated additions during
        reverse transcription.
        Characterize the mismatches into type_counts.
    '''
    if sam.contains_indel_pysam(mapping) or mapping.is_unmapped:
        set_nongenomic_length(mapping, 0)
        return mapping

    if mapping.is_reverse:
        aligned_pairs = mapping.aligned_pairs[::-1]
        index_lookup = utilities.base_to_complement_index
    else:
        aligned_pairs = mapping.aligned_pairs
        index_lookup = utilities.base_to_index

    decoded_qual = fastq.decode_sanger(mapping.qual)
    
    bases_to_trim = 0
    found_trim_point = False
    first_ref_index = None
    for read_index, ref_index in aligned_pairs:
        if read_index == None:
            # This shouldn't be able to be triggered since alignments
            # containing indels are ruled out above.
            continue

        if mapping.is_reverse:
            corrected_read_index = mapping.qlen - 1 - read_index
        else:
            corrected_read_index = read_index

        ref_base = region_fetcher(mapping.tid, ref_index, ref_index + 1)
        read_base = mapping.seq[read_index]
        read_qual = decoded_qual[read_index]
        coords = (mapping.qlen,
                  corrected_read_index,
                  read_qual,
                  index_lookup[ref_base],
                  index_lookup[read_base],
                 )
        type_counts[coords] += 1

        if not found_trim_point:
            if read_base != ref_base and read_qual >= 30:
                bases_to_trim += 1
            else:
                first_ref_index = ref_index
                found_trim_point = True

    if first_ref_index == None:
        raise ValueError('first_ref_index not set')

    if bases_to_trim == 0:
        trimmed_mapping = mapping
    else:
        trimmed_mapping = pysam.AlignedRead()
        trimmed_mapping.qname = mapping.qname
        trimmed_mapping.tid = mapping.tid
        
        # first_ref_index has been set above to the be index of the
        # reference base aligned to the first non-trimmed base in the
        # read. If the mapping is forward, this will be the new pos.
        # If the mapping is reverse, the pos won't change.
        if mapping.is_reverse:
            first_ref_index = mapping.pos
        trimmed_mapping.pos = first_ref_index

        trimmed_mapping.is_reverse = mapping.is_reverse
        trimmed_mapping.is_secondary = mapping.is_secondary
        trimmed_mapping.mapq = mapping.mapq

        if mapping.is_reverse:
            # bases_to_trim is never zero here, so there is no danger
            # of minus zero
            trimmed_slice = slice(None, -bases_to_trim)
        else:
            trimmed_slice = slice(bases_to_trim, None)

        trimmed_mapping.seq = mapping.seq[trimmed_slice]
        trimmed_mapping.qual = mapping.qual[trimmed_slice]
        trimmed_mapping.rnext = -1
        trimmed_mapping.pnext = -1

        trimmed_length = len(mapping.seq) - bases_to_trim
        if mapping.is_reverse:
            # Remove blocks from the end
            trimmed_cigar = sam.truncate_cigar_blocks_up_to(mapping.cigar, trimmed_length)
        else:
            # Remove blocks from the beginning
            trimmed_cigar = sam.truncate_cigar_blocks_from_beginning(mapping.cigar, trimmed_length)
        
        trimmed_mapping.cigar = trimmed_cigar

    return trimmed_mapping