示例#1
0
    def __shift_cigar(align, new_start=None, new_end=None):
        new_cigar = 'cs:Z:'
        ctg_pos = align.s2
        strand_direction = 1 if align.s2 < align.e2 else -1
        diff_len = 0
        if not align.cigar:
            return diff_len

        for op in parse_cs_tag(align.cigar):
            if op.startswith('*'):
                if (new_start and ctg_pos >= new_start) or \
                        (new_end and ctg_pos <= new_end):
                    new_cigar += op
                ctg_pos += 1 * strand_direction
            else:
                if op.startswith(':'):
                    n_bases = int(op[1:])
                else:
                    n_bases = len(op) - 1
                corr_n_bases = n_bases
                if new_end and (ctg_pos + n_bases * strand_direction > new_end
                                or ctg_pos > new_end):
                    corr_n_bases = new_end - ctg_pos + (
                        n_bases if strand_direction == -1 else 1)
                elif new_start and (
                        ctg_pos < new_start
                        or ctg_pos + n_bases * strand_direction < new_start):
                    corr_n_bases = ctg_pos + (n_bases if strand_direction == 1
                                              else 1) - new_start

                if corr_n_bases < 1:
                    if not op.startswith('-'):
                        ctg_pos += n_bases * strand_direction
                    if op.startswith('-'):
                        diff_len -= n_bases
                    if op.startswith('+'):
                        diff_len += n_bases
                    continue
                if op.startswith('+'):
                    ctg_pos += n_bases * strand_direction
                    diff_len += (n_bases - corr_n_bases)
                    if new_start:
                        new_cigar += '+' + op[1 + (corr_n_bases - n_bases):]
                    elif new_end:
                        new_cigar += op[:corr_n_bases + 1]
                elif op.startswith('-'):
                    diff_len -= (n_bases - corr_n_bases)
                    if new_start:
                        new_cigar += '-' + op[1 + (corr_n_bases - n_bases):]
                    elif new_end:
                        new_cigar += op[:corr_n_bases + 1]
                elif op.startswith(':'):
                    ctg_pos += n_bases * strand_direction
                    new_cigar += ':' + str(corr_n_bases)
        align.cigar = new_cigar
        return diff_len
示例#2
0
def analyze_coverage(ref_aligns, reference_chromosomes, ns_by_chromosomes, used_snps_fpath):
    indels_info = IndelsInfo()
    genome_mapping = {}
    for chr_name, chr_len in reference_chromosomes.items():
        genome_mapping[chr_name] = [0] * (chr_len + 1)
    with open(used_snps_fpath, 'w') as used_snps_f:
        for chr_name, aligns in ref_aligns.items():
            for align in aligns:
                ref_pos, ctg_pos = align.s1, align.s2
                strand_direction = 1 if align.s2 < align.e2 else -1
                for op in parse_cs_tag(align.cigar):
                    if op.startswith(':'):
                        n_bases = int(op[1:])
                    else:
                        n_bases = len(op) - 1
                    if op.startswith('*'):
                        ref_nucl, ctg_nucl = op[1].upper(), op[2].upper()
                        if ctg_nucl != 'N' and ref_nucl != 'N':
                            indels_info.mismatches += 1
                            if qconfig.show_snps:
                                used_snps_f.write('%s\t%s\t%d\t%s\t%s\t%d\n' % (chr_name, align.contig, ref_pos, ref_nucl, ctg_nucl, ctg_pos))
                        ref_pos += 1
                        ctg_pos += 1 * strand_direction
                    elif op.startswith('+'):
                        indels_info.indels_list.append(n_bases)
                        indels_info.insertions += n_bases
                        if qconfig.show_snps and n_bases < qconfig.MAX_INDEL_LENGTH:
                            ref_nucl, ctg_nucl = '.', op[1:].upper()
                            used_snps_f.write('%s\t%s\t%d\t%s\t%s\t%d\n' % (chr_name, align.contig, ref_pos, ref_nucl, ctg_nucl, ctg_pos))
                        ctg_pos += n_bases * strand_direction
                    elif op.startswith('-'):
                        indels_info.indels_list.append(n_bases)
                        indels_info.deletions += n_bases
                        if qconfig.show_snps and n_bases < qconfig.MAX_INDEL_LENGTH:
                            ref_nucl, ctg_nucl = op[1:].upper(), '.'
                            used_snps_f.write('%s\t%s\t%d\t%s\t%s\t%d\n' % (chr_name, align.contig, ref_pos, ref_nucl, ctg_nucl, ctg_pos))
                        ref_pos += n_bases
                    else:
                        ref_pos += n_bases
                        ctg_pos += n_bases * strand_direction
                if align.s1 < align.e1:
                    for pos in range(align.s1, align.e1 + 1):
                        genome_mapping[align.ref][pos] = 1
                else:
                    for pos in range(align.s1, len(genome_mapping[align.ref])):
                        genome_mapping[align.ref][pos] = 1
                    for pos in range(1, align.e1 + 1):
                        genome_mapping[align.ref][pos] = 1
            for i in ns_by_chromosomes[align.ref]:
                genome_mapping[align.ref][i] = 0

    covered_bases = sum([sum(genome_mapping[chrom]) for chrom in genome_mapping])
    return covered_bases, indels_info
示例#3
0
def analyze_coverage(ref_aligns, reference_chromosomes, ns_by_chromosomes, used_snps_fpath):
    indels_info = IndelsInfo()
    genome_mapping = {}
    for chr_name, chr_len in reference_chromosomes.items():
        genome_mapping[chr_name] = [0] * (chr_len + 1)
    with open(used_snps_fpath, 'w') as used_snps_f:
        for chr_name, aligns in ref_aligns.items():
            for align in aligns:
                ref_pos, ctg_pos = align.s1, align.s2
                strand_direction = 1 if align.s2 < align.e2 else -1
                for op in parse_cs_tag(align.cigar):
                    if op.startswith(':'):
                        n_bases = int(op[1:])
                    else:
                        n_bases = len(op) - 1
                    if op.startswith('*'):
                        ref_nucl, ctg_nucl = op[1].upper(), op[2].upper()
                        if ctg_nucl != 'N' and ref_nucl != 'N':
                            indels_info.mismatches += 1
                            if qconfig.show_snps:
                                used_snps_f.write('%s\t%s\t%d\t%s\t%s\t%d\n' % (chr_name, align.contig, ref_pos, ref_nucl, ctg_nucl, ctg_pos))
                        ref_pos += 1
                        ctg_pos += 1 * strand_direction
                    elif op.startswith('+'):
                        indels_info.indels_list.append(n_bases)
                        indels_info.insertions += n_bases
                        if qconfig.show_snps and n_bases < qconfig.MAX_INDEL_LENGTH:
                            ref_nucl, ctg_nucl = '.', op[1:].upper()
                            used_snps_f.write('%s\t%s\t%d\t%s\t%s\t%d\n' % (chr_name, align.contig, ref_pos, ref_nucl, ctg_nucl, ctg_pos))
                        ctg_pos += n_bases * strand_direction
                    elif op.startswith('-'):
                        indels_info.indels_list.append(n_bases)
                        indels_info.deletions += n_bases
                        if qconfig.show_snps and n_bases < qconfig.MAX_INDEL_LENGTH:
                            ref_nucl, ctg_nucl = op[1:].upper(), '.'
                            used_snps_f.write('%s\t%s\t%d\t%s\t%s\t%d\n' % (chr_name, align.contig, ref_pos, ref_nucl, ctg_nucl, ctg_pos))
                        ref_pos += n_bases
                    else:
                        ref_pos += n_bases
                        ctg_pos += n_bases * strand_direction
                if align.s1 < align.e1:
                    for pos in range(align.s1, align.e1 + 1):
                        genome_mapping[align.ref][pos] = 1
                else:
                    for pos in range(align.s1, len(genome_mapping[align.ref])):
                        genome_mapping[align.ref][pos] = 1
                    for pos in range(1, align.e1 + 1):
                        genome_mapping[align.ref][pos] = 1
            for i in ns_by_chromosomes[align.ref]:
                genome_mapping[align.ref][i] = 0

    covered_bases = sum([sum(genome_mapping[chrom]) for chrom in genome_mapping])
    return covered_bases, indels_info
示例#4
0
def split_align(coords_file, align_start, strand_direction, ref_start,
                ref_name, contig, cs):
    def _write_align():
        if align_len < qconfig.min_alignment or not ref_len or not align_cs:
            return
        align_end = align_start + (align_len - 1) * strand_direction
        ref_end = ref_start + ref_len - 1
        align_idy = '%.2f' % (matched_bases * 100.0 / ref_len)
        if float(align_idy) >= qconfig.min_IDY:
            align = Mapping(s1=ref_start,
                            e1=ref_end,
                            s2=align_start,
                            e2=align_end,
                            len1=ref_len,
                            len2=align_len,
                            idy=align_idy,
                            ref=ref_name,
                            contig=contig,
                            cigar=align_cs)
            coords_file.write(align.coords_str() + '\n')

    ref_len, align_len, align_end = 0, 0, 0
    align_cs = ''
    matched_bases = 0
    for op in parse_cs_tag(cs):
        if op.startswith(':'):
            n_bases = int(op[1:])
        else:
            n_bases = len(op) - 1
        if op.startswith('*'):
            align_cs += op
            ref_len += 1
            align_len += 1
        elif op.startswith('+'):
            _write_align()
            align_start += (align_len + n_bases) * strand_direction
            ref_start += ref_len
            align_len, ref_len, matched_bases = 0, 0, 0
            align_cs = ''
        elif op.startswith('-'):
            _write_align()
            align_start += align_len * strand_direction
            ref_start += ref_len + n_bases
            align_len, ref_len, matched_bases = 0, 0, 0
            align_cs = ''
        else:
            align_cs += op
            ref_len += n_bases
            align_len += n_bases
            matched_bases += n_bases
    _write_align()
示例#5
0
文件: circos.py 项目: trocialba/quast
def create_mismatches_plot(assembly, window_size, ref_len, root_dir,
                           output_dir):
    assembly_label = qutils.label_from_fpath_for_fname(assembly.fpath)
    aligner_dirpath = join(root_dir, '..',
                           qconfig.detailed_contigs_reports_dirname)
    coords_basename = join(create_minimap_output_dir(aligner_dirpath),
                           assembly_label)
    _, coords_filtered_fpath, _, _ = get_aux_out_fpaths(coords_basename)
    if not exists(coords_filtered_fpath) or not qconfig.show_snps:
        return None

    mismatches_fpath = join(output_dir, assembly_label + '.mismatches.txt')
    mismatch_density_by_chrom = defaultdict(lambda: [0] *
                                            (ref_len // window_size + 1))
    with open(coords_filtered_fpath) as coords_file:
        for line in coords_file:
            s1 = int(line.split('|')[0].split()[0])
            chrom = line.split()[11].strip()
            cigar = line.split()[-1].strip()
            ref_pos = s1
            for op in parse_cs_tag(cigar):
                n_bases = len(op) - 1
                if op.startswith('*'):
                    mismatch_density_by_chrom[chrom][int(ref_pos) //
                                                     window_size] += 1
                    ref_pos += 1
                elif not op.startswith('+'):
                    ref_pos += n_bases
    with open(mismatches_fpath, 'w') as out_f:
        for chrom, density_list in mismatch_density_by_chrom.items():
            start, end = 0, 0
            for i, density in enumerate(density_list):
                if density == 0:
                    end = (i + 1) * window_size
                else:
                    if end:
                        out_f.write(
                            '\t'.join([chrom, str(start),
                                       str(end), '0']) + '\n')
                    out_f.write('\t'.join([
                        chrom,
                        str(i * window_size),
                        str(((i + 1) * window_size)),
                        str(density)
                    ]) + '\n')
                    start = (i + 1) * window_size
                    end = None
            if end:
                out_f.write('\t'.join([chrom, str(start),
                                       str(end), '0']) + '\n')
    return mismatches_fpath
示例#6
0
def split_align(coords_file, align_start, strand_direction, ref_start, ref_name, contig, cs):
    def _write_align():
        if align_len < qconfig.min_alignment or not ref_len or not align_cs:
            return
        align_end = align_start + (align_len - 1) * strand_direction
        ref_end = ref_start + ref_len - 1
        align_idy = '%.2f' % (matched_bases * 100.0 / ref_len)
        if float(align_idy) >= qconfig.min_IDY:
            align = Mapping(s1=ref_start, e1=ref_end, s2=align_start, e2=align_end, len1=ref_len,
                            len2=align_len, idy=align_idy, ref=ref_name, contig=contig, cigar=align_cs)
            coords_file.write(align.coords_str() + '\n')

    ref_len, align_len, align_end = 0, 0, 0
    align_cs = ''
    matched_bases = 0
    for op in parse_cs_tag(cs):
        if op.startswith(':'):
            n_bases = int(op[1:])
        else:
            n_bases = len(op) - 1
        if op.startswith('*'):
            align_cs += op
            ref_len += 1
            align_len += 1
        ## split alignment in positions of indels to get smaller alignments with higher identity
        elif op.startswith('+'):
            if n_bases > SHORT_INDEL_THRESHOLD:
                _write_align()
                align_start += (align_len + n_bases) * strand_direction
                ref_start += ref_len
                align_len, ref_len, matched_bases = 0, 0, 0
                align_cs = ''
            else:
                align_cs += op
                align_len += n_bases
        elif op.startswith('-'):
            if n_bases > SHORT_INDEL_THRESHOLD:
                _write_align()
                align_start += align_len * strand_direction
                ref_start += ref_len + n_bases
                align_len, ref_len, matched_bases = 0, 0, 0
                align_cs = ''
            else:
                align_cs += op
                ref_len += n_bases
        else:
            align_cs += op
            ref_len += n_bases
            align_len += n_bases
            matched_bases += n_bases
    _write_align()
示例#7
0
    def __shift_cigar(align, new_start=None, new_end=None):
        new_cigar = 'cs:Z:'
        ctg_pos = align.s2
        strand_direction = 1 if align.s2 < align.e2 else -1
        diff_len = 0
        if not align.cigar:
            return diff_len

        for op in parse_cs_tag(align.cigar):
            if op.startswith('*'):
                if (new_start and ctg_pos >= new_start) or \
                        (new_end and ctg_pos <= new_end):
                    new_cigar += op
                ctg_pos += 1 * strand_direction
            else:
                if op.startswith(':'):
                    n_bases = int(op[1:])
                else:
                    n_bases = len(op) - 1
                corr_n_bases = n_bases
                if new_end and (ctg_pos + n_bases * strand_direction > new_end or ctg_pos > new_end):
                    corr_n_bases = new_end - ctg_pos + (n_bases if strand_direction == -1 else 1)
                elif new_start and (ctg_pos < new_start or ctg_pos + n_bases * strand_direction < new_start):
                    corr_n_bases = ctg_pos + (n_bases if strand_direction == 1 else 1) - new_start

                if corr_n_bases < 1:
                    if not op.startswith('-'):
                        ctg_pos += n_bases * strand_direction
                    if op.startswith('-'):
                        diff_len -= n_bases
                    if op.startswith('+'):
                        diff_len += n_bases
                    continue
                if op.startswith('+'):
                    ctg_pos += n_bases * strand_direction
                    diff_len += (n_bases - corr_n_bases)
                    if new_start:
                        new_cigar += '+' + op[1 + (corr_n_bases - n_bases):]
                    elif new_end:
                        new_cigar += op[:corr_n_bases + 1]
                elif op.startswith('-'):
                    diff_len -= (n_bases - corr_n_bases)
                    if new_start:
                        new_cigar += '-' + op[1 + (corr_n_bases - n_bases):]
                    elif new_end:
                        new_cigar += op[:corr_n_bases + 1]
                elif op.startswith(':'):
                    ctg_pos += n_bases * strand_direction
                    new_cigar += ':' + str(corr_n_bases)
        align.cigar = new_cigar
        return diff_len
示例#8
0
文件: circos.py 项目: ablab/quast
def create_mismatches_plot(assembly, window_size, ref_len, root_dir, output_dir):
    assembly_label = qutils.label_from_fpath_for_fname(assembly.fpath)
    aligner_dirpath = join(root_dir, '..', 'contigs_reports')
    coords_basename = join(create_minimap_output_dir(aligner_dirpath), assembly_label)
    _, coords_filtered_fpath, _, _ = get_aux_out_fpaths(coords_basename)
    if not exists(coords_filtered_fpath) or not qconfig.show_snps:
        return None

    mismatches_fpath = join(output_dir, assembly_label + '.mismatches.txt')
    mismatch_density_by_chrom = defaultdict(lambda : [0] * (ref_len // window_size + 1))
    with open(coords_filtered_fpath) as coords_file:
        for line in coords_file:
            s1 = int(line.split('|')[0].split()[0])
            chrom = line.split()[11].strip()
            cigar = line.split()[-1].strip()
            ref_pos = s1
            for op in parse_cs_tag(cigar):
                n_bases = len(op) - 1
                if op.startswith('*'):
                    mismatch_density_by_chrom[chrom][int(ref_pos) // window_size] += 1
                    ref_pos += 1
                elif not op.startswith('+'):
                    ref_pos += n_bases
    with open(mismatches_fpath, 'w') as out_f:
        for chrom, density_list in mismatch_density_by_chrom.items():
            start, end = 0, 0
            for i, density in enumerate(density_list):
                if density == 0:
                    end = (i + 1) * window_size
                else:
                    if end:
                        out_f.write('\t'.join([chrom, str(start), str(end), '0']) + '\n')
                    out_f.write('\t'.join([chrom, str(i * window_size), str(((i + 1) * window_size)), str(density)]) + '\n')
                    start = (i + 1) * window_size
                    end = None
            if end:
                out_f.write('\t'.join([chrom, str(start), str(end), '0']) + '\n')
    return mismatches_fpath
示例#9
0
def split_align(coords_file, align_start, strand_direction, ref_start,
                ref_name, contig, cs):
    def _write_align():
        if align.len2 < qconfig.min_alignment or not align.len1 or not align.cigar:
            return
        align.e1 = align.s1 + align.len1 - 1
        align.e2 = align.s2 + (align.len2 - 1) * strand_direction
        align.idy = '%.2f' % (matched_bases * 100.0 /
                              max(align.len1, align.len2))
        if float(align.idy) >= qconfig.min_IDY:
            coords_file.write(align.coords_str() + '\n')

    def _try_split(matched_bases, prev_op, n_refbases=0, n_alignbases=0):
        ## split alignment in positions of indels or stretch of mismatches to get smaller alignments with higher identity
        if n_alignbases > SPLIT_ALIGN_THRESHOLD or n_refbases > SPLIT_ALIGN_THRESHOLD:
            _write_align()
            align.s1 += align.len1 + n_refbases
            align.s2 += (align.len2 + n_alignbases) * strand_direction
            align.len1, align.len2 = 0, 0
            align.cigar = ''
            matched_bases = 0
        else:
            align.len1 += n_refbases
            align.len2 += n_alignbases
            align.cigar += prev_op
        return matched_bases

    matched_bases = 0
    align = Mapping(s1=ref_start,
                    e1=ref_start,
                    s2=align_start,
                    e2=align_start,
                    len1=0,
                    len2=0,
                    ref=ref_name,
                    contig=contig,
                    cigar='')
    cur_mismatch_stretch = ''
    for op in parse_cs_tag(cs):
        if op.startswith('*'):
            cur_mismatch_stretch += op
            continue
        if cur_mismatch_stretch:
            n_bases = cur_mismatch_stretch.count('*')
            matched_bases = _try_split(matched_bases, cur_mismatch_stretch,
                                       n_bases, n_bases)
        cur_mismatch_stretch = ''
        if op.startswith(':'):
            n_bases = int(op[1:])
            align.cigar += op
            align.len1 += n_bases
            align.len2 += n_bases
            matched_bases += n_bases
        else:
            n_bases = len(op) - 1
            if op.startswith('+'):
                matched_bases = _try_split(matched_bases,
                                           op,
                                           n_alignbases=n_bases)
            elif op.startswith('-'):
                matched_bases = _try_split(matched_bases,
                                           op,
                                           n_refbases=n_bases)
    _write_align()
示例#10
0
def analyze_coverage(ref_aligns, reference_chromosomes, ns_by_chromosomes,
                     used_snps_fpath):
    logger.info("    Enter analyze_coverage")
    #logger.info(f"    {ref_aligns=}")
    indels_info = IndelsInfo()
    maximum_contig_align_size_per_ref_base = {}
    strict_maximum_contig_align_size_per_ref_base = {}
    genome_mapping = {}
    genome_length = 0
    for chr_name, chr_len in reference_chromosomes.items():
        genome_mapping[chr_name] = [0] * (chr_len + 1)
        maximum_contig_align_size_per_ref_base[chr_name] = [0] * (chr_len + 1)
        strict_maximum_contig_align_size_per_ref_base[chr_name] = [0] * (
            chr_len + 1)
        genome_length += chr_len
    logger.info("      Genome length: " + str(genome_length))

    alignment_total_length = 0
    with open(used_snps_fpath, 'w') as used_snps_f:
        for chr_name, aligns in ref_aligns.items():
            for align in aligns:
                # Vars with 1 are on the reference, vars with 2 are on the contig
                ref_pos, ctg_pos = align.s1, align.s2
                strand_direction = 1 if align.s2 < align.e2 else -1
                for op in parse_cs_tag(align.cigar):
                    if op.startswith(':'):
                        n_bases = int(op[1:])
                    else:
                        n_bases = len(op) - 1
                    if op.startswith('*'):
                        ref_nucl, ctg_nucl = op[1].upper(), op[2].upper()
                        if ctg_nucl != 'N' and ref_nucl != 'N':
                            indels_info.mismatches += 1
                            if qconfig.show_snps:
                                used_snps_f.write(
                                    '%s\t%s\t%d\t%s\t%s\t%d\n' %
                                    (chr_name, align.contig, ref_pos, ref_nucl,
                                     ctg_nucl, ctg_pos))
                        ref_pos += 1
                        ctg_pos += 1 * strand_direction
                    elif op.startswith('+'):
                        indels_info.indels_list.append(n_bases)
                        indels_info.insertions += n_bases
                        if qconfig.show_snps and n_bases < qconfig.MAX_INDEL_LENGTH:
                            ref_nucl, ctg_nucl = '.', op[1:].upper()
                            used_snps_f.write('%s\t%s\t%d\t%s\t%s\t%d\n' %
                                              (chr_name, align.contig, ref_pos,
                                               ref_nucl, ctg_nucl, ctg_pos))
                        ctg_pos += n_bases * strand_direction
                    elif op.startswith('-'):
                        indels_info.indels_list.append(n_bases)
                        indels_info.deletions += n_bases
                        if qconfig.show_snps and n_bases < qconfig.MAX_INDEL_LENGTH:
                            ref_nucl, ctg_nucl = op[1:].upper(), '.'
                            used_snps_f.write('%s\t%s\t%d\t%s\t%s\t%d\n' %
                                              (chr_name, align.contig, ref_pos,
                                               ref_nucl, ctg_nucl, ctg_pos))
                        ref_pos += n_bases
                    else:
                        ref_pos += n_bases
                        ctg_pos += n_bases * strand_direction

                alignment_total_length += align.len2_excluding_local_misassemblies
                if align.s1 < align.e1:
                    align_size = align.len2_excluding_local_misassemblies  # Use the same len that is used to compute NGAx
                    strict_align_size = align.len2_including_local_misassemblies  # Use the same len that is used to strict compute NGAx
                    for pos in range(align.s1, align.e1 + 1):
                        genome_mapping[align.ref][pos] = 1
                        maximum_contig_align_size_per_ref_base[
                            align.ref][pos] = max(
                                align_size,
                                maximum_contig_align_size_per_ref_base[
                                    align.ref][pos])
                        strict_maximum_contig_align_size_per_ref_base[
                            align.ref][pos] = max(
                                strict_align_size,
                                strict_maximum_contig_align_size_per_ref_base[
                                    align.ref][pos])
                else:
                    align_size = align.len2_excluding_local_misassemblies  # Use the same len that is used to compute NGAx
                    strict_align_size = align.len2_including_local_misassemblies  # Use the same len that is used to strict compute NGAx
                    for pos in range(align.s1, len(genome_mapping[align.ref])):
                        genome_mapping[align.ref][pos] = 1
                        maximum_contig_align_size_per_ref_base[
                            align.ref][pos] = max(
                                align_size,
                                maximum_contig_align_size_per_ref_base[
                                    align.ref][pos])
                        strict_maximum_contig_align_size_per_ref_base[
                            align.ref][pos] = max(
                                strict_align_size,
                                strict_maximum_contig_align_size_per_ref_base[
                                    align.ref][pos])
                    for pos in range(1, align.e1 + 1):
                        genome_mapping[align.ref][pos] = 1
                        maximum_contig_align_size_per_ref_base[
                            align.ref][pos] = max(
                                align_size,
                                maximum_contig_align_size_per_ref_base[
                                    align.ref][pos])
                        strict_maximum_contig_align_size_per_ref_base[
                            align.ref][pos] = max(
                                strict_align_size,
                                strict_maximum_contig_align_size_per_ref_base[
                                    align.ref][pos])
            for i in ns_by_chromosomes[align.ref]:
                genome_mapping[align.ref][i] = 0
                maximum_contig_align_size_per_ref_base[align.ref][pos] = 0
                strict_maximum_contig_align_size_per_ref_base[
                    align.ref][pos] = 0

    covered_bases = sum(
        [sum(genome_mapping[chrom]) for chrom in genome_mapping])

    maximum_contig_align_size_per_ref_base = [
        align_size
        for contig in maximum_contig_align_size_per_ref_base.values()
        for align_size in contig
    ]
    maximum_contig_align_size_per_ref_base.sort(reverse=True)
    ea_x_max = [0] * 101
    for i in range(0, 100):
        ea_x_max[i] = maximum_contig_align_size_per_ref_base[
            (len(maximum_contig_align_size_per_ref_base) * i) // 100]
    ea_x_max[100] = maximum_contig_align_size_per_ref_base[-1]
    ea_mean_max = int(
        sum(maximum_contig_align_size_per_ref_base) /
        len(maximum_contig_align_size_per_ref_base))
    p5k = P(maximum_contig_align_size_per_ref_base, 5000)
    p10k = P(maximum_contig_align_size_per_ref_base, 10000)
    p15k = P(maximum_contig_align_size_per_ref_base, 15000)
    p20k = P(maximum_contig_align_size_per_ref_base, 20000)

    strict_maximum_contig_align_size_per_ref_base = [
        align_size
        for contig in strict_maximum_contig_align_size_per_ref_base.values()
        for align_size in contig
    ]
    strict_maximum_contig_align_size_per_ref_base.sort(reverse=True)
    strict_ea_x_max = [0] * 101
    for i in range(0, 100):
        strict_ea_x_max[i] = strict_maximum_contig_align_size_per_ref_base[
            (len(strict_maximum_contig_align_size_per_ref_base) * i) // 100]
    strict_ea_x_max[100] = strict_maximum_contig_align_size_per_ref_base[-1]
    strict_ea_mean_max = int(
        sum(strict_maximum_contig_align_size_per_ref_base) /
        len(strict_maximum_contig_align_size_per_ref_base))
    strict_p5k = P(strict_maximum_contig_align_size_per_ref_base, 5000)
    strict_p10k = P(strict_maximum_contig_align_size_per_ref_base, 10000)
    strict_p15k = P(strict_maximum_contig_align_size_per_ref_base, 15000)
    strict_p20k = P(strict_maximum_contig_align_size_per_ref_base, 20000)

    #print("computed ea_x_max as " + str(ea_x_max))
    logger.info("      Duplication ratio = %.2f = %d/%d" %
                ((alignment_total_length / covered_bases),
                 alignment_total_length, covered_bases))
    logger.info("      EA50max = {}".format(ea_x_max[50]))
    logger.info("      Strict EA50max = {}".format(strict_ea_x_max[50]))
    logger.info("      len2 NGA50 = {}".format(
        N50.NG50_and_LG50(
            [align.len2 for aligns in ref_aligns.values() for align in aligns],
            genome_length,
            need_sort=True)[0]))
    logger.info("      len2_excluding_local_misassemblies NGA50 = {}".format(
        N50.NG50_and_LG50([
            align.len2_excluding_local_misassemblies
            for aligns in ref_aligns.values() for align in aligns
        ],
                          genome_length,
                          need_sort=True)[0]))

    return covered_bases, indels_info, ea_x_max, strict_ea_x_max, ea_mean_max, strict_ea_mean_max, p5k, p10k, p15k, p20k, strict_p5k, strict_p10k, strict_p15k, strict_p20k