def __shift_cigar(align, new_start=None, new_end=None): new_cigar = 'cs:Z:' ctg_pos = align.s2 strand_direction = 1 if align.s2 < align.e2 else -1 diff_len = 0 if not align.cigar: return diff_len for op in parse_cs_tag(align.cigar): if op.startswith('*'): if (new_start and ctg_pos >= new_start) or \ (new_end and ctg_pos <= new_end): new_cigar += op ctg_pos += 1 * strand_direction else: if op.startswith(':'): n_bases = int(op[1:]) else: n_bases = len(op) - 1 corr_n_bases = n_bases if new_end and (ctg_pos + n_bases * strand_direction > new_end or ctg_pos > new_end): corr_n_bases = new_end - ctg_pos + ( n_bases if strand_direction == -1 else 1) elif new_start and ( ctg_pos < new_start or ctg_pos + n_bases * strand_direction < new_start): corr_n_bases = ctg_pos + (n_bases if strand_direction == 1 else 1) - new_start if corr_n_bases < 1: if not op.startswith('-'): ctg_pos += n_bases * strand_direction if op.startswith('-'): diff_len -= n_bases if op.startswith('+'): diff_len += n_bases continue if op.startswith('+'): ctg_pos += n_bases * strand_direction diff_len += (n_bases - corr_n_bases) if new_start: new_cigar += '+' + op[1 + (corr_n_bases - n_bases):] elif new_end: new_cigar += op[:corr_n_bases + 1] elif op.startswith('-'): diff_len -= (n_bases - corr_n_bases) if new_start: new_cigar += '-' + op[1 + (corr_n_bases - n_bases):] elif new_end: new_cigar += op[:corr_n_bases + 1] elif op.startswith(':'): ctg_pos += n_bases * strand_direction new_cigar += ':' + str(corr_n_bases) align.cigar = new_cigar return diff_len
def analyze_coverage(ref_aligns, reference_chromosomes, ns_by_chromosomes, used_snps_fpath): indels_info = IndelsInfo() genome_mapping = {} for chr_name, chr_len in reference_chromosomes.items(): genome_mapping[chr_name] = [0] * (chr_len + 1) with open(used_snps_fpath, 'w') as used_snps_f: for chr_name, aligns in ref_aligns.items(): for align in aligns: ref_pos, ctg_pos = align.s1, align.s2 strand_direction = 1 if align.s2 < align.e2 else -1 for op in parse_cs_tag(align.cigar): if op.startswith(':'): n_bases = int(op[1:]) else: n_bases = len(op) - 1 if op.startswith('*'): ref_nucl, ctg_nucl = op[1].upper(), op[2].upper() if ctg_nucl != 'N' and ref_nucl != 'N': indels_info.mismatches += 1 if qconfig.show_snps: used_snps_f.write('%s\t%s\t%d\t%s\t%s\t%d\n' % (chr_name, align.contig, ref_pos, ref_nucl, ctg_nucl, ctg_pos)) ref_pos += 1 ctg_pos += 1 * strand_direction elif op.startswith('+'): indels_info.indels_list.append(n_bases) indels_info.insertions += n_bases if qconfig.show_snps and n_bases < qconfig.MAX_INDEL_LENGTH: ref_nucl, ctg_nucl = '.', op[1:].upper() used_snps_f.write('%s\t%s\t%d\t%s\t%s\t%d\n' % (chr_name, align.contig, ref_pos, ref_nucl, ctg_nucl, ctg_pos)) ctg_pos += n_bases * strand_direction elif op.startswith('-'): indels_info.indels_list.append(n_bases) indels_info.deletions += n_bases if qconfig.show_snps and n_bases < qconfig.MAX_INDEL_LENGTH: ref_nucl, ctg_nucl = op[1:].upper(), '.' used_snps_f.write('%s\t%s\t%d\t%s\t%s\t%d\n' % (chr_name, align.contig, ref_pos, ref_nucl, ctg_nucl, ctg_pos)) ref_pos += n_bases else: ref_pos += n_bases ctg_pos += n_bases * strand_direction if align.s1 < align.e1: for pos in range(align.s1, align.e1 + 1): genome_mapping[align.ref][pos] = 1 else: for pos in range(align.s1, len(genome_mapping[align.ref])): genome_mapping[align.ref][pos] = 1 for pos in range(1, align.e1 + 1): genome_mapping[align.ref][pos] = 1 for i in ns_by_chromosomes[align.ref]: genome_mapping[align.ref][i] = 0 covered_bases = sum([sum(genome_mapping[chrom]) for chrom in genome_mapping]) return covered_bases, indels_info
def analyze_coverage(ref_aligns, reference_chromosomes, ns_by_chromosomes, used_snps_fpath): indels_info = IndelsInfo() genome_mapping = {} for chr_name, chr_len in reference_chromosomes.items(): genome_mapping[chr_name] = [0] * (chr_len + 1) with open(used_snps_fpath, 'w') as used_snps_f: for chr_name, aligns in ref_aligns.items(): for align in aligns: ref_pos, ctg_pos = align.s1, align.s2 strand_direction = 1 if align.s2 < align.e2 else -1 for op in parse_cs_tag(align.cigar): if op.startswith(':'): n_bases = int(op[1:]) else: n_bases = len(op) - 1 if op.startswith('*'): ref_nucl, ctg_nucl = op[1].upper(), op[2].upper() if ctg_nucl != 'N' and ref_nucl != 'N': indels_info.mismatches += 1 if qconfig.show_snps: used_snps_f.write('%s\t%s\t%d\t%s\t%s\t%d\n' % (chr_name, align.contig, ref_pos, ref_nucl, ctg_nucl, ctg_pos)) ref_pos += 1 ctg_pos += 1 * strand_direction elif op.startswith('+'): indels_info.indels_list.append(n_bases) indels_info.insertions += n_bases if qconfig.show_snps and n_bases < qconfig.MAX_INDEL_LENGTH: ref_nucl, ctg_nucl = '.', op[1:].upper() used_snps_f.write('%s\t%s\t%d\t%s\t%s\t%d\n' % (chr_name, align.contig, ref_pos, ref_nucl, ctg_nucl, ctg_pos)) ctg_pos += n_bases * strand_direction elif op.startswith('-'): indels_info.indels_list.append(n_bases) indels_info.deletions += n_bases if qconfig.show_snps and n_bases < qconfig.MAX_INDEL_LENGTH: ref_nucl, ctg_nucl = op[1:].upper(), '.' used_snps_f.write('%s\t%s\t%d\t%s\t%s\t%d\n' % (chr_name, align.contig, ref_pos, ref_nucl, ctg_nucl, ctg_pos)) ref_pos += n_bases else: ref_pos += n_bases ctg_pos += n_bases * strand_direction if align.s1 < align.e1: for pos in range(align.s1, align.e1 + 1): genome_mapping[align.ref][pos] = 1 else: for pos in range(align.s1, len(genome_mapping[align.ref])): genome_mapping[align.ref][pos] = 1 for pos in range(1, align.e1 + 1): genome_mapping[align.ref][pos] = 1 for i in ns_by_chromosomes[align.ref]: genome_mapping[align.ref][i] = 0 covered_bases = sum([sum(genome_mapping[chrom]) for chrom in genome_mapping]) return covered_bases, indels_info
def split_align(coords_file, align_start, strand_direction, ref_start, ref_name, contig, cs): def _write_align(): if align_len < qconfig.min_alignment or not ref_len or not align_cs: return align_end = align_start + (align_len - 1) * strand_direction ref_end = ref_start + ref_len - 1 align_idy = '%.2f' % (matched_bases * 100.0 / ref_len) if float(align_idy) >= qconfig.min_IDY: align = Mapping(s1=ref_start, e1=ref_end, s2=align_start, e2=align_end, len1=ref_len, len2=align_len, idy=align_idy, ref=ref_name, contig=contig, cigar=align_cs) coords_file.write(align.coords_str() + '\n') ref_len, align_len, align_end = 0, 0, 0 align_cs = '' matched_bases = 0 for op in parse_cs_tag(cs): if op.startswith(':'): n_bases = int(op[1:]) else: n_bases = len(op) - 1 if op.startswith('*'): align_cs += op ref_len += 1 align_len += 1 elif op.startswith('+'): _write_align() align_start += (align_len + n_bases) * strand_direction ref_start += ref_len align_len, ref_len, matched_bases = 0, 0, 0 align_cs = '' elif op.startswith('-'): _write_align() align_start += align_len * strand_direction ref_start += ref_len + n_bases align_len, ref_len, matched_bases = 0, 0, 0 align_cs = '' else: align_cs += op ref_len += n_bases align_len += n_bases matched_bases += n_bases _write_align()
def create_mismatches_plot(assembly, window_size, ref_len, root_dir, output_dir): assembly_label = qutils.label_from_fpath_for_fname(assembly.fpath) aligner_dirpath = join(root_dir, '..', qconfig.detailed_contigs_reports_dirname) coords_basename = join(create_minimap_output_dir(aligner_dirpath), assembly_label) _, coords_filtered_fpath, _, _ = get_aux_out_fpaths(coords_basename) if not exists(coords_filtered_fpath) or not qconfig.show_snps: return None mismatches_fpath = join(output_dir, assembly_label + '.mismatches.txt') mismatch_density_by_chrom = defaultdict(lambda: [0] * (ref_len // window_size + 1)) with open(coords_filtered_fpath) as coords_file: for line in coords_file: s1 = int(line.split('|')[0].split()[0]) chrom = line.split()[11].strip() cigar = line.split()[-1].strip() ref_pos = s1 for op in parse_cs_tag(cigar): n_bases = len(op) - 1 if op.startswith('*'): mismatch_density_by_chrom[chrom][int(ref_pos) // window_size] += 1 ref_pos += 1 elif not op.startswith('+'): ref_pos += n_bases with open(mismatches_fpath, 'w') as out_f: for chrom, density_list in mismatch_density_by_chrom.items(): start, end = 0, 0 for i, density in enumerate(density_list): if density == 0: end = (i + 1) * window_size else: if end: out_f.write( '\t'.join([chrom, str(start), str(end), '0']) + '\n') out_f.write('\t'.join([ chrom, str(i * window_size), str(((i + 1) * window_size)), str(density) ]) + '\n') start = (i + 1) * window_size end = None if end: out_f.write('\t'.join([chrom, str(start), str(end), '0']) + '\n') return mismatches_fpath
def split_align(coords_file, align_start, strand_direction, ref_start, ref_name, contig, cs): def _write_align(): if align_len < qconfig.min_alignment or not ref_len or not align_cs: return align_end = align_start + (align_len - 1) * strand_direction ref_end = ref_start + ref_len - 1 align_idy = '%.2f' % (matched_bases * 100.0 / ref_len) if float(align_idy) >= qconfig.min_IDY: align = Mapping(s1=ref_start, e1=ref_end, s2=align_start, e2=align_end, len1=ref_len, len2=align_len, idy=align_idy, ref=ref_name, contig=contig, cigar=align_cs) coords_file.write(align.coords_str() + '\n') ref_len, align_len, align_end = 0, 0, 0 align_cs = '' matched_bases = 0 for op in parse_cs_tag(cs): if op.startswith(':'): n_bases = int(op[1:]) else: n_bases = len(op) - 1 if op.startswith('*'): align_cs += op ref_len += 1 align_len += 1 ## split alignment in positions of indels to get smaller alignments with higher identity elif op.startswith('+'): if n_bases > SHORT_INDEL_THRESHOLD: _write_align() align_start += (align_len + n_bases) * strand_direction ref_start += ref_len align_len, ref_len, matched_bases = 0, 0, 0 align_cs = '' else: align_cs += op align_len += n_bases elif op.startswith('-'): if n_bases > SHORT_INDEL_THRESHOLD: _write_align() align_start += align_len * strand_direction ref_start += ref_len + n_bases align_len, ref_len, matched_bases = 0, 0, 0 align_cs = '' else: align_cs += op ref_len += n_bases else: align_cs += op ref_len += n_bases align_len += n_bases matched_bases += n_bases _write_align()
def __shift_cigar(align, new_start=None, new_end=None): new_cigar = 'cs:Z:' ctg_pos = align.s2 strand_direction = 1 if align.s2 < align.e2 else -1 diff_len = 0 if not align.cigar: return diff_len for op in parse_cs_tag(align.cigar): if op.startswith('*'): if (new_start and ctg_pos >= new_start) or \ (new_end and ctg_pos <= new_end): new_cigar += op ctg_pos += 1 * strand_direction else: if op.startswith(':'): n_bases = int(op[1:]) else: n_bases = len(op) - 1 corr_n_bases = n_bases if new_end and (ctg_pos + n_bases * strand_direction > new_end or ctg_pos > new_end): corr_n_bases = new_end - ctg_pos + (n_bases if strand_direction == -1 else 1) elif new_start and (ctg_pos < new_start or ctg_pos + n_bases * strand_direction < new_start): corr_n_bases = ctg_pos + (n_bases if strand_direction == 1 else 1) - new_start if corr_n_bases < 1: if not op.startswith('-'): ctg_pos += n_bases * strand_direction if op.startswith('-'): diff_len -= n_bases if op.startswith('+'): diff_len += n_bases continue if op.startswith('+'): ctg_pos += n_bases * strand_direction diff_len += (n_bases - corr_n_bases) if new_start: new_cigar += '+' + op[1 + (corr_n_bases - n_bases):] elif new_end: new_cigar += op[:corr_n_bases + 1] elif op.startswith('-'): diff_len -= (n_bases - corr_n_bases) if new_start: new_cigar += '-' + op[1 + (corr_n_bases - n_bases):] elif new_end: new_cigar += op[:corr_n_bases + 1] elif op.startswith(':'): ctg_pos += n_bases * strand_direction new_cigar += ':' + str(corr_n_bases) align.cigar = new_cigar return diff_len
def create_mismatches_plot(assembly, window_size, ref_len, root_dir, output_dir): assembly_label = qutils.label_from_fpath_for_fname(assembly.fpath) aligner_dirpath = join(root_dir, '..', 'contigs_reports') coords_basename = join(create_minimap_output_dir(aligner_dirpath), assembly_label) _, coords_filtered_fpath, _, _ = get_aux_out_fpaths(coords_basename) if not exists(coords_filtered_fpath) or not qconfig.show_snps: return None mismatches_fpath = join(output_dir, assembly_label + '.mismatches.txt') mismatch_density_by_chrom = defaultdict(lambda : [0] * (ref_len // window_size + 1)) with open(coords_filtered_fpath) as coords_file: for line in coords_file: s1 = int(line.split('|')[0].split()[0]) chrom = line.split()[11].strip() cigar = line.split()[-1].strip() ref_pos = s1 for op in parse_cs_tag(cigar): n_bases = len(op) - 1 if op.startswith('*'): mismatch_density_by_chrom[chrom][int(ref_pos) // window_size] += 1 ref_pos += 1 elif not op.startswith('+'): ref_pos += n_bases with open(mismatches_fpath, 'w') as out_f: for chrom, density_list in mismatch_density_by_chrom.items(): start, end = 0, 0 for i, density in enumerate(density_list): if density == 0: end = (i + 1) * window_size else: if end: out_f.write('\t'.join([chrom, str(start), str(end), '0']) + '\n') out_f.write('\t'.join([chrom, str(i * window_size), str(((i + 1) * window_size)), str(density)]) + '\n') start = (i + 1) * window_size end = None if end: out_f.write('\t'.join([chrom, str(start), str(end), '0']) + '\n') return mismatches_fpath
def split_align(coords_file, align_start, strand_direction, ref_start, ref_name, contig, cs): def _write_align(): if align.len2 < qconfig.min_alignment or not align.len1 or not align.cigar: return align.e1 = align.s1 + align.len1 - 1 align.e2 = align.s2 + (align.len2 - 1) * strand_direction align.idy = '%.2f' % (matched_bases * 100.0 / max(align.len1, align.len2)) if float(align.idy) >= qconfig.min_IDY: coords_file.write(align.coords_str() + '\n') def _try_split(matched_bases, prev_op, n_refbases=0, n_alignbases=0): ## split alignment in positions of indels or stretch of mismatches to get smaller alignments with higher identity if n_alignbases > SPLIT_ALIGN_THRESHOLD or n_refbases > SPLIT_ALIGN_THRESHOLD: _write_align() align.s1 += align.len1 + n_refbases align.s2 += (align.len2 + n_alignbases) * strand_direction align.len1, align.len2 = 0, 0 align.cigar = '' matched_bases = 0 else: align.len1 += n_refbases align.len2 += n_alignbases align.cigar += prev_op return matched_bases matched_bases = 0 align = Mapping(s1=ref_start, e1=ref_start, s2=align_start, e2=align_start, len1=0, len2=0, ref=ref_name, contig=contig, cigar='') cur_mismatch_stretch = '' for op in parse_cs_tag(cs): if op.startswith('*'): cur_mismatch_stretch += op continue if cur_mismatch_stretch: n_bases = cur_mismatch_stretch.count('*') matched_bases = _try_split(matched_bases, cur_mismatch_stretch, n_bases, n_bases) cur_mismatch_stretch = '' if op.startswith(':'): n_bases = int(op[1:]) align.cigar += op align.len1 += n_bases align.len2 += n_bases matched_bases += n_bases else: n_bases = len(op) - 1 if op.startswith('+'): matched_bases = _try_split(matched_bases, op, n_alignbases=n_bases) elif op.startswith('-'): matched_bases = _try_split(matched_bases, op, n_refbases=n_bases) _write_align()
def analyze_coverage(ref_aligns, reference_chromosomes, ns_by_chromosomes, used_snps_fpath): logger.info(" Enter analyze_coverage") #logger.info(f" {ref_aligns=}") indels_info = IndelsInfo() maximum_contig_align_size_per_ref_base = {} strict_maximum_contig_align_size_per_ref_base = {} genome_mapping = {} genome_length = 0 for chr_name, chr_len in reference_chromosomes.items(): genome_mapping[chr_name] = [0] * (chr_len + 1) maximum_contig_align_size_per_ref_base[chr_name] = [0] * (chr_len + 1) strict_maximum_contig_align_size_per_ref_base[chr_name] = [0] * ( chr_len + 1) genome_length += chr_len logger.info(" Genome length: " + str(genome_length)) alignment_total_length = 0 with open(used_snps_fpath, 'w') as used_snps_f: for chr_name, aligns in ref_aligns.items(): for align in aligns: # Vars with 1 are on the reference, vars with 2 are on the contig ref_pos, ctg_pos = align.s1, align.s2 strand_direction = 1 if align.s2 < align.e2 else -1 for op in parse_cs_tag(align.cigar): if op.startswith(':'): n_bases = int(op[1:]) else: n_bases = len(op) - 1 if op.startswith('*'): ref_nucl, ctg_nucl = op[1].upper(), op[2].upper() if ctg_nucl != 'N' and ref_nucl != 'N': indels_info.mismatches += 1 if qconfig.show_snps: used_snps_f.write( '%s\t%s\t%d\t%s\t%s\t%d\n' % (chr_name, align.contig, ref_pos, ref_nucl, ctg_nucl, ctg_pos)) ref_pos += 1 ctg_pos += 1 * strand_direction elif op.startswith('+'): indels_info.indels_list.append(n_bases) indels_info.insertions += n_bases if qconfig.show_snps and n_bases < qconfig.MAX_INDEL_LENGTH: ref_nucl, ctg_nucl = '.', op[1:].upper() used_snps_f.write('%s\t%s\t%d\t%s\t%s\t%d\n' % (chr_name, align.contig, ref_pos, ref_nucl, ctg_nucl, ctg_pos)) ctg_pos += n_bases * strand_direction elif op.startswith('-'): indels_info.indels_list.append(n_bases) indels_info.deletions += n_bases if qconfig.show_snps and n_bases < qconfig.MAX_INDEL_LENGTH: ref_nucl, ctg_nucl = op[1:].upper(), '.' used_snps_f.write('%s\t%s\t%d\t%s\t%s\t%d\n' % (chr_name, align.contig, ref_pos, ref_nucl, ctg_nucl, ctg_pos)) ref_pos += n_bases else: ref_pos += n_bases ctg_pos += n_bases * strand_direction alignment_total_length += align.len2_excluding_local_misassemblies if align.s1 < align.e1: align_size = align.len2_excluding_local_misassemblies # Use the same len that is used to compute NGAx strict_align_size = align.len2_including_local_misassemblies # Use the same len that is used to strict compute NGAx for pos in range(align.s1, align.e1 + 1): genome_mapping[align.ref][pos] = 1 maximum_contig_align_size_per_ref_base[ align.ref][pos] = max( align_size, maximum_contig_align_size_per_ref_base[ align.ref][pos]) strict_maximum_contig_align_size_per_ref_base[ align.ref][pos] = max( strict_align_size, strict_maximum_contig_align_size_per_ref_base[ align.ref][pos]) else: align_size = align.len2_excluding_local_misassemblies # Use the same len that is used to compute NGAx strict_align_size = align.len2_including_local_misassemblies # Use the same len that is used to strict compute NGAx for pos in range(align.s1, len(genome_mapping[align.ref])): genome_mapping[align.ref][pos] = 1 maximum_contig_align_size_per_ref_base[ align.ref][pos] = max( align_size, maximum_contig_align_size_per_ref_base[ align.ref][pos]) strict_maximum_contig_align_size_per_ref_base[ align.ref][pos] = max( strict_align_size, strict_maximum_contig_align_size_per_ref_base[ align.ref][pos]) for pos in range(1, align.e1 + 1): genome_mapping[align.ref][pos] = 1 maximum_contig_align_size_per_ref_base[ align.ref][pos] = max( align_size, maximum_contig_align_size_per_ref_base[ align.ref][pos]) strict_maximum_contig_align_size_per_ref_base[ align.ref][pos] = max( strict_align_size, strict_maximum_contig_align_size_per_ref_base[ align.ref][pos]) for i in ns_by_chromosomes[align.ref]: genome_mapping[align.ref][i] = 0 maximum_contig_align_size_per_ref_base[align.ref][pos] = 0 strict_maximum_contig_align_size_per_ref_base[ align.ref][pos] = 0 covered_bases = sum( [sum(genome_mapping[chrom]) for chrom in genome_mapping]) maximum_contig_align_size_per_ref_base = [ align_size for contig in maximum_contig_align_size_per_ref_base.values() for align_size in contig ] maximum_contig_align_size_per_ref_base.sort(reverse=True) ea_x_max = [0] * 101 for i in range(0, 100): ea_x_max[i] = maximum_contig_align_size_per_ref_base[ (len(maximum_contig_align_size_per_ref_base) * i) // 100] ea_x_max[100] = maximum_contig_align_size_per_ref_base[-1] ea_mean_max = int( sum(maximum_contig_align_size_per_ref_base) / len(maximum_contig_align_size_per_ref_base)) p5k = P(maximum_contig_align_size_per_ref_base, 5000) p10k = P(maximum_contig_align_size_per_ref_base, 10000) p15k = P(maximum_contig_align_size_per_ref_base, 15000) p20k = P(maximum_contig_align_size_per_ref_base, 20000) strict_maximum_contig_align_size_per_ref_base = [ align_size for contig in strict_maximum_contig_align_size_per_ref_base.values() for align_size in contig ] strict_maximum_contig_align_size_per_ref_base.sort(reverse=True) strict_ea_x_max = [0] * 101 for i in range(0, 100): strict_ea_x_max[i] = strict_maximum_contig_align_size_per_ref_base[ (len(strict_maximum_contig_align_size_per_ref_base) * i) // 100] strict_ea_x_max[100] = strict_maximum_contig_align_size_per_ref_base[-1] strict_ea_mean_max = int( sum(strict_maximum_contig_align_size_per_ref_base) / len(strict_maximum_contig_align_size_per_ref_base)) strict_p5k = P(strict_maximum_contig_align_size_per_ref_base, 5000) strict_p10k = P(strict_maximum_contig_align_size_per_ref_base, 10000) strict_p15k = P(strict_maximum_contig_align_size_per_ref_base, 15000) strict_p20k = P(strict_maximum_contig_align_size_per_ref_base, 20000) #print("computed ea_x_max as " + str(ea_x_max)) logger.info(" Duplication ratio = %.2f = %d/%d" % ((alignment_total_length / covered_bases), alignment_total_length, covered_bases)) logger.info(" EA50max = {}".format(ea_x_max[50])) logger.info(" Strict EA50max = {}".format(strict_ea_x_max[50])) logger.info(" len2 NGA50 = {}".format( N50.NG50_and_LG50( [align.len2 for aligns in ref_aligns.values() for align in aligns], genome_length, need_sort=True)[0])) logger.info(" len2_excluding_local_misassemblies NGA50 = {}".format( N50.NG50_and_LG50([ align.len2_excluding_local_misassemblies for aligns in ref_aligns.values() for align in aligns ], genome_length, need_sort=True)[0])) return covered_bases, indels_info, ea_x_max, strict_ea_x_max, ea_mean_max, strict_ea_mean_max, p5k, p10k, p15k, p20k, strict_p5k, strict_p10k, strict_p15k, strict_p20k