def main(args, outs): genomes = cr_matrix.GeneBCMatrices.load_genomes_from_h5( args.filtered_matrices) chemistry = cr_matrix.GeneBCMatrices.load_chemistry_from_h5( args.filtered_matrices) total_cells = cr_matrix.GeneBCMatrices.count_cells_from_h5( args.filtered_matrices) summary = { 'chemistry_description': chemistry, 'filtered_bcs_transcriptome_union': total_cells } with open(outs.summary, 'w') as f: json.dump(summary, f, indent=4, sort_keys=True) sample_properties = ReanalyzeSampleProperties( sample_id=args.analysis_id, sample_desc=args.analysis_desc, genomes=genomes, version=martian.get_pipelines_version()) sample_properties = dict(sample_properties._asdict()) sample_data_paths = cr_webshim_data.SampleDataPaths( summary_path=outs.summary, analysis_path=args.analysis, ) sample_data = cr_webshim.load_sample_data(sample_properties, sample_data_paths) cr_webshim.build_web_summary_html(outs.web_summary, sample_properties, sample_data, PIPELINE_REANALYZE)
def main(args, outs): cr_report.merge_jsons(args.summaries, outs.metrics_summary_json) sample_data_paths = cr_webshim_data.SampleDataPaths( summary_path=outs.metrics_summary_json, barcode_summary_path=args.barcode_summary_h5, analysis_path=args.analysis, filtered_barcodes_path=args.filtered_barcodes, ) genomes = cr_utils.get_reference_genomes(args.reference_path) sample_properties = CountSampleProperties( sample_id=args.sample_id, sample_desc=args.sample_desc, genomes=genomes, version=martian.get_pipelines_version()) sample_properties = dict(sample_properties._asdict()) sample_data = cr_webshim.load_sample_data(sample_properties, sample_data_paths) cr_webshim.build_web_summary_html(outs.web_summary, sample_properties, sample_data, PIPELINE_COUNT, alerts_output_filename=outs.alerts) cr_webshim.build_metrics_summary_csv(outs.metrics_summary_csv, sample_properties, sample_data, PIPELINE_COUNT)
def join(args, outs, chunk_defs, chunk_outs): summary_files = [ args.reads_summary, args.filter_umis_summary, args.filter_barcodes_summary, args.trim_reads_summary, args.filter_reads_summary, args.filter_contigs_summary, args.report_contigs_summary, args.report_contig_alignments_summary, args.raw_consensus_summary, args.group_clonotypes_summary, ] summary_files = [sum_file for sum_file in summary_files if not sum_file is None] cr_report.merge_jsons(summary_files, outs.metrics_summary_json) # Copy barcode summary h5 if args.barcode_summary: cr_utils.copy(args.barcode_summary, outs.barcode_summary) # Copy cell barcodes if args.cell_barcodes: cr_utils.copy(args.cell_barcodes, outs.cell_barcodes) # Copy barcode support if args.barcode_support: cr_utils.copy(args.barcode_support, outs.barcode_support) # Copy barcode umi summary if args.barcode_umi_summary: cr_utils.copy(args.barcode_umi_summary, outs.barcode_umi_summary) # Copy umi info if args.umi_info: cr_utils.copy(args.umi_info, outs.umi_info) sample_data_paths = cr_webshim_data.SampleDataPaths( summary_path=outs.metrics_summary_json, barcode_summary_path=args.barcode_summary, vdj_clonotype_summary_path=args.clonotype_summary, vdj_barcode_support_path=args.barcode_support, ) sample_properties = cr_webshim.get_sample_properties(args.sample_id, args.sample_desc, [], version=martian.get_pipelines_version()) sample_data = cr_webshim.load_sample_data(sample_properties, sample_data_paths) if args.barcode_whitelist is not None: cr_webshim.build_web_summary_html(outs.web_summary, sample_properties, sample_data, PIPELINE_VDJ, alerts_output_filename=outs.alerts) cr_webshim.build_metrics_summary_csv(outs.metrics_summary_csv, sample_properties, sample_data, PIPELINE_VDJ)
def main(args, outs): summary = {} filtered_mat = cr_matrix.CountMatrix.load_h5_file( args.filtered_matrices_h5) genomes = filtered_mat.get_genomes() # get metrics from other summaries if args.analyze_matrices_summary: with open(args.analyze_matrices_summary) as reader: analysis_summary = json.load(reader) summary.update(analysis_summary) with open(args.normalize_depth_summary, 'r') as reader: summary.update(json.load(reader)) agg_batches = summary['batches'] with open(outs.summary, 'w') as f: json.dump(summary, f, indent=4, sort_keys=True) # build web summary sample_properties = AggrSampleProperties( sample_id=args.sample_id, sample_desc=args.sample_desc, genomes=genomes, version=martian.get_pipelines_version(), agg_batches=agg_batches) sample_properties = dict(sample_properties._asdict()) sample_data_paths = cr_webshim_data.SampleDataPaths( summary_path=outs.summary, barcode_summary_path=args.barcode_summary_h5, analysis_path=args.analysis, ) sample_data = cr_webshim.load_sample_data(sample_properties, sample_data_paths) cr_webshim.build_web_summary_html(outs.web_summary, sample_properties, sample_data, PIPELINE_AGGR)
def main(args, outs): summary = {} # add stats from matrices filtered_mats = cr_matrix.GeneBCMatrices.load_h5(args.filtered_matrices_h5) genomes = filtered_mats.get_genomes() cells_per_genome = {} for genome in genomes: matrix = filtered_mats.matrices[genome] cells_per_genome[genome] = matrix.bcs_dim median_gene_counts = np.median( matrix._sum(matrix.m >= cr_constants.MIN_READS_PER_GENE, axis=0)) median_umi_counts = np.median(matrix._sum(matrix.m, axis=0)) summary.update({ '%s_filtered_bcs' % genome: cells_per_genome[genome], '%s_filtered_bcs_median_counts' % genome: median_umi_counts, '%s_filtered_bcs_median_unique_genes_detected' % genome: median_gene_counts, }) del filtered_mats # get metrics from other summaries if args.analyze_matrices_summary: with open(args.analyze_matrices_summary) as reader: analysis_summary = json.load(reader) summary.update(analysis_summary) with open(args.normalize_depth_summary, 'r') as reader: data = json.load(reader) raw_conf_mapped_per_genome = data['raw_conf_mapped_per_genome'] downsample_map = data['downsample_info'] mol_counter_metrics = data['mol_counter_metrics'] with open(args.count_genes_summary, 'r') as reader: data = json.load(reader) flt_conf_mapped_per_genome = data['flt_conf_mapped_per_genome'] for genome in flt_conf_mapped_per_genome: frac_reads_in_cells = tk_stats.robust_divide( flt_conf_mapped_per_genome[genome], raw_conf_mapped_per_genome[genome]) summary['%s_filtered_bcs_conf_mapped_barcoded_reads_cum_frac' % genome] = frac_reads_in_cells # Pass chemistry metrics through to output summary.update({ k: v for k, v in mol_counter_metrics.iteritems() if k.startswith('chemistry_') }) # Molecule counter metrics gem_groups = [] total_reads_per_gem_group = [] downsampled_reads_per_gem_group = [] for (gg, submetrics) in mol_counter_metrics[ cr_mol_counter.GEM_GROUPS_METRIC].iteritems(): gem_groups.append(gg) total_reads = submetrics[cr_mol_counter.GG_TOTAL_READS_METRIC] total_reads_per_gem_group.append(total_reads) # If metric is missing, assume no downsampling was done downsampled = submetrics.get( cr_mol_counter.GG_DOWNSAMPLED_READS_METRIC, total_reads) downsampled_reads_per_gem_group.append(downsampled) total_reads = sum(total_reads_per_gem_group) downsampled_reads = sum(downsampled_reads_per_gem_group) total_cells = sum(cells_per_genome.values()) mean_reads_per_cell = tk_stats.robust_divide(total_reads, total_cells) downsampled_mean_reads_per_cell = tk_stats.robust_divide( downsampled_reads, total_cells) summary.update({ 'pre_normalization_total_reads': total_reads, 'post_normalization_total_reads': downsampled_reads, 'filtered_bcs_transcriptome_union': total_cells, 'pre_normalization_multi_transcriptome_total_raw_reads_per_filtered_bc': mean_reads_per_cell, 'post_normalization_multi_transcriptome_total_raw_reads_per_filtered_bc': downsampled_mean_reads_per_cell, }) # Downsampling metrics gem_group_index = args.gem_group_index agg_batches = [] lowest_frac_reads_kept = 1.0 for (gg, rpg) in zip(gem_groups, total_reads_per_gem_group): dinfo = downsample_map[str(gg)] (library_id, old_gg) = gem_group_index[str(gg)] batch = library_id + ('-%d' % old_gg if old_gg > 1 else '') agg_batches.append(batch) # calc summary metrics frac_reads_kept = dinfo['frac_reads_kept'] lowest_frac_reads_kept = min(lowest_frac_reads_kept, frac_reads_kept) summary['%s_frac_reads_kept' % batch] = frac_reads_kept summary['%s_pre_normalization_raw_reads_per_filtered_bc' % batch] = tk_stats.robust_divide(dinfo['total_reads'], dinfo['cells']) summary['%s_pre_normalization_cmb_reads_per_filtered_bc' % batch] = tk_stats.robust_divide(dinfo['cmb_reads'], dinfo['cells']) # this is an internal metric, so keep using gem group instead of batch summary['%s_total_reads_per_gem_group' % gg] = frac_reads_kept * rpg summary['lowest_frac_reads_kept'] = lowest_frac_reads_kept with open(outs.summary, 'w') as f: json.dump(summary, f, indent=4, sort_keys=True) # build web summary sample_properties = cr_webshim.get_sample_properties( args.aggregation_id, args.aggregation_desc, genomes, version=martian.get_pipelines_version(), agg_batches=agg_batches) sample_data_paths = cr_webshim_data.SampleDataPaths( summary_path=outs.summary, barcode_summary_path=args.barcode_summary_h5, analysis_path=args.analysis, ) sample_data = cr_webshim.load_sample_data(sample_properties, sample_data_paths) cr_webshim.build_web_summary_html(outs.web_summary, sample_properties, sample_data, PIPELINE_AGGR)