示例#1
0
def main(args, outs):
    genomes = cr_matrix.GeneBCMatrices.load_genomes_from_h5(
        args.filtered_matrices)
    chemistry = cr_matrix.GeneBCMatrices.load_chemistry_from_h5(
        args.filtered_matrices)
    total_cells = cr_matrix.GeneBCMatrices.count_cells_from_h5(
        args.filtered_matrices)
    summary = {
        'chemistry_description': chemistry,
        'filtered_bcs_transcriptome_union': total_cells
    }
    with open(outs.summary, 'w') as f:
        json.dump(summary, f, indent=4, sort_keys=True)

    sample_properties = ReanalyzeSampleProperties(
        sample_id=args.analysis_id,
        sample_desc=args.analysis_desc,
        genomes=genomes,
        version=martian.get_pipelines_version())
    sample_properties = dict(sample_properties._asdict())

    sample_data_paths = cr_webshim_data.SampleDataPaths(
        summary_path=outs.summary,
        analysis_path=args.analysis,
    )

    sample_data = cr_webshim.load_sample_data(sample_properties,
                                              sample_data_paths)
    cr_webshim.build_web_summary_html(outs.web_summary, sample_properties,
                                      sample_data, PIPELINE_REANALYZE)
示例#2
0
def main(args, outs):
    cr_report.merge_jsons(args.summaries, outs.metrics_summary_json)

    sample_data_paths = cr_webshim_data.SampleDataPaths(
        summary_path=outs.metrics_summary_json,
        barcode_summary_path=args.barcode_summary_h5,
        analysis_path=args.analysis,
        filtered_barcodes_path=args.filtered_barcodes,
    )

    genomes = cr_utils.get_reference_genomes(args.reference_path)
    sample_properties = CountSampleProperties(
        sample_id=args.sample_id,
        sample_desc=args.sample_desc,
        genomes=genomes,
        version=martian.get_pipelines_version())
    sample_properties = dict(sample_properties._asdict())

    sample_data = cr_webshim.load_sample_data(sample_properties,
                                              sample_data_paths)

    cr_webshim.build_web_summary_html(outs.web_summary,
                                      sample_properties,
                                      sample_data,
                                      PIPELINE_COUNT,
                                      alerts_output_filename=outs.alerts)
    cr_webshim.build_metrics_summary_csv(outs.metrics_summary_csv,
                                         sample_properties, sample_data,
                                         PIPELINE_COUNT)
示例#3
0
def join(args, outs, chunk_defs, chunk_outs):
    summary_files = [
        args.reads_summary,
        args.filter_umis_summary,
        args.filter_barcodes_summary,
        args.trim_reads_summary,
        args.filter_reads_summary,
        args.filter_contigs_summary,
        args.report_contigs_summary,
        args.report_contig_alignments_summary,
        args.raw_consensus_summary,
        args.group_clonotypes_summary,
    ]

    summary_files = [sum_file for sum_file in summary_files if not sum_file is None]

    cr_report.merge_jsons(summary_files, outs.metrics_summary_json)

    # Copy barcode summary h5
    if args.barcode_summary:
        cr_utils.copy(args.barcode_summary, outs.barcode_summary)

    # Copy cell barcodes
    if args.cell_barcodes:
        cr_utils.copy(args.cell_barcodes, outs.cell_barcodes)

    # Copy barcode support
    if args.barcode_support:
        cr_utils.copy(args.barcode_support, outs.barcode_support)

    # Copy barcode umi summary
    if args.barcode_umi_summary:
        cr_utils.copy(args.barcode_umi_summary, outs.barcode_umi_summary)

    # Copy umi info
    if args.umi_info:
        cr_utils.copy(args.umi_info, outs.umi_info)

    sample_data_paths = cr_webshim_data.SampleDataPaths(
        summary_path=outs.metrics_summary_json,
        barcode_summary_path=args.barcode_summary,
        vdj_clonotype_summary_path=args.clonotype_summary,
        vdj_barcode_support_path=args.barcode_support,
    )

    sample_properties = cr_webshim.get_sample_properties(args.sample_id, args.sample_desc, [], version=martian.get_pipelines_version())

    sample_data = cr_webshim.load_sample_data(sample_properties, sample_data_paths)

    if args.barcode_whitelist is not None:
        cr_webshim.build_web_summary_html(outs.web_summary, sample_properties, sample_data, PIPELINE_VDJ,
                                          alerts_output_filename=outs.alerts)
        cr_webshim.build_metrics_summary_csv(outs.metrics_summary_csv, sample_properties, sample_data, PIPELINE_VDJ)
示例#4
0
def main(args, outs):
    summary = {}

    filtered_mat = cr_matrix.CountMatrix.load_h5_file(
        args.filtered_matrices_h5)
    genomes = filtered_mat.get_genomes()

    # get metrics from other summaries
    if args.analyze_matrices_summary:
        with open(args.analyze_matrices_summary) as reader:
            analysis_summary = json.load(reader)
        summary.update(analysis_summary)

    with open(args.normalize_depth_summary, 'r') as reader:
        summary.update(json.load(reader))
        agg_batches = summary['batches']

    with open(outs.summary, 'w') as f:
        json.dump(summary, f, indent=4, sort_keys=True)

    # build web summary
    sample_properties = AggrSampleProperties(
        sample_id=args.sample_id,
        sample_desc=args.sample_desc,
        genomes=genomes,
        version=martian.get_pipelines_version(),
        agg_batches=agg_batches)
    sample_properties = dict(sample_properties._asdict())

    sample_data_paths = cr_webshim_data.SampleDataPaths(
        summary_path=outs.summary,
        barcode_summary_path=args.barcode_summary_h5,
        analysis_path=args.analysis,
    )

    sample_data = cr_webshim.load_sample_data(sample_properties,
                                              sample_data_paths)
    cr_webshim.build_web_summary_html(outs.web_summary, sample_properties,
                                      sample_data, PIPELINE_AGGR)
示例#5
0
def main(args, outs):
    summary = {}

    # add stats from matrices
    filtered_mats = cr_matrix.GeneBCMatrices.load_h5(args.filtered_matrices_h5)
    genomes = filtered_mats.get_genomes()
    cells_per_genome = {}
    for genome in genomes:
        matrix = filtered_mats.matrices[genome]
        cells_per_genome[genome] = matrix.bcs_dim
        median_gene_counts = np.median(
            matrix._sum(matrix.m >= cr_constants.MIN_READS_PER_GENE, axis=0))
        median_umi_counts = np.median(matrix._sum(matrix.m, axis=0))
        summary.update({
            '%s_filtered_bcs' % genome:
            cells_per_genome[genome],
            '%s_filtered_bcs_median_counts' % genome:
            median_umi_counts,
            '%s_filtered_bcs_median_unique_genes_detected' % genome:
            median_gene_counts,
        })
    del filtered_mats

    # get metrics from other summaries
    if args.analyze_matrices_summary:
        with open(args.analyze_matrices_summary) as reader:
            analysis_summary = json.load(reader)
        summary.update(analysis_summary)

    with open(args.normalize_depth_summary, 'r') as reader:
        data = json.load(reader)
        raw_conf_mapped_per_genome = data['raw_conf_mapped_per_genome']
        downsample_map = data['downsample_info']
        mol_counter_metrics = data['mol_counter_metrics']

    with open(args.count_genes_summary, 'r') as reader:
        data = json.load(reader)
        flt_conf_mapped_per_genome = data['flt_conf_mapped_per_genome']

    for genome in flt_conf_mapped_per_genome:
        frac_reads_in_cells = tk_stats.robust_divide(
            flt_conf_mapped_per_genome[genome],
            raw_conf_mapped_per_genome[genome])
        summary['%s_filtered_bcs_conf_mapped_barcoded_reads_cum_frac' %
                genome] = frac_reads_in_cells

    # Pass chemistry metrics through to output
    summary.update({
        k: v
        for k, v in mol_counter_metrics.iteritems()
        if k.startswith('chemistry_')
    })

    # Molecule counter metrics
    gem_groups = []
    total_reads_per_gem_group = []
    downsampled_reads_per_gem_group = []
    for (gg, submetrics) in mol_counter_metrics[
            cr_mol_counter.GEM_GROUPS_METRIC].iteritems():
        gem_groups.append(gg)
        total_reads = submetrics[cr_mol_counter.GG_TOTAL_READS_METRIC]
        total_reads_per_gem_group.append(total_reads)
        # If metric is missing, assume no downsampling was done
        downsampled = submetrics.get(
            cr_mol_counter.GG_DOWNSAMPLED_READS_METRIC, total_reads)
        downsampled_reads_per_gem_group.append(downsampled)
    total_reads = sum(total_reads_per_gem_group)
    downsampled_reads = sum(downsampled_reads_per_gem_group)
    total_cells = sum(cells_per_genome.values())
    mean_reads_per_cell = tk_stats.robust_divide(total_reads, total_cells)
    downsampled_mean_reads_per_cell = tk_stats.robust_divide(
        downsampled_reads, total_cells)
    summary.update({
        'pre_normalization_total_reads':
        total_reads,
        'post_normalization_total_reads':
        downsampled_reads,
        'filtered_bcs_transcriptome_union':
        total_cells,
        'pre_normalization_multi_transcriptome_total_raw_reads_per_filtered_bc':
        mean_reads_per_cell,
        'post_normalization_multi_transcriptome_total_raw_reads_per_filtered_bc':
        downsampled_mean_reads_per_cell,
    })

    # Downsampling metrics
    gem_group_index = args.gem_group_index
    agg_batches = []
    lowest_frac_reads_kept = 1.0
    for (gg, rpg) in zip(gem_groups, total_reads_per_gem_group):
        dinfo = downsample_map[str(gg)]
        (library_id, old_gg) = gem_group_index[str(gg)]
        batch = library_id + ('-%d' % old_gg if old_gg > 1 else '')
        agg_batches.append(batch)
        # calc summary metrics
        frac_reads_kept = dinfo['frac_reads_kept']
        lowest_frac_reads_kept = min(lowest_frac_reads_kept, frac_reads_kept)
        summary['%s_frac_reads_kept' % batch] = frac_reads_kept
        summary['%s_pre_normalization_raw_reads_per_filtered_bc' %
                batch] = tk_stats.robust_divide(dinfo['total_reads'],
                                                dinfo['cells'])
        summary['%s_pre_normalization_cmb_reads_per_filtered_bc' %
                batch] = tk_stats.robust_divide(dinfo['cmb_reads'],
                                                dinfo['cells'])
        # this is an internal metric, so keep using gem group instead of batch
        summary['%s_total_reads_per_gem_group' % gg] = frac_reads_kept * rpg
    summary['lowest_frac_reads_kept'] = lowest_frac_reads_kept

    with open(outs.summary, 'w') as f:
        json.dump(summary, f, indent=4, sort_keys=True)

    # build web summary
    sample_properties = cr_webshim.get_sample_properties(
        args.aggregation_id,
        args.aggregation_desc,
        genomes,
        version=martian.get_pipelines_version(),
        agg_batches=agg_batches)

    sample_data_paths = cr_webshim_data.SampleDataPaths(
        summary_path=outs.summary,
        barcode_summary_path=args.barcode_summary_h5,
        analysis_path=args.analysis,
    )

    sample_data = cr_webshim.load_sample_data(sample_properties,
                                              sample_data_paths)
    cr_webshim.build_web_summary_html(outs.web_summary, sample_properties,
                                      sample_data, PIPELINE_AGGR)