示例#1
0
def join(args, outs, chunk_defs, chunk_outs):
    molecules = [chunk_out.molecule_h5 for chunk_out in chunk_outs]
    metrics = MoleculeCounter.naive_concatenate_metrics(molecules)
    metrics[cr_mol_counter.IS_AGGREGATED_METRIC] = True
    MoleculeCounter.concatenate(outs.merged_molecules,
                                molecules,
                                metrics=metrics)

    # Record, for each gem group, the range of barcode indices it can contain.
    outs.gem_group_barcode_ranges = {}
    for chunk_def, chunk_out in zip(chunk_defs, chunk_outs):
        for gg in chunk_out.new_gem_groups:
            outs.gem_group_barcode_ranges[str(gg)] = [
                chunk_def.barcode_idx_offset, chunk_def.barcode_idx_end
            ]
示例#2
0
def join(args, outs, chunk_defs, chunk_outs):
    summary = cr_utils.merge_jsons_as_dict([
        args.extract_reads_summary,
        args.attach_bcs_and_umis_summary,
        args.mark_duplicates_summary,
    ])

    # Hack for getting reference metadata -
    # this used to be computed in prior stages.
    # This is needed for storage in the molecule_info HDF5.
    tmp_reporter = cr_report.Reporter()
    tmp_reporter.store_reference_metadata(args.reference_path,
                                          cr_constants.REFERENCE_TYPE,
                                          cr_constants.REFERENCE_METRIC_PREFIX)
    ref_metadata = tmp_reporter.report(cr_constants.DEFAULT_REPORT_TYPE)
    summary.update(ref_metadata)

    # Load library info from BAM
    in_bam = tk_bam.create_bam_infile(args.inputs[0])
    library_info = rna_library.get_bam_library_info(in_bam)

    metrics = MoleculeCounter.get_metrics_from_summary(summary, library_info,
                                                       args.recovered_cells,
                                                       args.force_cells)

    input_h5_filenames = [chunk_out.output for chunk_out in chunk_outs]
    # update with metrics that were computed in the chunks
    chunk_metric = cr_mol_counter.USABLE_READS_METRIC
    summed_lib_metrics = MoleculeCounter.sum_library_metric(
        input_h5_filenames, chunk_metric)
    for lib_key, value in summed_lib_metrics.iteritems():
        metrics[cr_mol_counter.LIBRARIES_METRIC][lib_key][chunk_metric] = value

    MoleculeCounter.concatenate(outs.output,
                                input_h5_filenames,
                                metrics=metrics)