def join(args, outs, chunk_defs, chunk_outs): molecules = [chunk_out.molecule_h5 for chunk_out in chunk_outs] metrics = MoleculeCounter.naive_concatenate_metrics(molecules) metrics[cr_mol_counter.IS_AGGREGATED_METRIC] = True MoleculeCounter.concatenate(outs.merged_molecules, molecules, metrics=metrics) # Record, for each gem group, the range of barcode indices it can contain. outs.gem_group_barcode_ranges = {} for chunk_def, chunk_out in zip(chunk_defs, chunk_outs): for gg in chunk_out.new_gem_groups: outs.gem_group_barcode_ranges[str(gg)] = [ chunk_def.barcode_idx_offset, chunk_def.barcode_idx_end ]
def join(args, outs, chunk_defs, chunk_outs): summary = cr_utils.merge_jsons_as_dict([ args.extract_reads_summary, args.attach_bcs_and_umis_summary, args.mark_duplicates_summary, ]) # Hack for getting reference metadata - # this used to be computed in prior stages. # This is needed for storage in the molecule_info HDF5. tmp_reporter = cr_report.Reporter() tmp_reporter.store_reference_metadata(args.reference_path, cr_constants.REFERENCE_TYPE, cr_constants.REFERENCE_METRIC_PREFIX) ref_metadata = tmp_reporter.report(cr_constants.DEFAULT_REPORT_TYPE) summary.update(ref_metadata) # Load library info from BAM in_bam = tk_bam.create_bam_infile(args.inputs[0]) library_info = rna_library.get_bam_library_info(in_bam) metrics = MoleculeCounter.get_metrics_from_summary(summary, library_info, args.recovered_cells, args.force_cells) input_h5_filenames = [chunk_out.output for chunk_out in chunk_outs] # update with metrics that were computed in the chunks chunk_metric = cr_mol_counter.USABLE_READS_METRIC summed_lib_metrics = MoleculeCounter.sum_library_metric( input_h5_filenames, chunk_metric) for lib_key, value in summed_lib_metrics.iteritems(): metrics[cr_mol_counter.LIBRARIES_METRIC][lib_key][chunk_metric] = value MoleculeCounter.concatenate(outs.output, input_h5_filenames, metrics=metrics)