def join(args, outs, chunk_defs, chunk_outs): if do_not_make_cloupe(args): outs.output_for_cloupe = None return reference = ReferenceManager(args.reference_path) contig_info_fn = martian.make_path("contig_info.json") with open(contig_info_fn, 'w') as outfile: contig_info = get_contig_info(args.reference_path) json.dump(contig_info, outfile) gem_group_index_json = get_gem_group_index_json(args, outs) call = [ "crconverter", args.sample_id, args.pipestance_type, "--matrix", args.feature_barcode_matrix, "--analysis", args.analysis, "--output", outs.output_for_cloupe, "--description", '"' + args.sample_desc + '"', "--peaks", args.peaks, "--fragmentsindex", args.fragments_index, "--geneannotations", reference.genes, "--contiginfo", contig_info_fn, ] if args.metrics_json is not None: call.extend(["--metrics", args.metrics_json]) if args.aggregation_csv is not None: call.extend(["--aggregation", args.aggregation_csv]) if gem_group_index_json is not None: call.extend(["--gemgroups", gem_group_index_json]) transcript_gene_types = get_annotation_gene_types(args) if transcript_gene_types is not None: call.extend(["--geneannotationtypes", ",".join(transcript_gene_types)]) # the sample desc may be unicode, so send the whole # set of args str utf-8 to check_output unicode_call = [arg.encode('utf-8') for arg in call] # but keep the arg 'call' here because log_info inherently # attempts to encode the message... (TODO: should log_info # figure out the encoding of the input string) martian.log_info("Running crconverter: %s" % " ".join(call)) try: results = tk_subproc.check_output(unicode_call) martian.log_info("crconverter output: %s" % results) except subprocess.CalledProcessError as e: outs.output_for_cloupe = None martian.throw("Could not generate .cloupe file: \n%s" % e.output)
def main(args, outs): if do_not_make_cloupe(args): outs.output_for_cloupe = None return gem_group_index_json = get_gem_group_index_json(args, outs) call = [ "crconverter", args.sample_id, args.pipestance_type, "--matrix", args.filtered_gene_bc_matrices_h5, "--analysis", get_analysis_h5_path(args), "--output", outs.output_for_cloupe, "--description", args.sample_desc ] if args.metrics_json: call.extend(["--metrics", args.metrics_json]) if args.aggregation_csv: call.extend(["--aggregation", args.aggregation_csv]) if gem_group_index_json: call.extend(["--gemgroups", gem_group_index_json]) # the sample desc may be unicode, so send the whole # set of args str utf-8 to check_output unicode_call = [arg.encode('utf-8') for arg in call] # but keep the arg 'call' here because log_info inherently # attempts to encode the message... (TODO: should log_info # figure out the encoding of the input string) martian.log_info("Running crconverter: %s" % " ".join(call)) try: results = tk_subproc.check_output(unicode_call) martian.log_info("crconverter output: %s" % results) except subprocess.CalledProcessError, e: outs.output_for_cloupe = None martian.throw("Could not generate .cloupe file: \n%s" % e.output)
def record_package_versions(): for package in cr_constants.PACKAGE_VERSION_CMDS: name = package['name'] cmd = package['cmd'] version = tk_subproc.check_output(cmd, shell=True) print '%s: %s' % (name, version)
def join(args, outs, chunk_defs, chunk_outs): contig_info = get_contig_info(args) with open(outs.contig_info_json, 'w') as outfile: json.dump(contig_info, outfile) call = [ "dlconverter", args.sample_id, "--output", outs.output_for_dloupe, "--description", args.sample_desc, "--node-profile-h5", args.normalized_node_profiles, "--contig-info-json", outs.contig_info_json, "--merged-bed", args.node_cnv_calls, "--tree-data", args.tree_data, "--tracks", args.tracks, "--per-cell-summary", args.per_cell_summary_metrics ] gene_annotation_path = tk_ref.get_loupe_genes(args.reference_path) if os.path.exists(gene_annotation_path): call.extend(["--gene-annotations", gene_annotation_path]) # the sample desc may be unicode, so send the whole # set of args str utf-8 to check_output unicode_call = [arg.encode('utf-8') for arg in call] martian.log_info("Running dlconverter: %s" % " ".join(call)) try: results = tk_subproc.check_output(unicode_call) martian.log_info("dlconverter output: %s" % results) except subprocess.CalledProcessError, e: outs.output_for_dloupe = None martian.throw("Could not generate .dloupe file: \n%s" % e.output)
def main(args, outs): if args.pipestance_type != "count" and args.pipestance_type != "aggr": martian.exit("The type argument must be one of: count, aggr") if args.pipestance_type == "count": pname = "SC_RNA_COUNTER_CS" if args.pipestance_type == "aggr": pname = "SC_RNA_AGGREGATOR_CS" pipestance_exists = os.path.exists(args.pipestance_path) if not pipestance_exists: martian.exit("Invalid pipestance path: %s" % args.pipestance_path) # check to see if an analysis file exists. If it doesn't, then # this is likely a barnyard sample, and we cannot generate a # .loupe file (CELLRANGER-773); analysis_h5_path = os.path.join(args.pipestance_path, "outs/analysis/analysis.h5") # 1.2.0 location only internal_count_h5_path = os.path.join( args.pipestance_path, "SC_RNA_COUNTER_CS/SC_RNA_COUNTER/SC_RNA_ANALYZER/SUMMARIZE_ANALYSIS/fork0/files/analysis/analysis.h5" ) internal_aggr_h5_path = os.path.join( args.pipestance_path, "SC_RNA_AGGREGATOR_CS/SC_RNA_AGGREGATOR/SC_RNA_ANALYZER/SUMMARIZE_ANALYSIS/fork0/files/analysis/analysis.h5" ) if not os.path.exists(analysis_h5_path) \ and not os.path.exists(internal_count_h5_path) \ and not os.path.exists(internal_aggr_h5_path): martian.exit("Could not find single-species analysis HDF5 file. " + "Loupe Cell Browser files are not generated for multi-species experiments.") # has to be 1.2 or higher cellranger_pd_before_1_2_path = os.path.join(args.pipestance_path, "CELLRANGER_PD") cellranger_cs_before_1_2_path = os.path.join(args.pipestance_path, "CELLRANGER_CS") if os.path.exists(cellranger_pd_before_1_2_path) or os.path.exists(cellranger_cs_before_1_2_path): martian.exit("mkloupe is only supported for Cell Ranger 1.2 and later.") call = ["crconverter", args.sample_id, pname, "--pipestance", args.pipestance_path, "--output", outs.output_for_cloupe] martian.log_info("Running crconverter: %s" % " ".join(call)) try: results = tk_subproc.check_output(call) martian.log_info("crconverter output: %s" % results) except subprocess.CalledProcessError, e: outs.output_for_cloupe = None martian.throw("Could not generate .cloupe file: \n%s" % e.output)
def get_version(): # NOTE: this makes assumptions about the directory structure script_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', '..', '..', 'bin') version_fn = os.path.join(script_dir, '..', '.version') if os.path.exists(version_fn): with open(version_fn, 'r') as f: output = f.read() else: output = tk_subproc.check_output( ['git', 'describe', '--tags', '--always', '--dirty'], cwd=script_dir) return output.strip()
def main(args, outs): """ Run the vlconverter executable with inputs that should be available in the outs folder at the end of the pipeline run. This will generate "output_for_vloupe.vloupe" in the stage folder. Memory usage not expected to be excessive with this (thus no custom split/join as of yet); it will need to load a few full files (bam.bai, fasta.fai) into memory. """ if args.concat_ref_bam is None or not os.path.isfile(args.concat_ref_bam) or \ args.consensus_bam is None or not os.path.isfile(args.consensus_bam) or \ args.contig_bam_bai is None or not os.path.isfile(args.contig_bam_bai): martian.log_info( 'One or more bam files missing - cannot make vloupe file') return call = [ "vlconverter", args.sample_id, args.pipestance_type, "--output", outs.output_for_vloupe, "--reference-bam", args.concat_ref_bam, "--reference-bam-index", args.concat_ref_bam_bai, "--reference-fasta", args.concat_ref_fasta, "--reference-fasta-index", args.concat_ref_fasta_fai, "--reference-annotations", args.concat_ref_annotations_json, "--clonotypes", args.clonotypes_csv, "--consensus-bam", args.consensus_bam, "--consensus-bam-index", args.consensus_bam_bai, "--consensus-annotations", args.consensus_annotations_json, "--consensus-fasta", args.consensus_fasta, "--consensus-fasta-index", args.consensus_fasta_fai, "--contig-bam-relative-path", args.contig_bam_relative_path, "--contig-bam-index", args.contig_bam_bai, "--contig-annotations", args.contig_annotations_json, "--contig-bed", args.contig_annotations_bed, "--contig-fasta", args.contig_fasta, "--contig-fasta-index", args.contig_fasta_fai, "--description", args.sample_desc ] # the sample desc may be unicode, so send the whole # set of args str utf-8 to check_output unicode_call = [arg.encode('utf-8') for arg in call] # but keep the arg 'call' here because log_info inherently # attempts to encode the message... (TODO: should log_info # figure out the encoding of the input string) martian.log_info("Running vlconverter: %s" % " ".join(call)) try: results = tk_subproc.check_output(unicode_call) martian.log_info("vlconverter output: %s" % results) except subprocess.CalledProcessError, e: outs.output_for_vloupe = None martian.throw("Could not generate .vloupe file: \n%s" % e.output)
def get_unmapped_read_count_from_indexed_bam(bam_file_name): """ Get number of unmapped reads from an indexed BAM file. Args: bam_file_name (str): Name of indexed BAM file. Returns: int: number of unmapped reads in the BAM Note: BAM must be indexed for lookup using samtools. """ index_output = tk_subproc.check_output('samtools idxstats %s' % bam_file_name, shell=True) return int(index_output.strip().split('\n')[-1].split()[-1])