def call_consensus(samples): """ call consensus peaks on the narrow/Broad peakfiles from a set of ChiP/ATAC samples """ data = samples[0][0] new_samples = [] consensusdir = os.path.join(dd.get_work_dir(data), "consensus") utils.safe_makedir(consensusdir) peakfiles = [] for data in dd.sample_data_iterator(samples): if dd.get_chip_method(data) == "chip": for fn in tz.get_in(("peaks_files", "macs2"), data, []): if "narrowPeak" in fn: peakfiles.append(fn) break elif "broadPeak" in fn: peakfiles.append(fn) break elif dd.get_chip_method(data) == "atac": for fn in tz.get_in(("peaks_files", "NF", "macs2"), data, []): if "narrowPeak" in fn: peakfiles.append(fn) consensusfile = os.path.join(consensusdir, "consensus.bed") if not peakfiles: logger.info( "No suitable peak files found, skipping consensus peak calling.") return samples consensusfile = consensus(peakfiles, consensusfile, data) for data in dd.sample_data_iterator(samples): new_samples.append([ tz.assoc_in(data, ("peaks_files", "consensus"), {"main": consensusfile}) ]) return new_samples
def calling(data): """Main function to parallelize peak calling.""" method = dd.get_chip_method(data) caller_fn = get_callers()[data["peak_fn"]] if method == "chip": chip_bam = data.get("work_bam") input_bam = data.get("work_bam_input", None) name = dd.get_sample_name(data) out_dir = utils.safe_makedir(os.path.join(dd.get_work_dir(data), data["peak_fn"], name)) out_files = caller_fn(name, chip_bam, input_bam, dd.get_genome_build(data), out_dir, dd.get_chip_method(data), data["resources"], data) greylistdir = greylisting(data) data.update({"peaks_files": out_files}) if greylistdir: data["greylist"] = greylistdir if method == "atac": fractions = list(ATACRanges.keys()) + ["full"] for fraction in fractions: MIN_READS_TO_CALL = 1000 chip_bam = tz.get_in(("atac", "align", fraction), data) if not bam.has_nalignments(chip_bam, MIN_READS_TO_CALL, data): logger.warn(f"{chip_bam} has less than {MIN_READS_TO_CALL}, peak calling will fail so skip this fraction.") continue logger.info(f"Running peak calling with {data['peak_fn']} on the {fraction} fraction of {chip_bam}.") name = dd.get_sample_name(data) + f"-{fraction}" out_dir = utils.safe_makedir(os.path.join(dd.get_work_dir(data), data["peak_fn"], name)) out_files = caller_fn(name, chip_bam, None, dd.get_genome_build(data), out_dir, dd.get_chip_method(data), data["resources"], data) data = tz.assoc_in(data, ("peaks_files", fraction), out_files) return [[data]]
def calling(data): """Main function to parallelize peak calling.""" method = dd.get_chip_method(data) caller_fn = get_callers()[data["peak_fn"]] if method == "chip": chip_bam = data.get("work_bam") input_bam = data.get("work_bam_input", None) name = dd.get_sample_name(data) out_dir = utils.safe_makedir( os.path.join(dd.get_work_dir(data), data["peak_fn"], name)) out_files = caller_fn(name, chip_bam, input_bam, dd.get_genome_build(data), out_dir, dd.get_chip_method(data), data["resources"], data) greylistdir = greylisting(data) data.update({"peaks_files": out_files}) if greylistdir: data["greylist"] = greylistdir if method == "atac": for fraction in atac.ATACRanges.keys(): chip_bam = tz.get_in(("atac", "align", fraction), data) logger.info( f"Running peak calling with {data['peak_fn']} on the {fraction} fraction of {chip_bam}." ) name = dd.get_sample_name(data) + f"-{fraction}" out_dir = utils.safe_makedir( os.path.join(dd.get_work_dir(data), data["peak_fn"], name)) out_files = caller_fn(name, chip_bam, None, dd.get_genome_build(data), out_dir, dd.get_chip_method(data), data["resources"], data) data = tz.assoc_in(data, ("peaks_files", fraction), out_files) return [[data]]
def create_peaktable(samples): """create a table of peak counts per sample to use with differential peak calling """ data = dd.get_data_from_sample(samples[0]) peakcounts = [] out_dir = os.path.join(dd.get_work_dir(data), "consensus") out_file = os.path.join(out_dir, "consensus-counts.tsv") if dd.get_chip_method(data) == "chip": for data in dd.sample_data_iterator(samples): peakcounts.append(tz.get_in(("peak_counts"), data)) elif dd.get_chip_method(data) == "atac": for data in dd.sample_data_iterator(samples): if bam.is_paired(dd.get_work_bam(data)): peakcounts.append(tz.get_in(("peak_counts", "NF"), data)) else: logger.info(f"Creating peak table from full BAM file because " f"{dd.get_work_bam(data)} is single-ended.") peakcounts.append(tz.get_in(("peak_counts", "full"), data)) combined_peaks = count.combine_count_files(peakcounts, out_file, ext=".counts") new_data = [] for data in dd.sample_data_iterator(samples): data = tz.assoc_in(data, ("peak_counts", "peaktable"), combined_peaks) new_data.append(data) new_samples = dd.get_samples_from_datalist(new_data) return new_samples
def _maybe_add_peaks(algorithm, sample, out): out_dir = sample.get("peaks_files", {}) if dd.get_chip_method(sample) == "atac": for files in out_dir.values(): for caller in files: if caller == "main": continue for fn in files[caller]: if os.path.exists(fn): out.append({ "path": fn, "dir": caller, "ext": utils.splitext_plus(fn)[1] }) else: for caller in out_dir: if caller == "main": continue for fn in out_dir[caller]: if os.path.exists(fn): out.append({ "path": fn, "dir": caller, "ext": utils.splitext_plus(fn)[1] }) return out
def run_ataqv(data): if not dd.get_chip_method(data) == "atac": return None work_dir = dd.get_work_dir(data) sample_name = dd.get_sample_name(data) out_dir = os.path.join(work_dir, "qc", sample_name, "ataqv") peak_file = get_full_peaks(data) bam_file = get_unfiltered_bam(data) out_file = os.path.join(out_dir, sample_name + ".ataqv.json.gz") if not peak_file: logger.info(f"Full peak file for {sample_name} not found, skipping ataqv") return None if not bam_file: logger.info(f"Unfiltered BAM file for {sample_name} not found, skipping ataqv") return None if utils.file_exists(out_file): return out_file tss_bed_file = os.path.join(out_dir, "TSS.bed") tss_bed_file = gtf.get_tss_bed(dd.get_gtf_file(data), tss_bed_file, data, padding=1000) autosomal_reference = os.path.join(out_dir, "autosomal.txt") autosomal_reference = _make_autosomal_reference_file(autosomal_reference, data) ataqv = config_utils.get_program("ataqv", data) mitoname = chromhacks.get_mitochondrial_chroms(data)[0] if not ataqv: logger.info(f"ataqv executable not found, skipping running ataqv.") return None with file_transaction(out_file) as tx_out_file: cmd = (f"{ataqv} --peak-file {peak_file} --name {sample_name} --metrics-file {tx_out_file} " f"--tss-file {tss_bed_file} --autosomal-reference-file {autosomal_reference} " f"--ignore-read-groups --mitochondrial-reference-name {mitoname} " f"None {bam_file}") message = f"Running ataqv on {sample_name}." do.run(cmd, message) return out_file
def calling(data): """Main function to parallelize peak calling.""" chip_bam = dd.get_work_bam(data) input_bam = data.get("work_bam_input", None) caller_fn = get_callers()[data["peak_fn"]] name = dd.get_sample_name(data) out_dir = utils.safe_makedir( os.path.join(dd.get_work_dir(data), data["peak_fn"], name)) encode_bed = tz.get_in( ["genome_resources", "variation", "encode_blacklist"], data) # lcr_bed = utils.get_in(data, ("genome_resources", "variation", "lcr")) if encode_bed: chip_bam = _prepare_bam(chip_bam, encode_bed, data['config']) data["work_bam_filter"] = chip_bam input_bam = _prepare_bam(input_bam, encode_bed, data['config']) data["input_bam_filter"] = input_bam out_files = caller_fn(name, chip_bam, input_bam, dd.get_genome_build(data), out_dir, dd.get_chip_method(data), data["resources"], data["config"]) greylistdir = greylisting(data) data.update({"peaks_files": out_files}) if greylistdir: data["greylist"] = greylistdir return [[data]] data["input_bam_filter"] = input_bam
def clean_chipseq_alignment(data): # lcr_bed = utils.get_in(data, ("genome_resources", "variation", "lcr")) method = dd.get_chip_method(data) if method == "atac": data = clean_ATAC(data) # for ATAC-seq, this will be the NF BAM work_bam = dd.get_work_bam(data) work_bam = bam.sort(work_bam, dd.get_config(data)) bam.index(work_bam, dd.get_config(data)) clean_bam = remove_nonassembled_chrom(work_bam, data) clean_bam = remove_mitochondrial_reads(clean_bam, data) data = atac.calculate_complexity_metrics(clean_bam, data) if not dd.get_keep_multimapped(data): clean_bam = remove_multimappers(clean_bam, data) if not dd.get_keep_duplicates(data): clean_bam = bam.remove_duplicates(clean_bam, data) data["work_bam"] = clean_bam encode_bed = tz.get_in( ["genome_resources", "variation", "encode_blacklist"], data) if encode_bed: data["work_bam"] = remove_blacklist_regions(dd.get_work_bam(data), encode_bed, data['config']) bam.index(data["work_bam"], data['config']) try: data["bigwig"] = _normalized_bam_coverage(dd.get_sample_name(data), dd.get_work_bam(data), data) except subprocess.CalledProcessError: logger.warning(f"{dd.get_work_bam(data)} was too sparse to normalize, " f" falling back to non-normalized coverage.") data["bigwig"] = _bam_coverage(dd.get_sample_name(data), dd.get_work_bam(data), data) return [[data]]
def _normalized_bam_coverage(name, bam_input, data): """Run bamCoverage from deeptools but produce normalized bigWig files""" cmd = ("{bam_coverage} --bam {bam_input} --outFileName {bw_output} " "--binSize 20 --effectiveGenomeSize {size} " "--smoothLength 60 --extendReads 150 --centerReads -p {cores} ") size = bam.fasta.total_sequence_length(dd.get_ref_file(data)) cores = dd.get_num_cores(data) try: bam_coverage = config_utils.get_program("bamCoverage", data) except config_utils.CmdNotFound: logger.info("No bamCoverage found, skipping bamCoverage.") return None method = dd.get_chip_method(data) cmd += "--normalizeUsing CPM " toignore = get_mitochondrial_chroms(data) if toignore: ignorenormflag = f"--ignoreForNormalization {' '.join(toignore)} " cmd += ignorenormflag resources = config_utils.get_resources("bamCoverage", data["config"]) if resources: options = resources.get("options") if options: cmd += " %s" % " ".join([str(x) for x in options]) bw_output = os.path.join(os.path.dirname(bam_input), "%s.bw" % name) if utils.file_exists(bw_output): return bw_output with file_transaction(bw_output) as out_tx: do.run(cmd.format(**locals()), "Run bamCoverage in %s" % name) return bw_output
def call_consensus(samples): """ call consensus peaks on the narrowPeak files from a set of ChiP/ATAC samples """ data = samples[0][0] new_samples = [] consensusdir = os.path.join(dd.get_work_dir(data), "consensus") utils.safe_makedir(consensusdir) peakfiles = [] for data in dd.sample_data_iterator(samples): if dd.get_chip_method(data) == "chip": for fn in tz.get_in(("peaks_files", "macs2"), data, []): if "narrowPeak" in fn: peakfiles.append(fn) elif "broadPeak" in fn: peakfiles.append(fn) elif dd.get_chip_method(data) == "atac": if bam.is_paired(dd.get_work_bam(data)): for fn in tz.get_in(("peaks_files", "NF", "macs2"), data, []): if "narrowPeak" in fn: peakfiles.append(fn) else: logger.info( f"Using peaks from full fraction since {dd.get_work_bam(data)} is single-ended." ) for fn in tz.get_in(("peaks_files", "full", "macs2"), data, []): if "narrowPeak" in fn: peakfiles.append(fn) consensusfile = os.path.join(consensusdir, "consensus.bed") if not peakfiles: logger.info( "No suitable peak files found, skipping consensus peak calling.") return samples consensusfile = consensus(peakfiles, consensusfile, data) if not utils.file_exists(consensusfile): logger.warning("No consensus peaks found.") return samples saffile = consensus_to_saf(consensusfile, os.path.splitext(consensusfile)[0] + ".saf") for data in dd.sample_data_iterator(samples): data = tz.assoc_in(data, ("peaks_files", "consensus"), {"main": consensusfile}) new_samples.append([data]) return new_samples
def _check(sample, data): """Get input sample for each chip bam file.""" if dd.get_chip_method(sample).lower() == "atac": return [sample] if dd.get_phenotype(sample) == "input": return None for origin in data: if dd.get_batch(sample) in dd.get_batch(origin[0]) and dd.get_phenotype(origin[0]) == "input": sample["work_bam_input"] = dd.get_work_bam(origin[0]) return [sample] return [sample]
def _check(sample, data): """Get input sample for each chip bam file.""" if dd.get_chip_method(sample).lower() == "atac": return [sample] if dd.get_phenotype(sample) == "input": return None for origin in data: if dd.get_batch(sample) in (dd.get_batches(origin[0]) or []) and dd.get_phenotype(origin[0]) == "input": sample["work_bam_input"] = origin[0].get("work_bam") return [sample] return [sample]
def chipseq_count(data): """ count reads mapping to ChIP/ATAC consensus peaks with featureCounts """ method = dd.get_chip_method(data) if method == "chip": in_bam = dd.get_work_bam(data) elif method == "atac": in_bam = tz.get_in(("atac", "align", "NF"), data) out_dir = os.path.join(dd.get_work_dir(data), "align", dd.get_sample_name(data)) sorted_bam = bam.sort(in_bam, dd.get_config(data), order="queryname", out_dir=safe_makedir(out_dir)) consensus_file = tz.get_in(("peaks_files", "consensus", "main"), data) saf_file = os.path.splitext(consensus_file)[0] + ".saf" work_dir = dd.get_work_dir(data) out_dir = os.path.join(work_dir, "consensus") safe_makedir(out_dir) count_file = os.path.join(out_dir, dd.get_sample_name(data)) + ".counts" summary_file = os.path.join(out_dir, dd.get_sample_name(data)) + ".counts.summary" if file_exists(count_file) and _is_fixed_count_file(count_file): if method == "atac": data = tz.assoc_in(data, ("peak_counts", "NF"), count_file) elif method == "chip": data = tz.assoc_in(data, ("peak_counts"), count) return [[data]] featureCounts = config_utils.get_program("featureCounts", dd.get_config(data)) paired_flag = _paired_flag(in_bam) strand_flag = _strand_flag(data) cmd = ( "{featureCounts} -F SAF -a {saf_file} -o {tx_count_file} -s {strand_flag} " "{paired_flag} {sorted_bam}") message = ("Count reads in {sorted_bam} overlapping {saf_file} using " "featureCounts.") with file_transaction(data, [count_file, summary_file]) as tx_files: tx_count_file, tx_summary_file = tx_files do.run(cmd.format(**locals()), message.format(**locals())) fixed_count_file = _format_count_file(count_file, data) fixed_summary_file = _change_sample_name(summary_file, dd.get_sample_name(data), data=data) shutil.move(fixed_count_file, count_file) shutil.move(fixed_summary_file, summary_file) if method == "atac": data = tz.assoc_in(data, ("peak_counts", "NF"), count_file) elif method == "chip": data = tz.assoc_in(data, ("peak_counts"), count) return [[data]]
def get_qc_tools(data): """Retrieve a list of QC tools to use based on configuration and analysis type. Uses defaults if previously set. """ if dd.get_algorithm_qc(data): return dd.get_algorithm_qc(data) analysis = data["analysis"].lower() to_run = [] if tz.get_in(["config", "algorithm", "kraken"], data): to_run.append("kraken") if "fastqc" not in dd.get_tools_off(data): to_run.append("fastqc") if any([ tool in dd.get_tools_on(data) for tool in ["qualimap", "qualimap_full"] ]): to_run.append("qualimap") if analysis.startswith("rna-seq") or analysis == "smallrna-seq": if "qualimap" not in dd.get_tools_off(data): if gtf.is_qualimap_compatible(dd.get_gtf_file(data)): to_run.append("qualimap_rnaseq") else: logger.debug("GTF not compatible with Qualimap, skipping.") if analysis.startswith("chip-seq"): to_run.append("chipqc") if dd.get_chip_method(data) == "atac": to_run.append("ataqv") if analysis.startswith("smallrna-seq"): to_run.append("small-rna") to_run.append("atropos") if "coverage_qc" not in dd.get_tools_off(data): to_run.append("samtools") if dd.has_variantcalls(data): if "coverage_qc" not in dd.get_tools_off(data): to_run += ["coverage", "picard"] to_run += ["qsignature", "variants"] if vcfanno.is_human(data): to_run += ["peddy"] if "contamination" not in dd.get_tools_off(data): to_run += ["contamination"] if vcfutils.get_paired_phenotype(data): if "viral" not in dd.get_tools_off(data): to_run += ["viral"] if damage.should_filter([data]): to_run += ["damage"] if dd.get_umi_consensus(data): to_run += ["umi"] if tz.get_in(["config", "algorithm", "preseq"], data): to_run.append("preseq") to_run = [tool for tool in to_run if tool not in dd.get_tools_off(data)] to_run.sort() return to_run
def _macs2_cmd(data): """Main command for macs2 tool.""" method = dd.get_chip_method(data) if method.lower() == "chip": cmd = ("{macs2} callpeak -t {chip_bam} -c {input_bam} {paired} " "{genome_size} -n {name} --bdg {options} ") elif method.lower() == "atac": cmd = ("{macs2} callpeak -t {chip_bam} --nomodel " " {paired} {genome_size} -n {name} --bdg {options}" " --nolambda --keep-dup all") else: raise ValueError("chip_method should be chip or atac.") return cmd
def create_peaktable(samples): """create a table of peak counts per sample to use with differential peak calling """ data = dd.get_data_from_sample(samples[0]) peakcounts = [] out_dir = os.path.join(dd.get_work_dir(data), "consensus") out_file = os.path.join(out_dir, "consensus-counts.tsv") if dd.get_chip_method(data) == "chip": for data in dd.sample_data_iterator(samples): peakcounts.append(tz.get_in(("peak_counts"), data)) elif dd.get_chip_method(data) == "atac": for data in dd.sample_data_iterator(samples): peakcounts.append(tz.get_in(("peak_counts", "NF"), data)) combined_peaks = count.combine_count_files(peakcounts, out_file, ext=".counts") new_data = [] for data in dd.sample_data_iterator(samples): data = tz.assoc_in(data, ("peak_counts", "peaktable"), combined_peaks) new_data.append(data) new_samples = dd.get_samples_from_datalist(new_data) return new_samples
def calling(data): """Main function to parallelize peak calling.""" chip_bam = dd.get_work_bam(data) input_bam = data.get("work_bam_input", None) caller_fn = get_callers()[data["peak_fn"]] name = dd.get_sample_name(data) out_dir = utils.safe_makedir(os.path.join(dd.get_work_dir(data), data["peak_fn"], name )) # chip_bam = _prepare_bam(chip_bam, dd.get_variant_regions(data), data['config']) # input_bam = _prepare_bam(input_bam, dd.get_variant_regions(data), data['config']) out_file = caller_fn(name, chip_bam, input_bam, dd.get_genome_build(data), out_dir, dd.get_chip_method(data), data["config"]) data["peaks_file"] = out_file return [[data]]
def calling(data): """Main function to parallelize peak calling.""" chip_bam = dd.get_work_bam(data) input_bam = data.get("work_bam_input", None) caller_fn = get_callers()[data["peak_fn"]] name = dd.get_sample_name(data) out_dir = utils.safe_makedir(os.path.join(dd.get_work_dir(data), data["peak_fn"], name )) # chip_bam = _prepare_bam(chip_bam, dd.get_variant_regions(data), data['config']) # input_bam = _prepare_bam(input_bam, dd.get_variant_regions(data), data['config']) out_file = caller_fn(name, chip_bam, input_bam, dd.get_genome_build(data), out_dir, dd.get_chip_method(data), data["config"]) data["peaks_file"] = out_file return [[data]]
def _get_multiplier(samples): """Get multiplier to get jobs only for samples that have input """ to_process = 1.0 to_skip = 0 for sample in samples: if dd.get_phenotype(sample[0]) == "chip": to_process += 1.0 elif dd.get_chip_method(sample[0]).lower() == "atac": to_process += 1.0 else: to_skip += 1.0 return (to_process - to_skip) / len(samples)
def calling(data): """Main function to parallelize peak calling.""" chip_bam = data.get("work_bam") input_bam = data.get("work_bam_input", None) caller_fn = get_callers()[data["peak_fn"]] name = dd.get_sample_name(data) out_dir = utils.safe_makedir(os.path.join(dd.get_work_dir(data), data["peak_fn"], name)) out_files = caller_fn(name, chip_bam, input_bam, dd.get_genome_build(data), out_dir, dd.get_chip_method(data), data["resources"], data) greylistdir = greylisting(data) data.update({"peaks_files": out_files}) # data["input_bam_filter"] = input_bam if greylistdir: data["greylist"] = greylistdir return [[data]]
def calling(data): """Main function to parallelize peak calling.""" chip_bam = data.get("work_bam") input_bam = data.get("work_bam_input", None) caller_fn = get_callers()[data["peak_fn"]] name = dd.get_sample_name(data) out_dir = utils.safe_makedir(os.path.join(dd.get_work_dir(data), data["peak_fn"], name)) out_files = caller_fn(name, chip_bam, input_bam, dd.get_genome_build(data), out_dir, dd.get_chip_method(data), data["resources"], data) greylistdir = greylisting(data) data.update({"peaks_files": out_files}) # data["input_bam_filter"] = input_bam if greylistdir: data["greylist"] = greylistdir return [[data]]
def _get_multiplier(samples): """Get multiplier to get jobs only for samples that have input """ to_process = 1.0 to_skip = 0 for sample in samples: if dd.get_phenotype(sample[0]) == "chip": to_process += 1.0 elif dd.get_chip_method(sample[0]).lower() == "atac": to_process += 1.0 else: to_skip += 1.0 mult = (to_process - to_skip) / len(samples) if mult <= 0: mult = 1 / len(samples) return max(mult, 1)
def calculate_encode_complexity_metrics(data): metrics_file = tz.get_in(['atac', 'complexity_metrics_file'], data, None) if not metrics_file: return {} else: with open(metrics_file) as in_handle: header = next(in_handle).strip().split(",") values = next(in_handle).strip().split(",") raw_metrics = {h: int(v) for h, v in zip(header, values)} metrics = {"PBC1": raw_metrics["m1"] / raw_metrics["m0"], "NRF": raw_metrics["m0"] / raw_metrics["mt"]} if raw_metrics["m2"] == 0: PBC2 = 0 else: PBC2 = raw_metrics["m1"] / raw_metrics["m2"] metrics["PBC2"] = PBC2 if dd.get_chip_method(data) == "atac": metrics["bottlenecking"] = get_atac_bottlenecking_flag(metrics["PBC1"], metrics["PBC2"]) metrics["complexity"] = get_atac_complexity_flag(metrics["NRF"]) else: metrics["bottlenecking"] = get_chip_bottlenecking_flag(metrics["PBC1"], metrics["PBC2"]) metrics["complexity"] = get_chip_complexity_flag(metrics["NRF"]) return(metrics)
def clean_chipseq_alignment(data): # lcr_bed = utils.get_in(data, ("genome_resources", "variation", "lcr")) method = dd.get_chip_method(data) if method == "atac": data = shift_ATAC(data) work_bam = dd.get_work_bam(data) work_bam = bam.sort(work_bam, dd.get_config(data)) bam.index(work_bam, dd.get_config(data)) # an unfiltered BAM file is useful for calculating some metrics later data = tz.assoc_in(data, ['chipseq', 'align', "unfiltered"], work_bam) clean_bam = remove_nonassembled_chrom(work_bam, data) clean_bam = remove_mitochondrial_reads(clean_bam, data) data = atac.calculate_complexity_metrics(clean_bam, data) if not dd.get_keep_multimapped(data): clean_bam = remove_multimappers(clean_bam, data) if not dd.get_keep_duplicates(data): clean_bam = bam.remove_duplicates(clean_bam, data) data["work_bam"] = clean_bam # for ATAC-seq, brewak alignments into NF, mono/di/tri nucleosome BAM files if method == "atac": data = atac.split_ATAC(data) encode_bed = tz.get_in( ["genome_resources", "variation", "encode_blacklist"], data) if encode_bed: data["work_bam"] = remove_blacklist_regions(dd.get_work_bam(data), encode_bed, data['config']) bam.index(data["work_bam"], data['config']) try: data["bigwig"] = _normalized_bam_coverage(dd.get_sample_name(data), dd.get_work_bam(data), data) except subprocess.CalledProcessError: logger.warning(f"{dd.get_work_bam(data)} was too sparse to normalize, " f" falling back to non-normalized coverage.") data["bigwig"] = _bam_coverage(dd.get_sample_name(data), dd.get_work_bam(data), data) return [[data]]