def _piped_input_cl(data, region, tmp_dir, out_base_file, prep_params): """Retrieve the commandline for streaming input into preparation step. If marking duplicates, this requires writing an intermediate file since MarkDuplicates uses multiple passed on an input. """ broad_runner = broad.runner_from_config(data["config"]) cl = _gatk_extract_reads_cl(data, region, prep_params, tmp_dir) if prep_params["dup"] == "picard": sel_file = "%s-select%s" % os.path.splitext(out_base_file) if not utils.file_exists(sel_file): with file_transaction(sel_file) as tx_out_file: cl += ["-o", tx_out_file] do.run_memory_retry(cl, "GATK: PrintReads", data, region=region) dup_metrics = "%s-dup.dup_metrics" % os.path.splitext(out_base_file)[0] compression = "5" if prep_params["realign"] == "gatk" else "0" cl = broad_runner.cl_picard("MarkDuplicates", [("INPUT", sel_file), ("OUTPUT", "/dev/stdout"), ("METRICS_FILE", dup_metrics), ("PROGRAM_RECORD_ID", "null"), ("COMPRESSION_LEVEL", compression), ("TMP_DIR", tmp_dir)]) elif not prep_params["dup"]: sel_file = data["work_bam"] else: raise ValueError("Duplication approach not supported with GATK: %s" % prep_params["dup"]) bam.index(sel_file, data["config"]) return sel_file, " ".join(cl)
def run_gatk(self, params, tmp_dir=None, log_error=True, memory_retry=False, data=None, region=None): with curdir_tmpdir() as local_tmp_dir: if tmp_dir is None: tmp_dir = local_tmp_dir cl = self.cl_gatk(params, tmp_dir) atype_index = cl.index("-T") if cl.count("-T") > 0 \ else cl.index("--analysis_type") prog = cl[atype_index + 1] if memory_retry: do.run_memory_retry(cl, "GATK: {0}".format(prog), data, region=region) else: do.run(cl, "GATK: {0}".format(prog), data, region=region, log_error=log_error)
def run_gatk(self, params, tmp_dir=None, log_error=True, memory_retry=False, data=None, region=None): with curdir_tmpdir() as local_tmp_dir: if tmp_dir is None: tmp_dir = local_tmp_dir cl = self.cl_gatk(params, tmp_dir) atype_index = cl.index("-T") if cl.count("-T") > 0 else cl.index("--analysis_type") prog = cl[atype_index + 1] if memory_retry: do.run_memory_retry(cl, "GATK: {0}".format(prog), data, region=region) else: do.run(cl, "GATK: {0}".format(prog), data, region=region, log_error=log_error)
def _piped_bamprep_region_fullpipe(data, region, prep_params, out_file, tmp_dir): """Perform fully piped BAM preparation using non-GATK/Picard tools. """ with file_transaction(out_file) as tx_out_file: extract_recal_cmd = _piped_extract_recal_cmd(data, region, prep_params, tmp_dir) dedup_cmd = _piped_dedup_recal_cmd(data, prep_params, tmp_dir, out_file) realign_cmd = _piped_realign_cmd(data, prep_params, tmp_dir) cmd = "{extract_recal_cmd} {dedup_cmd} {realign_cmd} > {tx_out_file}" cmd = cmd.format(**locals()) do.run_memory_retry(cmd, "Piped post-alignment bamprep {0}".format(region), data, region=region)