示例#1
0
def _piped_input_cl(data, region, tmp_dir, out_base_file, prep_params):
    """Retrieve the commandline for streaming input into preparation step.
    If marking duplicates, this requires writing an intermediate file since
    MarkDuplicates uses multiple passed on an input.
    """
    broad_runner = broad.runner_from_config(data["config"])
    cl = _gatk_extract_reads_cl(data, region, prep_params, tmp_dir)
    if prep_params["dup"] == "picard":
        sel_file = "%s-select%s" % os.path.splitext(out_base_file)
        if not utils.file_exists(sel_file):
            with file_transaction(sel_file) as tx_out_file:
                cl += ["-o", tx_out_file]
                do.run_memory_retry(cl,
                                    "GATK: PrintReads",
                                    data,
                                    region=region)
        dup_metrics = "%s-dup.dup_metrics" % os.path.splitext(out_base_file)[0]
        compression = "5" if prep_params["realign"] == "gatk" else "0"
        cl = broad_runner.cl_picard("MarkDuplicates",
                                    [("INPUT", sel_file),
                                     ("OUTPUT", "/dev/stdout"),
                                     ("METRICS_FILE", dup_metrics),
                                     ("PROGRAM_RECORD_ID", "null"),
                                     ("COMPRESSION_LEVEL", compression),
                                     ("TMP_DIR", tmp_dir)])
    elif not prep_params["dup"]:
        sel_file = data["work_bam"]
    else:
        raise ValueError("Duplication approach not supported with GATK: %s" %
                         prep_params["dup"])
    bam.index(sel_file, data["config"])
    return sel_file, " ".join(cl)
示例#2
0
def _piped_input_cl(data, region, tmp_dir, out_base_file, prep_params):
    """Retrieve the commandline for streaming input into preparation step.
    If marking duplicates, this requires writing an intermediate file since
    MarkDuplicates uses multiple passed on an input.
    """
    broad_runner = broad.runner_from_config(data["config"])
    cl = _gatk_extract_reads_cl(data, region, prep_params, tmp_dir)
    if prep_params["dup"] == "picard":
        sel_file = "%s-select%s" % os.path.splitext(out_base_file)
        if not utils.file_exists(sel_file):
            with file_transaction(sel_file) as tx_out_file:
                cl += ["-o", tx_out_file]
                do.run_memory_retry(cl, "GATK: PrintReads", data, region=region)
        dup_metrics = "%s-dup.dup_metrics" % os.path.splitext(out_base_file)[0]
        compression = "5" if prep_params["realign"] == "gatk" else "0"
        cl = broad_runner.cl_picard("MarkDuplicates",
                                    [("INPUT", sel_file),
                                     ("OUTPUT", "/dev/stdout"),
                                     ("METRICS_FILE", dup_metrics),
                                     ("PROGRAM_RECORD_ID", "null"),
                                     ("COMPRESSION_LEVEL", compression),
                                     ("TMP_DIR", tmp_dir)])
    elif not prep_params["dup"]:
        sel_file = data["work_bam"]
    else:
        raise ValueError("Duplication approach not supported with GATK: %s" % prep_params["dup"])
    bam.index(sel_file, data["config"])
    return sel_file, " ".join(cl)
示例#3
0
 def run_gatk(self,
              params,
              tmp_dir=None,
              log_error=True,
              memory_retry=False,
              data=None,
              region=None):
     with curdir_tmpdir() as local_tmp_dir:
         if tmp_dir is None:
             tmp_dir = local_tmp_dir
         cl = self.cl_gatk(params, tmp_dir)
         atype_index = cl.index("-T") if cl.count("-T") > 0 \
                       else cl.index("--analysis_type")
         prog = cl[atype_index + 1]
         if memory_retry:
             do.run_memory_retry(cl,
                                 "GATK: {0}".format(prog),
                                 data,
                                 region=region)
         else:
             do.run(cl,
                    "GATK: {0}".format(prog),
                    data,
                    region=region,
                    log_error=log_error)
示例#4
0
 def run_gatk(self, params, tmp_dir=None, log_error=True, memory_retry=False, data=None, region=None):
     with curdir_tmpdir() as local_tmp_dir:
         if tmp_dir is None:
             tmp_dir = local_tmp_dir
         cl = self.cl_gatk(params, tmp_dir)
         atype_index = cl.index("-T") if cl.count("-T") > 0 else cl.index("--analysis_type")
         prog = cl[atype_index + 1]
         if memory_retry:
             do.run_memory_retry(cl, "GATK: {0}".format(prog), data, region=region)
         else:
             do.run(cl, "GATK: {0}".format(prog), data, region=region, log_error=log_error)
示例#5
0
def _piped_bamprep_region_fullpipe(data, region, prep_params, out_file, tmp_dir):
    """Perform fully piped BAM preparation using non-GATK/Picard tools.
    """
    with file_transaction(out_file) as tx_out_file:
        extract_recal_cmd = _piped_extract_recal_cmd(data, region, prep_params, tmp_dir)
        dedup_cmd = _piped_dedup_recal_cmd(data, prep_params, tmp_dir, out_file)
        realign_cmd = _piped_realign_cmd(data, prep_params, tmp_dir)
        cmd = "{extract_recal_cmd} {dedup_cmd} {realign_cmd}  > {tx_out_file}"
        cmd = cmd.format(**locals())
        do.run_memory_retry(cmd, "Piped post-alignment bamprep {0}".format(region), data,
                            region=region)
示例#6
0
def _piped_bamprep_region_fullpipe(data, region, prep_params, out_file,
                                   tmp_dir):
    """Perform fully piped BAM preparation using non-GATK/Picard tools.
    """
    with file_transaction(out_file) as tx_out_file:
        extract_recal_cmd = _piped_extract_recal_cmd(data, region, prep_params,
                                                     tmp_dir)
        dedup_cmd = _piped_dedup_recal_cmd(data, prep_params, tmp_dir,
                                           out_file)
        realign_cmd = _piped_realign_cmd(data, prep_params, tmp_dir)
        cmd = "{extract_recal_cmd} {dedup_cmd} {realign_cmd}  > {tx_out_file}"
        cmd = cmd.format(**locals())
        do.run_memory_retry(cmd,
                            "Piped post-alignment bamprep {0}".format(region),
                            data,
                            region=region)