def _square_batch_bcbio_variation(data, region, bam_files, vrn_files, out_file, todo="square"): """Run squaring or merging analysis using bcbio.variation.recall. """ ref_file = tz.get_in(("reference", "fasta", "base"), data) cores = tz.get_in(("config", "algorithm", "num_cores"), data, 1) resources = config_utils.get_resources("bcbio-variation-recall", data["config"]) # adjust memory by cores but leave room for run program memory memcores = int(math.ceil(float(cores) / 5.0)) jvm_opts = config_utils.adjust_opts(resources.get("jvm_opts", ["-Xms250m", "-Xmx2g"]), {"algorithm": {"memory_adjust": {"direction": "increase", "magnitude": memcores}}}) # Write unique VCFs and BAMs to input file input_file = "%s-inputs.txt" % os.path.splitext(out_file)[0] with open(input_file, "w") as out_handle: out_handle.write("\n".join(sorted(list(set(vrn_files)))) + "\n") if todo == "square": out_handle.write("\n".join(sorted(list(set(bam_files)))) + "\n") variantcaller = tz.get_in(("config", "algorithm", "jointcaller"), data).replace("-joint", "") cmd = ["bcbio-variation-recall", todo] + jvm_opts + broad.get_default_jvm_opts() + \ ["-c", cores, "-r", bamprep.region_to_gatk(region)] if todo == "square": cmd += ["--caller", variantcaller] cmd += [out_file, ref_file, input_file] bcbio_env = utils.get_bcbio_env() cmd = " ".join(str(x) for x in cmd) do.run(cmd, "%s in region: %s" % (cmd, bamprep.region_to_gatk(region)), env=bcbio_env) return out_file
def _square_batch_bcbio_variation(data, region, bam_files, vrn_files, out_file, todo="square"): """Run squaring or merging analysis using bcbio.variation.recall. """ ref_file = tz.get_in(("reference", "fasta", "base"), data) cores = tz.get_in(("config", "algorithm", "num_cores"), data, 1) resources = config_utils.get_resources("bcbio-variation-recall", data["config"]) # adjust memory by cores but leave room for run program memory memcores = int(math.ceil(float(cores) / 5.0)) jvm_opts = config_utils.adjust_opts(resources.get("jvm_opts", ["-Xms250m", "-Xmx2g"]), {"algorithm": {"memory_adjust": {"direction": "increase", "magnitude": memcores}}}) # Write unique VCFs and BAMs to input file input_file = "%s-inputs.txt" % os.path.splitext(out_file)[0] with open(input_file, "w") as out_handle: out_handle.write("\n".join(sorted(list(set(vrn_files)))) + "\n") if todo == "square": out_handle.write("\n".join(sorted(list(set(bam_files)))) + "\n") variantcaller = tz.get_in(("config", "algorithm", "jointcaller"), data).replace("-joint", "") cmd = ["bcbio-variation-recall", todo] + jvm_opts + broad.get_default_jvm_opts() + \ ["-c", cores, "-r", bamprep.region_to_gatk(region)] if todo == "square": cmd += ["--caller", variantcaller] cmd += [out_file, ref_file, input_file] do.run(cmd, "%s in region: %s" % (cmd, bamprep.region_to_gatk(region))) return out_file
def _do_smash_calldiff(truth_vcf, eval_vcf, ref_file, out_dir, config): resources = config_utils.get_resources("smash", config) jvm_opts = resources.get("jvm_opts", ["-Xms250m", "-Xmx2g"]) cmd = ["smash"] + jvm_opts + broad.get_default_jvm_opts() + \ ["--lhs_vcf", truth_vcf, "--rhs_vcf", eval_vcf, "--reference_fasta", ref_file, "--presorted"] do.run(cmd, "Compare files with SMaSH calldiff")
def bcbio_variation_comparison(config_file, base_dir, data): """Run a variant comparison using the bcbio.variation toolkit, given an input configuration. """ tmp_dir = utils.safe_makedir(os.path.join(base_dir, "tmp")) resources = config_utils.get_resources("bcbio_variation", data["config"]) jvm_opts = resources.get("jvm_opts", ["-Xms750m", "-Xmx2g"]) cmd = ["bcbio-variation"] + jvm_opts + broad.get_default_jvm_opts(tmp_dir) + \ ["variant-compare", config_file] do.run(cmd, "Comparing variant calls using bcbio.variation", data)
def _get_jvm_opts(data, out_file): """Retrieve JVM options when running the Java version of VarDict. """ if get_vardict_command(data) == "vardict-java": resources = config_utils.get_resources("vardict", data["config"]) jvm_opts = resources.get("jvm_opts", ["-Xms750m", "-Xmx4g"]) jvm_opts += broad.get_default_jvm_opts(os.path.dirname(out_file)) return "export VAR_DICT_OPTS='%s' && " % " ".join(jvm_opts) else: return ""
def _get_jvm_opts(data, out_file): """Retrieve JVM options when running the Java version of VarDict. """ if dd.get_variantcaller(data).endswith("-java"): resources = config_utils.get_resources("vardict", data["config"]) jvm_opts = resources.get("jvm_opts", ["-Xms750m", "-Xmx4g"]) jvm_opts += broad.get_default_jvm_opts(os.path.dirname(out_file)) return "export VAR_DICT_OPTS='%s' && " % " ".join(jvm_opts) else: return ""
def _get_jvm_opts(out_file, data): """Retrieve Java options, adjusting memory for available cores. """ resources = config_utils.get_resources("purple", data["config"]) jvm_opts = resources.get("jvm_opts", ["-Xms750m", "-Xmx3500m"]) jvm_opts = config_utils.adjust_opts(jvm_opts, {"algorithm": {"memory_adjust": {"direction": "increase", "maximum": "30000M", "magnitude": dd.get_cores(data)}}}) jvm_opts += broad.get_default_jvm_opts(os.path.dirname(out_file)) return jvm_opts
def _get_fgbio_jvm_opts(data, tmpdir, scale_factor=None): cores, mem = _get_cores_memory(data) resources = config_utils.get_resources("fgbio", data["config"]) jvm_opts = resources.get("jvm_opts", ["-Xms750m", "-Xmx4g"]) if scale_factor and cores > scale_factor: jvm_opts = config_utils.adjust_opts(jvm_opts, {"algorithm": {"memory_adjust": {"direction": "increase", "magnitude": cores // scale_factor}}}) jvm_opts += broad.get_default_jvm_opts() jvm_opts = " ".join(jvm_opts) return jvm_opts + " --tmp-dir %s" % tmpdir
def _get_varscan_opts(config, tmp_dir): """Retrieve common options for running VarScan. Handles jvm_opts, setting user and country to English to avoid issues with different locales producing non-compliant VCF. """ resources = config_utils.get_resources("varscan", config) jvm_opts = resources.get("jvm_opts", ["-Xmx750m", "-Xmx2g"]) jvm_opts = config_utils.adjust_opts(jvm_opts, {"algorithm": {"memory_adjust": {"magnitude": 1.1, "direction": "decrease"}}}) jvm_opts += ["-Duser.language=en", "-Duser.country=US"] jvm_opts += broad.get_default_jvm_opts(tmp_dir) return " ".join(jvm_opts)
def _get_jvm_opts(out_file, data): """Retrieve Java options, adjusting memory for available cores. """ resources = config_utils.get_resources("purple", data["config"]) jvm_opts = resources.get("jvm_opts", ["-Xms750m", "-Xmx3500m"]) jvm_opts = config_utils.adjust_opts( jvm_opts, { "algorithm": { "memory_adjust": { "direction": "increase", "maximum": "30000M", "magnitude": dd.get_cores(data) } } }) jvm_opts += broad.get_default_jvm_opts(os.path.dirname(out_file)) return jvm_opts