def _variant_checkpoints(samples): """Check sample configuration to identify required steps in analysis. """ checkpoints = {} checkpoints["vc"] = any([dd.get_variantcaller(d) for d in samples]) checkpoints["hla"] = any([dd.get_hlacaller(d) for d in samples]) return checkpoints
def _variant_checkpoints(samples): """Check sample configuration to identify required steps in analysis. """ checkpoints = {} checkpoints["vc"] = any([dd.get_variantcaller(d) for d in samples]) checkpoints["sv"] = any([dd.get_svcaller(d) for d in samples]) checkpoints["jointvc"] = any([dd.get_jointcaller(d) or ("gvcf" in dd.get_tools_on(d)) for d in samples]) checkpoints["hla"] = any([dd.get_hlacaller(d) for d in samples]) return checkpoints
def _variant_checkpoints(samples): """Check sample configuration to identify required steps in analysis. """ checkpoints = {} checkpoints["vc"] = any([dd.get_variantcaller(d) for d in samples]) checkpoints["sv"] = any([dd.get_svcaller(d) for d in samples]) checkpoints["jointvc"] = any([(dd.get_jointcaller(d) or ("gvcf" in dd.get_tools_on(d))) and dd.get_batch(d) for d in samples]) checkpoints["hla"] = any([dd.get_hlacaller(d) for d in samples]) checkpoints["align"] = any([(dd.get_aligner(d) or dd.get_bam_clean(d)) for d in samples]) checkpoints["align_split"] = not all([(dd.get_align_split_size(d) is False or not dd.get_aligner(d)) for d in samples]) return checkpoints
def _variant_checkpoints(samples): """Check sample configuration to identify required steps in analysis. """ checkpoints = {} checkpoints["vc"] = any([dd.get_variantcaller(d) or d.get("vrn_file") for d in samples]) checkpoints["sv"] = any([dd.get_svcaller(d) for d in samples]) checkpoints["jointvc"] = any([(dd.get_jointcaller(d) or "gvcf" in dd.get_tools_on(d)) for d in samples]) checkpoints["hla"] = any([dd.get_hlacaller(d) for d in samples]) checkpoints["align"] = any([(dd.get_aligner(d) or dd.get_bam_clean(d)) for d in samples]) checkpoints["align_split"] = not all([(dd.get_align_split_size(d) is False or not dd.get_aligner(d)) for d in samples]) checkpoints["umi"] = any([dd.get_umi_consensus(d) for d in samples]) checkpoints["ensemble"] = any([dd.get_ensemble(d) for d in samples]) checkpoints["cancer"] = any(dd.get_phenotype(d) in ["tumor"] for d in samples) return checkpoints
def _get_bwa_mem_cmd(data, out_file, ref_file, fastq1, fastq2=""): """Perform piped bwa mem mapping potentially with alternative alleles in GRCh38 + HLA typing. Commands for HLA post-processing: base=TEST run-HLA $base.hla > $base.hla.top cat $base.hla.HLA*.gt | grep ^GT | cut -f2- > $base.hla.all rm -f $base.hla.HLA*gt rm -f $base.hla.HLA*gz """ alt_file = ref_file + ".alt" if utils.file_exists(alt_file) and dd.get_hlacaller(data): bwakit_dir = os.path.dirname( os.path.realpath(utils.which("run-bwamem"))) hla_base = os.path.join( utils.safe_makedir(os.path.join(os.path.dirname(out_file), "hla")), os.path.basename(out_file) + ".hla") alt_cmd = ( " | {bwakit_dir}/k8 {bwakit_dir}/bwa-postalt.js -p {hla_base} {alt_file}" ) else: alt_cmd = "" if dd.get_aligner(data) == "sentieon-bwa": bwa_exe = "sentieon-bwa" exports = sentieon.license_export(data) else: bwa_exe = "bwa" exports = "" bwa = config_utils.get_program(bwa_exe, data["config"]) num_cores = data["config"]["algorithm"].get("num_cores", 1) bwa_resources = config_utils.get_resources("bwa", data["config"]) bwa_params = (" ".join([str(x) for x in bwa_resources.get("options", [])]) if "options" in bwa_resources else "") rg_info = novoalign.get_rg_info(data["rgnames"]) # For UMI runs, pass along consensus tags c_tags = "-C" if "umi_bam" in data else "" pairing = "-p" if not fastq2 else "" # Restrict seed occurances to 1/2 of default, manage memory usage for centromere repeats in hg38 # https://sourceforge.net/p/bio-bwa/mailman/message/31514937/ # http://ehc.ac/p/bio-bwa/mailman/message/32268544/ mem_usage = "-c 250" bwa_cmd = ( "{exports}{bwa} mem {pairing} {c_tags} {mem_usage} -M -t {num_cores} {bwa_params} -R '{rg_info}' " "-v 1 {ref_file} {fastq1} {fastq2} ") return (bwa_cmd + alt_cmd).format(**locals())
def _variant_checkpoints(samples): """Check sample configuration to identify required steps in analysis. """ checkpoints = {} checkpoints["vc"] = any([dd.get_variantcaller(d) or d.get("vrn_file") for d in samples]) checkpoints["sv"] = any([dd.get_svcaller(d) for d in samples]) checkpoints["jointvc"] = any([(dd.get_jointcaller(d) or "gvcf" in dd.get_tools_on(d)) for d in samples]) checkpoints["hla"] = any([dd.get_hlacaller(d) for d in samples]) checkpoints["align"] = any([(dd.get_aligner(d) or dd.get_bam_clean(d)) for d in samples]) checkpoints["align_split"] = not all([(dd.get_align_split_size(d) is False or not dd.get_aligner(d)) for d in samples]) checkpoints["archive"] = any([dd.get_archive(d) for d in samples]) checkpoints["umi"] = any([dd.get_umi_consensus(d) for d in samples]) checkpoints["ensemble"] = any([dd.get_ensemble(d) for d in samples]) checkpoints["cancer"] = any(dd.get_phenotype(d) in ["tumor"] for d in samples) return checkpoints
def _get_bwa_mem_cmd(data, out_file, ref_file, fastq1, fastq2=""): """Perform piped bwa mem mapping potentially with alternative alleles in GRCh38 + HLA typing. Commands for HLA post-processing: base=TEST run-HLA $base.hla > $base.hla.top cat $base.hla.HLA*.gt | grep ^GT | cut -f2- > $base.hla.all rm -f $base.hla.HLA*gt rm -f $base.hla.HLA*gz """ alt_file = ref_file + ".alt" if utils.file_exists(alt_file) and dd.get_hlacaller(data): bwakit_dir = os.path.dirname(os.path.realpath(utils.which("run-bwamem"))) hla_base = os.path.join(utils.safe_makedir(os.path.join(os.path.dirname(out_file), "hla")), os.path.basename(out_file) + ".hla") alt_cmd = (" | {bwakit_dir}/k8 {bwakit_dir}/bwa-postalt.js -p {hla_base} {alt_file}") else: alt_cmd = "" if dd.get_aligner(data) == "sentieon-bwa": bwa_exe = "sentieon-bwa" exports = sentieon.license_export(data) else: bwa_exe = "bwa" exports = "" bwa = config_utils.get_program(bwa_exe, data["config"]) num_cores = data["config"]["algorithm"].get("num_cores", 1) bwa_resources = config_utils.get_resources("bwa", data["config"]) bwa_params = (" ".join([str(x) for x in bwa_resources.get("options", [])]) if "options" in bwa_resources else "") rg_info = novoalign.get_rg_info(data["rgnames"]) # For UMI runs, pass along consensus tags c_tags = "-C" if "umi_bam" in data else "" pairing = "-p" if not fastq2 else "" # Restrict seed occurances to 1/2 of default, manage memory usage for centromere repeats in hg38 # https://sourceforge.net/p/bio-bwa/mailman/message/31514937/ # http://ehc.ac/p/bio-bwa/mailman/message/32268544/ mem_usage = "-c 250" bwa_cmd = ("{exports}{bwa} mem {pairing} {c_tags} {mem_usage} -M -t {num_cores} {bwa_params} -R '{rg_info}' " "-v 1 {ref_file} {fastq1} {fastq2} ") return (bwa_cmd + alt_cmd).format(**locals())
def hla_on(data): return has_hla(data) and dd.get_hlacaller(data)