Пример #1
0
def external_script(_command):
    import inspect, json, tempfile

    VARS_TO_PASS = '******'.split()

    callerlocal = inspect.currentframe().f_back.f_locals
    callerglobal = inspect.currentframe().f_back.f_globals
    packed = {}
    for var in VARS_TO_PASS:
        if isinstance(callerlocal[var], int):
            packed[var] = callerlocal[var]
        else:
            packed[var] = list(callerlocal[var].allitems())

    with tempfile.NamedTemporaryFile(mode='wt') as tmpfile:
        json.dump(packed, tmpfile)
        tmpfile.flush()
        os.environ[PARAMETER_PASSING_ENVVAR] = tmpfile.name

        try:
            locals().update(callerglobal)
            locals().update(callerlocal)
            shell(_command)
        finally:
            del os.environ[PARAMETER_PASSING_ENVVAR]
Пример #2
0
def get_cuda_arch():
    '''
    Determine currently installed NVIDIA GPU cards by PCI device ID
    and match them with the predefined GPU model lists.
    It tries to detect all GPUs and include cubins suitable for all GPU
    architectures detected.
    If your GPU is not detected correctly, update *_DEVICES
    files by referring https://pci-ids.ucw.cz/v2.2/pci.ids
    and make a pull request!
    '''
    pci_list = str(shell('lspci -nn', read=True))
    supported_archs = ['MAXWELL', 'KEPLER', 'FERMI']
    devtypes_found = set()
    for devtype in supported_archs:
        fname = devtype + '_DEVICES'
        with open(fname, 'r') as f:
            for line in f:
                line = line.strip()
                if not line: continue
                model, pciid = line.split('\t')
                pciid = pciid.replace('0x', '')
                if pciid in pci_list:
                    devtypes_found.add(devtype)
    if len(devtypes_found) == 0:
        return []
    return list(sorted(devtypes_found, key=lambda k: supported_archs.index(k)))
Пример #3
0
def fasta_postprocess(origfn, newfn):
    """
    The fasta from UCSC comes as a tarball of fastas. So we extract them all to
    a temp directory and then cat them all together into the final fa.gz file.
    """
    assert (
        (isinstance(origfn, list)) and (len(origfn) == 1)
    ), 'unexpected input: %s' % origfn
    origfn = origfn[0]
    t = tarfile.open(origfn)
    shell('mkdir -p {origfn}.tmp')
    t.extractall(origfn + '.tmp')
    with gzip.open(newfn, 'wt') as fout:
        for fa in sorted(glob.glob(origfn + '.tmp/*.fa')):
            print(fa)
            fout.write(open(fa).read())
    shell('rm -r {origfn}.tmp')
Пример #4
0
def rscript(string, scriptname, log=None):
    """
    Saves the string as `scriptname` and then runs it

    Parameters
    ----------
    string : str
        Filled-in template to be written as R script

    scriptname : str
        File to save script to

    log : str
        File to redirect stdout and stderr to. If None, no redirection occurs.
    """
    with open(scriptname, 'w') as fout:
        fout.write(string)
    if log:
        _log = '> {0} 2>&1'.format(log)
    else:
        _log = ""
    shell('Rscript {scriptname} {_log}')
Пример #5
0
from snakemake.shell import shell
from os import path

log = snakemake.log_fmt_shell(stdout=True, stderr=True)

def inputCmd(subsets):
    cond_a_files = subsets[0]
    cond_b_files = subsets[1]
    input_cmd = "-a {} -b {}".format(cond_a_files, cond_b_files)
    print(input_cmd)
    return input_cmd

quant_subsets = snakemake.params.quant_subsets
input_cmd = inputCmd(quant_subsets)
output_path = snakemake.params.output_path
min_cov = snakemake.params.min_cov
min_sam = snakemake.params.min_sam

shell("whippet-delta.jl "
"{input_cmd} "
"-o {output_path} "
"-r {min_cov} "
"-s {min_sam} "
"{log}")
Пример #6
0
# list to a string, here we detect single-end reads by checking if input.fastq
# is a string.
if isinstance(snakemake.input.fastq, str):
    fastqs = '-U {0} '.format(snakemake.input.fastq)
else:
    assert len(snakemake.input.fastq) == 2
    fastqs = '-1 {0} -2 {1} '.format(*snakemake.input.fastq)

# Figure out the prefix based on the input index, which has the format
#
#   prefix.N.ht2
#
# where N is [1-8]. We strip off the .N.ht2 and ensure the remaining prefixes
# are the same.
#
prefixes = list(set(map(lambda x: '.'.join(x.split('.')[:-2]), snakemake.input.index)))
assert len(prefixes) == 1, 'Multiple prefixes detected from "{0}"'.format(snakemake.input.index)
prefix = prefixes[0]

shell(
    "hisat2 "
    "-x {prefix} "
    "{fastqs} "
    "--threads {snakemake.threads} "
    "{extra} "
    "-S {snakemake.output}.sam "
    "{log}"
)

shell("samtools view -Sb {snakemake.output}.sam > {snakemake.output} && rm {snakemake.output}.sam")
Пример #7
0
__author__ = "Jusitn fear"
__copyright__ = "Copyright 2016, Justin Fear"
__email__ = "*****@*****.**"
__license__ = "MIT"


from snakemake.shell import shell

try:
    extra = snakemake.params.extra
except AttributeError:
    extra = ""

if snakemake.log:
    log = "> {} 2>&1".format(snakemake.log)
else:
    log = ""

shell(
    "picard MarkDuplicates "
    "I={snakemake.input.bam} "
    "O={snakemake.output.bam} "
    "{extra} "
    "M={snakemake.output.metrics} "
    "{log}"
)
Пример #8
0
__author__ = "Sebastian Kurscheid"
__copyright__ = "Copyright 2016, Sebastian Kurscheid"
__email__ = "*****@*****.**"
__license__ = "MIT"
__date__ = "2016-09-14"
__version__ = 0.1

from snakemake.shell import shell

shell("""
            {snakemake.params.macs2_dir}/macs2 callpeak -B \
                                              -t {snakemake.input.chip} \
                                              -c {snakemake.input.input} \
                                              -n {snakemake.wildcards.sample} \
                                              -f {snakemake.params.format} \
                                              --nomodel \
                                              --extsize {snakemake.params.extsize} \
                                              --outdir {snakemake.output}
      """)
Пример #9
0
__author__ = "Johannes Köster"
__copyright__ = "Copyright 2016, Johannes Köster"
__email__ = "*****@*****.**"
__license__ = "MIT"


from snakemake.shell import shell


shell("samtools index {snakemake.params} {snakemake.input[0]} {snakemake.output[0]}")
Пример #10
0
log_dir = os.path.dirname(snakemake.log[0])
output_dir = os.path.dirname(snakemake.output[0])

# sample basename
basename = os.path.splitext(os.path.basename(snakemake.input.bam[0]))[0]

split_inputs = " ".join(str(x) for x in range(0, int(snakemake.threads)))

with tempfile.TemporaryDirectory() as tmp_dir:
    shell(
        "(BIN_DIR=$(ls -d $CONDA_DEFAULT_ENV/share/deepvariant*/binaries/Deepvariant/*/DeepVariant*) \n"
        "parallel --eta --halt 2 --joblog {log_dir}/log --res {log_dir} "
        "python $BIN_DIR/make_examples.zip "
        "--mode calling --ref {snakemake.input.ref} --reads {snakemake.input.bam} "
        "--examples {tmp_dir}/{basename}.tfrecord@{snakemake.threads}.gz "
        "--gvcf {tmp_dir}/{basename}.gvcf.tfrecord@{snakemake.threads}.gz "
        "--task {{}} "
        "::: {split_inputs} \n"
        "dv_call_variants.py "
        "--cores {snakemake.threads} "
        "--outfile {tmp_dir}/{basename}.tmp "
        "--sample {basename} "
        "--examples {tmp_dir} "
        "--model {snakemake.params.model} \n"
        "python $BIN_DIR/postprocess_variants.zip "
        "--ref {snakemake.input.ref} "
        "--infile {tmp_dir}/{basename}.tmp "
        "--outfile {snakemake.output.vcf} "
        "--nonvariant_site_tfrecord_path {tmp_dir}/{basename}.gvcf.tfrecord@{snakemake.threads}.gz "
        "--gvcf_outfile {snakemake.output.gvcf} ) {log}")
Пример #11
0
from snakemake.shell import shell
initial_log = snakemake.get_log()
stdout_log = snakemake.get_log(stderr=False, append=True)
stderr_log = snakemake.get_log(stdout=False, append=True)
shell('''
      cat {snakemake.input} > {snakemake.output}
      echo "should not appear since next line truncates" {initial_log}
      echo "first line" {initial_log}
      (>&2 echo "a stderr message") {stderr_log}
      (echo "a stdout message") {stdout_log}
      ''')
Пример #12
0
__author__ = "Johannes Köster"
__copyright__ = "Copyright 2016, Johannes Köster"
__email__ = "*****@*****.**"
__license__ = "MIT"

from snakemake.shell import shell

log = snakemake.log_fmt_shell(stdout=True, stderr=True)

# Samtools takes additional threads through its option -@
# One thread for samtools merge
# Other threads are *additional* threads passed to the '-@' argument
threads = "" if snakemake.threads <= 1 else " -@ {} ".format(
    snakemake.threads - 1)

shell(
    "samtools index {threads} {snakemake.params} {snakemake.input[0]} {snakemake.output[0]} {log}"
)
Пример #13
0
__author__ = "David Laehnemann, Victoria Sack"
__copyright__ = "Copyright 2018, David Laehnemann, Victoria Sack"
__email__ = "*****@*****.**"
__license__ = "MIT"

import os
from snakemake.shell import shell

prefix = os.path.splitext(snakemake.output[0])[0]

shell("samtools bam2fq {snakemake.params} "
      " -@ {snakemake.threads} "
      " {snakemake.input[0]}"
      " >{snakemake.output[0]} ")
Пример #14
0
__author__ = "Adrien Leger"
__copyright__ = "Copyright 2019, Adrien Leger"
__email__ = "*****@*****.**"
__license__ = "MIT"
__version__ = "0.0.1"

# Imports
from snakemake.shell import shell

# Shortcuts
opt = snakemake.params.get("opt", "")
bam_input = snakemake.input.bam
bam_output = snakemake.output.bam

# Run shell command
shell(
    "pyBioTools Alignment Split {opt} -i {bam_input} -l {bam_output} --verbose &> {snakemake.log}"
)
Пример #15
0
"""Snakemake wrapper for ProSolo single-cell-bulk calling"""

__author__ = "David Lähnemann"
__copyright__ = "Copyright 2020, David Lähnemann"
__email__ = "*****@*****.**"
__license__ = "MIT"

from snakemake.shell import shell

log = snakemake.log_fmt_shell(stdout=True, stderr=True)

shell(
    "( prosolo single-cell-bulk "
    "--omit-indels "
    " {snakemake.params.extra} "
    "--candidates {snakemake.input.candidates} "
    "--output {snakemake.output} "
    "{snakemake.input.single_cell} "
    "{snakemake.input.bulk} "
    "{snakemake.input.ref} ) "
    "{log} "
)
Пример #16
0
__author__ = "Johannes Köster"
__copyright__ = "Copyright 2016, Johannes Köster"
__email__ = "*****@*****.**"
__license__ = "MIT"


from snakemake.shell import shell


shell(
    "(samtools mpileup {snakemake.params.mpileup} {snakemake.input.samples} "
    "--fasta-ref {snakemake.input.ref} --BCF --uncompressed | "
    "bcftools call -m {snakemake.params.call} -o {snakemake.output[0]} -v -) 2> {snakemake.log}")
Пример #17
0
__author__ = "Johannes Köster"
__copyright__ = "Copyright 2016, Johannes Köster"
__email__ = "*****@*****.**"
__license__ = "MIT"

from snakemake.shell import shell
from snakemake_wrapper_utils.java import get_java_opts

log = snakemake.log_fmt_shell(stdout=True, stderr=True)

extra = snakemake.params.get("extra", "")
java_opts = get_java_opts(snakemake)
bams = snakemake.input
if isinstance(bams, str):
    bams = [bams]
bams = list(map("INPUT={}".format, bams))

shell("picard MarkDuplicates "  # Tool and its subcommand
      "{java_opts} "  # Automatic java option
      "{extra} "  # User defined parmeters
      "{bams} "  # Input bam(s)
      "OUTPUT={snakemake.output.bam} "  # Output bam
      "METRICS_FILE={snakemake.output.metrics} "  # Output metrics
      "{log}"  # Logging
      )
Пример #18
0

log=snakemake.log_fmt_shell(stdout=True,stderr=True)
target=snakemake.params.target


if target['catg']=="expr":
	ref=target['ref']
	start=target['start']
	end=target['end']
	bamfile=snakemake.input.bam

	tmpfile=tempfile.mktemp()
	shell(
		"samtools view -b {bamfile} {ref}:{start}-{end} > {tmpfile} ;"
		"samtools sort {tmpfile} >  {snakemake.output[0]} ;"	
		"samtools index {snakemake.output[0]} "
	)


if target['catg']=="fusion":
	ref1=target['r1']['ref']
	start1=target['r1']['start']
	end1=target['r1']['end']
	bamfile=snakemake.input.chimeric

	ref2=target['r2']['ref']
	start2=target['r2']['start']
	end2=target['r2']['end']

	tmpfile=tempfile.mktemp()
Пример #19
0
fastq = [fastq] if isinstance(fastq, str) else fastq
if len(fastq) > 2:
    raise RuleInputException(
        'Your sequencing read should be single-read or paired-end.')

single_flag = '' if len(fastq) == 2 else '--single'
if single_flag and (fragment_length == '' or standard_deviation == ''):
    raise RuleParameterException(
        'Please provide fragment length(-l) and standard deviation(-s) parameter for single-end reads.'
    )
fastq = ' '.join(fastq)

index = snakemake.input.index
threads = snakemake.threads

output_directory = path.dirname(snakemake.output[0])

# Execute shell command.
shell("("
      "kallisto quant "
      "-i {index} "
      "-o {output_directory} "
      "-t {threads} "
      "{fragment_length} "
      "{standard_deviation} "
      "{single_flag} "
      "{extra} "
      "{fastq}"
      ")"
      "{log}")
Пример #20
0
__author__ = "Max Cummins"
__copyright__ = "Copyright 2021, Max Cummins"
__email__ = "*****@*****.**"
__license__ = "MIT"

from snakemake.shell import shell
from os import path

log = snakemake.log_fmt_shell(stdout=False, stderr=True)

shell("assembly-stats"
      " {snakemake.params.extra}"
      " {snakemake.input.assembly}"
      " > {snakemake.output.assembly_stats}"
      " {log}")
Пример #21
0
__author__ = "Jack Zhu"
__email__ = "*****@*****.**"
__license__ = "MIT"


from snakemake.shell import shell


shell("""
    module load STAR/2.5.1b
    STAR-Fusion --genome_lib_dir {snakemake.params.genome_lib_dir} \
                 -J Chimeric.out.junction \
                 --output_dir {snakemake.params.star_fusion_outdir} \
                 --tmpdir /lscratch/${{SLURM_JOBID}}
""")

Пример #22
0
# Extract parameters.
extra = snakemake.params.get('extra', '')

# Extract required inputs.
input_file = snakemake.input[0]
input_command = '-i %s' % input_file

# Extract optional inputs.
output_file = snakemake.output[0]
output_command = '-o %s' % output_file

# Extract user parameters.
user_parameters = []
user_parameters.append(optionify_params('gsize', '--gsize'))
user_parameters.append(optionify_params('tsize', '--tsize'))
user_parameters.append(optionify_params('pvalue', '--pvalue'))
user_parameters.append(optionify_params('keep_dup', '--keep_dup'))
user_parameters.append(optionify_params('verbose', '--verbose'))
user_parameters = ' '.join([p for p in user_parameters if not p != ''])

# Execute shell command.
shell("("
      "macs2 filterdup "
      "{input_command} "
      "{output_command} "
      "{user_parameters} "
      "{extra} "
      ") "
      "{log}")
Пример #23
0
try:
    extra = snakemake.params.extra
except AttributeError:
    extra = ""

if snakemake.log:
    log = "> {} 2>&1".format(snakemake.log)
else:
    log = ""

# Figure out the prefix based on the input index, which has the format
#
#   prefix.N.bt2
#
# where N is [1-4]. We strip off the .N.bt2 and ensure the remaining prefixes
# are the same.
#
prefixes = list(set(map(lambda x: '.'.join(x.split('.')[:-2]), snakemake.output)))
assert len(prefixes) == 1, 'Multiple prefixes detected from "{0}"'.format(snakemake.output)
prefix = prefixes[0]

shell(
    "bowtie2-build "
    "--threads {snakemake.threads} "
    "{extra} "
    "{snakemake.input} "
    "{prefix} "
    "{log}"
)
Пример #24
0
__author__ = "Adrien Leger"
__copyright__ = "Copyright 2019, Adrien Leger"
__email__ = "*****@*****.**"
__license__ = "MIT"
__version__ = "0.0.3"

# Imports
from snakemake.shell import shell
import os

# Shortcuts
opt = snakemake.params.get("opt", "")
ref = snakemake.input.ref
index_dir = snakemake.output.index_dir
os.makedirs(index_dir, exist_ok=True)

# Run shell command
shell(
    "salmon index {opt} -p {snakemake.threads} -t {ref} -i {index_dir} &> {snakemake.log}"
)
Пример #25
0
__author__ = "Johannes Köster"
__copyright__ = "Copyright 2016, Johannes Köster"
__email__ = "*****@*****.**"
__license__ = "MIT"


import os
from snakemake.shell import shell


prefix = os.path.splitext(snakemake.output[0])[0]

shell(
    "samtools sort {snakemake.params} -@ {snakemake.threads} -o {snakemake.output[0]} "
    "-T {prefix} {snakemake.input[0]}")
Пример #26
0
"""Snakemake wrapper for trimming paired-end reads using cutadapt."""

__author__ = "Julian de Ruiter"
__copyright__ = "Copyright 2017, Julian de Ruiter"
__email__ = "*****@*****.**"
__license__ = "MIT"

from snakemake.shell import shell

n = len(snakemake.input)
assert n == 2, "Input must contain 2 (paired-end) elements."

log = snakemake.log_fmt_shell(stdout=False, stderr=True)

shell("cutadapt"
      " {snakemake.params.adapters}"
      " {snakemake.params.others}"
      " -o {snakemake.output.fastq1}"
      " -p {snakemake.output.fastq2}"
      " -j {snakemake.threads}"
      " {snakemake.input}"
      " > {snakemake.output.qc} {log}")
Пример #27
0
__author__ = "Johannes Köster"
__copyright__ = "Copyright 2016, Johannes Köster"
__email__ = "*****@*****.**"
__license__ = "MIT"


from snakemake.shell import shell



shell("""
(module load fastqc; \
fastqc --extract -t {snakemake.threads} -o {snakemake.params.outdir} {snakemake.input} ) 2> {snakemake.log}""")


Пример #28
0
# Imports
from snakemake.shell import shell
from pyfaidx import Fasta
from math import log2
import os

# Wrapper info
wrapper_name = "star_index"
wrapper_version = "0.0.4"
author = "Adrien Leger"
license = "MIT"
shell(
    "echo 'Wrapper {wrapper_name} v{wrapper_version} / {author} / Licence {license}' > {snakemake.log}"
)

# Shortcuts
opt = snakemake.params.get("opt", "")
ref = snakemake.input.ref
annotation = snakemake.input.annotation
index_dir = os.path.abspath(snakemake.output.index_dir) + "/"
os.makedirs(index_dir, exist_ok=True)

# Comput index base depending on genome length
genome_len = 0
with Fasta(ref) as fa:
    for seq in fa:
        genome_len += len(seq)
indexNbases = min(14, int(log2(genome_len) / 2) - 1)

# Run shell command
shell("STAR {opt} \
Пример #29
0
__author__ = "Johannes Köster"
__copyright__ = "Copyright 2016, Johannes Köster"
__email__ = "*****@*****.**"
__license__ = "MIT"


from snakemake.shell import shell


shell("samtools merge --threads {snakemake.threads} {snakemake.params} "
      "{snakemake.output[0]} {snakemake.input}")
Пример #30
0
    cmds.append("--dir {output_dir:q}")

if html_file:
    html_file_name = os.path.basename(html_file)
    cmds.append("--output {html_file_name:q}")

# reports
reports = [
    "alignment_report",
    "dedup_report",
    "splitting_report",
    "mbias_report",
    "nucleotide_report",
]
skip_optional_reports = answer2bool(
    snakemake.params.get("skip_optional_reports", False))
for report_name in reports:
    path = snakemake.input.get(report_name, "")
    if path:
        locals()[report_name] = path
        cmds.append("--{0} {{{1}:q}}".format(report_name, report_name))
    elif skip_optional_reports:
        cmds.append("--{0} 'none'".format(report_name))

# log
log = snakemake.log_fmt_shell(stdout=True, stderr=True)
cmds.append("{log}")

# run shell command:
shell(" ".join(cmds))
Пример #31
0
# the veqtl-mapper swarming mechanism is really silly...
# we need to count the number of lines in the intput file
with open(snakemake.input['pheno']) as f:
    n_features = sum(1 for line in f)
n_features -= 1  # drop header

# get the number of features we want to use per run
n_features_per_job = int(
    np.ceil(n_features / int(snakemake.wildcards['j_total'])))

# do we need the final job? -- could do this modulo
total_n = n_features_per_job * (int(snakemake.wildcards['j_total']) - 1)
final_job = snakemake.wildcards['j_cur'] == snakemake.wildcards['j_total']
if total_n >= n_features and final_job:
    # we don't need the final job. example: 608 gene and 30 jobs
    shell('touch %s' % snakemake.output[0])
else:
    # start to build the command
    cmd = 'veqtl-mapper --vcf %s' % (snakemake.input['geno'])
    cmd = '%s --bed %s' % (cmd, snakemake.input['pheno'])
    cmd = '%s --genes %d' % (cmd, n_features_per_job)
    cmd = '%s --job-number %s' % (cmd, snakemake.wildcards['j_cur'])
    cmd = '%s --out %s' % (cmd, snakemake.output[0])

    params = dict(snakemake.params.items())

    window = params.get("window", "1000000")  # cis window default = 1Mb
    cmd = '%s --window %s' % (cmd, window)

    if 'other_settings' in params:
        cmd = '%s %s' % (cmd, snakemake.params['other_settings'])
Пример #32
0
from snakemake.shell import shell
log = snakemake.get_log(stdout=False, append=True)
shell('''
      cat {snakemake.input} > {snakemake.output}
      (>&2 echo "a stderr message") {log}
      (echo "a stdout message") {log}
      ''')
Пример #33
0
"""Snakemake wrapper for Salmon Index."""

__author__ = "Tessa Pierce"
__copyright__ = "Copyright 2018, Tessa Pierce"
__email__ = "*****@*****.**"
__license__ = "MIT"

from snakemake.shell import shell

log = snakemake.log_fmt_shell(stdout=True, stderr=True)
extra = snakemake.params.get("extra", "")

shell("salmon index -t {snakemake.input} -i {snakemake.output} "
      " --threads {snakemake.threads} {extra} {log}")
Пример #34
0
def plus_lncrna_fasta_postprocess(tmpfiles, outfile):
    shell('gunzip -c {tmpfiles} > {outfile}')
Пример #35
0
import tempfile
from snakemake.shell import shell
from snakemake_wrapper_utils.java import get_java_opts

extra = snakemake.params.get("extra", "")
java_opts = get_java_opts(snakemake)
log = snakemake.log_fmt_shell(stdout=True, stderr=True)

filters = [
    "--filter-name {} --filter-expression '{}'".format(
        name, expr.replace("'", "\\'"))
    for name, expr in snakemake.params.filters.items()
]

intervals = snakemake.input.get("intervals", "")
if not intervals:
    intervals = snakemake.params.get("intervals", "")
if intervals:
    intervals = "--intervals {}".format(intervals)

with tempfile.TemporaryDirectory() as tmpdir:
    shell("gatk --java-options '{java_opts}' VariantFiltration"
          " --variant {snakemake.input.vcf}"
          " --reference {snakemake.input.ref}"
          " {filters}"
          " {intervals}"
          " {extra}"
          " --tmp-dir {tmpdir}"
          " --output {snakemake.output.vcf}"
          " {log}")
Пример #36
0
__author__ = "Sean Davis"
__email__ = "*****@*****.**"
__license__ = "MIT"


from snakemake.shell import shell



shell("""
#######################
MEM="48"
module load GATK
java -Xmx${{MEM}}g -Djava.io.tmpdir=/lscratch/${{SLURM_JOBID}} -jar $GATK_JAR \
    -T RealignerTargetCreator -R {snakemake.input.ref} -nt ${{SLURM_CPUS_ON_NODE}} -known {snakemake.input.phase1} -known {snakemake.input.mills} \
    -I {snakemake.input.bam} -o /lscratch/${{SLURM_JOBID}}/realignment.intervals > {snakemake.log} 2>&1
java -Xmx${{MEM}}g -Djava.io.tmpdir=/lscratch/${{SLURM_JOBID}} -jar $GATK_JAR \
    -T IndelRealigner -R {snakemake.input.ref} -known {snakemake.input.phase1} -known {snakemake.input.mills} \
    -I {snakemake.input.bam} --targetIntervals /lscratch/${{SLURM_JOBID}}/realignment.intervals \
    -o /lscratch/${{SLURM_JOBID}}/lr.bam >>{snakemake.log} 2>&1
java -Xmx${{MEM}}g -Djava.io.tmpdir=/lscratch/${{SLURM_JOBID}} -jar $GATK_JAR \
    -T BaseRecalibrator -R {snakemake.input.ref} -knownSites {snakemake.input.phase1} -knownSites {snakemake.input.mills} \
    -I /lscratch/${{SLURM_JOBID}}/lr.bam -nct ${{SLURM_CPUS_ON_NODE}} \
    -o /lscratch/${{SLURM_JOBID}}/recalibration.matrix.txt >>{snakemake.log} 2>&1
java -Xmx${{MEM}}g -Djava.io.tmpdir=/lscratch/${{SLURM_JOBID}} -jar $GATK_JAR \
    -T PrintReads -R {snakemake.input.ref} -I /lscratch/${{SLURM_JOBID}}/lr.bam \
    -nct ${{SLURM_CPUS_ON_NODE}} \
    -o {snakemake.output.bam} -BQSR /lscratch/${{SLURM_JOBID}}/recalibration.matrix.txt >>{snakemake.log} 2>&1
######################
""")
Пример #37
0
    counts = np.fromstring(inpf.read(), np.uint32).reshape((cycles, bins))
    return counts.astype(np.uint64)

def write_sig_dists(counts, filename):
    with gzip.open(filename, 'wb') as outf:
        outf.write(struct.pack('<III', 4, counts.shape[0], counts.shape[1]))
        outf.write(counts.astype(np.uint32).tostring())

counts_aggr = None

for signals, taginfo, out in zip(input.signals, input.taginfo,
                                 output.taginfo):
    cmd = format('{BINDIR}/tailseq-polya-ruler {wildcards.tile} {signals} \
        {input.score_cutoffs} {CONF[polyA_finder][signal_analysis_trigger]} \
        {CONF[polyA_ruler][downhill_extension_weight]} \
        {taginfo} {CONF[polyA_seeder][dist_sampling_bins]} \
        {CONF[polyA_ruler][signal_resampling_gap]} \
        {output.sigdists} | {BGZIP_CMD} -c > {out}', wildcards=wildcards,
        input=input, output=output)
    shell(cmd)

    counts_new = load_sig_dists(output.sigdists)
    if counts_aggr is None:
        counts_aggr = counts_new
    else:
        counts_aggr += counts_new

write_sig_dists(counts_aggr, output.sigdists)

Пример #38
0
__author__ = "Sean Davis"
__email__ = "*****@*****.**"
__license__ = "MIT"

from snakemake.shell import shell

shell("""
module load igvtools
igvtools count {snakemake.input} {snakemake.output} {snakemake.params.genome}
""")
Пример #39
0
__author__ = "Sean Davis"
__email__ = "*****@*****.**"
__license__ = "MIT"


from snakemake.shell import shell

# assumes that bams are coming in as a list

shell("""
module load samtools
( samtools view -h {snakemake.input} | \
  sed 's/\tBI\:Z\:[^\t]*//' | \
  sed 's/\tBI\:Z\:[^\t]*//' | samtools view -bS - > {snakemake.output} ) >& {snakemake.log}
""")
Пример #40
0
assert n_reads == 1, "Input must contain 1 fastq files. Given: %r." % [
    n_reads, snakemake.input.reads
]
assert n_fastqc_html == 1, "Input must contain 1 fastqc html reports. Given: %r." % n_fastqc_html
assert n_fastqc_zip == 1, "Input must contain 1 fastqc .zip files. Given: %r." % n_fastqc_zip

# Don't run with `--fastqc` flag
if "--fastqc" in snakemake.params.get("extra", ""):
    raise ValueError("The trim_galore Snakemake wrapper cannot "
                     "be run with the `--fastqc` flag. Please "
                     "remove the flag from extra params. "
                     "You can use the fastqc Snakemake wrapper on "
                     "the input and output files instead.")

# Check that four output files were supplied
m = len(snakemake.output)
assert m == 2, "Output must contain 2 files. Given: %r." % m

# Check that all output files are in the same directory
out_dir = os.path.dirname(snakemake.output[0])
for file_path in snakemake.output[1:]:
    assert out_dir == os.path.dirname(file_path), \
        "trim_galore can only output files to a single directory." \
        " Please indicate only one directory for the output files."

shell("(trim_galore"
      " {snakemake.params.extra}"
      " -o {out_dir}"
      " {snakemake.input.reads})"
      " {log}")
Пример #41
0
__author__ = "Sean Davis"
__email__ = "*****@*****.**"
__license__ = "MIT"

from snakemake.shell import shell

# assumes that "extra" files are available. These should be included in the
# input specification for the rule

# the awk line replaces spaces in the info field (not allowed) with "_".
# This was necessary because Kaviar has spaces in the database names

shell("""
module load vcfanno
vcfanno -p ${{SLURM_CPUS_ON_NODE}} {snakemake.input.config} {snakemake.input.vcf} \
  | awk -F'\t' -vOFS='\t' '{{ gsub(" ", "_", $8) ; print }}' > {snakemake.output}
""")
Пример #42
0
######## Snakemake header ########
import sys
sys.path.insert(
    0, "/home/athersh/miniconda3/envs/snakemake/lib/python3.5/site-packages")
import pickle
snakemake = pickle.loads(
    b'\x80\x03csnakemake.script\nSnakemake\nq\x00)\x81q\x01}q\x02(X\x05\x00\x00\x00inputq\x03csnakemake.io\nInputFiles\nq\x04)\x81q\x05(X\x19\x00\x00\x00dedup/sjl_kc_input_R1.bamq\x06X\x18\x00\x00\x00dedup/sjl_kc_shep_R1.bamq\x07e}q\x08(X\x03\x00\x00\x00inpq\th\x06X\x02\x00\x00\x00ipq\nh\x07X\x06\x00\x00\x00_namesq\x0b}q\x0c(h\tK\x00N\x86q\rh\nK\x01N\x86q\x0euubX\x06\x00\x00\x00outputq\x0fcsnakemake.io\nOutputFiles\nq\x10)\x81q\x11(X=\x00\x00\x00peak_out/macs2/sjl_kc_shep_R1/sjl_kc_shep_R1_peaks.narrowPeakq\x12X0\x00\x00\x00peak_out/macs2/sjl_kc_shep_R1/sjl_kc_shep_R1.bedq\x13e}q\x14(X\n\x00\x00\x00narrowPeakq\x15h\x12X\x03\x00\x00\x00bedq\x16h\x13h\x0b}q\x17(h\x15K\x00N\x86q\x18h\x16K\x01N\x86q\x19uubX\t\x00\x00\x00wildcardsq\x1acsnakemake.io\nWildcards\nq\x1b)\x81q\x1cX\x0e\x00\x00\x00sjl_kc_shep_R1q\x1da}q\x1e(X\x06\x00\x00\x00sampleq\x1fh\x1dh\x0b}q X\x06\x00\x00\x00sampleq!K\x00N\x86q"subX\x03\x00\x00\x00logq#csnakemake.io\nLog\nq$)\x81q%}q&h\x0b}q\'sbX\x07\x00\x00\x00threadsq(K\x01X\x04\x00\x00\x00ruleq)X\x05\x00\x00\x00macs2q*X\x06\x00\x00\x00paramsq+csnakemake.io\nParams\nq,)\x81q-X\x1d\x00\x00\x00peak_out/macs2/sjl_kc_shep_R1q.a}q/(X\x06\x00\x00\x00prefixq0h.h\x0b}q1h0K\x00N\x86q2subX\x06\x00\x00\x00configq3}q4X\t\x00\x00\x00resourcesq5csnakemake.io\nResources\nq6)\x81q7(K\x01K\x01e}q8(X\x06\x00\x00\x00_coresq9K\x01X\x06\x00\x00\x00_nodesq:K\x01h\x0b}q;(h9K\x00N\x86q<h:K\x01N\x86q=uubub.'
)
######## Original script #########
from snakemake.shell import shell

shell('macs2 '
      'callpeak '
      '-c {snakemake.input.inp} '
      '-t {snakemake.input.ip} '
      '--bdg --SPMR '
      '-n {snakemake.wildcards.sample} '
      '--outdir {snakemake.params.prefix}')
shell('Rscript {snakemake.params.prefix}_model.r')
Пример #43
0
__author__ = "Behram Radmanesh"
__copyright__ = "Copyright 2016, Behram Radmanesh"
__email__ = "*****@*****.**"
__license__ = "MIT"

from snakemake.shell import shell

try:
    extra = snakemake.params.extra
except AttributeError:
    extra = ""

shell("""
awk '{{print $3}}' {snakemake.input.sam} \
        | grep "^ch" | sort | uniq -c | sed -e 's/_\|:\| \|-/\\t/g' \
        |  awk '{{OFS="\t";print $2,$3,$4,$1}}' > \
        {snakemake.output}"""
)
Пример #44
0
__copyright__ = "Copyright 2018, Patrik Smeds"
__email__ = "*****@*****.**"
__license__ = "MIT"


from snakemake.shell import shell

shell.executable("bash")

log = snakemake.log_fmt_shell(stdout=False, stderr=True)

extra_params = snakemake.params.get("extra", "")

bam_input = snakemake.input[0]

if not isinstance(bam_input, str) and len(snakemake.input) != 1:
    raise ValueError("Input bam should be one bam file: " + str(bam_input) + "!")

output_file = snakemake.output[0]

if not isinstance(output_file, str) and len(snakemake.output) != 1:
    raise ValueError("Output should be one bam file: " + str(output_file) + "!")

shell(
    "fgbio SetMateInformation"
    " -i {bam_input}"
    " -o {output_file}"
    " {extra_params}"
    " {log}"
)
Пример #45
0
__author__ = "Sean Davis"
__email__ = "*****@*****.**"
__license__ = "MIT"

from snakemake.shell import shell

# Assumes that SDST is installed:
# "easy_install git+https://github.com/seandavi/SDST.git"

# input: vcf
# input: bam the RNA-seq bam file
# output: vcf

shell("""
seqtool vcf rnacount -f {snakemake.input.vcf} -o {snakemake.output.vcf} {snakemake.input.bam} 2> {snakemake.log}
""")
Пример #46
0
__author__ = "Ali Ghaffaari"
__copyright__ = "Copyright 2018, Ali Ghaffaari"
__email__ = "*****@*****.**"
__license__ = "MIT"


from snakemake.shell import shell

log = snakemake.log_fmt_shell()

shell(
    "(wgsim {snakemake.params} {snakemake.input.ref}"
    " {snakemake.output.read1} {snakemake.output.read2}) {log}"
)
Пример #47
0
__email__ = "*****@*****.**"
__license__ = "MIT"


from snakemake.shell import shell


shell("""
    module load samtools/1.3 picard/1.139

    ## Generate intervals for hsmetrics
    cat <(samtools view -H {snakemake.input} ) <(gawk '{{print $1 "\t" $2+1 "\t" $3 "\t+\tinterval_" NR}}' {snakemake.params.BAIT_INTERVALS} )> {snakemake.input}.BAIT_INTERVALS
    cat <(samtools view -H {snakemake.input} ) <(gawk '{{print $1 "\t" $2+1 "\t" $3 "\t+\tinterval_" NR}}' {snakemake.params.TARGET_INTERVALS} )> {snakemake.input}.TARGET_INTERVALS
    
    ## CalculateHsMetrics
    MEM=$((SLURM_MEM_PER_NODE / 1024))
    #MEM="22"
    java -Xmx${{MEM}}g -jar $PICARDJARPATH/picard.jar \
    CalculateHsMetrics \
    BAIT_INTERVALS={snakemake.input}.BAIT_INTERVALS \
    TARGET_INTERVALS={snakemake.input}.TARGET_INTERVALS \
    I={snakemake.input} \
    O={snakemake.output} \
    AS=true \
    VALIDATION_STRINGENCY=SILENT \
    2> {snakemake.log}
    
    rm -f {snakemake.input}.BAIT_INTERVALS {snakemake.input}.TARGET_INTERVALS
""")

Пример #48
0
__author__ = "Per Unneberg"
__copyright__ = "Copyright 2020, Per Unneberg"
__email__ = "*****@*****.**"
__license__ = "MIT"

import os
from snakemake.shell import shell

options = snakemake.params.get("options", "")
log = snakemake.log_fmt_shell(stdout=True, stderr=True)

analysis = snakemake.wildcards.analysis
outdir = os.path.join("results", "genecovr", analysis)

shell("genecovr -p {snakemake.threads} "
      "{snakemake.params.options} "
      "-d {outdir} "
      "{snakemake.input.csv} "
      "{log}")
Пример #49
0
__author__ = "Sebastian Kurscheid"
__copyright__ = "Copyright 2016, Sebastian Kurscheid"
__email__ = "*****@*****.**"
__license__ = "MIT"
__date__ = "2016-09-07"
__version__ = 0.1

from snakemake.shell import shell

shell("""
        bedtools bamtofastq -i {snakemake.input.bam_file} \
                            -fq {snakemake.output.read1} \
                            -fq2 {snakemake.output.read2}
      """)
Пример #50
0
log = snakemake.log_fmt_shell(stdout=False, stderr=True)

def basename_without_ext(file_path):
    """Returns basename of file path, without the file extension."""

    base = path.basename(file_path)

    split_ind = 2 if base.endswith(".gz") else 1
    base = ".".join(base.split(".")[:-split_ind])

    return base


# Run fastqc, since there can be race conditions if multiple jobs 
# use the same fastqc dir, we create a temp dir.
with TemporaryDirectory() as tempdir:
    shell("fastqc {snakemake.params} --quiet "
          "--outdir {tempdir} {snakemake.input[0]}"
          " {log}")

    # Move outputs into proper position.
    output_base = basename_without_ext(snakemake.input[0])
    html_path = path.join(tempdir, output_base + "_fastqc.html")
    zip_path = path.join(tempdir, output_base + "_fastqc.zip")

    if snakemake.output.html != html_path:
        shell("mv {html_path} {snakemake.output.html}")

    if snakemake.output.zip != zip_path:
        shell("mv {zip_path} {snakemake.output.zip}")
Пример #51
0
__author__ = "Ryan Dale"
__copyright__ = "Copyright 2016, Ryan Dale"
__email__ = "*****@*****.**"
__license__ = "MIT"

import os
from snakemake.shell import shell

try:
    extra = snakemake.params.extra
except AttributeError:
    extra = ""

if snakemake.log:
    log = "> {} 2>&1".format(snakemake.log)
else:
    log = ""

outdir = os.path.dirname(snakemake.output[0])

shell(
    "kallisto quant "
    "--index {snakemake.input.index} "
    "-o {snakemake.output} "
    "--threads {snakemake.threads} "
    "{extra} "
    "{snakemake.input.fastq} "
    "{log} ")
Пример #52
0
__author__ = "Ali Ghaffaari"
__copyright__ = "Copyright 2018, Ali Ghaffaari"
__email__ = "*****@*****.**"
__license__ = "MIT"


from snakemake.shell import shell

log = snakemake.log_fmt_shell(stdout=False)

shell(
    "(vg sim {snakemake.params} --xg-name {snakemake.input.xg}"
    " --threads {snakemake.threads} > {snakemake.output.reads}) {log}"
)
Пример #53
0
__author__ = "Sean Davis"
__email__ = "*****@*****.**"
__license__ = "MIT"


from snakemake.shell import shell


shell("""
MEM="8"
        module load picard
java -Xmx${{MEM}}g -jar $PICARDJARPATH/picard.jar MarkDuplicates VALIDATION_STRINGENCY=SILENT I={snakemake.input} O={snakemake.output} AS=true 2> {snakemake.log}""")

Пример #54
0
"""Snakemake wrapper for running samtools depth."""

__author__ = "Dayne L Filer"
__copyright__ = "Copyright 2020, Dayne L Filer"
__email__ = "*****@*****.**"
__license__ = "MIT"

from snakemake.shell import shell

params = snakemake.params.get("extra", "")

# check for optional bed file
bed = snakemake.input.get("bed", "")
if bed:
    bed = "-b " + bed

shell("samtools depth {params} {bed} "
      "-o {snakemake.output[0]} {snakemake.input.bams}")
Пример #55
0
# specify output: vcf=

shell("""
echo {buffer_size}
VEP_VERSION="83"
VEP_ASSEMBLY="GRCh37"
module load VEP/${{VEP_VERSION}}
module load samtools
mkdir -p /lscratch/$SLURM_JOBID/homo_sapiens/${{VEP_VERSION}}_${{VEP_ASSEMBLY}}
cp -r /fdb/VEP/${{VEP_VERSION}}/cache/homo_sapiens/${{VEP_VERSION}}_${{VEP_ASSEMBLY}} /lscratch/${{SLURM_JOBID}}/homo_sapiens
cp -r /fdb/VEP/${{VEP_VERSION}}/cache/${{VEP_ASSEMBLY}}.fa* /lscratch/${{SLURM_JOBID}}/
export CACHE_DIR=/lscratch/${{SLURM_JOBID}}/
export CADD_DIR=/data/CCRBioinfo/public/CADD
export EXAC_DIR=/fdb/exac/release0.3
export CACHE_DIR=/lscratch/${{SLURM_JOB_ID}}
variant_effect_predictor.pl \
  -i {snakemake.input.vcf} --offline --cache   \
  --dir_cache $CACHE_DIR --fasta $CACHE_DIR/${{VEP_ASSEMBLY}}.fa  \
  --output {snakemake.output.vcf} --fork ${{SLURM_CPUS_ON_NODE}} \
  —sift s --polyphen s --vcf --pick   \
  --symbol --buffer_size {buffer_size} --biotype --hgvs --assembly ${{VEP_ASSEMBLY}} \
  --gene_phenotype --gmaf --check_existing \
  --pubmed  --force_overwrite   \
  --maf_1kg --maf_esp --regulatory --domains --numbers   \
  --uniprot --xref_refseq \
  --plugin CADD,$CADD_DIR/whole_genome_SNVs.tsv.gz   \
  --plugin ExAC,$EXAC_DIR/ExAC.r0.3.sites.vep.vcf.gz   \
  --plugin CSN,1   \
  --plugin Carol
""")

Пример #56
0
__author__ = "Patrik Smeds"
__copyright__ = "Copyright 2021, Patrik Smeds"
__email__ = "*****@*****.**"
__license__ = "MIT"

from snakemake.shell import shell
from snakemake_wrapper_utils.bcftools import get_bcftools_opts

bcftools_opts = get_bcftools_opts(snakemake)
log = snakemake.log_fmt_shell(stdout=False, stderr=True)

if len(snakemake.output) > 1:
    raise Exception("Only one output file expected, got: " +
                    str(len(snakemake.output)))

filter = snakemake.params.get("filter", "")
extra = snakemake.params.get("extra", "")

shell("bcftools filter {filter} {snakemake.input[0]} "
      "{bcftools_opts} "
      "-o {snakemake.output[0]} "
      "{log}")
Пример #57
0
__author__ = "Behram Radmanesh"
__copyright__ = "Copyright 2016, Behram Radmanesh"
__email__ = "*****@*****.**"
__license__ = "MIT"

from snakemake.shell import shell

try:
    extra = snakemake.params.extra
except AttributeError:
    extra = ""

if snakemake.log:
    log = "> {} 2>&1".format(snakemake.log)
else:
    log = ""

shell(
    "Rscript {snakemake.input.dupRScript} "
    "{snakemake.input.dupBAM} "
    "gtf={snakemake.input.GTF} "
    "stranded={snakemake.params.stranded} "
    "paired={snakemake.params.paired} "
    "outfile={snakemake.output[0]} "
    "threads={snakemake.threads}"
)
Пример #58
0
__author__ = "William Rowell"
__copyright__ = "Copyright 2020, William Rowell"
__email__ = "*****@*****.**"
__license__ = "MIT"

from snakemake.shell import shell

extra = snakemake.params.get("extra", "")
log = snakemake.log_fmt_shell(stdout=True, stderr=True)

shell("""
    (whatshap phase \
        {extra} \
        --chromosome {snakemake.wildcards.chromosome} \
        --output {snakemake.output} \
        --reference {snakemake.input.reference} \
        {snakemake.input.vcf} \
        {snakemake.input.phaseinput}) {log}
    """)
Пример #59
0
__author__ = "Sebastian Kurscheid"
__copyright__ = "Copyright 2016, Sebastian Kurscheid"
__email__ = "*****@*****.**"
__license__ = "MIT"
__date__ = "2016-08-08"
__version__ = 0.2

from snakemake.shell import shell

shell("""
            kallisto quant --index={snakemake.input.ki} \
                           --output-dir={snakemake.output} \
                           --threads={snakemake.threads} \
                           --bootstrap-samples={snakemake.params.bootstraps} \
                           {snakemake.input.read1} {snakemake.input.read2}
      """)
Пример #60
0
######## Snakemake header ########
import sys
sys.path.insert(
    0, "/home/athersh/miniconda3/envs/snakemake/lib/python3.5/site-packages")
import pickle
snakemake = pickle.loads(
    b'\x80\x03csnakemake.script\nSnakemake\nq\x00)\x81q\x01}q\x02(X\t\x00\x00\x00resourcesq\x03csnakemake.io\nResources\nq\x04)\x81q\x05(K\x01K\x01e}q\x06(X\x06\x00\x00\x00_namesq\x07}q\x08(X\x06\x00\x00\x00_nodesq\tK\x00N\x86q\nX\x06\x00\x00\x00_coresq\x0bK\x01N\x86q\x0cuh\tK\x01h\x0bK\x01ubX\t\x00\x00\x00wildcardsq\rcsnakemake.io\nWildcards\nq\x0e)\x81q\x0fX\x10\x00\x00\x00sjl_cl8_cp190_R1q\x10a}q\x11(h\x07}q\x12X\x06\x00\x00\x00sampleq\x13K\x00N\x86q\x14sX\x06\x00\x00\x00sampleq\x15h\x10ubX\x06\x00\x00\x00paramsq\x16csnakemake.io\nParams\nq\x17)\x81q\x18(X\x1f\x00\x00\x00peak_out/macs2/sjl_cl8_cp190_R1q\x19h\x10e}q\x1a(h\x07}q\x1b(X\x06\x00\x00\x00prefixq\x1cK\x00N\x86q\x1dX\x0e\x00\x00\x00wrapper_sampleq\x1eK\x01N\x86q\x1fuh\x1ch\x19h\x1eh\x10ubX\x04\x00\x00\x00ruleq X\x05\x00\x00\x00macs2q!X\x07\x00\x00\x00threadsq"K\x01X\x05\x00\x00\x00inputq#csnakemake.io\nInputFiles\nq$)\x81q%(X\x1a\x00\x00\x00dedup/sjl_cl8_input_R1.bamq&X\x1a\x00\x00\x00dedup/sjl_cl8_cp190_R1.bamq\'e}q((h\x07}q)(X\x03\x00\x00\x00inpq*K\x00N\x86q+X\x02\x00\x00\x00ipq,K\x01N\x86q-uh*h&h,h\'ubX\x06\x00\x00\x00outputq.csnakemake.io\nOutputFiles\nq/)\x81q0(XA\x00\x00\x00peak_out/macs2/sjl_cl8_cp190_R1/sjl_cl8_cp190_R1_peaks.narrowPeakq1X4\x00\x00\x00peak_out/macs2/sjl_cl8_cp190_R1/sjl_cl8_cp190_R1.bedq2e}q3(h\x07}q4(X\n\x00\x00\x00narrowPeakq5K\x00N\x86q6X\x03\x00\x00\x00bedq7K\x01N\x86q8uh5h1h7h2ubX\x03\x00\x00\x00logq9csnakemake.io\nLog\nq:)\x81q;}q<h\x07}q=sbX\x06\x00\x00\x00configq>}q?ub.'
)
######## Original script #########
from snakemake.shell import shell

shell('macs2 '
      'callpeak '
      '-c {snakemake.input.inp} '
      '-t {snakemake.input.ip} '
      '--bdg --SPMR '
      '-n {snakemake.wildcards.sample} '
      '--outdir {snakemake.params.prefix}')
shell(
    'Rscript {snakemake.params.prefix}/{snakemake.params.wrapper_sample}_model.r > {snakemake.params.prefix}/{snakemake.params.wrapper_sample}_model.pdf'
)
shell("ln -sf {snakemake.output.narrowPeak} {snakemake.output.bed}")