Exemplo n.º 1
0
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
# <http://www.gnu.org/licenses/>.

import glob, os, sys, inspect, json, shutil
from collections import defaultdict
import traceback as tb
#import snakemake
from snakemake import load_configfile
from snakemake.utils import validate, min_version
import argparse
import subprocess
import re
#import logging
min_version("5.8.2")

from lib.Collection import *
from lib.Logger import *
scriptname = os.path.basename(__file__)


def parseargs():
    parser = argparse.ArgumentParser(
        description=
        'Wrapper around snakemake to run config based jobs automatically')
    parser.add_argument("-c",
                        "--configfile",
                        type=str,
                        help='Configuration json to read')
    parser.add_argument("-g",
Exemplo n.º 2
0
import os
import argparse
import datetime
import time
from snakemake.utils import min_version
from ..configuration.daijin_configurator import create_daijin_config
import pkg_resources
try:
    from yaml import CSafeLoader as yLoader
except ImportError:
    from yaml import SafeLoader as yLoader

system_hpc_yaml = pkg_resources.resource_filename(
    "Mikado", os.path.join("daijin", "hpc.yaml"))

min_version("3.6")

TIME_START = time.time()
NOW = datetime.datetime.fromtimestamp(TIME_START).strftime('%Y-%m-%d_%H:%M:%S')

DAIJIN_DIR = pkg_resources.resource_filename("Mikado", "daijin")
assert pkg_resources.resource_exists("Mikado", "daijin")


# noinspection PyPep8Naming
def get_sub_commands(SCHEDULER, prefix, additional):
    res_cmd = ""
    sub_cmd = ""

    if SCHEDULER == "LSF":
        sub_cmd = "bsub"
Exemplo n.º 3
0
import random
import math
import itertools as iter
import os 
import datetime
from snakemake.utils import validate, min_version
min_version("5.3.0")

SRC_PATH = os.getcwd()

configfile: "config.yml"

REF_GENOME_DIR = config['ref_genome_dir']
GENOME_FASTA = REF_GENOME_DIR + config['genome_fasta']
GENOME_2BIT = REF_GENOME_DIR + config['genome_2bit']
GENOME_GTF = REF_GENOME_DIR + config['genome_gtf']
GENOME_SIZES = REF_GENOME_DIR + config['genome_sizes']
ANNOTATION_GTF = REF_GENOME_DIR + config['annotation_gtf']

PROTOCOL = config["protocol"]
paired = config["paired"]
control = config["control"]
demultiplexed = config["demultiplexed"]
peakcaller = config["peakcaller"]
mapper = config["mapper"]

###############
## FUNCTIONS ##
###############

def list_all_values_of_dict(dictionary):
Exemplo n.º 4
0
import random
import math
import itertools as iter
import os 
import datetime
from snakemake.utils import validate, min_version
min_version("5.3.0")

SRC_PATH = os.getcwd()

configfile: "config.yml"

REF_GENOME_DIR = config['ref_genome_dir']
GENOME_FASTA = REF_GENOME_DIR + config['genome_fasta']
GENOME_2BIT = REF_GENOME_DIR + config['genome_2bit']
GENOME_GTF = REF_GENOME_DIR + config['genome_gtf']
GENOME_SIZES = REF_GENOME_DIR + config['genome_sizes']
ANNOTATION_GTF = REF_GENOME_DIR + config['annotation_gtf']

PROTOCOL = config["protocol"]
paired = config["paired"]
control = config["control"]
demultiplexed = config["demultiplexed"]
peakcaller = config["peakcaller"]
mapper = config["mapper"]

###############
## FUNCTIONS ##
###############

def list_all_values_of_dict(dictionary):
import pandas as pd
import os
from snakemake.exceptions import MissingInputException
from snakemake.utils import validate, min_version
# set minimum snakemake version #
min_version("5.1.2")

__author__ = "Sebastian Kurscheid ([email protected])"
__license__ = "MIT"
__date__ = "2018-05-22"

rule:
    version: 0.1

localrules:
    all

wrapper_dir = os.environ['HOME'] + "/Development/snakemake-wrappers/bio"
include_prefix= os.environ['HOME'] + "/Development/JCSMR-Tremethick-Lab/Breast/snakemake/rules/"
config_dir = os.environ['HOME'] + "/Development/JCSMR-Tremethick-Lab/Breast/snakemake/configs/"
home = os.environ['HOME']

configfile: config_dir + "config_RNA-Seq.json"
# validate(config, schema="schemas/config.schema.yaml")

samples = pd.read_table(config_dir + "PRNJA350495_samples.tsv").set_index("sample", drop=False)
#validate(samples, schema="schemas/samples.schema.yaml")

units = pd.read_table(config_dir + "PRNJA350495_units.tsv", dtype=str).set_index(["sample", "unit"], drop=False)
units.index = units.index.set_levels([i.astype(str) for i in units.index.levels])  # enforce str in index
#validate(units, schema="schemas/units.schema.yaml")
Exemplo n.º 6
0
#!/usr/bin/env python3

import snakemake
import sys
import os
import argparse
import datetime
import time
import yaml

from snakemake.utils import min_version

min_version("3.5")

ts = time.time()
now = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d_%H:%M:%S')

snakey_dir = os.path.dirname(os.path.realpath(__file__))

parser = argparse.ArgumentParser("A general purpose script for running snakemake in a scheduled environment.")
parser.add_argument("snakefile",
					help="Snakefile to use.")
parser.add_argument("config",
					help="Configuration file to use.")
parser.add_argument("-c", "--hpc_conf", default=snakey_dir + "/hpc.json",
					help="Configuration file that allows the user to override resource requests for each rule when running under a scheduler in a HPC environment.")
parser.add_argument("-N", "--max_nodes", type=int, default="10",
					help="Maximum number of nodes to use concurrently")
parser.add_argument("-n", "--max_cores", type=int, default="1000",
					help="Maximum number of cores to use concurrently")
parser.add_argument("-d", "--no_drmaa", action='store_true', default=False,
Exemplo n.º 7
0
from snakemake.utils import min_version

min_version("3.5.4")

configfile: "config.json"

fileInfo = {}
for line in open(config["sampleFile"]):
    if line[0] == "#":
        continue
    sName, rNr, fileName = line.strip("\n").split("\t")
    if sName not in fileInfo:
        fileInfo[sName] = [[], []]
    fileInfo[sName][int(rNr)-1].append(fileName)

samples = list(fileInfo.keys())

rule all:
    input: "krona/All.krona.html", "krona/5_8s.krona.html", "krona/ITS2.krona.html", "taxonomy/all.compareClass.tsv", "otu_table.tsv", "All.rarefactions.pdf", "readNumbers/readNumbers.pdf"

### generate reference data bases
include: "prepDatabases.snakemake.py"

### read processing
include: "readProcessing.snakemake.py"

### analize 5.8S
include: "r58S.snakemake.py"

### analize ITS2
Exemplo n.º 8
0
import time
import json
import yaml
import snakemake
from snakemake.utils import min_version
from ..utilities.log_utils import create_default_logger
from ..configuration.daijin_configurator import create_daijin_config, check_config
import shutil
import pkg_resources

# import logging
# import logging.handlers

system_hpc_yaml = pkg_resources.resource_filename("Mikado", os.path.join("daijin", "hpc.yaml"))

min_version("3.5")

TIME_START = time.time()
NOW = datetime.datetime.fromtimestamp(TIME_START).strftime('%Y-%m-%d_%H:%M:%S')

DAIJIN_DIR = pkg_resources.resource_filename("Mikado", "daijin")
assert pkg_resources.resource_exists("Mikado", "daijin")


# noinspection PyPep8Naming
def get_sub_commands(SCHEDULER, prefix):
    res_cmd = ""
    sub_cmd = ""

    if SCHEDULER == "LSF":
        sub_cmd = "bsub"
Exemplo n.º 9
0
#!/usr/bin/env python3

import os
from os.path import join, dirname, basename
import shutil
from argparse import ArgumentParser, RawDescriptionHelpFormatter as RDHF
from collections import defaultdict, namedtuple

import yaml
from snakemake.utils import min_version

from .snakemake_helper import *
from . import DEFAULT_CONFIG_FILE, DEFAULT_HPC_CONFIG_FILE, __version__, QAA_Runner

min_version("4.0")

# https://www.ascii-art.net/content.php?id=disney
kaa = [
    "                                     ..:::::;'|",
    "             __   ___             / \:::::;'  ;",
    "           ,::::`'::,`.          :   ___     /",
    "          :_ `,`.::::)|          | ,'SSt`.  /",
    "          |(` :\)):::`;          |:::::::| :",
    "          : \ ,`'`:::::`.        |:::::::| |",
    "           \ \  ,' `:::::`.      :\::::::; |",
    "            \ `.  ,' ` ,--.)     : `----'  |",
    "             :  `-.._,'__.'      :   ____  |",
    "             |     |              :,'::::\ |",
    "             :  _  |              :::::::::|",
    "             ;     :              |:::::::||",
    "            :      |              |:::::::;|",
Exemplo n.º 10
0
import os
import sys
from glob import glob
import subprocess

#Request a minimum version of snakemake

from snakemake.utils import min_version

min_version("5.4.0")

cwd = os.getcwd()
# print(cwd)

########## --- 1) Get the genome information
# x = -1
# if x < 0:
# 	raise Exception("Sorry no numbers below zero")

# x = "hello"

# if not type(x) is int:
#   raise TypeError("Only integers are allowed")

# print("module load STAR/2.5.2a-foss-2016a".split(" "))

thisdict = {"brand": "Ford", "model": "Mustang", "year": 1964}

print(thisdict)

x = thisdict["model"]
Exemplo n.º 11
0
    from collections.abc import Mapping
import re
import yaml

try:
    from yaml import CLoader as Loader
except:
    from yaml import Loader as Loader
from datetime import datetime


from snakemake.logging import logger
from snakemake.workflow import srcdir
from snakemake.utils import update_config, min_version

min_version("5.10.0")

## setup paths used in workflows
TMPDIR = os.environ.get("TMPDIR", "/tmp")
# config paths will override environment variables
INTERIM_DIR = config.get("interim_dir") or environ.get("GCF_INTERIM", "data/tmp")
makedirs(INTERIM_DIR, exist_ok=True)
EXT_DIR = config.get("ext_dir") or environ.get("GCF_EXT", "data/ext")
makedirs(EXT_DIR, exist_ok=True)
EXT_CACHE = join(EXT_DIR, '.cache')
makedirs(EXT_DIR, exist_ok=True)

FASTQ_DIR = config.get("fastq_dir") or environ.get("GCF_FASTQ", "data/raw/fastq")
while FASTQ_DIR.endswith(os.path.sep):
    FASTQ_DIR = FASTQ_DIR[:-1]
makedirs(FASTQ_DIR, exist_ok=True)