Exemplo n.º 1
0
# parser.add_argument('--pipeline', "-p", 
# 					type=str, 
# 					choices = ['sim_pure', 'sim_contam','real_pure','test_pipe'],
#                     help="Defining which pipeline to run")

parser.add_argument('--config_file', "-cf", 
					type=str,
					#metavar="config_file",
					help="yaml file with pipeline parameters")

options = parser.parse_args()



## standard python logger which can be synchronised across concurrent Ruffus tasks
## define logging output with --log_file  log_file_name
logger, logger_mutex = cmdline.setup_logging (__name__, options.log_file, options.verbose)


# if we are printing only
if  not options.just_print and \
    not options.flowchart and \
    not options.touch_files_only:

    config_file= file(options.config_file, 'r')
    config = yaml.load(config_file)
    
    pipeline1a = make_sipp(org_list = config['org_list'], config = config)
    cmdline.run (options, logger = logger)
    sys.exit()
Exemplo n.º 2
0
annotations = parser.add_argument_group('annotations')
annotations.add_argument('--genome_fasta', type=str, help='genome fasta')
annotations.add_argument('--gtf', type=str, help='gtf')
annotations.add_argument("--suppl_annot", type=str, nargs="+", help="supplementary annotation file(s) for checking novel splice events")

args = parser.parse_args()
params = get_params(args, args.params)
check_params(args, params)

logs_dir = args.outdir + '/logs'
if not os.path.exists(logs_dir):
    os.makedirs(logs_dir)

log_file = '%s/log.%s.txt' % (logs_dir, datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S"))
logger, logging_mutex = cmdline.setup_logging(__name__,
                                              log_file,
                                              args.verbose)
print 'log_file:', log_file

cmdline.run(args)

read_pairs = []
if args.fq:
    read_pairs = format_read_pairs(fqs=args.fq)
elif args.fq_list:
    read_pairs = format_read_pairs(list_file=args.fq_list)

history_file = '%s/.ruffus_history.sqlite' % args.outdir
bbt_outdir = '%s/bbt_%s' % (args.outdir, get_version('bbt'))
assembly_outdir = '%s/rnabloom_%s' % (args.outdir, get_version('rnabloom'))
pv_outdir = '%s/pv_%s' % (args.outdir, get_version('pv'))
### start of pipeline

# setting the main working directory
home_dir = os.getcwd()
root_dir = "%s/%s" % (home_dir, args.name)
try:
    os.mkdir(root_dir)
except OSError:
    pass

### setting up logging
if not args.log_file:
    logfile = "%s/%s" % (root_dir, "log_file.txt")
else:
    logfile = "%s/%s" % (root_dir, args.log_file)
logger, logger_mutex = cmdline.setup_logging("CRAC_Pipeline_SE", logfile, 10)

### setting the starting files

startingfiles = [os.path.abspath(i) for i in args.forwardreads]

logger.info("analysing the following files:\n%s\n" % "\n".join(startingfiles))

### start of pipeline definition

pipeline = Pipeline(
    name="Single-end data CRAC pipeline for multiplexed libraries")

logger.info("Trimming the reads")
pipeline.transform(
    task_func=runFlexBar,
Exemplo n.º 4
0
	hisat_genome_index="$HOME/Scratch/reference/GRcm38/hisat2/grcm38_snp_tran"
        genome="$HOME/Scratch/reference/GRcm38/GRCm38/Mus_musculus.GRCm38"
        genome_name="Mus_musculus.GRCm38.dna.primary_assembly.fa"
	mask="$HOME/Scratch/reference/GRcm38/gtf/Mus_musculus.GRCm38.84.ribo.mito.mask.gtf"

if hisat_check:
   print "hisat check: " + str(hisat_check)
   print(str(hisat_genome_index))
if stringtie_check:
   print "stringtie check: " + str(stringtie_check)
print "STAR check: " + str(star_check)
print gtf
print "Basedir: " + basedir

#  standard python logger which can be synchronised across concurrent Ruffus tasks
logger, logger_mutex = cmdline.setup_logging ("Chela", options.log_file, options.verbose)

if not aligner: 
    raise Exception ("Aligner not selected with --aligner")
files_list=options.input
if not files_list:
  raise Exception ("No matching samples specified with --input.")

input_files=[]
with open(files_list, 'r') as f:
    content = [line.decode('utf-8').rstrip('\n') for line in f] 
    for line in content:
        #print(line)
        line = line.rstrip()
        #print(basedir + line)
        # print "input " + str(glob.glob(basedir + line + "/replicate*/fastq_raw/*gz"))
Exemplo n.º 5
0
import ruffus as rf
import ruffus.cmdline as cmdline
import pandas as pd
import numpy as np
import os, errno
import yaml
# import logging
from trio import triodb
# import warnings
# warnings.filterwarnings("ignore", message=".*deprecation.*")

# Configuration and command line options
parser = cmdline.get_argparse(description='Pipeline using the TPC-H example.')
parser.add_argument("--config")
options = parser.parse_args()
lg, lm = cmdline.setup_logging(__name__, options.log_file, options.verbose)
# lg.setLevel(logging.INFO)
if vars(options)['config'] == None:
    print "No config supplied."
    parser.print_help()
    sys.exit()
with open(vars(options)['config'], 'r') as f:
    cfg = yaml.load(f)
lg.info('pipeline:: ::config %s' % str(cfg))

SQL_PRINT_MAX = 1000

# =============================================================
#                    functions
# =============================================================
Exemplo n.º 6
0
pipeline2.set_input(input=[pipeline1a, pipeline1b, pipeline1c])

import ruffus.cmdline as cmdline

parser = cmdline.get_argparse(
    description='Demonstrates the new Ruffus syntax in version 2.6')

parser.add_argument('--cleanup',
                    "-C",
                    action="store_true",
                    help="Cleanup before and after.")

options = parser.parse_args()

#  standard python logger which can be synchronised across concurrent Ruffus tasks
logger, logger_mutex = cmdline.setup_logging(__file__, options.log_file,
                                             options.verbose)

logger.debug("\tRuffus Version = " + ruffus.__version__)

if options.cleanup:
    try:
        shutil.rmtree(tempdir)
    except:
        pass

correct = False
# if we are not printing but running
if  not options.just_print and \
    not options.flowchart and \
    not options.touch_files_only:
Exemplo n.º 7
0
### start of pipeline

# setting the main working directory
home_dir = os.getcwd()
root_dir = "%s/%s" % (home_dir, args.name)
try:
    os.mkdir(root_dir)
except OSError:
    pass

### setting up logging
if not args.log_file:
    logfile = "%s/%s" % (root_dir, "log_file.txt")
else:
    logfile = "%s/%s" % (root_dir, args.log_file)
logger, logger_mutex = cmdline.setup_logging("ChemModSeqPipeline", logfile, 10)

### starting the pipeline commands

pipeline = Pipeline(name="ChemModSeqPipeline")

args.forwardreads = [os.path.abspath(i) for i in args.forwardreads]
args.reversereads = [os.path.abspath(i) for i in args.reversereads]
startingfiles = zip(args.forwardreads, args.reversereads)
#print startingfiles

if args.adapter:
    pipeline.transform(
        task_func=runFlexBar,
        input=startingfiles,
        filter=formatter("^.+/([^/]+)_1.(san)?fastq$",
Exemplo n.º 8
0
 def __init__(self, prog_name, log_file, verbosity):
     proxy, mutex = cmdline.setup_logging(__name__, log_file, verbosity)
     self.proxy = proxy
     self.mutex = mutex