def solid(args): '''Create solid assembly calls''' import mlst_modules paths = mlst_modules.setSystem() cmd = '%sassemble.pl' % paths['solid_home'] if args.se: arg = ' %s %i -numcores %i' % (' '.join(args.se), args.rf, args.n) elif args.pe: arg = ' %s %s %i -f5 %s -f5qv %s -ins_length %i -ins_length_sd %i -numcores %i ' % (args.pe[0], args.pe[1], args.rf, args.pe[2], args.pe[3], args.ins_length, args.ins_length_sd, args.n) elif args.mp: arg = ' %s %s %i -r3 %s -r3qv %s -ins_length %i -ins_length_sd %i -numcores %i ' % (args.mp[0], args.mp[1], args.rf, args.mp[2], args.mp[3], args.ins_length, args.ins_length_sd, args.n) else: raise ValueError('Input must be given by --se, --pe or --mp') # add extra commands if args.add_solid: arg = arg + ' ' + args.add_solid cmds = [cmd+arg] ## write semaphore if args.sfile and args.sfile != 'None': cmds.append('echo "done" > %s' % args.sfile) # write in bash script fh = open('solid_assembly.sh', 'w') fh.write('#!/bin/sh\n\n') for cmd in cmds: fh.write(cmd+'\n') fh.close() # return command (NB. add env. variable to run) return ['sh solid_assembly.sh']
def newbler_stats(args): '''Create newbler stats calls''' import mlst_modules paths = mlst_modules.setSystem() cmds = [] assembly_path = '%s/assembly/' % args.outpath cmds.append('''perl -ne 'if ($_ =~ m/^>.+length=(\d+)/) { print $1, "\\n"}' %s > 454AllContigs.lengths ''' % (assembly_path + '454AllContigs.fna')) cmds.append('''perl -ne 'if ($_ =~ m/^>.+length=(\d+)/) { print $1, "\\n"}' %s > 454LargeContigs.lengths ''' % (assembly_path + '454LargeContigs.fna')) if args.pe: cmds.append('''perl -ne 'if ($_ =~ m/^>.+length=(\d+)/) { print $1, "\\n"}' %s > 454Scaffolds.lengths ''' % (assembly_path + '454Scaffolds.fna')) cmds.append('%sR --vanilla 454AllContigs.lengths 454LargeContigs.lengths 454Scaffolds.lengths assembly.stats.txt < %smlst_denovo_newbler_stats.R ' % (paths['R_home'], paths['mlst_home'])) else: cmds.append('%sR --vanilla 454AllContigs.lengths 454LargeContigs.lengths NA assembly.stats.txt < %smlst_denovo_newbler_stats.R ' % (paths['R_home'], paths['mlst_home'])) ## write semaphore if args.sfile and args.sfile != 'None': cmds.append('echo "done" > %s' % args.sfile) # write in bash script fh = open('newbler_stats.sh', 'w') fh.write('#!/bin/sh\n\n') for cmd in cmds: fh.write(cmd+'\n') fh.close() # return command (NB. add env. variable to run) return ['sh newbler_stats.sh']
def submit_xmsub(self, depends, logger): '''Submits jobs using xmsub''' import re import subprocess import time import os import mlst_modules home = os.getcwd() paths = mlst_modules.setSystem() ids = [] for i in range(len(self.calls)): call = self.calls[i] stdout = '%s/log/%s%i.o' % (home, self.runname, i) stderr = '%s/log/%s%i.e' % (home, self.runname, i) # catch stdouts if call includes 'program infile > outfile', needs to be directed as -O instead of > pattern = re.compile(r'(^.+)>\s(.+)$') match = pattern.search(call) if match: call = match.group(1) stdout = '%s/%s' % (home, match.group(2)) # create xmsub commands cmd = paths['mlst_home'] + 'xmsub' # toggle if job should be on hold or env variable should be added if self.hold: cmd = '%s -h ' % cmd if self.env: cmd = cmd + ' -v %s' % self.env if not self.depend: xmsub = cmd+' -d %s -l %s,partition=%s -O %s -E %s -r y -q %s -N %s -t %s' % (home, self.cpu, self.partition, stdout, stderr, self.queue, self.runname, call) else: xmsub = cmd+' -d %s -l %s,depend=%s,partition=%s -O %s -E %s -r y -q %s -N %s -t %s' % (home, self.cpu, depends[i], self.partition, stdout, stderr, self.queue, self.runname, call) time.sleep(1) if logger: logger.info(xmsub) # submit on different host if that is given if self.host: try: (id, stderr) = self.ssh_submit(self.host, xmsub) except: print stderr print 'Job error, waiting 1m' time.sleep(60) (id, stderr) = self.ssh_submit(self.host, xmsub) ids.append(id) else: try: id = subprocess.check_output(xmsub, shell=True) except: print 'Job error, waiting 1m' time.sleep(60) id = subprocess.check_output(xmsub, shell=True) ids.append(id.split('\n')[1]) return ids
def create_jobs(prog, args, sfile, logger): '''Create an msub command from prog and args''' import subprocess import os import mlst_modules paths = mlst_modules.setSystem() # create commands msub = '%sxmsub -d %s -l nodes=1:ppn=1,mem=256mb,walltime=172800,partition=%s -q %s -r y -N run_%s -O run_%s.out -E run_%s.err -t' % (paths['mlst_home'], os.getcwd(), args.partition, args.q, args.assembler, args.assembler, args.assembler) cmd = [msub, prog] # create parameters for assembler for key, value in vars(args).items(): # special cases if key == 'assembler': continue if value == None: continue if key == 'wait': cmd.append('--sfile %s' % sfile) continue if type(value) == bool: if value == True: cmd.append('--%s' % key) continue if key == 'sample': cmd.append('--sample None') continue if key == 'add_solid': cmd.append('--add_solid "%s"' % value) continue if key == 'add_velveth': cmd.append('--add_velveth "%s"' % value) continue if key == 'add_velvetg': cmd.append('--add_velvetg "%s"' % value) continue # key-value paramters cmd.append('--%s' %key) if type(value) == list: cmd.append(' '.join(value)) elif type(value) == str or type(value) == int or type(value) == float: cmd.append('%s' %value) else: raise ValueError('%s, %s is a %s, should be either list, string or int' % (key, value, type(value))) # submit job cmd = ' '.join(cmd) logger.info(cmd) job = subprocess.check_output(cmd, shell=True) job = job.strip('\n') print 'Jobs are spawned by: %s' % job
def postprocess(args): '''Determine best assembly, remove other assemblies, clean up and write semaphore file (if required)''' import mlst_modules paths = mlst_modules.setSystem() calls = [] if len(args.ksizes) > 1: ## parse_assemblies cmd = '%sR --vanilla ' % paths['R_home'] # set argument if len(args.ksizes) == 1: arg = ' %s %s' % (args.outpath, args.ksizes[0]) elif len(args.ksizes) >= 2: if len(args.ksizes) == 2: step = 2 elif len(args.ksizes) == 3: step = args.ksizes[2] arg_list = [] for k in range(int(args.ksizes[0]), int(args.ksizes[1]), int(step)): out = '%s_%s/stats.txt %s' % (args.outpath, k, k) arg_list.append(out) arg = ' '.join(arg_list) call = cmd + arg + ' < %smlst_denovo_velvet_parse.R' % (paths['mlst_home']) calls.append(call) ## accept assembly call = '%smlst_denovo_velvet_accept.py %s' % (paths['mlst_home'], args.outpath) calls.append(call) ## clean call = '%smlst_denovo_velvet_clean.py' % (paths['mlst_home']) calls.append(call) ## write semaphore if args.sfile and args.sfile != 'None': calls.append('echo "done" > %s' % args.sfile) ## write in bash script fh = open('postprocess.sh', 'w') fh.write('#!/bin/sh\n\n') for call in calls: fh.write(call+'\n') fh.close() return ['sh postprocess.sh']
def wait(self): '''Wait for files to be created''' from time import sleep import string import random import os import mlst_modules import subprocess paths = mlst_modules.setSystem() # add directory and set semaphore filename if not os.path.exists('semaphores/'): os.makedirs('semaphores/') rand = ''.join(random.choice(string.ascii_uppercase + string.digits) for x in range(10)) semaphore_file = 'semaphores/' + self.file_prefix + '.' + rand semaphore_file_err = 'log/' + self.file_prefix + '.' + rand + '.err' # create job depends = ':'.join(self.semaphore_ids) xmsub = '%sxmsub -d %s -l ncpus=1,mem=10mb,walltime=180,depend=%s,partition=%s -O %s -q %s -N semaphores -E %s -r y -t echo done' % (paths['mlst_home'], self.home, depends, partition, semaphore_file, self.queue, semaphore_file_err) # submit job if self.host: dummy_id, stderr = self.ssh_submit(self.host, xmsub) if stderr: print stderr else: dummy_id = subprocess.check_output(xmsub, shell=True) # check for file to appear cnt = self.max_time while cnt > 0: if os.path.isfile(semaphore_file): break cnt -= self.check_interval sleep(self.check_interval) if cnt <= 0: raise SystemExit('%s did not finish in %is' % ())
def start_assembly(args, logger): '''start newbler assembly''' import mlst_modules from mlst_classes import Moab from mlst_classes import Semaphore import os # set queueing paths = mlst_modules.setSystem() home = os.getcwd() if args.partition == 'uv': cpuV = 'ncpus=%i,mem=%s,walltime=172800' % (args.n, args.m) cpuA = 'ncpus=1,mem=512mb,walltime=172800' cpuC = 'ncpus=1,mem=2gb,walltime=172800' cpuE = 'ncpus=1,mem=5gb,walltime=172800' cpuF = 'ncpus=2,mem=2gb,walltime=172800' cpuB = 'ncpus=16,mem=10gb,walltime=172800' else: cpuV = 'nodes=1:ppn=%i,mem=%s,walltime=172800' % (args.n, args.m) cpuA = 'nodes=1:ppn=1,mem=512mb,walltime=172800' cpuC = 'nodes=1:ppn=1,mem=2gb,walltime=172800' cpuE = 'nodes=1:ppn=1,mem=5gb,walltime=172800' cpuF = 'nodes=1:ppn=2,mem=2gb,walltime=172800' cpuB = 'nodes=1:ppn=16,mem=10gb,walltime=172800' newbler_calls = newbler(args) newblerstats_calls = newbler_stats(args) # set environment variable (add newbler binaries to bin): env_var = 'PATH=/panvol1/simon/bin/454/bin/' # submit and release jobs print "Submitting jobs" newbler_moab = Moab(newbler_calls, logfile=logger, runname='run_mlst_newbler', queue=args.q, cpu=cpuV, env=env_var, partition=args.partition, host='cge-s2.cbs.dtu.dk') newblerstats_moab = Moab(newblerstats_calls, logfile=logger, runname='run_mlst_newblerstats', queue=args.q, cpu=cpuA, depend=True, depend_type='one2one', depend_val=[1], depend_ids=newbler_moab.ids, partition=args.partition, host='cge-s2.cbs.dtu.dk') # release jobs newbler_moab.release('cge-s2.cbs.dtu.dk') newblerstats_moab.release('cge-s2.cbs.dtu.dk')
def start_assembly(args, logger): '''Start assembly of solid reads''' import mlst_modules from mlst_classes import Moab from mlst_classes import Semaphore import os # set queueing paths = mlst_modules.setSystem() home = os.getcwd() if args.partition == 'uv': cpuV = 'ncpus=%i,mem=%s,walltime=172800' % (args.n, args.m) cpuA = 'ncpus=1,mem=512mb,walltime=172800' cpuC = 'ncpus=1,mem=2gb,walltime=172800' cpuE = 'ncpus=1,mem=5gb,walltime=172800' cpuF = 'ncpus=2,mem=2gb,walltime=172800' cpuB = 'ncpus=16,mem=10gb,walltime=172800' else: cpuV = 'nodes=1:ppn=%i,mem=%s,walltime=172800' % (args.n, args.m) cpuA = 'nodes=1:ppn=1,mem=512mb,walltime=172800' cpuC = 'nodes=1:ppn=1,mem=2gb,walltime=172800' cpuE = 'nodes=1:ppn=1,mem=5gb,walltime=172800' cpuF = 'nodes=1:ppn=2,mem=2gb,walltime=172800' cpuB = 'nodes=1:ppn=16,mem=10gb,walltime=172800' solid_calls = solid(args) # set environment variable (add solid binaries to bin): env_var = 'denovo2=%s' % paths['solid_home'] # submit and release jobs print "Submitting jobs" solid_moab = Moab(solid_calls, logfile=logger, runname='run_mlst_solid', queue=args.q, cpu=cpuV, env=env_var, partition=args.partition, host='cge-s2.cbs.dtu.dk') # release jobs solid_moab.release(host='cge-s2.cbs.dtu.dk')
def newbler(args): '''Creating newbler calls''' def convert_fastq(args, paths): '''If input is fastq convert to fasta+qual''' cmds = [] se = [] pe = [] # identify file inputs if args.se: se_ftypes = map(mlst_modules.set_filetype, args.se) for i,f in enumerate(args.se): if se_ftypes[i] == 'fastq': cmds.append('%smlst_fastq2fastaqual.py --i %s --p %s' % (paths['mlst_home'], f, os.path.split(f)[1])) se.append(os.path.split(f)[1]+'.fasta') elif se_ftypes[i] == 'fasta': if f.endswith('.gz'): fnew = os.path.splitext(os.path.split(f)[1])[0] cmds.append('''%spigz -dc -p %s %s > %s''' % (paths['pigz_home'], args.n, f, fnew)) se.append(fnew) # look for qual file (dont add to path because newbler will pick it up) possible_qual = os.path.splitext(os.path.splitext(f)[0])[0] + '.qual.gz' if os.path.exists(possible_qual): qnew = os.path.split(os.path.splitext(os.path.splitext(f)[0])[0] + '.qual')[1] cmds.append('''%spigz -dc -p %s %s > %s''' % (paths['pigz_home'], args.n, possible_qual, qnew)) else: se.append(f) else: se.append(f) if args.pe: pe_ftypes = map(mlst_modules.set_filetype, args.pe) for i,f in enumerate(args.pe): if pe_ftypes[i] == 'fastq': cmds.append('%smlst_fastq2fastaqual.py --i %s --p %s' % (paths['mlst_home'], f, os.path.split(f)[1])) pe.append(os.path.split(f)[1]+'.fasta') elif pe_ftypes[i] == 'fasta': if f.endswith('.gz'): fnew = os.path.splitext(os.path.split(f)[1])[0] cmds.append('''%spigz -dc -p %s %s > %s''' % (paths['pigz_home'], args.n, f, fnew)) fnew = os.path.splitext(f)[0] pe.append(fnew) # look for qual file (dont add to path because newbler will pick it up) possible_qual = os.path.splitext(os.path.splitext(f)[0])[0] + '.qual.gz' if os.path.exists(possible_qual): qnew = os.path.split(os.path.splitext(os.path.splitext(f)[0])[0] + '.qual')[1] cmds.append('''%spigz -dc -p %s %s > %s''' % (paths['pigz_home'], args.n, possible_qual, qnew)) else: se.append(f) else: pe.append(f) return cmds, se, pe import mlst_modules paths = mlst_modules.setSystem() cmds = [] cf = convert_fastq(args, paths) cmds.extend(cf[0]) args.se = cf[1] args.pe = cf[2] cmds.append('newAssembly %s' % args.outpath) if args.se: for f in args.se: cmds.append('addRun -lib shotgun %s %s' % (args.outpath, f)) if args.pe: for i,f in enumerate(args.pe): cmds.append('addRun -p -lib PE%i %s %s' % (i, args.outpath, f)) cmds.append('runProject -cpu %s %s' % (args.n, args.outpath)) # write in bash script fh = open('newbler.sh', 'w') fh.write('#!/bin/sh\n\n') for cmd in cmds: fh.write(cmd+'\n') fh.close() # return command (NB. add env. variable to run) return ['sh newbler.sh']
def start_assembly(args, logger): '''Start assembly''' import mlst_modules from mlst_classes import Moab from mlst_classes import Semaphore import os # set queueing paths = mlst_modules.setSystem() home = os.getcwd() if args.partition == 'uv': cpuV = 'procs=%i,mem=%s,walltime=172800,flags=sharedmem' % (args.n, args.m) cpuA = 'procs=1,mem=512mb,walltime=172800,flags=sharedmem' cpuC = 'procs=1,mem=2gb,walltime=172800,flags=sharedmem' cpuE = 'procs=1,mem=5gb,walltime=172800,flags=sharedmem' cpuF = 'procs=2,mem=%s,walltime=172800,flags=sharedmem' % args.m cpuB = 'procs=16,mem=10gb,walltime=172800,flags=sharedmem' else: cpuV = 'nodes=1:ppn=%i,mem=%s,walltime=172800' % (args.n, args.m) cpuA = 'nodes=1:ppn=1,mem=512mb,walltime=172800' cpuC = 'nodes=1:ppn=1,mem=2gb,walltime=172800' cpuE = 'nodes=1:ppn=1,mem=5gb,walltime=172800' cpuF = 'nodes=1:ppn=2,mem=%s,walltime=172800' % args.m cpuB = 'nodes=1:ppn=16,mem=10gb,walltime=172800' # set kmersizes (if auto) if args.ksizes == ['auto']: args.ksizes = set_kmersizes(args) # trimming calls if args.trim: illuminatrim_calls = illumina_trim(args, int(args.ksizes[0]), 15, 20, 15, False) if not os.path.exists('trimmed'): os.makedirs('trimmed') # velvet calls velvet_calls = create_velvet_calls(args) # velvet parse calls postprocess_calls = postprocess(args) # set environment variable: env_var = 'OMP_NUM_THREADS=%i' % int(args.n - 1) # submit and release jobs # NB: mlst_denovo_velvet is run from a compute node, it will then ssh to "host" and submit the jobs from there (cge-s2) print "Submitting jobs" # if trimming is needed if args.trim: illuminatrim_moab = Moab(illuminatrim_calls, logfile=logger, runname='run_mlst_trim', queue=args.q, cpu=cpuF, partition=args.partition, host='cge-s2.cbs.dtu.dk') velvet_moab = Moab(velvet_calls, logfile=logger, runname='run_mlst_velvet', queue=args.q, cpu=cpuV, depend=True, depend_type='all', depend_val=[1], depend_ids=illuminatrim_moab.ids, env=env_var, partition=args.partition, host='cge-s2.cbs.dtu.dk') # if no trimming else: velvet_moab = Moab(velvet_calls, logfile=logger, runname='run_mlst_velvet', queue=args.q, cpu=cpuV, env=env_var, partition=args.partition, host='cge-s2.cbs.dtu.dk') # submit job for postprocessing postprocess_moab = Moab(postprocess_calls, logfile=logger, runname='run_mlst_postprocess', queue=args.q, cpu=cpuA, depend=True, depend_type='conc', depend_val=[len(velvet_calls)], depend_ids=velvet_moab.ids, partition=args.partition, host='cge-s2.cbs.dtu.dk') # release jobs print "Releasing jobs" if args.trim and len(illuminatrim_calls) > 0: illuminatrim_moab.release(host='cge-s2.cbs.dtu.dk') velvet_moab.release('cge-s2.cbs.dtu.dk') postprocess_moab.release(host='cge-s2.cbs.dtu.dk')
def create_velvet_calls(args): '''Create velvet calls''' import mlst_modules paths = mlst_modules.setSystem() # VELVETH CALLS # create calls, outpath, ksizes, format, readtypes, reads cmd = '%svelveth' % paths['velvet_home'] velveth_calls = [] if len(args.ksizes) == 1: arg = ' %s %s -create_binary ' % (args.outpath, args.ksizes[0]) if args.short: arg = arg + ' -short -%s %s' % (args.short[0], ' '.join(args.short[1:])) if args.short2: arg = arg + ' -short2 -%s %s' % (args.short2[0], ' '.join(args.short2[1:])) if args.shortPaired: if len(args.shortPaired) == 2: arg = arg + ' -shortPaired -%s %s' % (args.shortPaired[0], args.shortPaired[1]) elif len(args.shortPaired) == 3: arg = arg + ' -shortPaired -separate -%s %s %s' % (args.shortPaired[0], args.shortPaired[1], args.shortPaired[2]) if args.shortPaired2: if len(args.shortPaired2) == 2: arg = arg + ' -shortPaired2 -%s %s' % (args.shortPaired2[0], args.shortPaired2[1]) elif len(args.shortPaired) == 3: arg = arg + ' -shortPaired2 -separate -%s %s %s' % (args.shortPaired2[0], args.shortPaired2[1], args.shortPaired2[2]) if args.long: arg = arg + ' -long -%s %s' % (args.long[0], ' '.join(args.long[1:])) if args.longPaired: if len(args.longPaired) == 2: arg = arg + ' -longPaired -%s %s' % (args.longPaired[0], args.longPaired[1]) elif len(args.longPaired) == 3: arg = arg + ' -longPaired -separate -%s %s %s' % (args.longPaired[0], args.longPaired[1], args.longPaired[2]) if args.add_velveth: arg = arg + ' %s' % args.add_velveth call = cmd + arg velveth_calls.append(call) elif len(args.ksizes) >= 2 and len(args.ksizes) <= 3: if len(args.ksizes) == 2: step = 2 elif len(args.ksizes) == 3: step = args.ksizes[2] # create calls, outpath, ksizes, format, readtypes, reads for k in range(int(args.ksizes[0]), int(args.ksizes[1]), int(step)): arg = ' %s_%s %s -create_binary ' % (args.outpath, k, k) if args.short: arg = arg + ' -short -%s %s' % (args.short[0], ' '.join(args.short[1:])) if args.short2: arg = arg + ' -short2 -%s %s' % (args.short2[0], ' '.join(args.short2[1:])) if args.shortPaired: if len(args.shortPaired) == 2: arg = arg + ' -shortPaired -%s %s' % (args.shortPaired[0], args.shortPaired[1]) elif len(args.shortPaired) == 3: arg = arg + ' -shortPaired -separate -%s %s %s' % (args.shortPaired[0], args.shortPaired[1], args.shortPaired[2]) if args.shortPaired2: if len(args.shortPaired2) == 2: arg = arg + ' -shortPaired2 -%s %s' % (args.shortPaired2[0], args.shortPaired2[1]) elif len(args.shortPaired) == 3: arg = arg + ' -shortPaired2 -separate -%s %s %s' % (args.shortPaired2[0], args.shortPaired2[1], args.shortPaired2[2]) if args.long: arg = arg + ' -long -%s %s' % (args.long[0], ' '.join(args.long[1:])) if args.longPaired: if len(args.longPaired) == 2: arg = arg + ' -longPaired -%s %s' % (args.longPaired[0], args.longPaired[1]) elif len(args.longPaired) == 3: arg = arg + ' -longPaired -separate -%s %s %s' % (args.longPaired[0], args.longPaired[1], args.longPaired[2]) if args.add_velveth: arg = arg + ' %s' % args.add_velveth call = cmd + arg velveth_calls.append(call) else: raise ValueError('ksizes must be one value giving ksize, two values giving lower and upper limit (step will be 2) or three values giving lower limit, upper limit and step') # VELVETG CALLS # create cmd cmd = '%svelvetg' % paths['velvet_home'] cmds = [] if len(args.ksizes) == 1: cmd = '%svelvetg %s' % (paths['velvet_home'], args.outpath) cmds.append(cmd) elif len(args.ksizes) >= 2 and len(args.ksizes) <= 3: if len(args.ksizes) == 2: step = 2 elif len(args.ksizes) == 3: step = args.ksizes[2] for k in range(int(args.ksizes[0]), int(args.ksizes[1]), int(step)): cmd = '%svelvetg %s_%s' % (paths['velvet_home'], args.outpath, k) cmds.append(cmd) # create arg: cov_cutoff, exp_cov, ins_length, add_velvetg velvetg_calls = [] # add other parameters for i in range(len(cmds)): arg = ' -min_contig_lgth %i' % args.min_contig_lgth if args.cov_cutoff: arg = arg + ' -cov_cutoff %f' % args.cov_cutoff if args.exp_cov != "None": arg = arg + ' -exp_cov %s' % args.exp_cov if args.ins_length: arg = arg + ' -ins_length %i' % args.ins_length if args.add_velvetg: arg = arg + ' %s' % args.add_velvetg velvetg_calls.append(cmds[i]+arg) # COMBINE IN SH-FILES # sh_calls = [] for i in range(len(velveth_calls)): fh = open('velvet%i.sh' % i, 'w') fh.write('#!/bin/sh\n\n') fh.write(velveth_calls[i]+'\n') fh.write(velvetg_calls[i]+'\n') fh.close() sh_calls.append('sh velvet%i.sh' %i) return sh_calls
def illumina_trim(args, min_length, min_baseq, min_avgq, min_adaptor_match, keep_n): '''Create single end trim calls''' import os import mlst_modules paths = mlst_modules.setSystem() cmd = '%smlst_illumina_trim_h.py' % (paths['mlst_home']) calls = [] if args.short: if args.short[0] == 'fastq' or args.short[0] == 'fastq.gz': outfiles_short = [] for i,f in enumerate(args.short): if i == 0: continue outfile_short = 'trimmed/' + os.path.split(f)[1] + '.trim.fq' outfiles_short.append(outfile_short) arg = ' --i %s --min_length %i --min_baseq %i --min_avgq %i --min_adaptor_match %i --o %s ' % (f, min_length, min_baseq, min_avgq, min_adaptor_match, outfile_short) if keep_n: arg = arg + ' --keep_n' calls.append(cmd+arg) args.short[1:] = outfiles_short if args.short2: if args.short2[0] == 'fastq' or args.short2[0] == 'fastq.gz': outfiles_short2 = [] for i,f in enumerate(args.short2): if i == 0: continue outfile_short2 = 'trimmed/' + os.path.split(f)[1] + '.trim.fq' outfiles_short2.append(outfile_short2) arg = ' --i %s --min_length %i --min_baseq %i --min_avgq %i --min_adaptor_match %i --o %s ' % (f, min_length, min_baseq, min_avgq, min_adaptor_match, outfile_short2) if keep_n: arg = arg + ' --keep_n' calls.append(cmd+arg) args.short2[1:] = outfiles_short2 if args.shortPaired and args.shortPaired[0].find('fastq') > -1: outfiles_shortPaired = [] if len(args.shortPaired) == 3: outfile_pe1 = 'trimmed/' + os.path.split(args.shortPaired[1])[1] + '.trim.fq' outfile_pe2 = 'trimmed/' + os.path.split(args.shortPaired[2])[1] + '.trim.fq' outfiles_shortPaired.append(outfile_pe1) outfiles_shortPaired.append(outfile_pe2) arg = ' --i %s %s --min_length %i --min_baseq %i --min_avgq %i --min_adaptor_match %i --o %s %s' % (args.shortPaired[1], args.shortPaired[2], min_length, min_baseq, min_avgq, min_adaptor_match, outfile_pe1, outfile_pe2) elif len(args.shortPaired) == 2: outfile_pe1 = 'trimmed/' + os.path.split(args.shortPaired[1])[1] + '_1.trim.fq' outfile_pe2 = 'trimmed/' + os.path.split(args.shortPaired[1])[1] + '_2.trim.fq' outfiles_shortPaired.append(outfile_pe1) outfiles_shortPaired.append(outfile_pe2) arg = ' --i %s --min_length %i --min_baseq %i --min_avgq %i --min_adaptor_match %i --o %s %s' % (args.shortPaired[1], min_length, min_baseq, min_avgq, min_adaptor_match, outfile_pe1, outfile_pe2) else: raise ValueError('Length of input to shortPaired is not correct') if keep_n: arg = arg + ' --keep_n' calls.append(cmd+arg) args.shortPaired[1:] = outfiles_shortPaired if args.shortPaired2 and args.shortPaired[0].find('fastq') > -1: outfiles_shortPaired2 = [] if len(args.shortPaired2) == 3: outfile_pe1 = 'trimmed/' + os.path.split(args.shortPaired2[1])[1] + '.trim.fq' outfile_pe2 = 'trimmed/' + os.path.split(args.shortPaired2[2])[1] + '.trim.fq' outfiles_shortPaired2.append(outfile_pe1) outfiles_shortPaired2.append(outfile_pe2) arg = ' --i %s %s --min_length %i --min_baseq %i --min_avgq %i --min_adaptor_match %i --o %s %s' % (args.shortPaired2[1], args.shortPaired2[2], min_length, min_baseq, min_avgq, min_adaptor_match, outfile_pe1, outfile_pe2) elif len(args.shortPaired2) == 2: outfile_pe1 = 'trimmed/' + os.path.split(args.shortPaired2[1])[1] + '_1.trim.fq' outfile_pe2 = 'trimmed/' + os.path.split(args.shortPaired2[1])[1] + '_2.trim.fq' outfiles_shortPaired2.append(outfile_pe1) outfiles_shortPaired2.append(outfile_pe2) arg = ' --i %s --min_length %i --min_baseq %i --min_avgq %i --min_adaptor_match %i --o %s %s' % (args.shortPaired2[1], min_length, min_baseq, min_avgq, min_adaptor_match, outfile_pe1, outfile_pe2) else: raise ValueError('Length of input to shortPaired2 is not correct') if keep_n: arg = arg + ' --keep_n' calls.append(cmd+arg) args.shortPaired2[1:] = outfiles_shortPaired2 if len(calls) > 0: if not os.path.exists('trimmed'): os.makedirs('trimmed') return calls
parser_solid.add_argument('--ins_length', help='estimate of mate/paired end insert length eg. (1200/170)', type=int, required=True) parser_solid.add_argument('--ins_length_sd', help='estimate of mate/paired end insert length eg. (300/30)', type=int, required=True) parser_solid.add_argument('--add_solid', help='additional parameters to solid assembler', default=None) args = parser.parse_args() #args = parser.parse_args('velvet --shortPaired Kleb-10-213361_2_1_sequence.txt Kleb-10-213361_2_2_sequence.txt --ksizes 41 55 4 --trim'.split()) #args = parser.parse_args('velvet --shortPaired Kleb-10-213361_2.interleaved.fastq --trim --sample Kleb_auto'.split()) #args = parser.parse_args('velvet --short 110601_I238_FCB067HABXX_L3_ESCqslRAADIAAPEI-2_1.fq --ksizes 45 75 --sample E_coli_TY2482_illumina --trim'.split()) #args = parser.parse_args('velvet --shortPaired test_kleb_1.fq test_kleb_2.fq --ksizes 41 55 4 --sample test_kleb --cov_cutoff 8'.split()) #args = parser.parse_args('newbler --se life_unimuenster_sff/*.sff --sample test_newbler --wait'.split()) #args = parser.parse_args('solid --mp ecoli_600x_F3.csfasta ecoli_600x_F3.qual ecoli_600x_R3.csfasta ecoli_600x_R3.qual --rf 5000000 --ins_length 1300 --ins_length_sd 300 --m 7gb --sample solid_test --wait'.split()) #args = parser.parse_args('velvet --shortPaired test_kleb_1.fq test_kleb_2.fq --ksizes 41 55 4'.split()) # set pythonpath os.environ['PYTHONPATH'] = '/panvol1/simon/lib/python/:/home/panfs/cbs/projects/cge/servers/MLST/assembly/' paths = mlst_modules.setSystem() # If working dir is given, create and move to working directory else run where program is invoked if args.sample: if not os.path.exists(args.sample): os.makedirs(args.sample) #os.chmod(args.sample, 0777) os.chdir(args.sample) else: pass # create log dir if not os.path.exists('log'): os.makedirs('log') # set logging