def run(self, run_id, inputs): #workflow is to run through the stage correctly and then check for error handles #[1a]get input names and output names setup if ('.fa' not in inputs) or ('.bam' not in inputs) or ('out_dir' not in inputs): print "ERROR: .fa, .bam, and out_dir are required for genome_strip.py" return None out_exts = self.split_out_exts() out_dir = inputs['out_dir'] + '/' stripped_name = '' if len(inputs['.bam']) == 1: stripped_name = self.strip_path( self.strip_in_ext(inputs['.bam'][0], '.bam')) else: stripped_name = 'joint' sub_dir = out_dir + stripped_name + '_S' + str(self.stage_id) + '/' if not os.path.exists(sub_dir): os.makedirs(sub_dir) out_names = { '.root': sub_dir + 'temp', '.calls': sub_dir + 'temp' + out_exts[1], '.vcf': out_dir + stripped_name + '_S' + str(self.stage_id) + out_exts[2] } #[2a]build command args #split the ref seq into seperate chroms... cnvnator = self.tools['CNVNATOR'] cnv2vcf = self.tools['CNVNATOR2VCF'] #[self.strip_in_ext(self.strip_path(i),'.fa') for i in in_names['.fa']] #by using the input list bin_size = str(150) #refd = self.strip_name(inputs['.fa']) #this is a bit hackish extr = [ cnvnator, '-unique', '-root', out_names['.root'] + '.tree.root', '-tree' ] + inputs['.bam'] hist = [ cnvnator, '-root', out_names['.root'] + '.tree.root', '-outroot', out_names['.root'] + '.his.root', '-his', bin_size, '-d', sub_dir ] stats = [ cnvnator, '-root', out_names['.root'] + '.his.root', '-stat', bin_size ] sig = [ cnvnator, '-root', out_names['.root'] + '.his.root', '-partition', bin_size ] call = [ cnvnator, '-root', out_names['.root'] + '.his.root', '-call', bin_size, '>', out_names['.calls'] ] conv = ['perl', cnv2vcf, out_names['.calls'], '>', out_names['.vcf']] #[2b]make start entry which is a new staged_run row #[3a]execute the command here---------------------------------------------------- output, err = '', {} try: print("<<<<<<<<<<<<<SVE command>>>>>>>>>>>>>>>\n") print(" ".join(extr)) output += subprocess.check_output( ' '.join(extr), stderr=subprocess.STDOUT, shell=True) + '\n' print("<<<<<<<<<<<<<SVE command>>>>>>>>>>>>>>>\n") print(" ".join(hist)) output += subprocess.check_output( ' '.join(hist), stderr=subprocess.STDOUT, shell=True) + '\n' print("<<<<<<<<<<<<<SVE command>>>>>>>>>>>>>>>\n") print(" ".join(stats)) output += subprocess.check_output( ' '.join(stats), stderr=subprocess.STDOUT, shell=True) + '\n' print("<<<<<<<<<<<<<SVE command>>>>>>>>>>>>>>>\n") print(" ".join(sig)) output += subprocess.check_output( ' '.join(sig), stderr=subprocess.STDOUT, shell=True) + '\n' print("<<<<<<<<<<<<<SVE command>>>>>>>>>>>>>>>\n") print(" ".join(call)) output += subprocess.check_output( ' '.join(call), stderr=subprocess.STDOUT, shell=True) + '\n' print("<<<<<<<<<<<<<SVE command>>>>>>>>>>>>>>>\n") print(" ".join(conv)) output += subprocess.check_output( ' '.join(conv), stderr=subprocess.STDOUT, shell=True) + '\n' print("<<<<<<<<<<<<<SVE command>>>>>>>>>>>>>>>\n") print('rm -rf %s' % sub_dir) output += subprocess.check_output('rm -rf %s' % sub_dir, stderr=subprocess.STDOUT, shell=True) #catch all errors that arise under normal call behavior except subprocess.CalledProcessError as E: print('call error: ' + E.output) #what you would see in the term err['output'] = E.output #the python exception issues (shouldn't have any... print('message: ' + E.message) #?? empty err['message'] = E.message #return codes used for failure.... print('code: ' + str(E.returncode)) #return 1 for a fail in art? err['code'] = E.returncode except OSError as E: print('os error: ' + E.strerror) #what you would see in the term err['output'] = E.strerror #the python exception issues (shouldn't have any... print('message: ' + E.message) #?? empty err['message'] = E.message #the error num print('code: ' + str(E.errno)) err['code'] = E.errno print('output:\n' + output) #[3b]check results-------------------------------------------------- if err != {}: print err if GetCallCount(out_names['.vcf']) > 0: print("<<<<<<<<<<<<<cnvnator sucessfull>>>>>>>>>>>>>>>\n") return out_names['.vcf'] #return a list of names else: print("<<<<<<<<<<<<<cnvnator failure>>>>>>>>>>>>>>>\n") return None
def run(self, run_id, inputs): #workflow is to run through the stage correctly and then check for error handles #[1a]get input names and output names setup if ('.fa' not in inputs) or ('.bam' not in inputs) or ('out_dir' not in inputs): print "ERROR: .fa, .bam, and out_dir are required for genome_strip.py" return None #will have to figure out output file name handling out_exts = self.split_out_exts() out_dir = inputs['out_dir'] + '/' stripped_name = '' if len(inputs['.bam']) == 1: stripped_name = self.strip_path( self.strip_in_ext(inputs['.bam'][0], '.bam')) else: stripped_name = 'joint' out_names = { '.vcf': out_dir + stripped_name + '_S' + str(self.stage_id) + out_exts[0] } #[2a]build command args #build temp directory to work in sub_dir = out_dir + stripped_name + '_S' + str(self.stage_id) + '/' if not os.path.exists(sub_dir): os.makedirs(sub_dir) gff = '' if inputs['genome'] == 'hg19': gff = self.files['BREAKSEQ-HG19'] elif inputs['genome'] == 'hg38': gff = self.files['BREAKSEQ-HG38'] python = sys.executable samtools = self.tools['SAMTOOLS'] bwa = self.tools['BWA'] breakseq = self.tools['BREAKSEQ'] call = [ python, breakseq, '--bwa', bwa, '--samtools', samtools, '--reference', inputs['.fa'], '--work', sub_dir, '--min_span', str(2), '--window', str(500), '--min_overlap', str(2), '--junction_length', str(1000), '--bams' ] + inputs['.bam'] if 'threads' in inputs: call += ['--nthreads', str(inputs['threads'])] if gff != '': call += ['--bplib_gff', gff] #[3a]execute the command here---------------------------------------------------- output, err = '', {} try: print("<<<<<<<<<<<<<SVE command>>>>>>>>>>>>>>>\n") print(" ".join(call)) output += subprocess.check_output( ' '.join(call), stderr=subprocess.STDOUT, shell=True, env={'PYTHONPATH': self.tools['BREAKSEQ_PATH']}) if os.path.isfile(sub_dir + 'breakseq.vcf.gz'): with gzip.open(sub_dir + 'breakseq.vcf.gz', 'rb') as in_file: gz_in = in_file.read() with open(out_names['.vcf'], 'w') as f: f.write(gz_in) os.remove(sub_dir) except subprocess.CalledProcessError as E: print('call error: ' + E.output) #what you would see in the term err['output'] = E.output #the python exception issues (shouldn't have any... print('message: ' + E.message) #?? empty err['message'] = E.message #return codes used for failure.... print('code: ' + str(E.returncode)) #return 1 for a fail in art? err['code'] = E.returncode except OSError as E: print('os error: ' + E.strerror) #what you would see in the term err['output'] = E.strerror #the python exception issues (shouldn't have any... print('message: ' + E.message) #?? empty err['message'] = E.message #the error num print('code: ' + str(E.errno)) err['code'] = E.errno except Exception as E: print('vcf write os/file IO error') err['output'] = 'vcf write os/file IO error' err['message'] = 'vcf write os/file IO error' err['code'] = 1 print('output:\n' + output) #[3b]check results-------------------------------------------------- if err != {}: print err if GetCallCount(out_names['.vcf']) > 0: print("<<<<<<<<<<<<<breakseq sucessfull>>>>>>>>>>>>>>>\n") return out_names['.vcf'] #return a list of names else: print("<<<<<<<<<<<<<breakseq failure>>>>>>>>>>>>>>>\n") return None
def run(self, run_id, inputs): #workflow is to run through the stage correctly and then check for error handles #[1a]get input names and output names setup if ('.fa' not in inputs) or ('.bam' not in inputs) or ('out_dir' not in inputs): print "ERROR: .fa, .bam, and out_dir are required for genome_strip.py" return None #if self.db_get_ref_name(run_id): ref_name = self.ref_name ref_name = inputs['.fa'].rsplit('/')[-1].rsplit('.')[0] out_exts = self.split_out_exts() out_dir = inputs['out_dir'] + '/' stripped_name = '' if len(inputs['.bam']) == 1: stripped_name = self.strip_path( self.strip_in_ext(inputs['.bam'][0], '.bam')) else: stripped_name = 'joint' sub_dir = out_dir + stripped_name + '_S' + str(self.stage_id) + '/' if not os.path.exists(sub_dir): os.makedirs(sub_dir) out_names = { '.vcf': out_dir + stripped_name + '_S' + str(self.stage_id) + out_exts[0] } #[a]use to run several sub scripts via command line/seperate process python = sys.executable hydra = self.tools['HYDRA_PATH'] + '/' hydra_to_vcf = self.tools['SVE_HOME'] + '/stages/utils/hydra_to_vcf.py' #ENV PATH = hydra+'bin:'+hydra+'scripts:'+\ self.tools['SAMTOOLS-0.1.19_PATH'] if os.environ.has_key('PATH'): PATH += ':' + os.environ['PATH'] LD_LIBRARY_PATH = os.environ['LD_LIBRARY_PATH'] #[0] stub file generation bams = 'bam.stub' bam_names = '\n'.join([ 'sample%s' % i + '\t' + inputs['.bam'][i] for i in range(len(inputs['.bam'])) ]) with open(bams, 'w') as f: f.write(bam_names) #follow readme.md tenplate #[1] make a config file cfg = sub_dir + 'bam.stub.config' #s is number of sample pairs, n is the max unit of variation #python scripts/make_hydra_config.py -i config.stub.txt > config.hydra.txt make_cfg = [ python, hydra + 'scripts/make_hydra_config.py', '-i', bams, '-s', str(int(1E5)), '-n', str(16), '>', cfg ] #[2] extract discordant alignments for each sample .bam file #python scripts/extract_discordants.py -c config.hydra.txt -d <sample_name> #--min_mapq=INT,--allow_dups=FLAG,--mem=INT=2E9?2GB-4GB? extract = [ python, hydra + 'scripts/extract_discordants.py', '-c', cfg, '-d' ] #[3] run hydra router #hydra-router -config config.hydra.txt -routedList routed-files.txt routed_bams = sub_dir + 'bam.routed' route = [ hydra + 'bin/hydra-router', '-config', cfg, '-routedList', routed_bams ] #[4] assemble SV breakpoint clusters #sh scripts/assemble-routed-files.sh routed-files-test.txt config.hydra.txt 1 #assemble-routed-files.sh <config file> <routed file list file> <number of processes> <punt parameter> #punt should be 5x the average read depth over all samples assemble_command = hydra + 'scripts/assemble-routed-files.sh' assemble = [assemble_command, cfg, routed_bams, str(1), str(60)] #[5] merge SV assembly files #sh scripts/combine-assembled-files.sh /full/path/to/assembled/files/ all.assembled asm = sub_dir + 'all.assembled' merge_command = hydra + 'scripts/combine-assembled-files.sh' merge = [merge_command, '.', asm] #[6] finalize SV breakpoints #scripts/forceOneClusterPerPairMem.py -i all.assembled -o all.sv-calls svs = sub_dir + 'all-sv.calls' cluster = [ python, hydra + 'scripts/forceOneClusterPerPairMem.py', '-i', asm, '-o', svs ] #[7] annotate SV breakpoints on samples #scripts/frequency.py -f all.sv-calls.final -d all.sv-calls.detail > all.sv-calls.freq freq_name = svs + '.freq' freqs = [ python, hydra + 'scripts/frequency.py', '-c', cfg, '-f', svs + '.final', '-d', svs + '.detail', '>', freq_name ] #[8] change footprint intervals into breakpoint intervals final_name = svs + '.final' #not sure if this is correct in general case... vcf_name = svs + '.vcf' bkpts_name = svs + '.bkpts' #grep -v "#" all.hydra.sv.freq | python ~/bin/hydraToBreakpoint.py -i stdin > all.hydra.sv.bkpts bkpts = [ 'grep', '-v', '"#"', freq_name, '|', python, hydra + 'scripts/hydraToBreakpoint.py', '-i', 'stdin', '>', bkpts_name ] #bkpts = [python,hydra+'scripts/hydraToBreakpoint.py','-i',freq_name,'>',bkpts_name] #[9] convert to VCF using the utils/hydra_to_vcf.py tool fasta_2bit = inputs['.fa'] + '.2bit' bkpt2vcf = [python, hydra_to_vcf, final_name, fasta_2bit] #assumes a .2bit for ref is there... #[10] Copy out and Clean up files copy = ['cp', vcf_name, out_names['.vcf']] clean = ['rm', '-rf', sub_dir] #[3a]execute the command here---------------------------------------------------- output, err = '', {} try: print("<<<<<<<<<<<<<SVE command>>>>>>>>>>>>>>>\n") print('making the hydra configuration') print(' '.join(make_cfg)) output += subprocess.check_output(' '.join(make_cfg), stderr=subprocess.STDOUT, shell=True, env={'PATH': PATH}) + '\n' for k in ['sample%s' % i for i in range(len(inputs['.bam']))]: print('extracting discordants for %s' % k) print(' '.join(extract + [k])) output += subprocess.check_output(' '.join(extract + [k]), stderr=subprocess.STDOUT, shell=True, env={'PATH': PATH}) + '\n' print("<<<<<<<<<<<<<SVE command>>>>>>>>>>>>>>>\n") print('routing all samples into hydra router') print(' '.join(route)) output += subprocess.check_output( ' '.join(route), stderr=subprocess.STDOUT, shell=True, env={ 'PATH': PATH, 'LD_LIBRARY_PATH': LD_LIBRARY_PATH }) + '\n' print("<<<<<<<<<<<<<SVE command>>>>>>>>>>>>>>>\n") print('combining hydra assembly files') print(' '.join(assemble)) output += subprocess.check_output( ' '.join(assemble), stderr=subprocess.STDOUT, shell=True, env={ 'PATH': PATH, 'LD_LIBRARY_PATH': LD_LIBRARY_PATH }) + '\n' print("<<<<<<<<<<<<<SVE command>>>>>>>>>>>>>>>\n") print('merging results') print(' '.join(merge)) output += subprocess.check_output(' '.join(merge), stderr=subprocess.STDOUT, shell=True, env={'PATH': PATH}) + '\n' print("<<<<<<<<<<<<<SVE command>>>>>>>>>>>>>>>\n") print('starting hydra clustering') print(' '.join(cluster)) output += subprocess.check_output(' '.join(cluster), stderr=subprocess.STDOUT, shell=True, env={'PATH': PATH}) + '\n' print("<<<<<<<<<<<<<SVE command>>>>>>>>>>>>>>>\n") print('computing hydra frequencies') print(' '.join(freqs)) output += subprocess.check_output( ' '.join(freqs), stderr=subprocess.STDOUT, shell=True) + '\n' print("<<<<<<<<<<<<<SVE command>>>>>>>>>>>>>>>\n") print('converting hydra to vcf format') print(' '.join(bkpt2vcf)) if not os.path.isfile(fasta_2bit): generate_fasta_2bit = [ self.tools['FATO2BIT'], inputs['.fa'], fasta_2bit ] output += subprocess.check_output( ' '.join(generate_fasta_2bit), stderr=subprocess.STDOUT, shell=True) + '\n' output += subprocess.check_output(' '.join(bkpt2vcf), stderr=subprocess.STDOUT, shell=True) + '\n' print("<<<<<<<<<<<<<SVE command>>>>>>>>>>>>>>>\n") print('copying files and cleaning sub directory') output += subprocess.check_output( ' '.join(copy), stderr=subprocess.STDOUT, shell=True) + '\n' output += subprocess.check_output( ' '.join(clean), stderr=subprocess.STDOUT, shell=True) + '\n' #catch all errors that arise under normal call behavior except subprocess.CalledProcessError as E: print('call error: ' + E.output) #what you would see in the term err['output'] = E.output #the python exception issues (shouldn't have any... print('message: ' + E.message) #?? empty err['message'] = E.message #return codes used for failure.... print('code: ' + str(E.returncode)) #return 1 for a fail in art? err['code'] = E.returncode except OSError as E: print('os error: ' + E.strerror) #what you would see in the term err['output'] = E.strerror #the python exception issues (shouldn't have any... print('message: ' + E.message) #?? empty err['message'] = E.message #the error num print('code: ' + str(E.errno)) err['code'] = E.errno except Exception as E: print('vcf write os/file IO error') err['output'] = 'vcf write os/file IO error' err['message'] = 'vcf write os/file IO error' err['code'] = 1 print('output:\n' + output) print('vcf file %s exists=%s' % (out_names['.vcf'], os.path.exists(out_names['.vcf']))) print('computing hydra breakpoints') print(' '.join(bkpts)) try: #output = subprocess.check_output(' '.join(bkpts), # stderr=subprocess.STDOUT,shell=True)+'\n' #if os.path.exists(out_names['.vcf']): # output += subprocess.check_output(' '.join(clean), # stderr=subprocess.STDOUT,shell=True) print('all hydra stages completed') except subprocess.CalledProcessError as E: print('call error: ' + E.output) #what you would see in the term err['output'] = E.output #the python exception issues (shouldn't have any... print('message: ' + E.message) #?? empty err['message'] = E.message #return codes used for failure.... print('code: ' + str(E.returncode)) #return 1 for a fail in art? err['code'] = E.returncode except OSError as E: print('os error: ' + E.strerror) #what you would see in the term err['output'] = E.strerror #the python exception issues (shouldn't have any... print('message: ' + E.message) #?? empty err['message'] = E.message #the error num print('code: ' + str(E.errno)) err['code'] = E.errno except Exception as E: print('vcf write os/file IO error') err['output'] = 'vcf write os/file IO error' err['message'] = 'vcf write os/file IO error' err['code'] = 1 #print('output:\n'+output) #[3b]check results-------------------------------------------------- if err != {}: print err if GetCallCount(out_names['.vcf']) > 0: print("<<<<<<<<<<<<<hydra sucessfull>>>>>>>>>>>>>>>\n") return out_names['.vcf'] #return a list of names else: print("<<<<<<<<<<<<<hydra failure>>>>>>>>>>>>>>>\n") return None
def run(self, run_id, inputs): #workflow is to run through the stage correctly and then check for error handles #[1a]get input names and output names setup if ('.fa' not in inputs) or ('.bam' not in inputs) or ('out_dir' not in inputs): print "ERROR: .fa, .bam, and out_dir are required for genome_strip.py" return None #will have to figure out output file name handling out_exts = self.split_out_exts() out_dir = inputs['out_dir'] + '/' stripped_name = '' if len(inputs['.bam']) == 1: stripped_name = self.strip_path( self.strip_in_ext(inputs['.bam'][0], '.bam')) else: stripped_name = 'joint' out_names = { '.calls': out_dir + stripped_name + '_S' + str(self.stage_id) + out_exts[0], '.vcf': out_dir + stripped_name + '_S' + str(self.stage_id) + out_exts[1] } #[2a]build command args lumpy = self.tools['LUMPY-EXPRESS'] temp_dir = out_dir + stripped_name + '_S' + str( self.stage_id) + '/temp' sv_call = [lumpy, '-B'] + [','.join(inputs['.bam'])] + [ '-T', temp_dir, '-P', '-m 2', '-o', out_names['.vcf'] ] #more work on params #[3a]execute the command here---------------------------------------------------- output, err = '', {} try: print(' '.join(sv_call)) output += subprocess.check_output( ' '.join(sv_call), stderr=subprocess.STDOUT, shell=True) + '\n' os.remove(temp_dir) except subprocess.CalledProcessError as E: print('call error: ' + E.output) #what you would see in the term err['output'] = E.output #the python exception issues (shouldn't have any... print('message: ' + E.message) #?? empty err['message'] = E.message #return codes used for failure.... print('code: ' + str(E.returncode)) #return 1 for a fail in art? err['code'] = E.returncode except OSError as E: print('os error: ' + E.strerror) #what you would see in the term err['output'] = E.strerror #the python exception issues (shouldn't have any... print('message: ' + E.message) #?? empty err['message'] = E.message #the error num print('code: ' + str(E.errno)) err['code'] = E.errno except Exception as E: print('vcf write os/file IO error') err['output'] = 'vcf write os/file IO error' err['message'] = 'vcf write os/file IO error' err['code'] = 1 print('output:\n' + output) #[3b]check results-------------------------------------------------- if err != {}: print err if GetCallCount(out_names['.vcf']) > 0: print("<<<<<<<<<<<<<lumpy sucessfull>>>>>>>>>>>>>>>\n") return out_names['.vcf'] #return a list of names else: print("<<<<<<<<<<<<<lumpy failure>>>>>>>>>>>>>>>\n") return None
def run(self, run_id, inputs): #workflow is to run through the stage correctly and then check for error handles #[1a]get input names and output names setup if ('.fa' not in inputs) or ('.bam' not in inputs) or ('out_dir' not in inputs): print "ERROR: .fa, .bam, and out_dir are required for genome_strip.py" return None #will have to figure out output file name handling out_exts = self.split_out_exts() out_dir = inputs['out_dir'] + '/' stripped_name = '' if len(inputs['.bam']) == 1: stripped_name = self.strip_path( self.strip_in_ext(inputs['.bam'][0], '.bam')) else: stripped_name = 'joint' stripped_name = self.strip_path( self.strip_in_ext(inputs['.bam'][0], '.bam')) out_names = { '.vcf': out_dir + stripped_name + '_S' + str(self.stage_id) + out_exts[0] } #[2a]build command args sub_dir = out_dir + stripped_name + '_S' + str(self.stage_id) + '/' if not os.path.exists(sub_dir): os.makedirs(sub_dir) #add load libs parameters for OPEN_MP to do || processing #will have to make some connection changes here delly = self.tools['DELLY'] excl = '' if inputs['genome'] == 'hg19': excl = self.files['DELLY-HG19'] elif inputs['genome'] == 'hg38': excl = self.files['DELLY-HG38'] bcfs = {} type_list = ['del', 'dup', 'inv', 'bnd', 'ins'] for type in type_list: bcfs[type] = sub_dir + type + '.bcf' #self.db_start(run_id,in_names['.bam'][0]) #[3a]execute the command here---------------------------------------------------- output, err = '', {} try: #should split these up for better robustness... count = 0 # Delly call #if threads in inputs: p1 = mp.Pool(processes = inputs['threads']) #p1 = mp.Pool(processes = 1) for bam in inputs['.bam']: delly_call = [delly, 'call', '-g', inputs['.fa'], '-n'] if excl != '': delly_call += ['-x', excl] for type in type_list: type_call = delly_call + [ '-t', type.upper(), '-o', sub_dir + str(count) + '.' + type + '.bcf' ] + [bam] print(" ".join(type_call)) #p1.apply_async(call,args=(type_call, output),callback=collect_results) output += subprocess.check_output(' '.join(type_call), stderr=subprocess.STDOUT, shell=True) + '\n' count += 1 # Delly merge if count > 1: delly_merge = [ delly, 'merge', '-r', str(0.5), '-b', str(500), '-n', str(1000000), '-m', str(500) ] for type in type_list: type_merge = delly_merge + [ '-t', type.upper(), '-o', 'b_geno_' + bcfs[type] ] for i in range(count): type_merge += [ sub_dir + str(count) + '.' + type + '.bcf' ] output += subprocess.check_output(' '.join(type_merge), stderr=subprocess.STDOUT, shell=True) + '\n' # Delly renotype for bam in inputs['.bam']: delly_geno = [delly, 'call', '-g', inputs['.fa']] if excl != '': delly_geno += ['-x', excl] for type in type_list: type_geno = delly_geno + [ '-v', 'b_geno_' + bcfs[type], '-t', type.upper(), '-o', sub_dir + str(count) + '.' + type + '.geno.bcf' ] + [bam] print(" ".join(type_geno)) output += subprocess.check_output( ' '.join(type_geno), stderr=subprocess.STDOUT, shell=True) + '\n' # Merge regeno bcf delly_geno_merge = [delly, 'merge', '-m', 'id', '-O', 'b'] for type in type_list: type_geno_merge += ['-o', bcfs[type]] for i in range(count): type_geno_merge += [ sub_dir + str(count) + '.' + type + '.geno.bcf' ] output += subprocess.check_output( ' '.join(type_geno_merge), stderr=subprocess.STDOUT, shell=True) + '\n' elif count == 1: for type in type_list: bcfs[type] = sub_dir + str(count - 1) + '.' + type + '.bcf' #catch all errors that arise under normal call behavior except subprocess.CalledProcessError as E: print('call error: ' + E.output) #what you would see in the term err['output'] = E.output #the python exception issues (shouldn't have any... print('message: ' + E.message) #?? empty err['message'] = E.message #return codes used for failure.... print('code: ' + str(E.returncode)) #return 1 for a fail in art? err['code'] = E.returncode except OSError as E: print('os error: ' + E.strerror) #what you would see in the term err['output'] = E.strerror #the python exception issues (shouldn't have any... print('message: ' + E.message) #?? empty err['message'] = E.message #the error num print('code: ' + str(E.errno)) err['code'] = E.errno print('output:\n' + output) #merge/filter all the calls into one .vcf with vcftools bcftools = self.tools['BCFTOOLS'] concat = [bcftools, 'concat', '-a', '-o', out_names['.vcf'], '-O', 'v'] for type in type_list: concat += [bcfs[type]] try: print(' '.join(concat)) output += subprocess.check_output(' '.join(concat), stderr=subprocess.STDOUT, shell=True) print('rm -rf %s' % sub_dir) subprocess.check_output('rm -rf %s' % sub_dir, stderr=subprocess.STDOUT, shell=True) #catch all errors that arise under normal call behavior except subprocess.CalledProcessError as E: print('call error: ' + E.output) #what you would see in the term err['output'] = E.output #the python exception issues (shouldn't have any... print('message: ' + E.message) #?? empty err['message'] = E.message #return codes used for failure.... print('code: ' + str(E.returncode)) #return 1 for a fail in art? err['code'] = E.returncode except OSError as E: print('os error: ' + E.strerror) #what you would see in the term err['output'] = E.strerror #the python exception issues (shouldn't have any... print('message: ' + E.message) #?? empty err['message'] = E.message #the error num print('code: ' + str(E.errno)) err['code'] = E.errno print('output:\n' + output) #[3b]check results-------------------------------------------------- if err != {}: print err if GetCallCount(out_names['.vcf']) > 0: print("<<<<<<<<<<<<<delly sucessfull>>>>>>>>>>>>>>>\n") return out_names['.vcf'] #return a list of names else: print("<<<<<<<<<<<<<delly failure>>>>>>>>>>>>>>>\n") return None
def run(self, run_id, inputs): #workflow is to run through the stage correctly and then check for error handles #[1a]get input names and output names setup if ('.fa' not in inputs) or ('.bam' not in inputs) or ('out_dir' not in inputs): print "ERROR: .fa, .bam, and out_dir are required for genome_strip.py" return None #will have to figure out output file name handling out_ext = self.split_out_exts() out_dir = inputs['out_dir'] + '/' stripped_name = '' if len(inputs['.bam']) == 1: stripped_name = self.strip_path( self.strip_in_ext(inputs['.bam'][0], '.bam')) else: stripped_name = 'joint' out_names = { '.vcf': out_dir + stripped_name + '_S' + str(self.stage_id) + out_ext[0] } #[2a]build command args #split the ref seq into seperate chroms... rscript = self.tools['RSCRIPT'] # + '/bin/Rscript' cnmops_r = self.tools['SVE_HOME'] + '/stages/utils/cnmops.R' #load up params to pass to the Rscript cmd_parser.R defaults, params = self.params, [] if len(inputs['.bam']) <= 1: defaults['mode']['value'] = 3 elif len(inputs['.bam']) == 2: defaults['mode']['value'] = 1 else: defaults['mode']['value'] = 0 defaults['normal']['value'] = 3 defaults['cir_seg']['value'] = True defaults['window']['value'] = 1000 if 'threads' in inputs: defaults['cores']['value'] = inputs['threads'] params = [k + '=' + str(defaults[k]['value']) for k in defaults] command = [ rscript, cnmops_r, 'ref_seq=' + inputs['.fa'], 'in_bams=' + ','.join(inputs['.bam']), #'in_chroms='+','.join(in_names['chroms']), 'out_vcf=' + out_names['.vcf'] ] + params #cn.mop ref=x string is off and needs to be setup for chr1,chr2,chr3... #[2b]make start entry which is a new staged_run row #[3a]execute the command here---------------------------------------------------- output, err = '', {} try: print("<<<<<<<<<<<<<SVE command>>>>>>>>>>>>>>>\n") print(' '.join(command)) output = subprocess.check_output(' '.join(command), stderr=subprocess.STDOUT, shell=True) #env={'R_LIBS':R_LIBS,'PATH':PATH}) #catch all errors that arise under normal call behavior except subprocess.CalledProcessError as E: print('call error: ' + E.output) #what you would see in the term err['output'] = E.output #the python exception issues (shouldn't have any... print('message: ' + E.message) #?? empty err['message'] = E.message #return codes used for failure.... print('code: ' + str(E.returncode)) #return 1 for a fail in art? err['code'] = E.returncode except OSError as E: print('os error: ' + E.strerror) #what you would see in the term err['output'] = E.strerror #the python exception issues (shouldn't have any... print('message: ' + E.message) #?? empty err['message'] = E.message #the error num print('code: ' + str(E.errno)) err['code'] = E.errno print('output:\n' + output) #[3b]check results-------------------------------------------------- if err != {}: print err if GetCallCount(out_names['.vcf']) > 0: print("<<<<<<<<<<<<<cnmops sucessfull>>>>>>>>>>>>>>>\n") return out_names['.vcf'] #return a list of names else: print("<<<<<<<<<<<<<cnmops failure>>>>>>>>>>>>>>>\n") return None
def run(self, run_id, inputs): #workflow is to run through the stage correctly and then check for error handles #[1a]get input names and output names setup if ('.fa' not in inputs) or ('.bam' not in inputs) or ('out_dir' not in inputs): print "ERROR: .fa, .bam, and out_dir are required for genome_strip.py" return None #will have to figure out output file name handling out_exts = self.split_out_exts() out_dir = inputs['out_dir'] + '/' stripped_name = '' if len(inputs['.bam']) == 1: stripped_name = self.strip_path( self.strip_in_ext(inputs['.bam'][0], '.bam')) else: stripped_name = 'joint' #[2a]build command args #build temp directory to work in sub_dir = out_dir + stripped_name + '_S' + str(self.stage_id) + '/' if not os.path.exists(sub_dir): os.makedirs(sub_dir) out_names = { '.calls': out_dir + stripped_name + '_S' + str(self.stage_id) + out_exts[0], '.vcf': out_dir + stripped_name + '_S' + str(self.stage_id) + out_exts[1] } #[2a]build command args PERL5LIB = self.tools['PERL_LIB_PATH'] + '/lib/perl5' if os.environ.has_key('PERL5LIB'): PERL5LIB += ':' + os.environ['PERL5LIB'] PATH = self.tools['SAMTOOLS_PATH'] + ':' + os.environ['PATH'] cfg = sub_dir + "bd_confg.txt" #new version 1.1.2 working! config = self.tools['BREAKDANCER_PATH'] + '/perl/bam2cfg.pl' breakd = self.tools['BREAKDANCER_PATH'] + '/build/bin/breakdancer-max' configure = ['perl', config, '-q', '30', '-n', '10000' ] + inputs['.bam'] + ['>', cfg] sv_call = [breakd, cfg, '>', out_names['.calls']] #[3a]execute the command here---------------------------------------------------- output, err = '', {} try: print("<<<<<<<<<<<<<SVE command>>>>>>>>>>>>>>>\n") print(' '.join(configure)) output += subprocess.check_output(' '.join(configure), stderr=subprocess.STDOUT, shell=True, env={ 'PERL5LIB': PERL5LIB, 'PATH': PATH }) + '\n' print("<<<<<<<<<<<<<SVE command>>>>>>>>>>>>>>>\n") print(' '.join(sv_call)) output += subprocess.check_output( ' '.join(sv_call), stderr=subprocess.STDOUT, shell=True) + '\n' table = bd.read_breakdancer(out_names['.calls']) bd.write_vcf(out_names['.vcf'], bd.vcf_header(inputs['.fa']), bd.build_vcf(table)) os.remove(out_names['.calls']) os.remove(cfg) # output += subprocess.check_output(' '.join(['rm',out_names['.calls']]), # stderr=subprocess.STDOUT,shell=True)+'\n' # output += subprocess.check_output(' '.join(['rm',cfg]), # stderr=subprocess.STDOUT,shell=True)+'\n' #catch all errors that arise under normal call behavior except subprocess.CalledProcessError as E: print('call error: ' + E.output) #what you would see in the term err['output'] = E.output #the python exception issues (shouldn't have any... print('message: ' + E.message) #?? empty err['message'] = E.message #return codes used for failure.... print('code: ' + str(E.returncode)) #return 1 for a fail in art? err['code'] = E.returncode except OSError as E: print('os error: ' + E.strerror) #what you would see in the term err['output'] = E.strerror #the python exception issues (shouldn't have any... print('message: ' + E.message) #?? empty err['message'] = E.message #the error num print('code: ' + str(E.errno)) err['code'] = E.errno except Exception as E: print('vcf write os/file IO error') err['output'] = 'vcf write os/file IO error' err['message'] = 'vcf write os/file IO error' err['code'] = 1 print('output:\n' + output) #[3b]check results-------------------------------------------------- if err != {}: print err if GetCallCount(out_names['.vcf']) > 0: print("<<<<<<<<<<<<<breakdancer sucessfull>>>>>>>>>>>>>>>\n") return out_names['.vcf'] #return a list of names else: print("<<<<<<<<<<<<<breakdancer failure>>>>>>>>>>>>>>>\n") return None