示例#1
0
    def run(self, run_id, inputs):
        #workflow is to run through the stage correctly and then check for error handles

        #[1a]get input names and output names setup
        if ('.fa' not in inputs) or ('.bam' not in inputs) or ('out_dir'
                                                               not in inputs):
            print "ERROR: .fa, .bam, and out_dir are required for genome_strip.py"
            return None

        out_exts = self.split_out_exts()
        out_dir = inputs['out_dir'] + '/'
        stripped_name = ''
        if len(inputs['.bam']) == 1:
            stripped_name = self.strip_path(
                self.strip_in_ext(inputs['.bam'][0], '.bam'))
        else:
            stripped_name = 'joint'
        sub_dir = out_dir + stripped_name + '_S' + str(self.stage_id) + '/'
        if not os.path.exists(sub_dir): os.makedirs(sub_dir)
        out_names = {
            '.root':
            sub_dir + 'temp',
            '.calls':
            sub_dir + 'temp' + out_exts[1],
            '.vcf':
            out_dir + stripped_name + '_S' + str(self.stage_id) + out_exts[2]
        }
        #[2a]build command args

        #split the ref seq into seperate chroms...
        cnvnator = self.tools['CNVNATOR']
        cnv2vcf = self.tools['CNVNATOR2VCF']

        #[self.strip_in_ext(self.strip_path(i),'.fa') for i in in_names['.fa']] #by using the input list
        bin_size = str(150)
        #refd = self.strip_name(inputs['.fa']) #this is a bit hackish

        extr = [
            cnvnator, '-unique', '-root', out_names['.root'] + '.tree.root',
            '-tree'
        ] + inputs['.bam']
        hist = [
            cnvnator, '-root', out_names['.root'] + '.tree.root', '-outroot',
            out_names['.root'] + '.his.root', '-his', bin_size, '-d', sub_dir
        ]
        stats = [
            cnvnator, '-root', out_names['.root'] + '.his.root', '-stat',
            bin_size
        ]
        sig = [
            cnvnator, '-root', out_names['.root'] + '.his.root', '-partition',
            bin_size
        ]
        call = [
            cnvnator, '-root', out_names['.root'] + '.his.root', '-call',
            bin_size, '>', out_names['.calls']
        ]
        conv = ['perl', cnv2vcf, out_names['.calls'], '>', out_names['.vcf']]

        #[2b]make start entry which is a new staged_run row

        #[3a]execute the command here----------------------------------------------------
        output, err = '', {}
        try:
            print("<<<<<<<<<<<<<SVE command>>>>>>>>>>>>>>>\n")
            print(" ".join(extr))
            output += subprocess.check_output(
                ' '.join(extr), stderr=subprocess.STDOUT, shell=True) + '\n'
            print("<<<<<<<<<<<<<SVE command>>>>>>>>>>>>>>>\n")
            print(" ".join(hist))
            output += subprocess.check_output(
                ' '.join(hist), stderr=subprocess.STDOUT, shell=True) + '\n'
            print("<<<<<<<<<<<<<SVE command>>>>>>>>>>>>>>>\n")
            print(" ".join(stats))
            output += subprocess.check_output(
                ' '.join(stats), stderr=subprocess.STDOUT, shell=True) + '\n'
            print("<<<<<<<<<<<<<SVE command>>>>>>>>>>>>>>>\n")
            print(" ".join(sig))
            output += subprocess.check_output(
                ' '.join(sig), stderr=subprocess.STDOUT, shell=True) + '\n'
            print("<<<<<<<<<<<<<SVE command>>>>>>>>>>>>>>>\n")
            print(" ".join(call))
            output += subprocess.check_output(
                ' '.join(call), stderr=subprocess.STDOUT, shell=True) + '\n'
            print("<<<<<<<<<<<<<SVE command>>>>>>>>>>>>>>>\n")
            print(" ".join(conv))
            output += subprocess.check_output(
                ' '.join(conv), stderr=subprocess.STDOUT, shell=True) + '\n'
            print("<<<<<<<<<<<<<SVE command>>>>>>>>>>>>>>>\n")
            print('rm -rf %s' % sub_dir)
            output += subprocess.check_output('rm -rf %s' % sub_dir,
                                              stderr=subprocess.STDOUT,
                                              shell=True)
        #catch all errors that arise under normal call behavior
        except subprocess.CalledProcessError as E:
            print('call error: ' + E.output)  #what you would see in the term
            err['output'] = E.output
            #the python exception issues (shouldn't have any...
            print('message: ' + E.message)  #?? empty
            err['message'] = E.message
            #return codes used for failure....
            print('code: ' + str(E.returncode))  #return 1 for a fail in art?
            err['code'] = E.returncode
        except OSError as E:
            print('os error: ' + E.strerror)  #what you would see in the term
            err['output'] = E.strerror
            #the python exception issues (shouldn't have any...
            print('message: ' + E.message)  #?? empty
            err['message'] = E.message
            #the error num
            print('code: ' + str(E.errno))
            err['code'] = E.errno
        print('output:\n' + output)

        #[3b]check results--------------------------------------------------
        if err != {}:
            print err
        if GetCallCount(out_names['.vcf']) > 0:
            print("<<<<<<<<<<<<<cnvnator sucessfull>>>>>>>>>>>>>>>\n")
            return out_names['.vcf']  #return a list of names
        else:
            print("<<<<<<<<<<<<<cnvnator failure>>>>>>>>>>>>>>>\n")
            return None
示例#2
0
文件: breakseq.py 项目: xtmgah/SVE
    def run(self, run_id, inputs):
        #workflow is to run through the stage correctly and then check for error handles
        #[1a]get input names and output names setup
        if ('.fa' not in inputs) or ('.bam' not in inputs) or ('out_dir'
                                                               not in inputs):
            print "ERROR: .fa, .bam, and out_dir are required for genome_strip.py"
            return None
        #will have to figure out output file name handling
        out_exts = self.split_out_exts()
        out_dir = inputs['out_dir'] + '/'
        stripped_name = ''
        if len(inputs['.bam']) == 1:
            stripped_name = self.strip_path(
                self.strip_in_ext(inputs['.bam'][0], '.bam'))
        else:
            stripped_name = 'joint'
        out_names = {
            '.vcf':
            out_dir + stripped_name + '_S' + str(self.stage_id) + out_exts[0]
        }
        #[2a]build command args

        #build temp directory to work in
        sub_dir = out_dir + stripped_name + '_S' + str(self.stage_id) + '/'
        if not os.path.exists(sub_dir): os.makedirs(sub_dir)

        gff = ''
        if inputs['genome'] == 'hg19': gff = self.files['BREAKSEQ-HG19']
        elif inputs['genome'] == 'hg38': gff = self.files['BREAKSEQ-HG38']

        python = sys.executable
        samtools = self.tools['SAMTOOLS']
        bwa = self.tools['BWA']
        breakseq = self.tools['BREAKSEQ']

        call = [
            python, breakseq, '--bwa', bwa, '--samtools', samtools,
            '--reference', inputs['.fa'], '--work', sub_dir, '--min_span',
            str(2), '--window',
            str(500), '--min_overlap',
            str(2), '--junction_length',
            str(1000), '--bams'
        ] + inputs['.bam']

        if 'threads' in inputs: call += ['--nthreads', str(inputs['threads'])]
        if gff != '': call += ['--bplib_gff', gff]

        #[3a]execute the command here----------------------------------------------------
        output, err = '', {}
        try:
            print("<<<<<<<<<<<<<SVE command>>>>>>>>>>>>>>>\n")
            print(" ".join(call))
            output += subprocess.check_output(
                ' '.join(call),
                stderr=subprocess.STDOUT,
                shell=True,
                env={'PYTHONPATH': self.tools['BREAKSEQ_PATH']})
            if os.path.isfile(sub_dir + 'breakseq.vcf.gz'):
                with gzip.open(sub_dir + 'breakseq.vcf.gz', 'rb') as in_file:
                    gz_in = in_file.read()
                with open(out_names['.vcf'], 'w') as f:
                    f.write(gz_in)
            os.remove(sub_dir)
        except subprocess.CalledProcessError as E:
            print('call error: ' + E.output)  #what you would see in the term
            err['output'] = E.output
            #the python exception issues (shouldn't have any...
            print('message: ' + E.message)  #?? empty
            err['message'] = E.message
            #return codes used for failure....
            print('code: ' + str(E.returncode))  #return 1 for a fail in art?
            err['code'] = E.returncode
        except OSError as E:
            print('os error: ' + E.strerror)  #what you would see in the term
            err['output'] = E.strerror
            #the python exception issues (shouldn't have any...
            print('message: ' + E.message)  #?? empty
            err['message'] = E.message
            #the error num
            print('code: ' + str(E.errno))
            err['code'] = E.errno
        except Exception as E:
            print('vcf write os/file IO error')
            err['output'] = 'vcf write os/file IO error'
            err['message'] = 'vcf write os/file IO error'
            err['code'] = 1
        print('output:\n' + output)

        #[3b]check results--------------------------------------------------

        if err != {}:
            print err
        if GetCallCount(out_names['.vcf']) > 0:
            print("<<<<<<<<<<<<<breakseq sucessfull>>>>>>>>>>>>>>>\n")
            return out_names['.vcf']  #return a list of names
        else:
            print("<<<<<<<<<<<<<breakseq failure>>>>>>>>>>>>>>>\n")
            return None
示例#3
0
文件: hydra.py 项目: xtmgah/SVE
    def run(self, run_id, inputs):
        #workflow is to run through the stage correctly and then check for error handles

        #[1a]get input names and output names setup
        if ('.fa' not in inputs) or ('.bam' not in inputs) or ('out_dir'
                                                               not in inputs):
            print "ERROR: .fa, .bam, and out_dir are required for genome_strip.py"
            return None
        #if self.db_get_ref_name(run_id): ref_name = self.ref_name
        ref_name = inputs['.fa'].rsplit('/')[-1].rsplit('.')[0]

        out_exts = self.split_out_exts()
        out_dir = inputs['out_dir'] + '/'
        stripped_name = ''
        if len(inputs['.bam']) == 1:
            stripped_name = self.strip_path(
                self.strip_in_ext(inputs['.bam'][0], '.bam'))
        else:
            stripped_name = 'joint'
        sub_dir = out_dir + stripped_name + '_S' + str(self.stage_id) + '/'
        if not os.path.exists(sub_dir): os.makedirs(sub_dir)
        out_names = {
            '.vcf':
            out_dir + stripped_name + '_S' + str(self.stage_id) + out_exts[0]
        }

        #[a]use to run several sub scripts via command line/seperate process
        python = sys.executable
        hydra = self.tools['HYDRA_PATH'] + '/'
        hydra_to_vcf = self.tools['SVE_HOME'] + '/stages/utils/hydra_to_vcf.py'
        #ENV
        PATH = hydra+'bin:'+hydra+'scripts:'+\
               self.tools['SAMTOOLS-0.1.19_PATH']
        if os.environ.has_key('PATH'): PATH += ':' + os.environ['PATH']
        LD_LIBRARY_PATH = os.environ['LD_LIBRARY_PATH']

        #[0] stub file generation
        bams = 'bam.stub'
        bam_names = '\n'.join([
            'sample%s' % i + '\t' + inputs['.bam'][i]
            for i in range(len(inputs['.bam']))
        ])
        with open(bams, 'w') as f:
            f.write(bam_names)  #follow readme.md tenplate

        #[1] make a config file
        cfg = sub_dir + 'bam.stub.config'
        #s is number of sample pairs, n is the max unit of variation
        #python scripts/make_hydra_config.py -i config.stub.txt > config.hydra.txt
        make_cfg = [
            python, hydra + 'scripts/make_hydra_config.py', '-i', bams, '-s',
            str(int(1E5)), '-n',
            str(16), '>', cfg
        ]

        #[2] extract discordant alignments for each sample .bam file
        #python scripts/extract_discordants.py -c config.hydra.txt -d <sample_name>
        #--min_mapq=INT,--allow_dups=FLAG,--mem=INT=2E9?2GB-4GB?
        extract = [
            python, hydra + 'scripts/extract_discordants.py', '-c', cfg, '-d'
        ]
        #[3] run hydra router
        #hydra-router -config config.hydra.txt -routedList routed-files.txt
        routed_bams = sub_dir + 'bam.routed'
        route = [
            hydra + 'bin/hydra-router', '-config', cfg, '-routedList',
            routed_bams
        ]

        #[4] assemble SV breakpoint clusters
        #sh scripts/assemble-routed-files.sh routed-files-test.txt config.hydra.txt 1
        #assemble-routed-files.sh <config file> <routed file list file> <number of processes> <punt parameter>
        #punt should be 5x the average read depth over all samples
        assemble_command = hydra + 'scripts/assemble-routed-files.sh'
        assemble = [assemble_command, cfg, routed_bams, str(1), str(60)]

        #[5] merge SV assembly files
        #sh scripts/combine-assembled-files.sh /full/path/to/assembled/files/ all.assembled
        asm = sub_dir + 'all.assembled'
        merge_command = hydra + 'scripts/combine-assembled-files.sh'
        merge = [merge_command, '.', asm]

        #[6] finalize SV breakpoints
        #scripts/forceOneClusterPerPairMem.py -i all.assembled -o all.sv-calls
        svs = sub_dir + 'all-sv.calls'
        cluster = [
            python, hydra + 'scripts/forceOneClusterPerPairMem.py', '-i', asm,
            '-o', svs
        ]

        #[7] annotate SV breakpoints on samples
        #scripts/frequency.py -f all.sv-calls.final -d all.sv-calls.detail > all.sv-calls.freq
        freq_name = svs + '.freq'
        freqs = [
            python, hydra + 'scripts/frequency.py', '-c', cfg, '-f',
            svs + '.final', '-d', svs + '.detail', '>', freq_name
        ]

        #[8] change footprint intervals into breakpoint intervals
        final_name = svs + '.final'  #not sure if this is correct in general case...
        vcf_name = svs + '.vcf'
        bkpts_name = svs + '.bkpts'
        #grep -v "#" all.hydra.sv.freq | python ~/bin/hydraToBreakpoint.py -i stdin > all.hydra.sv.bkpts
        bkpts = [
            'grep', '-v', '"#"', freq_name, '|', python,
            hydra + 'scripts/hydraToBreakpoint.py', '-i', 'stdin', '>',
            bkpts_name
        ]
        #bkpts = [python,hydra+'scripts/hydraToBreakpoint.py','-i',freq_name,'>',bkpts_name]

        #[9] convert to VCF using the utils/hydra_to_vcf.py tool
        fasta_2bit = inputs['.fa'] + '.2bit'
        bkpt2vcf = [python, hydra_to_vcf, final_name,
                    fasta_2bit]  #assumes a .2bit for ref is there...

        #[10] Copy out and Clean up files
        copy = ['cp', vcf_name, out_names['.vcf']]
        clean = ['rm', '-rf', sub_dir]

        #[3a]execute the command here----------------------------------------------------
        output, err = '', {}
        try:

            print("<<<<<<<<<<<<<SVE command>>>>>>>>>>>>>>>\n")
            print('making the hydra configuration')
            print(' '.join(make_cfg))
            output += subprocess.check_output(' '.join(make_cfg),
                                              stderr=subprocess.STDOUT,
                                              shell=True,
                                              env={'PATH': PATH}) + '\n'

            for k in ['sample%s' % i for i in range(len(inputs['.bam']))]:
                print('extracting discordants for %s' % k)
                print(' '.join(extract + [k]))
                output += subprocess.check_output(' '.join(extract + [k]),
                                                  stderr=subprocess.STDOUT,
                                                  shell=True,
                                                  env={'PATH': PATH}) + '\n'

            print("<<<<<<<<<<<<<SVE command>>>>>>>>>>>>>>>\n")
            print('routing all samples into hydra router')
            print(' '.join(route))
            output += subprocess.check_output(
                ' '.join(route),
                stderr=subprocess.STDOUT,
                shell=True,
                env={
                    'PATH': PATH,
                    'LD_LIBRARY_PATH': LD_LIBRARY_PATH
                }) + '\n'

            print("<<<<<<<<<<<<<SVE command>>>>>>>>>>>>>>>\n")
            print('combining hydra assembly files')
            print(' '.join(assemble))
            output += subprocess.check_output(
                ' '.join(assemble),
                stderr=subprocess.STDOUT,
                shell=True,
                env={
                    'PATH': PATH,
                    'LD_LIBRARY_PATH': LD_LIBRARY_PATH
                }) + '\n'

            print("<<<<<<<<<<<<<SVE command>>>>>>>>>>>>>>>\n")
            print('merging results')
            print(' '.join(merge))
            output += subprocess.check_output(' '.join(merge),
                                              stderr=subprocess.STDOUT,
                                              shell=True,
                                              env={'PATH': PATH}) + '\n'

            print("<<<<<<<<<<<<<SVE command>>>>>>>>>>>>>>>\n")
            print('starting hydra clustering')
            print(' '.join(cluster))
            output += subprocess.check_output(' '.join(cluster),
                                              stderr=subprocess.STDOUT,
                                              shell=True,
                                              env={'PATH': PATH}) + '\n'

            print("<<<<<<<<<<<<<SVE command>>>>>>>>>>>>>>>\n")
            print('computing hydra frequencies')
            print(' '.join(freqs))
            output += subprocess.check_output(
                ' '.join(freqs), stderr=subprocess.STDOUT, shell=True) + '\n'

            print("<<<<<<<<<<<<<SVE command>>>>>>>>>>>>>>>\n")
            print('converting hydra to vcf format')
            print(' '.join(bkpt2vcf))
            if not os.path.isfile(fasta_2bit):
                generate_fasta_2bit = [
                    self.tools['FATO2BIT'], inputs['.fa'], fasta_2bit
                ]
                output += subprocess.check_output(
                    ' '.join(generate_fasta_2bit),
                    stderr=subprocess.STDOUT,
                    shell=True) + '\n'
            output += subprocess.check_output(' '.join(bkpt2vcf),
                                              stderr=subprocess.STDOUT,
                                              shell=True) + '\n'

            print("<<<<<<<<<<<<<SVE command>>>>>>>>>>>>>>>\n")
            print('copying files and cleaning sub directory')
            output += subprocess.check_output(
                ' '.join(copy), stderr=subprocess.STDOUT, shell=True) + '\n'
            output += subprocess.check_output(
                ' '.join(clean), stderr=subprocess.STDOUT, shell=True) + '\n'
            #catch all errors that arise under normal call behavior
        except subprocess.CalledProcessError as E:
            print('call error: ' + E.output)  #what you would see in the term
            err['output'] = E.output
            #the python exception issues (shouldn't have any...
            print('message: ' + E.message)  #?? empty
            err['message'] = E.message
            #return codes used for failure....
            print('code: ' + str(E.returncode))  #return 1 for a fail in art?
            err['code'] = E.returncode
        except OSError as E:
            print('os error: ' + E.strerror)  #what you would see in the term
            err['output'] = E.strerror
            #the python exception issues (shouldn't have any...
            print('message: ' + E.message)  #?? empty
            err['message'] = E.message
            #the error num
            print('code: ' + str(E.errno))
            err['code'] = E.errno
        except Exception as E:
            print('vcf write os/file IO error')
            err['output'] = 'vcf write os/file IO error'
            err['message'] = 'vcf write os/file IO error'
            err['code'] = 1
        print('output:\n' + output)

        print('vcf file %s exists=%s' %
              (out_names['.vcf'], os.path.exists(out_names['.vcf'])))
        print('computing hydra breakpoints')
        print(' '.join(bkpts))
        try:
            #output = subprocess.check_output(' '.join(bkpts),
            #                                  stderr=subprocess.STDOUT,shell=True)+'\n'
            #if os.path.exists(out_names['.vcf']):
            #    output += subprocess.check_output(' '.join(clean),
            #                                      stderr=subprocess.STDOUT,shell=True)
            print('all hydra stages completed')
        except subprocess.CalledProcessError as E:
            print('call error: ' + E.output)  #what you would see in the term
            err['output'] = E.output
            #the python exception issues (shouldn't have any...
            print('message: ' + E.message)  #?? empty
            err['message'] = E.message
            #return codes used for failure....
            print('code: ' + str(E.returncode))  #return 1 for a fail in art?
            err['code'] = E.returncode
        except OSError as E:
            print('os error: ' + E.strerror)  #what you would see in the term
            err['output'] = E.strerror
            #the python exception issues (shouldn't have any...
            print('message: ' + E.message)  #?? empty
            err['message'] = E.message
            #the error num
            print('code: ' + str(E.errno))
            err['code'] = E.errno
        except Exception as E:
            print('vcf write os/file IO error')
            err['output'] = 'vcf write os/file IO error'
            err['message'] = 'vcf write os/file IO error'
            err['code'] = 1
        #print('output:\n'+output)

        #[3b]check results--------------------------------------------------
        if err != {}:
            print err
        if GetCallCount(out_names['.vcf']) > 0:
            print("<<<<<<<<<<<<<hydra sucessfull>>>>>>>>>>>>>>>\n")
            return out_names['.vcf']  #return a list of names
        else:
            print("<<<<<<<<<<<<<hydra failure>>>>>>>>>>>>>>>\n")
            return None
示例#4
0
文件: lumpy.py 项目: xtmgah/SVE
    def run(self, run_id, inputs):
        #workflow is to run through the stage correctly and then check for error handles

        #[1a]get input names and output names setup
        if ('.fa' not in inputs) or ('.bam' not in inputs) or ('out_dir'
                                                               not in inputs):
            print "ERROR: .fa, .bam, and out_dir are required for genome_strip.py"
            return None
        #will have to figure out output file name handling
        out_exts = self.split_out_exts()
        out_dir = inputs['out_dir'] + '/'
        stripped_name = ''
        if len(inputs['.bam']) == 1:
            stripped_name = self.strip_path(
                self.strip_in_ext(inputs['.bam'][0], '.bam'))
        else:
            stripped_name = 'joint'
        out_names = {
            '.calls':
            out_dir + stripped_name + '_S' + str(self.stage_id) + out_exts[0],
            '.vcf':
            out_dir + stripped_name + '_S' + str(self.stage_id) + out_exts[1]
        }

        #[2a]build command args
        lumpy = self.tools['LUMPY-EXPRESS']
        temp_dir = out_dir + stripped_name + '_S' + str(
            self.stage_id) + '/temp'
        sv_call = [lumpy, '-B'] + [','.join(inputs['.bam'])] + [
            '-T', temp_dir, '-P', '-m 2', '-o', out_names['.vcf']
        ]  #more work on params
        #[3a]execute the command here----------------------------------------------------
        output, err = '', {}
        try:
            print(' '.join(sv_call))
            output += subprocess.check_output(
                ' '.join(sv_call), stderr=subprocess.STDOUT, shell=True) + '\n'
            os.remove(temp_dir)
        except subprocess.CalledProcessError as E:
            print('call error: ' + E.output)  #what you would see in the term
            err['output'] = E.output
            #the python exception issues (shouldn't have any...
            print('message: ' + E.message)  #?? empty
            err['message'] = E.message
            #return codes used for failure....
            print('code: ' + str(E.returncode))  #return 1 for a fail in art?
            err['code'] = E.returncode
        except OSError as E:
            print('os error: ' + E.strerror)  #what you would see in the term
            err['output'] = E.strerror
            #the python exception issues (shouldn't have any...
            print('message: ' + E.message)  #?? empty
            err['message'] = E.message
            #the error num
            print('code: ' + str(E.errno))
            err['code'] = E.errno
        except Exception as E:
            print('vcf write os/file IO error')
            err['output'] = 'vcf write os/file IO error'
            err['message'] = 'vcf write os/file IO error'
            err['code'] = 1
        print('output:\n' + output)

        #[3b]check results--------------------------------------------------
        if err != {}:
            print err
        if GetCallCount(out_names['.vcf']) > 0:
            print("<<<<<<<<<<<<<lumpy sucessfull>>>>>>>>>>>>>>>\n")
            return out_names['.vcf']  #return a list of names
        else:
            print("<<<<<<<<<<<<<lumpy failure>>>>>>>>>>>>>>>\n")
            return None
示例#5
0
    def run(self, run_id, inputs):
        #workflow is to run through the stage correctly and then check for error handles

        #[1a]get input names and output names setup
        if ('.fa' not in inputs) or ('.bam' not in inputs) or ('out_dir'
                                                               not in inputs):
            print "ERROR: .fa, .bam, and out_dir are required for genome_strip.py"
            return None
        #will have to figure out output file name handling
        out_exts = self.split_out_exts()
        out_dir = inputs['out_dir'] + '/'
        stripped_name = ''
        if len(inputs['.bam']) == 1:
            stripped_name = self.strip_path(
                self.strip_in_ext(inputs['.bam'][0], '.bam'))
        else:
            stripped_name = 'joint'
        stripped_name = self.strip_path(
            self.strip_in_ext(inputs['.bam'][0], '.bam'))
        out_names = {
            '.vcf':
            out_dir + stripped_name + '_S' + str(self.stage_id) + out_exts[0]
        }
        #[2a]build command args
        sub_dir = out_dir + stripped_name + '_S' + str(self.stage_id) + '/'
        if not os.path.exists(sub_dir): os.makedirs(sub_dir)

        #add load libs parameters for OPEN_MP to do || processing
        #will have to make some connection changes here
        delly = self.tools['DELLY']
        excl = ''
        if inputs['genome'] == 'hg19': excl = self.files['DELLY-HG19']
        elif inputs['genome'] == 'hg38': excl = self.files['DELLY-HG38']

        bcfs = {}
        type_list = ['del', 'dup', 'inv', 'bnd', 'ins']
        for type in type_list:
            bcfs[type] = sub_dir + type + '.bcf'

        #self.db_start(run_id,in_names['.bam'][0])
        #[3a]execute the command here----------------------------------------------------
        output, err = '', {}
        try:  #should split these up for better robustness...
            count = 0
            # Delly call
            #if threads in inputs: p1 = mp.Pool(processes = inputs['threads'])
            #p1 = mp.Pool(processes = 1)
            for bam in inputs['.bam']:
                delly_call = [delly, 'call', '-g', inputs['.fa'], '-n']
                if excl != '': delly_call += ['-x', excl]
                for type in type_list:
                    type_call = delly_call + [
                        '-t',
                        type.upper(), '-o',
                        sub_dir + str(count) + '.' + type + '.bcf'
                    ] + [bam]
                    print(" ".join(type_call))
                    #p1.apply_async(call,args=(type_call, output),callback=collect_results)
                    output += subprocess.check_output(' '.join(type_call),
                                                      stderr=subprocess.STDOUT,
                                                      shell=True) + '\n'
                count += 1

            # Delly merge
            if count > 1:
                delly_merge = [
                    delly, 'merge', '-r',
                    str(0.5), '-b',
                    str(500), '-n',
                    str(1000000), '-m',
                    str(500)
                ]
                for type in type_list:
                    type_merge = delly_merge + [
                        '-t', type.upper(), '-o', 'b_geno_' + bcfs[type]
                    ]
                    for i in range(count):
                        type_merge += [
                            sub_dir + str(count) + '.' + type + '.bcf'
                        ]
                    output += subprocess.check_output(' '.join(type_merge),
                                                      stderr=subprocess.STDOUT,
                                                      shell=True) + '\n'

                # Delly renotype
                for bam in inputs['.bam']:
                    delly_geno = [delly, 'call', '-g', inputs['.fa']]
                    if excl != '': delly_geno += ['-x', excl]
                    for type in type_list:
                        type_geno = delly_geno + [
                            '-v', 'b_geno_' + bcfs[type], '-t',
                            type.upper(), '-o',
                            sub_dir + str(count) + '.' + type + '.geno.bcf'
                        ] + [bam]
                        print(" ".join(type_geno))
                        output += subprocess.check_output(
                            ' '.join(type_geno),
                            stderr=subprocess.STDOUT,
                            shell=True) + '\n'

                # Merge regeno bcf
                delly_geno_merge = [delly, 'merge', '-m', 'id', '-O', 'b']
                for type in type_list:
                    type_geno_merge += ['-o', bcfs[type]]
                    for i in range(count):
                        type_geno_merge += [
                            sub_dir + str(count) + '.' + type + '.geno.bcf'
                        ]
                    output += subprocess.check_output(
                        ' '.join(type_geno_merge),
                        stderr=subprocess.STDOUT,
                        shell=True) + '\n'

            elif count == 1:
                for type in type_list:
                    bcfs[type] = sub_dir + str(count - 1) + '.' + type + '.bcf'

        #catch all errors that arise under normal call behavior
        except subprocess.CalledProcessError as E:
            print('call error: ' + E.output)  #what you would see in the term
            err['output'] = E.output
            #the python exception issues (shouldn't have any...
            print('message: ' + E.message)  #?? empty
            err['message'] = E.message
            #return codes used for failure....
            print('code: ' + str(E.returncode))  #return 1 for a fail in art?
            err['code'] = E.returncode
        except OSError as E:
            print('os error: ' + E.strerror)  #what you would see in the term
            err['output'] = E.strerror
            #the python exception issues (shouldn't have any...
            print('message: ' + E.message)  #?? empty
            err['message'] = E.message
            #the error num
            print('code: ' + str(E.errno))
            err['code'] = E.errno
        print('output:\n' + output)

        #merge/filter all the calls into one .vcf with vcftools
        bcftools = self.tools['BCFTOOLS']
        concat = [bcftools, 'concat', '-a', '-o', out_names['.vcf'], '-O', 'v']
        for type in type_list:
            concat += [bcfs[type]]

        try:
            print(' '.join(concat))
            output += subprocess.check_output(' '.join(concat),
                                              stderr=subprocess.STDOUT,
                                              shell=True)
            print('rm -rf %s' % sub_dir)
            subprocess.check_output('rm -rf %s' % sub_dir,
                                    stderr=subprocess.STDOUT,
                                    shell=True)
        #catch all errors that arise under normal call behavior
        except subprocess.CalledProcessError as E:
            print('call error: ' + E.output)  #what you would see in the term
            err['output'] = E.output
            #the python exception issues (shouldn't have any...
            print('message: ' + E.message)  #?? empty
            err['message'] = E.message
            #return codes used for failure....
            print('code: ' + str(E.returncode))  #return 1 for a fail in art?
            err['code'] = E.returncode
        except OSError as E:
            print('os error: ' + E.strerror)  #what you would see in the term
            err['output'] = E.strerror
            #the python exception issues (shouldn't have any...
            print('message: ' + E.message)  #?? empty
            err['message'] = E.message
            #the error num
            print('code: ' + str(E.errno))
            err['code'] = E.errno
        print('output:\n' + output)

        #[3b]check results--------------------------------------------------
        if err != {}:
            print err
        if GetCallCount(out_names['.vcf']) > 0:
            print("<<<<<<<<<<<<<delly sucessfull>>>>>>>>>>>>>>>\n")
            return out_names['.vcf']  #return a list of names
        else:
            print("<<<<<<<<<<<<<delly failure>>>>>>>>>>>>>>>\n")
            return None
示例#6
0
文件: cnmops.py 项目: xtmgah/SVE
    def run(self, run_id, inputs):
        #workflow is to run through the stage correctly and then check for error handles

        #[1a]get input names and output names setup
        if ('.fa' not in inputs) or ('.bam' not in inputs) or ('out_dir'
                                                               not in inputs):
            print "ERROR: .fa, .bam, and out_dir are required for genome_strip.py"
            return None
        #will have to figure out output file name handling
        out_ext = self.split_out_exts()
        out_dir = inputs['out_dir'] + '/'
        stripped_name = ''
        if len(inputs['.bam']) == 1:
            stripped_name = self.strip_path(
                self.strip_in_ext(inputs['.bam'][0], '.bam'))
        else:
            stripped_name = 'joint'
        out_names = {
            '.vcf':
            out_dir + stripped_name + '_S' + str(self.stage_id) + out_ext[0]
        }

        #[2a]build command args

        #split the ref seq into seperate chroms...
        rscript = self.tools['RSCRIPT']  # + '/bin/Rscript'
        cnmops_r = self.tools['SVE_HOME'] + '/stages/utils/cnmops.R'
        #load up params to pass to the Rscript cmd_parser.R
        defaults, params = self.params, []
        if len(inputs['.bam']) <= 1: defaults['mode']['value'] = 3
        elif len(inputs['.bam']) == 2: defaults['mode']['value'] = 1
        else: defaults['mode']['value'] = 0
        defaults['normal']['value'] = 3
        defaults['cir_seg']['value'] = True
        defaults['window']['value'] = 1000
        if 'threads' in inputs: defaults['cores']['value'] = inputs['threads']

        params = [k + '=' + str(defaults[k]['value']) for k in defaults]

        command = [
            rscript,
            cnmops_r,
            'ref_seq=' + inputs['.fa'],
            'in_bams=' + ','.join(inputs['.bam']),
            #'in_chroms='+','.join(in_names['chroms']),
            'out_vcf=' + out_names['.vcf']
        ] + params

        #cn.mop ref=x string is off and needs to be setup for chr1,chr2,chr3...

        #[2b]make start entry which is a new staged_run row

        #[3a]execute the command here----------------------------------------------------
        output, err = '', {}
        try:
            print("<<<<<<<<<<<<<SVE command>>>>>>>>>>>>>>>\n")
            print(' '.join(command))
            output = subprocess.check_output(' '.join(command),
                                             stderr=subprocess.STDOUT,
                                             shell=True)
            #env={'R_LIBS':R_LIBS,'PATH':PATH})
        #catch all errors that arise under normal call behavior
        except subprocess.CalledProcessError as E:
            print('call error: ' + E.output)  #what you would see in the term
            err['output'] = E.output
            #the python exception issues (shouldn't have any...
            print('message: ' + E.message)  #?? empty
            err['message'] = E.message
            #return codes used for failure....
            print('code: ' + str(E.returncode))  #return 1 for a fail in art?
            err['code'] = E.returncode
        except OSError as E:
            print('os error: ' + E.strerror)  #what you would see in the term
            err['output'] = E.strerror
            #the python exception issues (shouldn't have any...
            print('message: ' + E.message)  #?? empty
            err['message'] = E.message
            #the error num
            print('code: ' + str(E.errno))
            err['code'] = E.errno
        print('output:\n' + output)

        #[3b]check results--------------------------------------------------
        if err != {}:
            print err
        if GetCallCount(out_names['.vcf']) > 0:
            print("<<<<<<<<<<<<<cnmops sucessfull>>>>>>>>>>>>>>>\n")
            return out_names['.vcf']  #return a list of names
        else:
            print("<<<<<<<<<<<<<cnmops failure>>>>>>>>>>>>>>>\n")
            return None
示例#7
0
文件: breakdancer.py 项目: xtmgah/SVE
    def run(self, run_id, inputs):
        #workflow is to run through the stage correctly and then check for error handles

        #[1a]get input names and output names setup
        if ('.fa' not in inputs) or ('.bam' not in inputs) or ('out_dir'
                                                               not in inputs):
            print "ERROR: .fa, .bam, and out_dir are required for genome_strip.py"
            return None
        #will have to figure out output file name handling
        out_exts = self.split_out_exts()
        out_dir = inputs['out_dir'] + '/'
        stripped_name = ''
        if len(inputs['.bam']) == 1:
            stripped_name = self.strip_path(
                self.strip_in_ext(inputs['.bam'][0], '.bam'))
        else:
            stripped_name = 'joint'
        #[2a]build command args

        #build temp directory to work in
        sub_dir = out_dir + stripped_name + '_S' + str(self.stage_id) + '/'
        if not os.path.exists(sub_dir): os.makedirs(sub_dir)

        out_names = {
            '.calls':
            out_dir + stripped_name + '_S' + str(self.stage_id) + out_exts[0],
            '.vcf':
            out_dir + stripped_name + '_S' + str(self.stage_id) + out_exts[1]
        }
        #[2a]build command args
        PERL5LIB = self.tools['PERL_LIB_PATH'] + '/lib/perl5'
        if os.environ.has_key('PERL5LIB'):
            PERL5LIB += ':' + os.environ['PERL5LIB']
        PATH = self.tools['SAMTOOLS_PATH'] + ':' + os.environ['PATH']
        cfg = sub_dir + "bd_confg.txt"  #new version 1.1.2 working!
        config = self.tools['BREAKDANCER_PATH'] + '/perl/bam2cfg.pl'
        breakd = self.tools['BREAKDANCER_PATH'] + '/build/bin/breakdancer-max'
        configure = ['perl', config, '-q', '30', '-n', '10000'
                     ] + inputs['.bam'] + ['>', cfg]
        sv_call = [breakd, cfg, '>', out_names['.calls']]

        #[3a]execute the command here----------------------------------------------------
        output, err = '', {}
        try:
            print("<<<<<<<<<<<<<SVE command>>>>>>>>>>>>>>>\n")
            print(' '.join(configure))
            output += subprocess.check_output(' '.join(configure),
                                              stderr=subprocess.STDOUT,
                                              shell=True,
                                              env={
                                                  'PERL5LIB': PERL5LIB,
                                                  'PATH': PATH
                                              }) + '\n'
            print("<<<<<<<<<<<<<SVE command>>>>>>>>>>>>>>>\n")
            print(' '.join(sv_call))
            output += subprocess.check_output(
                ' '.join(sv_call), stderr=subprocess.STDOUT, shell=True) + '\n'
            table = bd.read_breakdancer(out_names['.calls'])
            bd.write_vcf(out_names['.vcf'], bd.vcf_header(inputs['.fa']),
                         bd.build_vcf(table))
            os.remove(out_names['.calls'])
            os.remove(cfg)
#            output += subprocess.check_output(' '.join(['rm',out_names['.calls']]),
#                                              stderr=subprocess.STDOUT,shell=True)+'\n'
#            output += subprocess.check_output(' '.join(['rm',cfg]),
#                                              stderr=subprocess.STDOUT,shell=True)+'\n'
#catch all errors that arise under normal call behavior
        except subprocess.CalledProcessError as E:
            print('call error: ' + E.output)  #what you would see in the term
            err['output'] = E.output
            #the python exception issues (shouldn't have any...
            print('message: ' + E.message)  #?? empty
            err['message'] = E.message
            #return codes used for failure....
            print('code: ' + str(E.returncode))  #return 1 for a fail in art?
            err['code'] = E.returncode
        except OSError as E:
            print('os error: ' + E.strerror)  #what you would see in the term
            err['output'] = E.strerror
            #the python exception issues (shouldn't have any...
            print('message: ' + E.message)  #?? empty
            err['message'] = E.message
            #the error num
            print('code: ' + str(E.errno))
            err['code'] = E.errno
        except Exception as E:
            print('vcf write os/file IO error')
            err['output'] = 'vcf write os/file IO error'
            err['message'] = 'vcf write os/file IO error'
            err['code'] = 1
        print('output:\n' + output)

        #[3b]check results--------------------------------------------------
        if err != {}:
            print err
        if GetCallCount(out_names['.vcf']) > 0:
            print("<<<<<<<<<<<<<breakdancer sucessfull>>>>>>>>>>>>>>>\n")
            return out_names['.vcf']  #return a list of names
        else:
            print("<<<<<<<<<<<<<breakdancer failure>>>>>>>>>>>>>>>\n")
            return None