def main(argv=sys.argv): global LOG LOG = support.setup_logger(None) if len(argv) < 2 or argv[1].startswith('-'): print 'you need to provide a configuration file to specific a couple cluster running environment' sys.exit(1) config_fn = argv[1] config = ConfigParser.ConfigParser() config.read(config_fn) job_type = 'SGE' if config.has_option('General', 'job_type'): job_type = config.get('General', 'job_type') sge_blasr_aln = ' -pe smp 24 -q bigmem ' if config.has_option('Unzip', 'sge_blasr_aln'): sge_blasr_aln = config.get('Unzip', 'sge_blasr_aln') smrt_bin = '/mnt/secondary/builds/full/3.0.0/prod/smrtanalysis_3.0.0.153854/smrtcmds/bin/' if config.has_option('Unzip', 'smrt_bin'): smrt_bin = config.get('Unzip', 'smrt_bin') sge_phasing = ' -pe smp 12 -q bigmem' if config.has_option('Unzip', 'sge_phasing'): sge_phasing = config.get('Unzip', 'sge_phasing') sge_hasm = ' -pe smp 48 -q bigmem' if config.has_option('Unzip', 'sge_hasm'): sge_hasm = config.get('Unzip', 'sge_hasm') sge_track_reads = ' -pe smp 12 -q bigmem' if config.has_option('Unzip', 'sge_track_reads'): sge_track_reads = config.get('Unzip', 'sge_track_reads') unzip_concurrent_jobs = 8 if config.has_option('Unzip', 'unzip_concurrent_jobs'): unzip_concurrent_jobs = config.getint('Unzip', 'unzip_concurrent_jobs') config = { 'job_type': job_type, 'sge_blasr_aln': sge_blasr_aln, 'smrt_bin': smrt_bin, 'sge_phasing': sge_phasing, 'sge_hasm': sge_hasm, 'sge_track_reads': sge_track_reads, 'unzip_concurrent_jobs': unzip_concurrent_jobs } #support.job_type = 'SGE' #tmp hack until we have a configuration parser unzip_all(config)
def main(argv=sys.argv): global fc_run_logger fc_run_logger = support.setup_logger(None) if len(sys.argv) < 2: print "you need to provide a configuration file to specific a couple cluster running environment" sys.exit(1) config_fn = sys.argv[1] config = ConfigParser.ConfigParser() config.read(config_fn) job_type = "SGE" if config.has_option('General', 'job_type'): job_type = config.get('General', 'job_type') sge_track_reads = " -pe smp 12 -q bigmem" if config.has_option('Unzip', 'sge_track_reads'): sge_track_reads = config.get('Unzip', 'sge_track_reads') sge_quiver = " -pe smp 24 -q bigmem " if config.has_option('Unzip', 'sge_quiver'): sge_quiver = config.get('Unzip', 'sge_quiver') smrt_bin = "/mnt/secondary/builds/full/3.0.0/prod/smrtanalysis_3.0.0.153854/smrtcmds/bin/" if config.has_option('Unzip', 'smrt_bin'): smrt_bin = config.get('Unzip', 'smrt_bin') input_bam_fofn = "input_bam.fofn" if config.has_option('Unzip', 'input_bam_fofn'): input_bam_fofn = config.get('Unzip', 'input_bam_fofn') quiver_concurrent_jobs = 8 if config.has_option('Unzip', 'quiver_concurrent_jobs'): quiver_concurrent_jobs = config.getint('Unzip', 'quiver_concurrent_jobs') config = { "job_type": job_type, "sge_quiver": sge_quiver, "sge_track_reads": sge_track_reads, "input_bam_fofn": input_bam_fofn, "smrt_bin": smrt_bin } support.job_type = "SGE" #tmp hack until we have a configuration parser ctg_ids = [] PypeThreadWorkflow.setNumThreadAllowed(quiver_concurrent_jobs, quiver_concurrent_jobs) wf = PypeThreadWorkflow() parameters = {"wd": os.path.abspath("."), "config": config} hasm_done = makePypeLocalFile("./3-unzip/1-hasm/hasm_done") job_done = makePypeLocalFile( os.path.join(parameters["wd"], "track_reads_h_done")) make_track_reads_task = PypeTask(inputs={"hasm_done": hasm_done}, outputs={"job_done": job_done}, parameters=parameters, TaskType=PypeThreadTaskBase, URL="task://localhost/track_reads_h") track_reads_task = make_track_reads_task(task_track_reads) wf.addTask(track_reads_task) wf.refreshTargets() #force refresh now, will put proper dependence later ref_seq_data = {} p_ctg_fa = FastaReader("./3-unzip/all_p_ctg.fa") ctg_types = {} for r in p_ctg_fa: rid = r.name.split()[0] ref_seq_data[rid] = r.sequence ctg_types[rid] = "p" h_ctg_fa = FastaReader("./3-unzip/all_h_ctg.fa") for r in h_ctg_fa: rid = r.name.split()[0] ref_seq_data[rid] = r.sequence ctg_types[rid] = "h" ctg_ids = sorted(ref_seq_data.keys()) p_ctg_out = [] h_ctg_out = [] for ctg_id in ctg_ids: sequence = ref_seq_data[ctg_id] m_ctg_id = ctg_id.split("-")[0] wd = os.path.join(os.getcwd(), "./4-quiver/", m_ctg_id) mkdir(wd) ref_fasta = makePypeLocalFile( os.path.join(wd, "{ctg_id}_ref.fa".format(ctg_id=ctg_id))) read_sam = makePypeLocalFile( os.path.join( os.getcwd(), "./4-quiver/reads/" "{ctg_id}.sam".format(ctg_id=ctg_id))) cns_fasta = makePypeLocalFile( os.path.join(wd, "cns-{ctg_id}.fasta.gz".format(ctg_id=ctg_id))) cns_fastq = makePypeLocalFile( os.path.join(wd, "cns-{ctg_id}.fastq.gz".format(ctg_id=ctg_id))) job_done = makePypeLocalFile( os.path.join(wd, "{ctg_id}_quiver_done".format(ctg_id=ctg_id))) if os.path.exists(fn(read_sam)): if ctg_types[ctg_id] == "p": p_ctg_out.append((cns_fasta, cns_fastq)) if ctg_types[ctg_id] == "h": h_ctg_out.append((cns_fasta, cns_fastq)) if not os.path.exists(fn(ref_fasta)): with open(fn(ref_fasta), "w") as f: print >> f, ">" + ctg_id print >> f, sequence parameters = { "job_uid": "q-" + ctg_id, "wd": wd, "config": config, "ctg_id": ctg_id } make_quiver_task = PypeTask( inputs={ "ref_fasta": ref_fasta, "read_sam": read_sam }, outputs={ "cns_fasta": cns_fasta, "cns_fastq": cns_fastq, "job_done": job_done }, parameters=parameters, TaskType=PypeThreadTaskBase, URL="task://localhost/q_{ctg_id}".format(ctg_id=ctg_id)) quiver_task = make_quiver_task(task_run_quiver) wf.addTask(quiver_task) wf.refreshTargets() os.system("sleep 30") mkdir("./4-quiver/cns_output") os.system("rm ./4-quiver/cns_output/cns_p_ctg.fasta") os.system("rm ./4-quiver/cns_output/cns_p_ctg.fastq") for cns_fasta, cns_fastq in sorted(p_ctg_out): os.system( "zcat {cns_fasta} >> ./4-quiver/cns_output/cns_p_ctg.fasta".format( cns_fasta=fn(cns_fasta))) os.system( "zcat {cns_fastq} >> ./4-quiver/cns_output/cns_p_ctg.fastq".format( cns_fastq=fn(cns_fastq))) os.system("rm ./4-quiver/cns_output/cns_h_ctg.fasta") os.system("rm ./4-quiver/cns_output/cns_h_ctg.fastq") for cns_fasta, cns_fastq in sorted(h_ctg_out): os.system( "zcat {cns_fasta} >> ./4-quiver/cns_output/cns_h_ctg.fasta".format( cns_fasta=fn(cns_fasta))) os.system( "zcat {cns_fastq} >> ./4-quiver/cns_output/cns_h_ctg.fastq".format( cns_fastq=fn(cns_fastq)))
from falcon_kit import run_support as support from pypeflow.data import PypeLocalFile, makePypeLocalFile, fn from pypeflow.task import PypeTask, PypeThreadTaskBase, PypeTaskBase from pypeflow.controller import PypeWorkflow, PypeThreadWorkflow from falcon_kit.FastaReader import FastaReader import glob import os import re import sys import time import ConfigParser global fc_run_logger fc_run_logger = support.setup_logger(None) support.job_type = "SGE" #tmp hack until we have a configuration parser wait_time = 5 fc_run_logger = None def system(call, check=False): fc_run_logger.debug('$(%s)' % repr(call)) rc = os.system(call) msg = "Call %r returned %d." % (call, rc) if rc: fc_run_logger.warning(msg) if check: raise Exception(msg) else: fc_run_logger.debug(msg)
script.append( "date" ) script.append( "touch {job_done}".format(job_done = job_done) ) with open(script_fn,"w") as script_file: script_file.write("\n".join(script) + '\n') job_data = support.make_job_data(self.URL, script_fn) job_data["sge_option"] = sge_quiver run_script(job_data, job_type = job_type) wait_for_file(job_done, task=self, job_name=job_data['job_name']) if __name__ == "__main__": global fc_run_logger fc_run_logger = support.setup_logger(None) if len(sys.argv) < 2: print "you need to provide a configuration file to specific a couple cluster running environment" sys.exit(1) config_fn = sys.argv[1] config = ConfigParser.ConfigParser() config.read(config_fn) job_type = "SGE" if config.has_option('General', 'job_type'): job_type = config.get('General', 'job_type')
def main(argv=sys.argv): global LOG LOG = support.setup_logger(None) if len(sys.argv) < 2: print >> sys.stderr, 'you need to provide a configuration file to specific a couple cluster running environment' sys.exit(1) config_fn = sys.argv[1] config_absbasedir = os.path.dirname(os.path.abspath(config_fn)) config = ConfigParser.ConfigParser() config.read(config_fn) job_type = 'SGE' if config.has_option('General', 'job_type'): job_type = config.get('General', 'job_type') sge_track_reads = ' -pe smp 12 -q bigmem' if config.has_option('Unzip', 'sge_track_reads'): sge_track_reads = config.get('Unzip', 'sge_track_reads') sge_quiver = ' -pe smp 24 -q bigmem ' if config.has_option('Unzip', 'sge_quiver'): sge_quiver = config.get('Unzip', 'sge_quiver') smrt_bin = '/mnt/secondary/builds/full/3.0.0/prod/smrtanalysis_3.0.0.153854/smrtcmds/bin/' if config.has_option('Unzip', 'smrt_bin'): smrt_bin = config.get('Unzip', 'smrt_bin') input_bam_fofn = 'input_bam.fofn' if config.has_option('Unzip', 'input_bam_fofn'): input_bam_fofn = config.get('Unzip', 'input_bam_fofn') if not os.path.isabs(input_bam_fofn): input_bam_fofn = os.path.join(config_absbasedir, input_bam_fofn) quiver_concurrent_jobs = 8 if config.has_option('Unzip', 'quiver_concurrent_jobs'): quiver_concurrent_jobs = config.getint('Unzip', 'quiver_concurrent_jobs') config = { 'job_type': job_type, 'sge_quiver': sge_quiver, 'sge_track_reads': sge_track_reads, 'input_bam_fofn': input_bam_fofn, 'smrt_bin': smrt_bin } LOG.info('config={}'.format(pprint.pformat(config))) #support.job_type = 'SGE' #tmp hack until we have a configuration parser wf = PypeProcWatcherWorkflow(max_jobs=quiver_concurrent_jobs, ) abscwd = os.path.abspath('.') parameters = { 'wd': os.path.join(abscwd, '4-quiver', 'track_reads_h'), 'config': config } hasm_done_plf = makePypeLocalFile( './3-unzip/1-hasm/hasm_done') # by convention track_reads_h_done_plf = makePypeLocalFile( os.path.join(parameters['wd'], 'track_reads_h_done')) make_track_reads_task = PypeTask( inputs={'hasm_done': hasm_done_plf}, outputs={'job_done': track_reads_h_done_plf}, parameters=parameters, ) track_reads_task = make_track_reads_task(task_track_reads) #sge_track_reads = config['sge_track_reads'] wf.addTask(track_reads_task) scattered_quiver_plf = makePypeLocalFile( '4-quiver/quiver_scatter/scattered.json') make_task = PypeTask( inputs={ 'p_ctg_fa': makePypeLocalFile('3-unzip/all_p_ctg.fa'), 'h_ctg_fa': makePypeLocalFile('3-unzip/all_h_ctg.fa'), 'track_reads_h_done': track_reads_h_done_plf, }, outputs={ 'scattered_quiver_json': scattered_quiver_plf, }, parameters={}, ) wf.addTask(make_task(task_scatter_quiver)) wf.refreshTargets() p_ctg_out, h_ctg_out, job_done_plfs = create_quiver_jobs( scattered_quiver_plf) gathered_p_ctg_plf = makePypeLocalFile('4-quiver/cns_gather/p_ctg.txt') gathered_h_ctg_plf = makePypeLocalFile('4-quiver/cns_gather/h_ctg.txt') gather_done_plf = makePypeLocalFile('4-quiver/cns_gather/job_done') mkdir('4-quiver/cns_gather') with open(fn(gathered_p_ctg_plf), 'w') as ifs: for cns_fasta_fn, cns_fastq_fn in sorted(p_ctg_out): ifs.write('{} {}\n'.format(cns_fasta_fn, cns_fastq_fn)) with open(fn(gathered_h_ctg_plf), 'w') as ifs: for cns_fasta_fn, cns_fastq_fn in sorted(h_ctg_out): ifs.write('{} {}\n'.format(cns_fasta_fn, cns_fastq_fn)) make_task = PypeTask( inputs=job_done_plfs, outputs={ 'job_done': gather_done_plf, }, parameters={}, ) wf.addTask(make_task(task_gather_quiver)) wf.refreshTargets() cns_p_ctg_fasta_plf = makePypeLocalFile( '4-quiver/cns_output/cns_p_ctg.fasta') cns_p_ctg_fastq_plf = makePypeLocalFile( '4-quiver/cns_output/cns_p_ctg.fastq') cns_h_ctg_fasta_plf = makePypeLocalFile( '4-quiver/cns_output/cns_h_ctg.fasta') cns_h_ctg_fastq_plf = makePypeLocalFile( '4-quiver/cns_output/cns_h_ctg.fastq') zcat_done_plf = makePypeLocalFile('4-quiver/cns_output/job_done') make_task = PypeTask( inputs={ 'gathered_p_ctg': gathered_p_ctg_plf, 'gathered_h_ctg': gathered_h_ctg_plf, 'gather_done': gather_done_plf, }, outputs={ 'cns_p_ctg_fasta': cns_p_ctg_fasta_plf, 'cns_p_ctg_fastq': cns_p_ctg_fastq_plf, 'cns_h_ctg_fasta': cns_h_ctg_fasta_plf, 'cns_h_ctg_fastq': cns_h_ctg_fastq_plf, 'job_done': zcat_done_plf, }, ) wf.addTask(make_task(task_cns_zcat)) wf.refreshTargets()
def main(argv=sys.argv): global LOG LOG = support.setup_logger(None) if len(sys.argv) < 2: print>>sys.stderr, 'you need to provide a configuration file to specific a couple cluster running environment' sys.exit(1) config_fn = sys.argv[1] config_absbasedir = os.path.dirname(os.path.abspath(config_fn)) config = ConfigParser.ConfigParser() config.read(config_fn) job_type = 'SGE' if config.has_option('General', 'job_type'): job_type = config.get('General', 'job_type') job_queue = 'default' if config.has_option('General', 'job_queue'): job_queue = config.get('General', 'job_queue') pwatcher_type = 'fs_based' if config.has_option('General', 'pwatcher_type'): pwatcher_type = config.get('General', 'pwatcher_type') sge_track_reads = ' -pe smp 12 -q bigmem' if config.has_option('Unzip', 'sge_track_reads'): sge_track_reads = config.get('Unzip', 'sge_track_reads') sge_quiver = ' -pe smp 24 -q bigmem ' if config.has_option('Unzip', 'sge_quiver'): sge_quiver = config.get('Unzip', 'sge_quiver') smrt_bin = '/mnt/secondary/builds/full/3.0.0/prod/smrtanalysis_3.0.0.153854/smrtcmds/bin/' if config.has_option('Unzip', 'smrt_bin'): smrt_bin = config.get('Unzip', 'smrt_bin') input_bam_fofn = 'input_bam.fofn' if config.has_option('Unzip', 'input_bam_fofn'): input_bam_fofn = config.get('Unzip', 'input_bam_fofn') if not os.path.isabs(input_bam_fofn): input_bam_fofn = os.path.join(config_absbasedir, input_bam_fofn) quiver_concurrent_jobs = 8 if config.has_option('Unzip', 'quiver_concurrent_jobs'): quiver_concurrent_jobs = config.getint('Unzip', 'quiver_concurrent_jobs') config = {'job_type': job_type, 'job_queue': job_queue, 'sge_quiver': sge_quiver, 'sge_track_reads': sge_track_reads, 'input_bam_fofn': input_bam_fofn, 'pwatcher_type': pwatcher_type, 'smrt_bin': smrt_bin} LOG.info('config={}'.format(pprint.pformat(config))) #support.job_type = 'SGE' #tmp hack until we have a configuration parser wf = PypeProcWatcherWorkflow( max_jobs=quiver_concurrent_jobs, job_type=config['job_type'], job_queue=config.get('job_queue'), sge_option=config.get('sge_option'), watcher_type=config.get('pwatcher_type'), #watcher_directory=config.get('pwatcher_directory', 'mypwatcher'), use_tmpdir=config.get('use_tmpdir'), ) abscwd = os.path.abspath('.') parameters = { 'sge_option': config['sge_track_reads'], } input_bam_fofn_fn = config['input_bam_fofn'] input_bam_fofn_plf = makePypeLocalFile(input_bam_fofn_fn) hasm_done_plf = makePypeLocalFile('./3-unzip/1-hasm/hasm_done') # by convention track_reads_h_done_plf = makePypeLocalFile('./4-quiver/reads/track_reads_h_done') make_track_reads_task = PypeTask(inputs = { 'input_bam_fofn': input_bam_fofn_plf, 'hasm_done': hasm_done_plf}, outputs = {'job_done': track_reads_h_done_plf}, parameters = parameters, ) track_reads_task = make_track_reads_task(task_track_reads) wf.addTask(track_reads_task) scattered_quiver_plf = makePypeLocalFile('4-quiver/quiver_scatter/scattered.json') parameters = { 'config': config, } make_task = PypeTask( inputs = { 'p_ctg_fa': makePypeLocalFile('3-unzip/all_p_ctg.fa'), 'h_ctg_fa': makePypeLocalFile('3-unzip/all_h_ctg.fa'), 'track_reads_h_done': track_reads_h_done_plf, }, outputs = { 'scattered_quiver_json': scattered_quiver_plf, }, parameters = parameters, ) wf.addTask(make_task(task_scatter_quiver)) wf.refreshTargets() p_ctg_out, h_ctg_out, job_done_plfs = create_quiver_jobs(wf, scattered_quiver_plf) gathered_p_ctg_plf = makePypeLocalFile('4-quiver/cns_gather/p_ctg.txt') gathered_h_ctg_plf = makePypeLocalFile('4-quiver/cns_gather/h_ctg.txt') gather_done_plf = makePypeLocalFile('4-quiver/cns_gather/job_done') mkdir('4-quiver/cns_gather') with open(fn(gathered_p_ctg_plf), 'w') as ifs: for cns_fasta_fn, cns_fastq_fn in sorted(p_ctg_out): ifs.write('{} {}\n'.format(cns_fasta_fn, cns_fastq_fn)) with open(fn(gathered_h_ctg_plf), 'w') as ifs: for cns_fasta_fn, cns_fastq_fn in sorted(h_ctg_out): ifs.write('{} {}\n'.format(cns_fasta_fn, cns_fastq_fn)) make_task = PypeTask( inputs = job_done_plfs, outputs = { 'job_done': gather_done_plf, }, parameters = {}, ) wf.addTask(make_task(task_gather_quiver)) wf.refreshTargets() cns_p_ctg_fasta_plf = makePypeLocalFile('4-quiver/cns_output/cns_p_ctg.fasta') cns_p_ctg_fastq_plf = makePypeLocalFile('4-quiver/cns_output/cns_p_ctg.fastq') cns_h_ctg_fasta_plf = makePypeLocalFile('4-quiver/cns_output/cns_h_ctg.fasta') cns_h_ctg_fastq_plf = makePypeLocalFile('4-quiver/cns_output/cns_h_ctg.fastq') zcat_done_plf = makePypeLocalFile('4-quiver/cns_output/job_done') make_task = PypeTask( inputs = { 'gathered_p_ctg': gathered_p_ctg_plf, 'gathered_h_ctg': gathered_h_ctg_plf, 'gather_done': gather_done_plf, }, outputs = { 'cns_p_ctg_fasta': cns_p_ctg_fasta_plf, 'cns_p_ctg_fastq': cns_p_ctg_fastq_plf, 'cns_h_ctg_fasta': cns_h_ctg_fasta_plf, 'cns_h_ctg_fastq': cns_h_ctg_fastq_plf, 'job_done': zcat_done_plf, }, ) wf.addTask(make_task(task_cns_zcat)) wf.refreshTargets()
def main(argv=sys.argv): global LOG LOG = support.setup_logger(None) if len(argv) < 2 or argv[1].startswith('-'): print 'you need to provide a configuration file to specific a couple cluster running environment' sys.exit(1) config_fn = argv[1] config = ConfigParser.ConfigParser() config.read(config_fn) job_type = 'SGE' if config.has_option('General', 'job_type'): job_type = config.get('General', 'job_type') job_queue = 'default' if config.has_option('General', 'job_queue'): job_queue = config.get('General', 'job_queue') pwatcher_type = 'fs_based' if config.has_option('General', 'pwatcher_type'): pwatcher_type = config.get('General', 'pwatcher_type') sge_blasr_aln = ' -pe smp 24 -q bigmem ' if config.has_option('Unzip', 'sge_blasr_aln'): sge_blasr_aln = config.get('Unzip', 'sge_blasr_aln') smrt_bin = '/mnt/secondary/builds/full/3.0.0/prod/smrtanalysis_3.0.0.153854/smrtcmds/bin/' if config.has_option('Unzip', 'smrt_bin'): smrt_bin = config.get('Unzip', 'smrt_bin') sge_phasing = ' -pe smp 12 -q bigmem' if config.has_option('Unzip', 'sge_phasing'): sge_phasing = config.get('Unzip', 'sge_phasing') sge_hasm = ' -pe smp 48 -q bigmem' if config.has_option('Unzip', 'sge_hasm'): sge_hasm = config.get('Unzip', 'sge_hasm') sge_track_reads = ' -pe smp 12 -q bigmem' if config.has_option('Unzip', 'sge_track_reads'): sge_track_reads = config.get('Unzip', 'sge_track_reads') unzip_blasr_concurrent_jobs = 8 if config.has_option('Unzip', 'unzip_blasr_concurrent_jobs'): unzip_blasr_concurrent_jobs = config.getint('Unzip', 'unzip_blasr_concurrent_jobs') unzip_phasing_concurrent_jobs = 8 if config.has_option('Unzip', 'unzip_phasing_concurrent_jobs'): unzip_phasing_concurrent_jobs = config.getint('Unzip', 'unzip_phasing_concurrent_jobs') config = {'job_type': job_type, 'job_queue': job_queue, 'sge_blasr_aln': sge_blasr_aln, 'smrt_bin': smrt_bin, 'sge_phasing': sge_phasing, 'sge_hasm': sge_hasm, 'sge_track_reads': sge_track_reads, 'unzip_blasr_concurrent_jobs': unzip_blasr_concurrent_jobs, 'unzip_phasing_concurrent_jobs': unzip_phasing_concurrent_jobs, 'pwatcher_type': pwatcher_type, } #support.job_type = 'SGE' #tmp hack until we have a configuration parser unzip_all(config)