Exemplo n.º 1
0
def get_read_hctg_map(asm_dir, hasm_dir, read_to_contig_map_fn):
    wf = PypeProcWatcherWorkflow(
        max_jobs=
        12,  # TODO: Why was NumThreads ever set? There is only one task!
    )

    rawread_id_file = makePypeLocalFile(
        os.path.join(asm_dir, 'read_maps/dump_rawread_ids/rawread_ids'))
    pread_id_file = makePypeLocalFile(
        os.path.join(asm_dir, 'read_maps/dump_pread_ids/pread_ids'))
    h_ctg_edges = makePypeLocalFile(os.path.join(hasm_dir, 'all_h_ctg_edges'))
    p_ctg_edges = makePypeLocalFile(os.path.join(hasm_dir, 'all_p_ctg_edges'))
    h_ctg_ids = makePypeLocalFile(os.path.join(hasm_dir, "all_h_ctg_ids"))
    #make_dirs(os.path.dirname(os.path.abspath(read_to_contig_map_fn)) # Workflow does this.

    read_to_contig_map_plf = makePypeLocalFile(read_to_contig_map_fn)

    inputs = {
        'rawread_id_file': rawread_id_file,
        'pread_id_file': pread_id_file,
        'h_ctg_edges': h_ctg_edges,
        'p_ctg_edges': p_ctg_edges,
        'h_ctg_ids': h_ctg_ids
    }

    make_task = PypeTask(
        inputs=inputs,
        outputs={'read_to_contig_map': read_to_contig_map_plf},
    )
    wf.addTask(make_task(generate_read_to_hctg_map))
    wf.refreshTargets()  # block
Exemplo n.º 2
0
def get_read_ctg_map(rawread_dir, pread_dir, asm_dir):
    read_map_dir = os.path.abspath(os.path.join(asm_dir, 'read_maps'))
    make_dirs(read_map_dir)

    wf = PypeProcWatcherWorkflow(max_jobs=12, )
    """
            job_type=config['job_type'],
            job_queue=config['job_queue'],
            sge_option=config.get('sge_option', ''),
            watcher_type=config['pwatcher_type'],
            watcher_directory=config['pwatcher_directory'])
    """

    rawread_db = makePypeLocalFile(os.path.join(rawread_dir, 'raw_reads.db'))
    rawread_id_file = makePypeLocalFile(
        os.path.join(read_map_dir, 'dump_rawread_ids', 'rawread_ids'))

    task = PypeTask(inputs={'rawread_db': rawread_db},
                    outputs={'rawread_id_file': rawread_id_file},
                    TaskType=PypeThreadTaskBase,
                    URL='task://localhost/dump_rawread_ids')
    wf.addTask(task(dump_rawread_ids))

    pread_db = makePypeLocalFile(os.path.join(pread_dir, 'preads.db'))
    pread_id_file = makePypeLocalFile(
        os.path.join(read_map_dir, 'dump_pread_ids', 'pread_ids'))

    task = PypeTask(inputs={'pread_db': pread_db},
                    outputs={'pread_id_file': pread_id_file},
                    TaskType=PypeThreadTaskBase,
                    URL='task://localhost/dump_pread_ids')
    wf.addTask(task(dump_pread_ids))

    wf.refreshTargets()  # block

    sg_edges_list = makePypeLocalFile(os.path.join(asm_dir, 'sg_edges_list'))
    utg_data = makePypeLocalFile(os.path.join(asm_dir, 'utg_data'))
    ctg_paths = makePypeLocalFile(os.path.join(asm_dir, 'ctg_paths'))

    inputs = {
        'rawread_id_file': rawread_id_file,
        'pread_id_file': pread_id_file,
        'sg_edges_list': sg_edges_list,
        'utg_data': utg_data,
        'ctg_paths': ctg_paths
    }

    read_to_contig_map = makePypeLocalFile(
        os.path.join(read_map_dir, 'get_ctg_read_map', 'read_to_contig_map'))

    task = PypeTask(inputs=inputs,
                    outputs={'read_to_contig_map': read_to_contig_map},
                    TaskType=PypeThreadTaskBase,
                    URL='task://localhost/get_ctg_read_map')
    wf.addTask(task(generate_read_to_ctg_map))

    wf.refreshTargets()  # block
Exemplo n.º 3
0
def get_read_ctg_map(rawread_dir, pread_dir, asm_dir):
    read_map_dir = os.path.abspath(os.path.join(asm_dir, 'read_maps'))
    make_dirs(read_map_dir)

    wf = PypeProcWatcherWorkflow(
        max_jobs=12,
    )
    """
            job_type=config['job_type'],
            job_queue=config['job_queue'],
            sge_option=config.get('sge_option', ''),
            watcher_type=config['pwatcher_type'],
            watcher_directory=config['pwatcher_directory'])
    """

    rawread_db = makePypeLocalFile(os.path.join(rawread_dir, 'raw_reads.db'))
    rawread_id_file = makePypeLocalFile(os.path.join(
        read_map_dir, 'dump_rawread_ids', 'rawread_ids'))

    task = PypeTask(
        inputs={'rawread_db': rawread_db},
        outputs={'rawread_id_file': rawread_id_file},
    )
    wf.addTask(task(pype_tasks.task_dump_rawread_ids))

    pread_db = makePypeLocalFile(os.path.join(pread_dir, 'preads.db'))
    pread_id_file = makePypeLocalFile(os.path.join(
        read_map_dir, 'dump_pread_ids', 'pread_ids'))

    task = PypeTask(
        inputs={'pread_db': pread_db},
        outputs={'pread_id_file': pread_id_file},
    )
    wf.addTask(task(pype_tasks.task_dump_pread_ids))

    wf.refreshTargets()  # block

    sg_edges_list = makePypeLocalFile(os.path.join(asm_dir, 'sg_edges_list'))
    utg_data = makePypeLocalFile(os.path.join(asm_dir, 'utg_data'))
    ctg_paths = makePypeLocalFile(os.path.join(asm_dir, 'ctg_paths'))

    inputs = {'rawread_id_file': rawread_id_file,
              'pread_id_file': pread_id_file,
              'sg_edges_list': sg_edges_list,
              'utg_data': utg_data,
              'ctg_paths': ctg_paths}

    read_to_contig_map = makePypeLocalFile(os.path.join(
        read_map_dir, 'get_ctg_read_map', 'read_to_contig_map'))

    task = PypeTask(
        inputs=inputs,
        outputs={'read_to_contig_map': read_to_contig_map},
    )
    wf.addTask(task(pype_tasks.task_generate_read_to_ctg_map))

    wf.refreshTargets()  # block
Exemplo n.º 4
0
def main(argv=sys.argv):
    global LOG
    LOG = support.setup_logger(None)

    if len(sys.argv) < 2:
        print >> sys.stderr, 'you need to provide a configuration file to specific a couple cluster running environment'
        sys.exit(1)

    config_fn = sys.argv[1]
    config_absbasedir = os.path.dirname(os.path.abspath(config_fn))

    config = ConfigParser.ConfigParser()
    config.read(config_fn)

    job_type = 'SGE'
    if config.has_option('General', 'job_type'):
        job_type = config.get('General', 'job_type')

    sge_track_reads = ' -pe smp 12 -q bigmem'
    if config.has_option('Unzip', 'sge_track_reads'):
        sge_track_reads = config.get('Unzip', 'sge_track_reads')

    sge_quiver = ' -pe smp 24 -q bigmem '
    if config.has_option('Unzip', 'sge_quiver'):
        sge_quiver = config.get('Unzip', 'sge_quiver')

    smrt_bin = '/mnt/secondary/builds/full/3.0.0/prod/smrtanalysis_3.0.0.153854/smrtcmds/bin/'
    if config.has_option('Unzip', 'smrt_bin'):
        smrt_bin = config.get('Unzip', 'smrt_bin')

    input_bam_fofn = 'input_bam.fofn'
    if config.has_option('Unzip', 'input_bam_fofn'):
        input_bam_fofn = config.get('Unzip', 'input_bam_fofn')
    if not os.path.isabs(input_bam_fofn):
        input_bam_fofn = os.path.join(config_absbasedir, input_bam_fofn)

    quiver_concurrent_jobs = 8
    if config.has_option('Unzip', 'quiver_concurrent_jobs'):
        quiver_concurrent_jobs = config.getint('Unzip',
                                               'quiver_concurrent_jobs')

    config = {
        'job_type': job_type,
        'sge_quiver': sge_quiver,
        'sge_track_reads': sge_track_reads,
        'input_bam_fofn': input_bam_fofn,
        'smrt_bin': smrt_bin
    }
    LOG.info('config={}'.format(pprint.pformat(config)))

    #support.job_type = 'SGE' #tmp hack until we have a configuration parser

    wf = PypeProcWatcherWorkflow(max_jobs=quiver_concurrent_jobs, )

    abscwd = os.path.abspath('.')
    parameters = {
        'wd': os.path.join(abscwd, '4-quiver', 'track_reads_h'),
        'config': config
    }
    hasm_done_plf = makePypeLocalFile(
        './3-unzip/1-hasm/hasm_done')  # by convention
    track_reads_h_done_plf = makePypeLocalFile(
        os.path.join(parameters['wd'], 'track_reads_h_done'))
    make_track_reads_task = PypeTask(
        inputs={'hasm_done': hasm_done_plf},
        outputs={'job_done': track_reads_h_done_plf},
        parameters=parameters,
    )
    track_reads_task = make_track_reads_task(task_track_reads)
    #sge_track_reads = config['sge_track_reads']

    wf.addTask(track_reads_task)

    scattered_quiver_plf = makePypeLocalFile(
        '4-quiver/quiver_scatter/scattered.json')
    make_task = PypeTask(
        inputs={
            'p_ctg_fa': makePypeLocalFile('3-unzip/all_p_ctg.fa'),
            'h_ctg_fa': makePypeLocalFile('3-unzip/all_h_ctg.fa'),
            'track_reads_h_done': track_reads_h_done_plf,
        },
        outputs={
            'scattered_quiver_json': scattered_quiver_plf,
        },
        parameters={},
    )
    wf.addTask(make_task(task_scatter_quiver))
    wf.refreshTargets()

    p_ctg_out, h_ctg_out, job_done_plfs = create_quiver_jobs(
        scattered_quiver_plf)

    gathered_p_ctg_plf = makePypeLocalFile('4-quiver/cns_gather/p_ctg.txt')
    gathered_h_ctg_plf = makePypeLocalFile('4-quiver/cns_gather/h_ctg.txt')
    gather_done_plf = makePypeLocalFile('4-quiver/cns_gather/job_done')
    mkdir('4-quiver/cns_gather')
    with open(fn(gathered_p_ctg_plf), 'w') as ifs:
        for cns_fasta_fn, cns_fastq_fn in sorted(p_ctg_out):
            ifs.write('{} {}\n'.format(cns_fasta_fn, cns_fastq_fn))
    with open(fn(gathered_h_ctg_plf), 'w') as ifs:
        for cns_fasta_fn, cns_fastq_fn in sorted(h_ctg_out):
            ifs.write('{} {}\n'.format(cns_fasta_fn, cns_fastq_fn))

    make_task = PypeTask(
        inputs=job_done_plfs,
        outputs={
            'job_done': gather_done_plf,
        },
        parameters={},
    )
    wf.addTask(make_task(task_gather_quiver))
    wf.refreshTargets()

    cns_p_ctg_fasta_plf = makePypeLocalFile(
        '4-quiver/cns_output/cns_p_ctg.fasta')
    cns_p_ctg_fastq_plf = makePypeLocalFile(
        '4-quiver/cns_output/cns_p_ctg.fastq')
    cns_h_ctg_fasta_plf = makePypeLocalFile(
        '4-quiver/cns_output/cns_h_ctg.fasta')
    cns_h_ctg_fastq_plf = makePypeLocalFile(
        '4-quiver/cns_output/cns_h_ctg.fastq')
    zcat_done_plf = makePypeLocalFile('4-quiver/cns_output/job_done')
    make_task = PypeTask(
        inputs={
            'gathered_p_ctg': gathered_p_ctg_plf,
            'gathered_h_ctg': gathered_h_ctg_plf,
            'gather_done': gather_done_plf,
        },
        outputs={
            'cns_p_ctg_fasta': cns_p_ctg_fasta_plf,
            'cns_p_ctg_fastq': cns_p_ctg_fastq_plf,
            'cns_h_ctg_fasta': cns_h_ctg_fasta_plf,
            'cns_h_ctg_fastq': cns_h_ctg_fastq_plf,
            'job_done': zcat_done_plf,
        },
    )
    wf.addTask(make_task(task_cns_zcat))

    wf.refreshTargets()
Exemplo n.º 5
0
def unzip_all(config):
    unzip_blasr_concurrent_jobs = config['unzip_blasr_concurrent_jobs']
    unzip_phasing_concurrent_jobs = config['unzip_phasing_concurrent_jobs']
    wf = PypeProcWatcherWorkflow(
        max_jobs=unzip_blasr_concurrent_jobs,
        job_type=config['job_type'],
        job_queue=config.get('job_queue'),
        sge_option=config.get('sge_option'),
        watcher_type=config.get('pwatcher_type'),
        #watcher_directory=config.get('pwatcher_directory', 'mypwatcher'),
        use_tmpdir=config.get('use_tmpdir'),
    )

    ctg_list_file = makePypeLocalFile('./3-unzip/reads/ctg_list')
    falcon_asm_done = makePypeLocalFile('./2-asm-falcon/falcon_asm_done')
    wdir = os.path.abspath('./3-unzip/reads')
    parameters = {
        'wd': wdir,
        'config': config,
        'sge_option': config['sge_track_reads'],
    }
    job_done = makePypeLocalFile(
        os.path.join(parameters['wd'], 'track_reads_done'))
    make_track_reads_task = PypeTask(
        inputs={'falcon_asm_done': falcon_asm_done},
        outputs={
            'job_done': job_done,
            'ctg_list_file': ctg_list_file
        },
        parameters=parameters,
        wdir=wdir,
    )
    track_reads_task = make_track_reads_task(task_track_reads)

    wf.addTask(track_reads_task)
    wf.refreshTargets()  #force refresh now, will put proper dependence later

    ctg_ids = []
    with open('./3-unzip/reads/ctg_list') as f:
        for row in f:
            row = row.strip()
            ctg_ids.append(row)

    aln1_outs = {}

    all_ctg_out = {}

    for ctg_id in ctg_ids:
        # inputs
        ref_fasta = makePypeLocalFile(
            './3-unzip/reads/{ctg_id}_ref.fa'.format(ctg_id=ctg_id))
        read_fasta = makePypeLocalFile(
            './3-unzip/reads/{ctg_id}_reads.fa'.format(ctg_id=ctg_id))

        # outputs
        wd = os.path.join(
            os.getcwd(), './3-unzip/0-phasing/{ctg_id}/'.format(ctg_id=ctg_id))
        #mkdir(wd)
        blasr_dir = os.path.join(wd, 'blasr')
        ctg_aln_out = makePypeLocalFile(
            os.path.join(blasr_dir,
                         '{ctg_id}_sorted.bam'.format(ctg_id=ctg_id)))
        job_done = makePypeLocalFile(
            os.path.join(blasr_dir, 'aln_{ctg_id}_done'.format(ctg_id=ctg_id)))

        parameters = {
            'job_uid': 'aln-' + ctg_id,
            'wd': blasr_dir,
            'config': config,
            'ctg_id': ctg_id,
            'sge_option': config['sge_blasr_aln'],
        }
        make_blasr_task = PypeTask(
            inputs={
                'ref_fasta': ref_fasta,
                'read_fasta': read_fasta
            },
            outputs={
                'ctg_aln_out': ctg_aln_out,
                'job_done': job_done
            },
            parameters=parameters,
        )
        blasr_task = make_blasr_task(task_run_blasr)
        aln1_outs[ctg_id] = (ctg_aln_out, job_done)
        wf.addTask(blasr_task)
    wf.refreshTargets()

    wf.max_jobs = unzip_phasing_concurrent_jobs
    for ctg_id in ctg_ids:
        # inputs
        ref_fasta = makePypeLocalFile(
            './3-unzip/reads/{ctg_id}_ref.fa'.format(ctg_id=ctg_id))
        read_fasta = makePypeLocalFile(
            './3-unzip/reads/{ctg_id}_reads.fa'.format(ctg_id=ctg_id))

        # outputs
        wd = os.path.join(
            os.getcwd(), './3-unzip/0-phasing/{ctg_id}/'.format(ctg_id=ctg_id))

        blasr_dir = os.path.join(wd, 'blasr')
        ctg_aln_out = makePypeLocalFile(
            os.path.join(blasr_dir,
                         '{ctg_id}_sorted.bam'.format(ctg_id=ctg_id)))

        phasing_dir = os.path.join(wd, 'phasing')
        job_done = makePypeLocalFile(
            os.path.join(phasing_dir, 'p_{ctg_id}_done'.format(ctg_id=ctg_id)))
        rid_to_phase_out = makePypeLocalFile(
            os.path.join(
                wd,
                'rid_to_phase.{ctg_id}'.format(ctg_id=ctg_id)))  # TODO: ???
        all_ctg_out['r2p.{ctg_id}'.format(
            ctg_id=ctg_id)] = rid_to_phase_out  # implicit output?

        parameters = {
            'job_uid': 'ha-' + ctg_id,
            'wd': wd,
            'config': config,
            'ctg_id': ctg_id,
            'sge_option': config['sge_phasing'],
        }
        make_phasing_task = PypeTask(
            inputs={
                'ref_fasta': ref_fasta,
                'aln_bam': ctg_aln_out
            },
            outputs={'job_done': job_done},
            parameters=parameters,
        )
        phasing_task = make_phasing_task(task_phasing)
        wf.addTask(phasing_task)
    wf.refreshTargets()

    hasm_wd = os.path.abspath('./3-unzip/1-hasm/')
    #mkdir(hasm_wd)
    rid_to_phase_all = makePypeLocalFile(
        os.path.join(hasm_wd, 'rid-to-phase-all', 'rid_to_phase.all'))
    task = PypeTask(
        inputs=all_ctg_out,
        outputs={'rid_to_phase_all': rid_to_phase_all},
    )(get_rid_to_phase_all)
    wf.addTask(task)

    parameters['wd'] = hasm_wd
    parameters['sge_option'] = config['sge_hasm']
    job_done = makePypeLocalFile(os.path.join(hasm_wd, 'hasm_done'))
    make_hasm_task = PypeTask(
        inputs={'rid_to_phase_all': rid_to_phase_all},
        outputs={'job_done': job_done},
        parameters=parameters,
    )
    hasm_task = make_hasm_task(task_hasm)

    wf.addTask(hasm_task)

    wf.refreshTargets()
Exemplo n.º 6
0
        max_jobs=12,
)

rawread_db = makePypeLocalFile( os.path.join( rawread_dir, "raw_reads.db" ) )
rawread_id_file = makePypeLocalFile( os.path.join( rawread_dir, "raw_read_ids" ) )

@PypeTask( inputs = {"rawread_db": rawread_db}, 
           outputs =  {"rawread_id_file": rawread_id_file},
           TaskType = PypeThreadTaskBase,
           URL = "task://localhost/dump_rawread_ids" )
def dump_rawread_ids(self):
    rawread_db = fn( self.rawread_db )
    rawread_id_file = fn( self.rawread_id_file )
    os.system("DBshow -n %s | tr -d '>' | awk '{print $1}' > %s" % (rawread_db, rawread_id_file) )

wf.addTask( dump_rawread_ids )

pread_db = makePypeLocalFile( os.path.join( pread_dir, "preads.db" ) )
pread_id_file = makePypeLocalFile( os.path.join( pread_dir, "pread_ids" ) )

@PypeTask( inputs = {"pread_db": pread_db}, 
           outputs =  {"pread_id_file": pread_id_file},
           TaskType = PypeThreadTaskBase,
           URL = "task://localhost/dump_pread_ids" )
def dump_pread_ids(self):
    pread_db = fn( self.pread_db )
    pread_id_file = fn( self.pread_id_file )
    os.system("DBshow -n %s | tr -d '>' | awk '{print $1}' > %s" % (pread_db, pread_id_file) )

wf.addTask( dump_pread_ids )
wf.refreshTargets() # block
Exemplo n.º 7
0
def main(argv=sys.argv):
    global LOG
    LOG = support.setup_logger(None)


    if len(sys.argv) < 2:
        print>>sys.stderr, 'you need to provide a configuration file to specific a couple cluster running environment'
        sys.exit(1)

    config_fn = sys.argv[1]
    config_absbasedir = os.path.dirname(os.path.abspath(config_fn))

    config = ConfigParser.ConfigParser()
    config.read(config_fn)


    job_type = 'SGE'
    if config.has_option('General', 'job_type'):
        job_type = config.get('General', 'job_type')

    job_queue = 'default'
    if config.has_option('General', 'job_queue'):
        job_queue = config.get('General', 'job_queue')

    pwatcher_type = 'fs_based'
    if config.has_option('General', 'pwatcher_type'):
        pwatcher_type = config.get('General', 'pwatcher_type')

    sge_track_reads = ' -pe smp 12 -q bigmem'
    if config.has_option('Unzip', 'sge_track_reads'):
        sge_track_reads = config.get('Unzip', 'sge_track_reads')

    sge_quiver = ' -pe smp 24 -q bigmem '
    if config.has_option('Unzip', 'sge_quiver'):
        sge_quiver = config.get('Unzip', 'sge_quiver')

    smrt_bin = '/mnt/secondary/builds/full/3.0.0/prod/smrtanalysis_3.0.0.153854/smrtcmds/bin/'
    if config.has_option('Unzip', 'smrt_bin'):
        smrt_bin = config.get('Unzip', 'smrt_bin')

    input_bam_fofn = 'input_bam.fofn'
    if config.has_option('Unzip', 'input_bam_fofn'):
        input_bam_fofn = config.get('Unzip', 'input_bam_fofn')
    if not os.path.isabs(input_bam_fofn):
        input_bam_fofn = os.path.join(config_absbasedir, input_bam_fofn)


    quiver_concurrent_jobs = 8
    if config.has_option('Unzip', 'quiver_concurrent_jobs'):
        quiver_concurrent_jobs = config.getint('Unzip', 'quiver_concurrent_jobs')

    config = {'job_type': job_type,
              'job_queue': job_queue,
              'sge_quiver': sge_quiver,
              'sge_track_reads': sge_track_reads,
              'input_bam_fofn': input_bam_fofn,
              'pwatcher_type': pwatcher_type,
              'smrt_bin': smrt_bin}
    LOG.info('config={}'.format(pprint.pformat(config)))

    #support.job_type = 'SGE' #tmp hack until we have a configuration parser


    wf = PypeProcWatcherWorkflow(
            max_jobs=quiver_concurrent_jobs,
            job_type=config['job_type'],
            job_queue=config.get('job_queue'),
            sge_option=config.get('sge_option'),
            watcher_type=config.get('pwatcher_type'),
            #watcher_directory=config.get('pwatcher_directory', 'mypwatcher'),
            use_tmpdir=config.get('use_tmpdir'),
    )

    abscwd = os.path.abspath('.')
    parameters = {
            'sge_option': config['sge_track_reads'],
    }
    input_bam_fofn_fn = config['input_bam_fofn']
    input_bam_fofn_plf = makePypeLocalFile(input_bam_fofn_fn)
    hasm_done_plf = makePypeLocalFile('./3-unzip/1-hasm/hasm_done') # by convention
    track_reads_h_done_plf = makePypeLocalFile('./4-quiver/reads/track_reads_h_done')
    make_track_reads_task = PypeTask(inputs = {
                                       'input_bam_fofn': input_bam_fofn_plf,
                                       'hasm_done': hasm_done_plf},
                                     outputs = {'job_done': track_reads_h_done_plf},
                                     parameters = parameters,
    )
    track_reads_task = make_track_reads_task(task_track_reads)

    wf.addTask(track_reads_task)

    scattered_quiver_plf = makePypeLocalFile('4-quiver/quiver_scatter/scattered.json')
    parameters = {
            'config': config,
    }
    make_task = PypeTask(
            inputs = {
                'p_ctg_fa': makePypeLocalFile('3-unzip/all_p_ctg.fa'),
                'h_ctg_fa': makePypeLocalFile('3-unzip/all_h_ctg.fa'),
                'track_reads_h_done': track_reads_h_done_plf,
            },
            outputs = {
                'scattered_quiver_json': scattered_quiver_plf,
            },
            parameters = parameters,
    )
    wf.addTask(make_task(task_scatter_quiver))
    wf.refreshTargets()

    p_ctg_out, h_ctg_out, job_done_plfs = create_quiver_jobs(wf, scattered_quiver_plf)

    gathered_p_ctg_plf = makePypeLocalFile('4-quiver/cns_gather/p_ctg.txt')
    gathered_h_ctg_plf = makePypeLocalFile('4-quiver/cns_gather/h_ctg.txt')
    gather_done_plf = makePypeLocalFile('4-quiver/cns_gather/job_done')
    mkdir('4-quiver/cns_gather')
    with open(fn(gathered_p_ctg_plf), 'w') as ifs:
        for cns_fasta_fn, cns_fastq_fn in sorted(p_ctg_out):
            ifs.write('{} {}\n'.format(cns_fasta_fn, cns_fastq_fn))
    with open(fn(gathered_h_ctg_plf), 'w') as ifs:
        for cns_fasta_fn, cns_fastq_fn in sorted(h_ctg_out):
            ifs.write('{} {}\n'.format(cns_fasta_fn, cns_fastq_fn))

    make_task = PypeTask(
            inputs = job_done_plfs,
            outputs = {
                'job_done': gather_done_plf,
            },
            parameters = {},
    )
    wf.addTask(make_task(task_gather_quiver))
    wf.refreshTargets()

    cns_p_ctg_fasta_plf = makePypeLocalFile('4-quiver/cns_output/cns_p_ctg.fasta')
    cns_p_ctg_fastq_plf = makePypeLocalFile('4-quiver/cns_output/cns_p_ctg.fastq')
    cns_h_ctg_fasta_plf = makePypeLocalFile('4-quiver/cns_output/cns_h_ctg.fasta')
    cns_h_ctg_fastq_plf = makePypeLocalFile('4-quiver/cns_output/cns_h_ctg.fastq')
    zcat_done_plf = makePypeLocalFile('4-quiver/cns_output/job_done')
    make_task = PypeTask(
            inputs = {
                'gathered_p_ctg': gathered_p_ctg_plf,
                'gathered_h_ctg': gathered_h_ctg_plf,
                'gather_done': gather_done_plf,
            },
            outputs = {
                'cns_p_ctg_fasta': cns_p_ctg_fasta_plf,
                'cns_p_ctg_fastq': cns_p_ctg_fastq_plf,
                'cns_h_ctg_fasta': cns_h_ctg_fasta_plf,
                'cns_h_ctg_fastq': cns_h_ctg_fastq_plf,
                'job_done': zcat_done_plf,
            },
    )
    wf.addTask(make_task(task_cns_zcat))

    wf.refreshTargets()
Exemplo n.º 8
0
def unzip_all(config):
    unzip_blasr_concurrent_jobs = config['unzip_blasr_concurrent_jobs']
    unzip_phasing_concurrent_jobs = config['unzip_phasing_concurrent_jobs']
    wf = PypeProcWatcherWorkflow(
            max_jobs=unzip_blasr_concurrent_jobs,
            job_type=config['job_type'],
            job_queue=config.get('job_queue'),
            sge_option=config.get('sge_option'),
            watcher_type=config.get('pwatcher_type'),
            #watcher_directory=config.get('pwatcher_directory', 'mypwatcher'),
            use_tmpdir=config.get('use_tmpdir'),
    )

    ctg_list_file = makePypeLocalFile('./3-unzip/reads/ctg_list')
    falcon_asm_done = makePypeLocalFile('./2-asm-falcon/falcon_asm_done')
    wdir = os.path.abspath('./3-unzip/reads')
    parameters = {'wd': wdir, 'config': config,
            'sge_option': config['sge_track_reads'],
    }
    job_done = makePypeLocalFile(os.path.join(parameters['wd'], 'track_reads_done'))
    make_track_reads_task = PypeTask(inputs = {'falcon_asm_done': falcon_asm_done},
                                     outputs = {'job_done': job_done, 'ctg_list_file': ctg_list_file},
                                     parameters = parameters,
                                     wdir = wdir,
    )
    track_reads_task = make_track_reads_task(task_track_reads)

    wf.addTask(track_reads_task)
    wf.refreshTargets() #force refresh now, will put proper dependence later

    ctg_ids = []
    with open('./3-unzip/reads/ctg_list') as f:
        for row in f:
            row = row.strip()
            ctg_ids.append(row)

    aln1_outs = {}

    all_ctg_out = {}

    for ctg_id in ctg_ids:
        # inputs
        ref_fasta = makePypeLocalFile('./3-unzip/reads/{ctg_id}_ref.fa'.format(ctg_id = ctg_id))
        read_fasta = makePypeLocalFile('./3-unzip/reads/{ctg_id}_reads.fa'.format(ctg_id = ctg_id))

        # outputs
        wd = os.path.join(os.getcwd(), './3-unzip/0-phasing/{ctg_id}/'.format(ctg_id = ctg_id))
        #mkdir(wd)
        blasr_dir = os.path.join(wd, 'blasr')
        ctg_aln_out = makePypeLocalFile(os.path.join(blasr_dir, '{ctg_id}_sorted.bam'.format(ctg_id = ctg_id)))
        job_done = makePypeLocalFile(os.path.join(blasr_dir, 'aln_{ctg_id}_done'.format(ctg_id = ctg_id)))

        parameters = {'job_uid':'aln-'+ctg_id, 'wd': blasr_dir, 'config':config, 'ctg_id': ctg_id,
                'sge_option': config['sge_blasr_aln'],
        }
        make_blasr_task = PypeTask(inputs = {'ref_fasta': ref_fasta, 'read_fasta': read_fasta},
                                   outputs = {'ctg_aln_out': ctg_aln_out, 'job_done': job_done},
                                   parameters = parameters,
        )
        blasr_task = make_blasr_task(task_run_blasr)
        aln1_outs[ctg_id] = (ctg_aln_out, job_done)
        wf.addTask(blasr_task)
    wf.refreshTargets()

    wf.max_jobs = unzip_phasing_concurrent_jobs
    for ctg_id in ctg_ids:
        # inputs
        ref_fasta = makePypeLocalFile('./3-unzip/reads/{ctg_id}_ref.fa'.format(ctg_id = ctg_id))
        read_fasta = makePypeLocalFile('./3-unzip/reads/{ctg_id}_reads.fa'.format(ctg_id = ctg_id))

        # outputs
        wd = os.path.join(os.getcwd(), './3-unzip/0-phasing/{ctg_id}/'.format(ctg_id = ctg_id))

        blasr_dir = os.path.join(wd, 'blasr')
        ctg_aln_out = makePypeLocalFile(os.path.join(blasr_dir, '{ctg_id}_sorted.bam'.format(ctg_id = ctg_id)))

        phasing_dir = os.path.join(wd, 'phasing')
        job_done = makePypeLocalFile(os.path.join(phasing_dir, 'p_{ctg_id}_done'.format(ctg_id = ctg_id)))
        rid_to_phase_out = makePypeLocalFile(os.path.join(wd, 'rid_to_phase.{ctg_id}'.format(ctg_id = ctg_id))) # TODO: ???
        all_ctg_out[ 'r2p.{ctg_id}'.format(ctg_id = ctg_id) ] = rid_to_phase_out # implicit output?

        parameters = {'job_uid':'ha-'+ctg_id, 'wd': wd, 'config':config, 'ctg_id': ctg_id,
                'sge_option': config['sge_phasing'],
        }
        make_phasing_task = PypeTask(inputs = {'ref_fasta': ref_fasta, 'aln_bam':ctg_aln_out},
                                   outputs = {'job_done': job_done},
                                   parameters = parameters,
        )
        phasing_task = make_phasing_task(task_phasing)
        wf.addTask(phasing_task)
    wf.refreshTargets()

    hasm_wd = os.path.abspath('./3-unzip/1-hasm/')
    #mkdir(hasm_wd)
    rid_to_phase_all = makePypeLocalFile(os.path.join(hasm_wd, 'rid-to-phase-all', 'rid_to_phase.all'))
    task = PypeTask(inputs = all_ctg_out, outputs = {'rid_to_phase_all': rid_to_phase_all},
    ) (get_rid_to_phase_all)
    wf.addTask(task)

    parameters['wd'] = hasm_wd
    parameters['sge_option'] = config['sge_hasm']
    job_done = makePypeLocalFile(os.path.join(hasm_wd, 'hasm_done'))
    make_hasm_task = PypeTask(inputs = {'rid_to_phase_all': rid_to_phase_all},
                              outputs = {'job_done': job_done},
                              parameters = parameters,
    )
    hasm_task = make_hasm_task(task_hasm)

    wf.addTask(hasm_task)

    wf.refreshTargets()