示例#1
0
def complete_pipeline(inputs, output_sentinel, outputs, sample_id, prev_sentinel):
    """merge, sort, clean up """
    task_list = []
    log_msg = ' [Final merge] ' + '[' + sample_id + '] '

    pipelineHelpers.Logging('INFO', log, log_msg + 'Starting')
    if pipelineHelpers.CheckSentinel(prev_sentinel, log, log_msg):

        python = sys.executable
        current_path = params.GetProgramPath()
        script_path = pipelineHelpers.GetScriptPath(
                sample_id, bamhelp.name)
        bamgineer_mem = bamhelp.GetBamgineerMem('high')
        mergedbamname = params.GetOutputFileName()
       
        script = open('{0}mergesort.sh'.format(script_path), 'w')
        script.write('#!/bin/bash\n')
        script.write('#\n')
        script.write('#$ -cwd \n')
        script.write('module load sambamba \n')
        
        script.write('python {path}/mergesort.py '
                                     ' {mergedfinal} {finalbamdir}\n'.format(path=current_path,  mergedfinal=mergedbamname, finalbamdir=finalbams_path))

        script.close()   
        process = pipelineHelpers.RunTask( os.path.abspath(script.name), 4, bamgineer_mem,
                            sample_id, bamhelp.name)
        task_list.append(process)
        pipelineHelpers.CheckTaskStatus(
                    task_list, output_sentinel, log, log_msg)
       
         
    pipelineHelpers.Logging('INFO', log, log_msg + 'COMPLETE!')
def run_pipeline(results_path):
    global haplotype_path,cancer_dir_path,tmpbams_path, finalbams_path,log_path, logfile ,terminating,logger,logQueue
    haplotype_path,cancer_dir_path,tmpbams_path, finalbams_path,log_path, logfile = handle.GetProjectPaths(results_path)
    terminating,logger,logQueue = handle.GetLoggings(logfile)
    
    t0 = time.time()
    outbamfn=params.GetOutputFileName() 
    chromosome_event = create_chr_event_list()
    chromosomes_bamfiles = create_chr_bam_list()
    logger.debug('pipeline started!')
    
    initialize(results_path,haplotype_path,cancer_dir_path)
    pool1 = multiprocessing.Pool(processes=4, initializer=initPool, initargs=[logQueue, logger.getEffectiveLevel(), terminating] ) 
    try:
        result1 = pool1.map_async(find_roi_bam, chromosome_event ).get(9999999)
        result2 = pool1.map_async(implement_cnv, chromosome_event ).get(9999999)
        pool1.close()
    except KeyboardInterrupt:  
        logger.debug('You cancelled the program!')
        pool1.terminate()
    except Exception as e:     
        logger.exception("Exception in main %s" , e)
        pool1.terminate()
    finally:
        pool1.join()
    time.sleep(.1)
    mergeSortBamFiles(outbamfn, finalbams_path )
    t1 = time.time()
    shutil.rmtree(tmpbams_path)
    logger.debug(' ***** pipeline finished in ' + str(round((t1 - t0)/60.0, 1)) +' minutes ***** ')
    logging.shutdown()
示例#3
0
def complete_pipeline_gain_task_list():
    (sentinel_path, results_path, haplotype_path, cancer_dir_path,
     tmpbams_path, finalbams_path) = taskHelpers.GetProjectNamePathRunID()
    inputs = []
    outputs = []
    prev_sentinels = []

    prev_sentinels.append(
        taskHelpers.CreateFileList('{0}_subsample_gain.sentinel', 1,
                                   sentinel_path))

    sentinels = taskHelpers.CreateFileList('{0}_sortmerge.sentinel', 1,
                                           sentinel_path)

    inputs.append(
        taskHelpers.CreateFileList('{0}_{1}_{2}.bam', 88, finalbams_path,
                                   "FINAL"))

    outputs.append(
        taskHelpers.CreateFileList(params.GetOutputFileName(), 1,
                                   finalbams_path))

    sample_ids = taskHelpers.CreateFileList('{0}', 1, '')

    job_parameters = taskHelpers.CreateTaskList(inputs, sentinels, outputs,
                                                sample_ids, prev_sentinels)
    for job in job_parameters:
        yield job
示例#4
0
def run_pipeline(results_path):
    print(results_path)
    global haplotype_path, cancer_dir_path, tmpbams_path, finalbams_path, log_path, logfile, terminating, logger, logQueue, res_path
    res_path = results_path
    haplotype_path, cancer_dir_path, tmpbams_path, finalbams_path, log_path, logfile = handle.GetProjectPaths(
        results_path)
    terminating, logger, logQueue = handle.GetLoggings(logfile)

    chr_list = ['chr' + str(x) for x in range(1, 23)]
    chr_list.extend(['chrX', 'chrY'])

    t0 = time.time()
    outbamfn = params.GetOutputFileName()

    cnv_list = glob.glob("/".join([params.GetCNVDir(), '*.*']))
    chromosome_event = create_chr_event_list(cnv_list, chr_list)

    logger.debug('pipeline started!')

    phase_path = '/'.join([results_path, 'phasedvcfdir'])
    if not os.path.exists('/'.join([results_path, 'phasedvcfdir'])):
        os.makedirs(phase_path)

    initialize0(phase_path, cancer_dir_path)

    for cnv_path in cnv_list:
        initialize_pipeline(phase_path, haplotype_path, cnv_path)

    pool1 = multiprocessing.Pool(
        processes=12,
        initializer=initPool,
        initargs=[logQueue, logger.getEffectiveLevel(), terminating])
    try:

        if not params.GetSplitBamsPath():

            if not os.path.exists("/".join([res_path, 'splitbams'])):
                os.makedirs("/".join([res_path, 'splitbams']))
                params.SetSplitBamsPath("/".join([res_path, 'splitbams']))

            result0 = pool1.map_async(split_bam_by_chr,
                                      chromosome_event).get(9999999)

        result1 = pool1.map_async(find_roi_bam, chromosome_event).get(9999999)
        result2 = pool1.map_async(implement_cnv, chromosome_event).get(9999999)
        pool1.close()
    except KeyboardInterrupt:
        logger.debug('You cancelled the program!')
        pool1.terminate()
    except Exception as e:
        logger.exception("Exception in main %s", e)
        pool1.terminate()
    finally:
        pool1.join()
    time.sleep(.1)
    mergeSortBamFiles(outbamfn, finalbams_path)
    t1 = time.time()
    shutil.rmtree(tmpbams_path)
    logger.debug(' ***** pipeline finished in ' +
                 str(round((t1 - t0) / 60.0, 1)) + ' minutes ***** ')
    logging.shutdown()