Пример #1
0
def complete_pipeline(inputs, output_sentinel, outputs, sample_id, prev_sentinel):
    """merge, sort, clean up """
    task_list = []
    log_msg = ' [Final merge] ' + '[' + sample_id + '] '

    pipelineHelpers.Logging('INFO', log, log_msg + 'Starting')
    if pipelineHelpers.CheckSentinel(prev_sentinel, log, log_msg):

        python = sys.executable
        current_path = params.GetProgramPath()
        script_path = pipelineHelpers.GetScriptPath(
                sample_id, bamhelp.name)
        bamgineer_mem = bamhelp.GetBamgineerMem('high')
        mergedbamname = params.GetOutputFileName()
       
        script = open('{0}mergesort.sh'.format(script_path), 'w')
        script.write('#!/bin/bash\n')
        script.write('#\n')
        script.write('#$ -cwd \n')
        script.write('module load sambamba \n')
        
        script.write('python {path}/mergesort.py '
                                     ' {mergedfinal} {finalbamdir}\n'.format(path=current_path,  mergedfinal=mergedbamname, finalbamdir=finalbams_path))

        script.close()   
        process = pipelineHelpers.RunTask( os.path.abspath(script.name), 4, bamgineer_mem,
                            sample_id, bamhelp.name)
        task_list.append(process)
        pipelineHelpers.CheckTaskStatus(
                    task_list, output_sentinel, log, log_msg)
       
         
    pipelineHelpers.Logging('INFO', log, log_msg + 'COMPLETE!')
Пример #2
0
def subsample_loss(inputs, output_sentinel, outputs, sample_id, prev_sentinel):     
    """adjusting sample rate for Bam files"""
    task_list = []
    log_msg = ' [subsample loss events] ' + '[' + sample_id + '] '
    if pipelineHelpers.CheckSentinel(prev_sentinel, log, log_msg):
        pipelineHelpers.Logging('INFO', log, log_msg + 'Starting')
        python = sys.executable
        current_path = params.GetProgramPath()
        script_path = pipelineHelpers.GetScriptPath(
            sample_id, bamhelp.name)
        bamgineer_mem = bamhelp.GetBamgineerMem('med')
        
        for inp in inputs[0]:
            chrevent=os.path.basename(inp).strip().split("_")[0]
            chr = re.split('(\d+)',chrevent)[1]
            original_bam = sub('.mutated.merged.sorted.bam', '.sorted.bam', inp) 
            sentinel_path, results_path,haplotype_path,cancer_dir_path,tmpbams_path,finalbams_path = taskHelpers.GetProjectNamePathRunID()
            LOSS_FINAL = "/".join([finalbams_path,  'CHR'+str(chr).upper() +'_LOSS.bam'])
    
            script = open('{0}sample_{1}_{2}.sh'.format(script_path, 'chr'+str(chr), "loss"), 'w')
            script.write('#!/bin/bash\n')
            script.write('#\n')
            script.write('#$ -cwd \n')
            script.write('module load samtools/1.2 \n')
            script.write('python {path}/subsample_loss.py {inbam} {fl} \n'.format(path=current_path,inbam=inp, fl=LOSS_FINAL)) 
            
            script.close()
            process = pipelineHelpers.RunTask( 
                os.path.abspath(script.name),4, bamgineer_mem,
                sample_id, bamhelp.name)
            task_list.append(process)
                
            pipelineHelpers.CheckTaskStatus(
                            task_list, output_sentinel, log, log_msg)
    pipelineHelpers.Logging('INFO', log, log_msg+ 'Finished Sampling Loss Event')
Пример #3
0
def repair_gain( inputs, output_sentinel, outputs, sample_id, prev_sentinel):
    """implementing cnv module and finding reads not matching hg19 at germline SNP locations"""
    task_list = []
    log_msg = ' [re-pairing reads] ' + '[' + sample_id + '] '
    pipelineHelpers.Logging('INFO', log, log_msg + 'Starting')
    
    if pipelineHelpers.CheckSentinel(prev_sentinel, log, log_msg):
        python = sys.executable
        current_path = params.GetProgramPath()
        script_path = pipelineHelpers.GetScriptPath(
                sample_id, bamhelp.name)
        bamgineer_mem = bamhelp.GetBamgineerMem('med')
       
        for inp in inputs[0]:
            chr= os.path.basename(inp).strip().split(".")[0]
            
            script = open('{0}re-pair_{1}_{2}.sh'.format(script_path, chr, "gain"), 'w')
            script.write('#!/bin/bash\n\n')
            script.write('module load samtools/1.2 \n')
            script.write('module load sambamba \n')
            script.write('python {path}/re-pair.py {inbam} \n'.format(inbam=inp, path=current_path ))        
            script.close()
            process = pipelineHelpers.RunTask( 
                os.path.abspath(script.name),4, bamgineer_mem,
                sample_id, bamhelp.name)
            task_list.append(process)
            
        pipelineHelpers.CheckTaskStatus(
                        task_list, output_sentinel, log, log_msg)
    pipelineHelpers.Logging('INFO', log, log_msg + 'Finished Re-pairing')
Пример #4
0
def mutate_gain(inputs, output_sentinel, outputs, sample_id, prev_sentinel):
    """mutating reads according to haplotype at germline SNP locations"""
    task_list = []
    log_msg = ' [implement_cnv] ' + '[' + sample_id + '] '
    
    if pipelineHelpers.CheckSentinel(prev_sentinel, log, log_msg):
        pipelineHelpers.Logging('INFO', log, log_msg + 'Starting')
        python = sys.executable

        current_path = params.GetProgramPath()
        script_path = pipelineHelpers.GetScriptPath(
            sample_id, bamhelp.name)
        bamgineer_mem = bamhelp.GetBamgineerMem('med')
        sentinel_path, results_path,haplotype_path,cancer_dir_path,tmpbams_path,finalbams_path = taskHelpers.GetProjectNamePathRunID()
        
        for inp in inputs[0]:
           
            chr= os.path.basename(inp).strip().split(".")[0]
           
            
            bedfn= "/".join([haplotype_path, 'gain_het_snp_' + chr + '.bed'])
            diffn =   "/".join([tmpbams_path,"diff.bam"])
            nonhet= "/".join([tmpbams_path, 'diff_only1_' +  os.path.basename(inp)])
            hetfn=sub('.gain.roi.repaired.sorted.bam$','.gain.mutated.het.bam', inp)
            hetfnsorted = sub('.gain.roi.repaired.sorted.bam$','.gain.mutated.het.sorted.bam', inp)
            mergedsortfn = sub('.gain.roi.repaired.sorted.bam$','.gain.mutated.merged.sorted.bam', inp)
            mergedrenamedfn = sub('.gain.roi.repaired.sorted.bam$','.gain.renamed.mutated.merged.sorted.bam', inp)
            
            script = open('{0}mutate_{1}_{2}.sh'.format(script_path, chr, "gain"), 'w')
            script.write('#!/bin/bash\n')
            script.write('#')
            script.write('#$ -cwd \n')
            script.write('module load samtools/1.2 \n')
            script.write('module load sambamba \n')
            script.write('module load bamUtil \n')  
                
            script.write('sort -u {bf} -o {bf}\n\n'.format(bf=bedfn))
            script.write('python {path}/mutate.py {repairedbam} {bf} {happath}\n\n'.format(repairedbam=inp, bf=bedfn ,path=current_path , happath=haplotype_path))        
            script.write('sambamba sort {het} -o {hetsort}\n\n'.format(het=hetfn, hetsort=hetfnsorted))
            script.write('bam diff --in1 {repairedbam} --in2 {hetsort} --out {dif}\n\n'.format(repairedbam=inp, hetsort=hetfnsorted ,dif=diffn ))  
            script.write('sambamba merge {merged} {hetonly} {nonhetonly}\n\n'.format(merged=mergedsortfn,hetonly=hetfnsorted, nonhetonly= nonhet))
            script.write('rm {het} {nonhetonly}  \n\n'.format(het=hetfn,nonhetonly= nonhet))
            script.write('python {path}/rename-reads.py {inp2} {outp}\n\n'.format(inp2= mergedsortfn, outp=mergedrenamedfn, path=current_path))
            
            script.close()
            process = pipelineHelpers.RunTask( 
                os.path.abspath(script.name),4, bamgineer_mem,
                sample_id, bamhelp.name)
            task_list.append(process)
                
            pipelineHelpers.CheckTaskStatus(
                            task_list, output_sentinel, log, log_msg)
    pipelineHelpers.Logging('INFO', log, log_msg + 'Finished Mutating')
Пример #5
0
import os
import sys
from re import sub
from ruffus import *
from helpers import depthOfCoverageTasks, depthOfCoverageHelpers, pipelineHelpers
from helpers import runIDHelpers as rid
from helpers import parameters as params
import subprocess 

current_path = params.GetProgramPath()
log = pipelineHelpers.GetLogFile('DepthOfCoverage')

        
def runCommand(cmd):
    try:
        process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stdin=subprocess.PIPE, shell=True)
        stdout, stderr = process.communicate()
    except OSError as e:
        print("Execution failed:", e)