def main(inputFilePathL, projectN, clean=False, pbs=False, server='smc1', genome='hg19', sampNL=[]): storageBase = os.path.dirname(mypipe.prepare_baseDir(projectN, mkdir=False)) + '/' apacheBase = storageBase if glob(storageBase+projectN): print ('File directory: already exists') else: os.system('mkdir %s/%s; \ chmod a+w %s/%s' % (storageBase,projectN, storageBase,projectN)) print('File directory: created') if glob(apacheBase+projectN): print ('Log directory: already exists') else: os.system('mkdir %s/%s; \ chmod a+w %s/%s' % (apacheBase,projectN, apacheBase,projectN)) print('Log directory: created') for inputFileP in inputFilePathL: inputFileN = inputFileP.split('/')[-1] sampN = inputFileN.split('_splice')[0] if sampNL != [] and sampN not in sampNL: continue print sampN cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_rsq2skip.py -i %s -n %s -p %s -c %s -s %s -g %s' % (mysetting.SRC_HOME, inputFileP, sampN, projectN, False, server, genome) if pbs: log = '%s/%s.Rsq_skip.qlog' % (storageBase+projectN+'/'+sampN,sampN) os.system('echo "%s" | qsub -q %s -N %s -o %s -j oe' % (cmd, server, sampN, log)) else: log = '%s/%s.Rsq_skip.qlog' % (storageBase+projectN,sampN) os.system('(%s) 2> %s' % (cmd, log))
def main(inputFilePathL, projectN, clean=False, pbs=False, server='smc1', genome='hg19'): baseDir = mypipe.prepare_baseDir(projectN, mkdir=True) for inputFileP in inputFilePathL: inputFileP2 = inputFileP[:-7] + '\*.fq.gz' inputFileN = inputFileP.split('/')[-1] sampN = inputFileN.split('.')[0] # if sampN not in ['IRCR_MBT14_182_T_CS','S1117889_T_CS']: # continue print sampN, inputFileP2 cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_cs2mut.py -i %s -n %s -p %s -c %s -s %s -g %s' % ( mysetting.SRC_HOME, inputFileP2, sampN, projectN, False, server, genome) if pbs: log = '%s/%s.Xsq.qlog' % (baseDir + '/' + sampN, sampN) os.system('echo "%s" | qsub -q %s -N %s -o %s -j oe' % (cmd, server, sampN, log)) else: log = '%s/%s.Xsq.qlog' % (baseDir, sampN) os.system('(%s) 2> %s' % (cmd, log))
def tcga(tumorDirN, bloodDirN, projectN='TCGA_somatic', pbs=False, server='smc2', genome='hg19'): storageBase = os.path.dirname(mypipe.prepare_baseDir(projectN, mkdir=False)) + '/' apacheBase = storageBase if glob(storageBase+projectN): print ('File directory: already exists') else: os.system('mkdir %s/%s; \ chmod a+w %s/%s' % (storageBase,projectN, storageBase,projectN)) print ('File directory: created') if glob(apacheBase+projectN): print ('Log directory: already exists') else: os.system('mkdir %s/%s; \ chmod a+w %s/%s' % (apacheBase,projectN, apacheBase,projectN)) print ('Log directory: created') for normalFileN in glob('%s/*/*recal.bam' % bloodDirN): sid = re.match('(.*)-B.recal.bam', os.path.basename(normalFileN)).group(1) tumorFileNL = glob('%s/*/%s*recal.bam' % (tumorDirN, sid)) for tumorFileN in tumorFileNL: tid = re.match('(.*)\.recal.bam$', os.path.basename(tumorFileN)).group(1) if not os.path.isdir('%s/%s/%s' % (storageBase, projectN, tid)): cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_xsq2somatic.py -i %s -j %s -n %s -p %s -c %s -s %s -g %s' % (mysetting.SRC_HOME, tumorFileN, normalFileN, tid, projectN, False, server, genome) print cmd if pbs: log = '%s/%s.Xsq2somatic.qlog' % (storageBase+projectN+'/'+tid,tid) os.system('echo "%s" | qsub -q %s -N x2somatic_%s -o %s -j oe' % (cmd, server, tid, log)) else: log = '%s/%s.Xsq2somatic.qlog' % (storageBase+projectN, tid) os.system('(%s) 2> %s' % (cmd, log))
def main(trioFileN, projectN, tidL=[], clean=False, pbs=False, server='smc1', genome='hg19'): storageBase = os.path.dirname(mypipe.prepare_baseDir(projectN, mkdir=False)) + '/' apacheBase = storageBase if glob(storageBase + projectN): print('File directory: already exists') else: os.system('mkdir %s/%s; \ chmod a+w %s/%s' % (storageBase, projectN, storageBase, projectN)) print('File directory: created') if glob(apacheBase + projectN): print('Log directory: already exists') else: os.system('mkdir %s/%s; \ chmod a+w %s/%s' % (apacheBase, projectN, apacheBase, projectN)) print('Log directory: created') bamDirL = mysetting.wxsBamDirL trioH = mypipe.read_trio(trioFileN, bamDirL, tidL) ## assume 1 primary & normal per trio for tid in trioH: if tidL != [] and tid not in tidL: continue if trioH[tid]['Normal'] == [] or trioH[tid]['prim_id'] == []: continue bamS = set() if trioH[tid]['prim_id'] != []: ##primary bamS.add(trioH[tid]['Normal'][0]) bamS.add(trioH[tid]['Primary'][0]) if trioH[tid]['recur_id'] != []: ##recurrent for recur in range(len(trioH[tid]['Recurrent'])): bamS.add(trioH[tid]['Recurrent'][recur]) sampN = trioH[tid]['prim_id'][0] cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_xsq2phylotree.py -i %s -n %s -p %s -c %s -s %s -g %s' % ( mysetting.SRC_HOME, ','.join( list(bamS)), sampN, projectN, False, server, genome) print cmd if pbs: log = '%s/%s.Xsq2phylotree.qlog' % (storageBase + projectN + '/' + sampN, sampN) os.system('echo "%s" | qsub -q %s -N x2phylotree_%s -o %s -j oe' % (cmd, server, sampN, log)) else: log = '%s/%s.Xsq2phylotree.qlog' % (storageBase + projectN, sampN) os.system('(%s) 2> %s' % (cmd, log))
def main(inputFilePathL, projectN, clean=False, pbs=False, server='smc1'): storageBase = os.path.dirname(mypipe.prepare_baseDir(projectN, mkdir=False)) + '/' apacheBase = storageBase if glob(storageBase + projectN): print('File directory: already exists') else: os.system('mkdir %s/%s; \ chmod a+w %s/%s' % (storageBase, projectN, storageBase, projectN)) print('File directory: created') if glob(apacheBase + projectN): print('Log directory: already exists') else: os.system('mkdir %s/%s; \ chmod a+w %s/%s' % (apacheBase, projectN, apacheBase, projectN)) print('Log directory: created') (con, cursor) = mymysql.connectDB(user=mysqlH['smc1']['user'], passwd=mysqlH['smc1']['passwd'], db='ircr1', host=mysqlH['smc1']['host']) for inputFileP in inputFilePathL: inputFileN = inputFileP.split('/')[-1] sampN = re.match('(.*)\.ngCGH', inputFileN).group(1) (sid, tag) = re.match('(.*)_([XCT].{0,2})_.*\.ngCGH', inputFileN).groups() if tag != 'T': sid = '%s_%s' % (sid, tag) # if sid not in ['IRCR_GBM13_352_T02_C01']: # continue cursor.execute('SELECT tumor_frac FROM xsq_purity WHERE samp_id="%s"' % (sid)) results = cursor.fetchall() if len(results) > 0 and results[0][ 0] != 'ND': ##Of samples for which purity was calculated if any( sid in x for x in os.listdir('/EQL3/pipeline/CNA_corr') ): # only those for which corrected cn were not calculated, yet continue print sid cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_xsq2cnCorr.py -i %s -n %s -p %s -c %s -s %s' % ( mysetting.SRC_HOME, inputFileP, sampN, projectN, False, server) print cmd if pbs: log = '%s/%s.Xsq_cnCorr.qlog' % (storageBase + projectN + '/' + sampN, sampN) os.system('echo "%s" | qsub -q %s -N %s -o %s -j oe' % (cmd, server, sampN, log)) else: log = '%s/%s.Xsq_cnCorr.qlog' % (storageBase + projectN, sampN) os.system('(%s) 2> %s' % (cmd, log))
def main(trioFileN, projectN, tidL=[], clean=False, pbs=False, server='smc1', genome='hg19'): storageBase = os.path.dirname(mypipe.prepare_baseDir(projectN, mkdir=False)) + '/' apacheBase = storageBase if glob(storageBase+projectN): print ('File directory: already exists') else: os.system('mkdir %s/%s; \ chmod a+w %s/%s' % (storageBase,projectN, storageBase,projectN)) print ('File directory: created') if glob(apacheBase+projectN): print ('Log directory: already exists') else: os.system('mkdir %s/%s; \ chmod a+w %s/%s' % (apacheBase,projectN, apacheBase,projectN)) print ('Log directory: created') bamDirL = mysetting.wxsBamDirL trioH = mypipe.read_trio(trioFileN, bamDirL, tidL) ## assume 1 primary & normal per trio for tid in trioH: if tidL != [] and tid not in tidL: continue if trioH[tid]['Normal'] == []: continue if trioH[tid]['prim_id'] != []: ##primary sampN = trioH[tid]['prim_id'][0] print tid, trioH[tid]['Primary'] tumor = trioH[tid]['Primary'][0] normal = trioH[tid]['Normal'][0] cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_xsq2somatic.py -i %s -j %s -n %s -p %s -c %s -s %s -g %s' % (mysetting.SRC_HOME, tumor, normal, sampN, projectN, False, server, genome) print cmd if pbs: log = '%s/%s.Xsq2somatic.qlog' % (storageBase+projectN+'/'+sampN,sampN) os.system('echo "%s" | qsub -q %s -N x2somatic_%s -o %s -j oe' % (cmd, server, sampN, log)) else: log = '%s/%s.Xsq2somatic.qlog' % (storageBase+projectN, sampN) os.system('(%s) 2> %s' % (cmd, log)) if trioH[tid]['recur_id'] != []: ##recurrent for recur in range(len(trioH[tid]['Recurrent'])): sampN = trioH[tid]['recur_id'][recur] tumor = trioH[tid]['Recurrent'][recur] normal = trioH[tid]['Normal'][0] cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_xsq2somatic.py -i %s -j %s -n %s -p %s -c %s -s %s -g %s' % (mysetting.SRC_HOME, tumor, normal, sampN, projectN, False, server, genome) print cmd if pbs: log = '%s/%s.Xsq2somatic.qlog' % (storageBase+projectN+'/'+sampN,sampN) os.system('echo "%s" | qsub -q %s -N x2somatic_%s -o %s -j oe' % (cmd, server, sampN, log)) else: log = '%s/%s.Xsq2somatic.qlog' % (storageBase+projectN, sampN) os.system('(%s) 2> %s' % (cmd, log))
def main(inputFilePathL, projectN, clean=False, pbs=False, server='smc1', genome='hg19'): storageBase = os.path.dirname(mypipe.prepare_baseDir(projectN, mkdir=False)) + '/' apacheBase = storageBase if glob(storageBase + projectN): print('File directory: already exists') else: os.system('mkdir %s/%s; \ chmod a+w %s/%s' % (storageBase, projectN, storageBase, projectN)) print('File directory: created') if glob(apacheBase + projectN): print('Log directory: already exists') else: os.system('mkdir %s/%s; \ chmod a+w %s/%s' % (apacheBase, projectN, apacheBase, projectN)) print('Log directory: created') for inputFileP in inputFilePathL: # inputLogFileP = inputFileP.split('/')[:-1] # # prosampNameL = list(set([re.match('.*/(.*).gsnap.qlog:Processed.*',line).group(1) for line in os.popen('grep Processed %s/*.qlog' % '/'.join(inputLogFileP))])) inputFileP2 = inputFileP[:-7] + '\*.fq.gz' inputFileN = inputFileP.split('/')[-1] sampN = inputFileN.split('.')[0] # if sampN[:14] in ['IRCR_GBM13_327','IRCR_GBM14_390','IRCR_GBM14_399','IRCR_GBM14_630','IRCR_GBM15_677','IRCR_MBT15_204','IRCR_MBT15_205']: # if sampN[:-4] in ['IRCR_LC14_440','IRCR_LC14_394','IRCR_LC14_423','IRCR_RCC14_148','IRCR_MBT15_206']: # continue print sampN cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_rsq2mut.py -i %s -n %s -p %s -c %s -s %s -g %s' % ( mysetting.SRC_HOME, inputFileP2, sampN, projectN, False, server, genome) if pbs: log = '%s/%s.Rsq_mut.qlog' % (storageBase + projectN + '/' + sampN, sampN) os.system('echo "%s" | qsub -q %s -N %s -o %s -j oe' % (cmd, server, sampN, log)) else: log = '%s/%s.Rsq_mut.qlog' % (storageBase + projectN, sampN) os.system('(%s) 2> %s' % (cmd, log))
def pooled(inputFileL, projectN, pool='SGI', clean=False, pbs=False, server='smc1', genome='hg19'): storageBase = os.path.dirname(mypipe.prepare_baseDir(projectN, mkdir=False)) + '/' apacheBase = storageBase if glob(storageBase+projectN): print ('File directory: already exists') else: os.system('mkdir %s/%s; \ chmod a+w %s/%s' % (storageBase,projectN, storageBase,projectN)) print('File directory: created') if glob(apacheBase+projectN): print ('Log directory: already exists') else: os.system('mkdir %s/%s; \ chmod a+w %s/%s' % (apacheBase,projectN, apacheBase,projectN)) print('Log directory: created') for inputFile in inputFileL: inputFileN = inputFile.rstrip().split('/')[-1] sampN = inputFileN.split('.')[0] if 'KN' in sampN: continue # sId = re.match('.{1}(.*)_T.{,2}_[STKNC]{2}', sampN).group(1) # if sId not in ['437','453','559','671','775']: ## unmatched samples from DNA Link # if sId not in ['722','796','171','302','121','319','652','208','015','202','314','503','279','320','285','335','284','334','388','533','585','783','316','317','223','240','243','323','3A','3B','5A','5B','7A','7B','8A','8B','9A','9B','10A','10B','11A','11B','12A','12B','14A','14B']: # if sampN not in ['IRCR_GBM14_460_T_CS','IRCR_GBM14_460_B_CS']: # if sampN in os.listdir('/EQL5/pipeline/CS_CNA'): # if sampN not in ['IRCR_GBM10_031_T_SS','IRCR_GBM11_107_T_SS','NS09_763_T_SS','IRCR_GBM14_587_T_SS','IRCR_GBM14_588_T_SS','NS09_737_T_SS','IRCR_GBM11_106_T_SS']: # if sampN not in ['IRCR_GBM14_629_T_SS','IRCR_GBM11_133_T_SS','IRCR_GBM12_181_T_SS','IRCR_GBM12_194_T_SS','NS07_466_T_SS']: # if sampN not in ['IRCR_GBM14_554_TA_SS','IRCR_GBM14_562_T_SS']: # if sampN not in ['NS10_809_T_SS','IRCR_GBM11_112_T_SS','IRCR_GBM14_596_T_SS','IRCR_GBM14_491_T_SS']: # continue if pool == 'SGI': flag = '--use_pool_sgi' elif pool == 'CS': flag = '--cancerscan' else: flag = '--use_pool_dlink' if not os.path.isdir(storageBase + projectN + '/' + sampN): ## if the sample had not beed processed already cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_xsq2cn.py -i %s -n %s -p %s -c %s -s %s -g %s %s' % (mysetting.SRC_HOME, inputFile, sampN, projectN, False, server, genome, flag) print sampN, cmd, storageBase if pbs: log = '%s/%s.Xsq2cn.qlog' % (storageBase+projectN+'/'+sampN,sampN) os.system('echo "%s" | qsub -q %s -N x2cn_%s -o %s -j oe' % (cmd, server, sampN, log)) else: log = '%s/%s.Xsq2cn.qlog' % (storageBase+projectN, sampN) os.system('(%s) 2> %s' % (cmd, log))
def main(inputFilePathL, projectN, clean=False, pbs=False, server='smc1', genome='hg19'): storageBase = os.path.dirname(mypipe.prepare_baseDir(projectN, mkdir=False)) + '/' apacheBase = storageBase if glob(storageBase + projectN): print('File directory: already exists') else: os.system('mkdir %s/%s; \ chmod a+w %s/%s' % (storageBase, projectN, storageBase, projectN)) print('File directory: created') if glob(apacheBase + projectN): print('Log directory: already exists') else: os.system('mkdir %s/%s; \ chmod a+w %s/%s' % (apacheBase, projectN, apacheBase, projectN)) print('Log directory: created') for inputFileP in inputFilePathL: inputFileP2 = inputFileP[:-7] + '\*.fq.gz' inputFileN = inputFileP.split('/')[-1] sampN = inputFileN[:-8] # if sampN[1:4] not in ['096','145']: # if sampN[:-4] in ['IRCR_LC14_440','IRCR_LC14_394','IRCR_LC14_423','IRCR_RCC14_148','IRCR_MBT15_206']: # continue print sampN cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_rsq2expr.py -i %s -n %s -p %s -c %s -s %s -g %s' % ( mysetting.SRC_HOME, inputFileP2, sampN, projectN, False, server, genome) if pbs: log = '%s/%s.Rsq_expr.qlog' % (storageBase + projectN + '/' + sampN, sampN) os.system('echo "%s" | qsub -q %s -N %s -o %s -j oe' % (cmd, server, sampN, log)) else: log = '%s/%s.Rsq_expr.qlog' % (storageBase + projectN, sampN) os.system('(%s) 2> %s' % (cmd, log))
def single(inputFilePathL, projectN, clean=False, pbs=False, server='smc1', genome='hg19'): storageBase = os.path.dirname(mypipe.prepare_baseDir(projectN, mkdir=False)) + '/' apacheBase = storageBase if glob(storageBase + projectN): print('File directory: already exists') else: os.system('mkdir %s/%s; \ chmod a+w %s/%s' % (storageBase, projectN, storageBase, projectN)) print('File directory: created') if glob(apacheBase + projectN): print('Log directory: already exists') else: os.system('mkdir %s/%s; \ chmod a+w %s/%s' % (apacheBase, projectN, apacheBase, projectN)) print('Log directory: created') for inputFileP in inputFilePathL: if '_B_' in inputFileP: continue inputFileN = os.path.basename(inputFileP) sampN = inputFileN.split('.')[0] # if sampN not in ['IRCR_GBM14_571_T_SS','IRCR_GBM10_031_T_SS','IRCR_GBM11_107_T_SS','NS09_763_T_SS','IRCR_GBM14_587_T_SS','IRCR_GBM14_588_T_SS','NS09_737_T_SS','IRCR_GBM11_106_T_SS','IRCR_GBM14_606_T_SS','IRCR_GBM14_554_TA_SS','IRCR_GBM10_007_T_SS','IRCR_GBM14_562_T_SS','NS09_756_T_SS','IRCR_GBM11_133_T_SS','IRCR_GBM12_181_T_SS','IRCR_GBM12_194_T_SS','NS07_466_T_SS','IRCR_GBM14_629_T_SS','IRCR_GBM14_648_T_SS']: # if sampN not in ['NS10_809_T_SS','IRCR_GBM11_112_T_SS','IRCR_GBM14_596_T_SS','IRCR_GBM14_491_T_SS']: # continue print sampN, inputFileP cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_xsq2somatic.py -i %s -n %s -p %s -c %s -s %s -g %s' % ( mysetting.SRC_HOME, inputFileP, sampN, projectN, False, server, genome) # print cmd if pbs: log = '%s/%s.Xsq2somatic_s.qlog' % (storageBase + projectN + '/' + sampN, sampN) os.system('echo "%s" | qsub -q %s -N x2somatic_%s_s -o %s -j oe' % (cmd, server, sampN, log)) else: log = '%s/%s.Xsq2somatic_s.qlog' % (storageBase + projectN, sampN) os.system('(%s) 2> %s' % (cmd, log))
def main(inputFilePathL, projectN, clean=False, pbs=False, server='smc1', genome='hg19'): storageBase = os.path.dirname(mypipe.prepare_baseDir(projectN, mkdir=False)) + '/' apacheBase = storageBase if glob(storageBase + projectN): print('File directory: already exists') else: os.system('mkdir %s/%s; \ chmod a+w %s/%s' % (storageBase, projectN, storageBase, projectN)) print('File directory: created') if glob(apacheBase + projectN): print('Log directory: already exists') else: os.system('mkdir %s/%s; \ chmod a+w %s/%s' % (apacheBase, projectN, apacheBase, projectN)) print('Log directory: created') for inputFileP in inputFilePathL: inputFileP2 = inputFileP[:-7] + '\*.fq.gz' inputFileN = inputFileP.split('/')[-1] sampN = inputFileN.split('.')[0] # if sampN not in ['IRCR_GBM10_002_T_SS','IRCR_GBM14_399_T_SS','IRCR_GBM14_491_T_SS']: # if sampN[:14] not in ['IRCR_GBM10_002','IRCR_GBM14_410','IRCR_GBM14_505','IRCR_GBM14_553','IRCR_GBM14_570','IRCR_GBM13_225','IRCR_GBM14_630']: if sampN[:14] not in ['IRCR_GBM11_117']: continue print sampN, inputFileP2 cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_xsq2mut.py -i %s -n %s -p %s -c %s -s %s -g %s' % ( mysetting.SRC_HOME, inputFileP2, sampN, projectN, False, server, genome) if pbs: log = '%s/%s.Xsq.qlog' % (storageBase + projectN + '/' + sampN, sampN) os.system('echo "%s" | qsub -q %s -N %s -o %s -j oe' % (cmd, server, sampN, log)) else: log = '%s/%s.Xsq.qlog' % (storageBase + projectN, sampN) os.system('(%s) 2> %s' % (cmd, log))
def tcga(tumorDirN, bloodDirN, projectN='TCGA_somatic', pbs=False, server='smc2', genome='hg19'): storageBase = os.path.dirname(mypipe.prepare_baseDir(projectN, mkdir=False)) + '/' apacheBase = storageBase if glob(storageBase + projectN): print('File directory: already exists') else: os.system('mkdir %s/%s; \ chmod a+w %s/%s' % (storageBase, projectN, storageBase, projectN)) print('File directory: created') if glob(apacheBase + projectN): print('Log directory: already exists') else: os.system('mkdir %s/%s; \ chmod a+w %s/%s' % (apacheBase, projectN, apacheBase, projectN)) print('Log directory: created') for normalFileN in glob('%s/*/*recal.bam' % bloodDirN): sid = re.match('(.*)-B.recal.bam', os.path.basename(normalFileN)).group(1) tumorFileNL = glob('%s/*/%s*recal.bam' % (tumorDirN, sid)) for tumorFileN in tumorFileNL: tid = re.match('(.*)\.recal.bam$', os.path.basename(tumorFileN)).group(1) if not os.path.isdir('%s/%s/%s' % (storageBase, projectN, tid)): cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_xsq2somatic.py -i %s -j %s -n %s -p %s -c %s -s %s -g %s' % ( mysetting.SRC_HOME, tumorFileN, normalFileN, tid, projectN, False, server, genome) print cmd if pbs: log = '%s/%s.Xsq2somatic.qlog' % (storageBase + projectN + '/' + tid, tid) os.system( 'echo "%s" | qsub -q %s -N x2somatic_%s -o %s -j oe' % (cmd, server, tid, log)) else: log = '%s/%s.Xsq2somatic.qlog' % (storageBase + projectN, tid) os.system('(%s) 2> %s' % (cmd, log))
def main(inputFilePathL, projectN, clean=False, pbs=False, server='smc1', genome='hg19', sampNL=[]): storageBase = os.path.dirname(mypipe.prepare_baseDir(projectN, mkdir=False)) + '/' apacheBase = storageBase if glob(storageBase + projectN): print('File directory: already exists') else: os.system('mkdir %s/%s; \ chmod a+w %s/%s' % (storageBase, projectN, storageBase, projectN)) print('File directory: created') if glob(apacheBase + projectN): print('Log directory: already exists') else: os.system('mkdir %s/%s; \ chmod a+w %s/%s' % (apacheBase, projectN, apacheBase, projectN)) print('Log directory: created') for inputFileP in inputFilePathL: inputFileN = inputFileP.split('/')[-1] sampN = inputFileN.split('_splice')[0] if sampNL != [] and sampN not in sampNL: continue print sampN cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_rsq2skip.py -i %s -n %s -p %s -c %s -s %s -g %s' % ( mysetting.SRC_HOME, inputFileP, sampN, projectN, False, server, genome) if pbs: log = '%s/%s.Rsq_skip.qlog' % (storageBase + projectN + '/' + sampN, sampN) os.system('echo "%s" | qsub -q %s -N %s -o %s -j oe' % (cmd, server, sampN, log)) else: log = '%s/%s.Rsq_skip.qlog' % (storageBase + projectN, sampN) os.system('(%s) 2> %s' % (cmd, log))
def main(inputFilePathL, projectN, clean=False, pbs=False, server='smc1'): storageBase = os.path.dirname(mypipe.prepare_baseDir(projectN, mkdir=False)) + '/' apacheBase = storageBase if glob(storageBase+projectN): print ('File directory: already exists') else: os.system('mkdir %s/%s; \ chmod a+w %s/%s' % (storageBase,projectN, storageBase,projectN)) print('File directory: created') if glob(apacheBase+projectN): print ('Log directory: already exists') else: os.system('mkdir %s/%s; \ chmod a+w %s/%s' % (apacheBase,projectN, apacheBase,projectN)) print('Log directory: created') (con, cursor) = mymysql.connectDB(user=mysqlH['smc1']['user'], passwd=mysqlH['smc1']['passwd'], db='ircr1', host=mysqlH['smc1']['host']) for inputFileP in inputFilePathL: inputFileN = inputFileP.split('/')[-1] sampN = re.match('(.*)\.ngCGH', inputFileN).group(1) (sid, tag) = re.match('(.*)_([XCT].{0,2})_.*\.ngCGH', inputFileN).groups() if tag != 'T': sid = '%s_%s' % (sid, tag) # if sid not in ['IRCR_GBM13_352_T02_C01']: # continue cursor.execute('SELECT tumor_frac FROM xsq_purity WHERE samp_id="%s"' % (sid)) results = cursor.fetchall() if len(results) > 0 and results[0][0] != 'ND': ##Of samples for which purity was calculated if any(sid in x for x in os.listdir('/EQL3/pipeline/CNA_corr')): # only those for which corrected cn were not calculated, yet continue print sid cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_xsq2cnCorr.py -i %s -n %s -p %s -c %s -s %s' % (mysetting.SRC_HOME, inputFileP, sampN, projectN, False, server) print cmd if pbs: log = '%s/%s.Xsq_cnCorr.qlog' % (storageBase+projectN+'/'+sampN,sampN) os.system('echo "%s" | qsub -q %s -N %s -o %s -j oe' % (cmd, server, sampN, log)) else: log = '%s/%s.Xsq_cnCorr.qlog' % (storageBase+projectN,sampN) os.system('(%s) 2> %s' % (cmd, log))
def main(inputFilePathL, projectN, clean=False, pbs=False, server='smc1', genome='hg19'): storageBase = os.path.dirname(mypipe.prepare_baseDir(projectN, mkdir=False)) + '/' apacheBase = storageBase if glob(storageBase+projectN): print ('File directory: already exists') else: os.system('mkdir %s/%s; \ chmod a+w %s/%s' % (storageBase,projectN, storageBase,projectN)) print('File directory: created') if glob(apacheBase+projectN): print ('Log directory: already exists') else: os.system('mkdir %s/%s; \ chmod a+w %s/%s' % (apacheBase,projectN, apacheBase,projectN)) print('Log directory: created') for inputFileP in inputFilePathL: # inputLogFileP = inputFileP.split('/')[:-1] # # prosampNameL = list(set([re.match('.*/(.*).gsnap.qlog:Processed.*',line).group(1) for line in os.popen('grep Processed %s/*.qlog' % '/'.join(inputLogFileP))])) inputFileP2 = inputFileP[:-7] + '\*.fq.gz' inputFileN = inputFileP.split('/')[-1] sampN = inputFileN.split('.')[0] # if sampN[:14] in ['IRCR_GBM13_327','IRCR_GBM14_390','IRCR_GBM14_399','IRCR_GBM14_630','IRCR_GBM15_677','IRCR_MBT15_204','IRCR_MBT15_205']: # if sampN[:-4] in ['IRCR_LC14_440','IRCR_LC14_394','IRCR_LC14_423','IRCR_RCC14_148','IRCR_MBT15_206']: # continue print sampN cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_rsq2mut.py -i %s -n %s -p %s -c %s -s %s -g %s' % (mysetting.SRC_HOME, inputFileP2, sampN, projectN, False, server, genome) if pbs: log = '%s/%s.Rsq_mut.qlog' % (storageBase+projectN+'/'+sampN,sampN) os.system('echo "%s" | qsub -q %s -N %s -o %s -j oe' % (cmd, server, sampN, log)) else: log = '%s/%s.Rsq_mut.qlog' % (storageBase+projectN,sampN) os.system('(%s) 2> %s' % (cmd, log))
def main(inputFilePathL, projectN, clean=False, pbs=False, server='smc1', genome='hg19'): baseDir = mypipe.prepare_baseDir(projectN, mkdir=True) for inputFileP in inputFilePathL: inputFileP2 = inputFileP[:-7] + '\*.fq.gz' inputFileN = inputFileP.split('/')[-1] sampN = inputFileN.split('.')[0] # if sampN not in ['IRCR_MBT14_182_T_CS','S1117889_T_CS']: # continue print sampN, inputFileP2 cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_cs2mut.py -i %s -n %s -p %s -c %s -s %s -g %s' % (mysetting.SRC_HOME, inputFileP2, sampN, projectN, False, server, genome) if pbs: log = '%s/%s.Xsq.qlog' % (baseDir+'/'+sampN, sampN) os.system('echo "%s" | qsub -q %s -N %s -o %s -j oe' % (cmd, server, sampN, log)) else: log = '%s/%s.Xsq.qlog' % (baseDir, sampN) os.system('(%s) 2> %s' % (cmd, log))
def main(inputFilePathL, projectN, clean=False, pbs=False, server='smc1', genome='hg19'): storageBase = os.path.dirname(mypipe.prepare_baseDir(projectN, mkdir=False)) + '/' apacheBase = storageBase if glob(storageBase+projectN): print ('File directory: already exists') else: os.system('mkdir %s/%s; \ chmod a+w %s/%s' % (storageBase,projectN, storageBase,projectN)) print('File directory: created') if glob(apacheBase+projectN): print ('Log directory: already exists') else: os.system('mkdir %s/%s; \ chmod a+w %s/%s' % (apacheBase,projectN, apacheBase,projectN)) print('Log directory: created') for inputFileP in inputFilePathL: inputFileP2 = inputFileP[:-7] + '\*.fq.gz' inputFileN = inputFileP.split('/')[-1] sampN = inputFileN.split('.')[0] # if sampN not in ['IRCR_GBM10_002_T_SS','IRCR_GBM14_399_T_SS','IRCR_GBM14_491_T_SS']: # if sampN[:14] not in ['IRCR_GBM10_002','IRCR_GBM14_410','IRCR_GBM14_505','IRCR_GBM14_553','IRCR_GBM14_570','IRCR_GBM13_225','IRCR_GBM14_630']: if sampN[:14] not in ['IRCR_GBM11_117']: continue print sampN, inputFileP2 cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_xsq2mut.py -i %s -n %s -p %s -c %s -s %s -g %s' % (mysetting.SRC_HOME, inputFileP2, sampN, projectN, False, server, genome) if pbs: log = '%s/%s.Xsq.qlog' % (storageBase+projectN+'/'+sampN,sampN) os.system('echo "%s" | qsub -q %s -N %s -o %s -j oe' % (cmd, server, sampN, log)) else: log = '%s/%s.Xsq.qlog' % (storageBase+projectN,sampN) os.system('(%s) 2> %s' % (cmd, log))
def single(inputFilePathL, projectN, clean=False, pbs=False, server='smc1', genome='hg19'): storageBase = os.path.dirname(mypipe.prepare_baseDir(projectN, mkdir=False)) + '/' apacheBase = storageBase if glob(storageBase+projectN): print ('File directory: already exists') else: os.system('mkdir %s/%s; \ chmod a+w %s/%s' % (storageBase,projectN, storageBase,projectN)) print ('File directory: created') if glob(apacheBase+projectN): print ('Log directory: already exists') else: os.system('mkdir %s/%s; \ chmod a+w %s/%s' % (apacheBase,projectN, apacheBase,projectN)) print ('Log directory: created') for inputFileP in inputFilePathL: if '_B_' in inputFileP: continue inputFileN = os.path.basename(inputFileP) sampN = inputFileN.split('.')[0] # if sampN not in ['IRCR_GBM14_571_T_SS','IRCR_GBM10_031_T_SS','IRCR_GBM11_107_T_SS','NS09_763_T_SS','IRCR_GBM14_587_T_SS','IRCR_GBM14_588_T_SS','NS09_737_T_SS','IRCR_GBM11_106_T_SS','IRCR_GBM14_606_T_SS','IRCR_GBM14_554_TA_SS','IRCR_GBM10_007_T_SS','IRCR_GBM14_562_T_SS','NS09_756_T_SS','IRCR_GBM11_133_T_SS','IRCR_GBM12_181_T_SS','IRCR_GBM12_194_T_SS','NS07_466_T_SS','IRCR_GBM14_629_T_SS','IRCR_GBM14_648_T_SS']: # if sampN not in ['NS10_809_T_SS','IRCR_GBM11_112_T_SS','IRCR_GBM14_596_T_SS','IRCR_GBM14_491_T_SS']: # continue print sampN, inputFileP cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_xsq2somatic.py -i %s -n %s -p %s -c %s -s %s -g %s' % (mysetting.SRC_HOME, inputFileP, sampN, projectN, False, server, genome) # print cmd if pbs: log = '%s/%s.Xsq2somatic_s.qlog' % (storageBase+projectN+'/'+sampN,sampN) os.system('echo "%s" | qsub -q %s -N x2somatic_%s_s -o %s -j oe' % (cmd, server, sampN, log)) else: log = '%s/%s.Xsq2somatic_s.qlog' % (storageBase+projectN, sampN) os.system('(%s) 2> %s' % (cmd, log))
def main(inputFilePathL, projectN, clean=False, pbs=False, server='smc1', genome='hg19'): storageBase = os.path.dirname(mypipe.prepare_baseDir(projectN, mkdir=False)) + '/' apacheBase = storageBase if glob(storageBase+projectN): print ('File directory: already exists') else: os.system('mkdir %s/%s; \ chmod a+w %s/%s' % (storageBase,projectN, storageBase,projectN)) print('File directory: created') if glob(apacheBase+projectN): print ('Log directory: already exists') else: os.system('mkdir %s/%s; \ chmod a+w %s/%s' % (apacheBase,projectN, apacheBase,projectN)) print('Log directory: created') for inputFileP in inputFilePathL: inputFileP2 = inputFileP[:-7] + '\*.fq.gz' inputFileN = inputFileP.split('/')[-1] sampN = inputFileN[:-8] # if sampN[1:4] not in ['096','145']: # if sampN[:-4] in ['IRCR_LC14_440','IRCR_LC14_394','IRCR_LC14_423','IRCR_RCC14_148','IRCR_MBT15_206']: # continue print sampN cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_rsq2expr.py -i %s -n %s -p %s -c %s -s %s -g %s' % (mysetting.SRC_HOME, inputFileP2, sampN, projectN, False, server, genome) if pbs: log = '%s/%s.Rsq_expr.qlog' % (storageBase+projectN+'/'+sampN,sampN) os.system('echo "%s" | qsub -q %s -N %s -o %s -j oe' % (cmd, server, sampN, log)) else: log = '%s/%s.Rsq_expr.qlog' % (storageBase+projectN,sampN) os.system('(%s) 2> %s' % (cmd, log))
def main(trioFileN, projectN, clean=False, pbs=False, server='smc1', genome='hg19', sampL=[]): storageBase = os.path.dirname(mypipe.prepare_baseDir(projectN, mkdir=False)) + '/' apacheBase = storageBase if glob(storageBase+projectN): print ('File directory: already exists') else: os.system('mkdir %s/%s; \ chmod a+w %s/%s' % (storageBase,projectN, storageBase,projectN)) print('File directory: created') if glob(apacheBase+projectN): print ('Log directory: already exists') else: os.system('mkdir %s/%s; \ chmod a+w %s/%s' % (apacheBase,projectN, apacheBase,projectN)) print('Log directory: created') bamDirL = mysetting.wxsBamDirL trioH = mypipe.read_trio(trioFileN, bamDirL) ## assume 1 primary & normal per trio for tid in trioH: ## must have normal sample if trioH[tid]['norm_id'] == []: continue norm_id = trioH[tid]['norm_id'][0] # if norm_id == 'S567_B_SS': ## id flip for mutscan(B) # norm_id = 'S567_T_SS' mutscanN = '' for dir in mysetting.wxsMutscanDirL: mutscanL = os.popen('find %s -name %s*.mutscan' % (dir, norm_id)).readlines() if len(mutscanL) > 0: mutscanN = mutscanL[0].rstrip() break if mutscanN == '': ## .mutscan not found print norm_id sys.stderr.write('Can\'t find .mutscan\n') # sys.exit(1) continue if trioH[tid]['prim_id'] != []: sampN = trioH[tid]['prim_id'][0] if sampL == [] or (sampL != [] and sampN in sampL): procN = '' for dir in mysetting.wxsPileupProcDirL: id = sampN # if sampN == 'S567_T_SS': ## id flip for pileup_proc # id = 'S567_B_SS' fileL = os.popen('find %s -name %s*chr*.pileup_proc' % (dir, id)).readlines() if len(fileL) > 0: procDir = list(set(map(lambda x: os.path.dirname(x.rstrip()), fileL)))[0] procN = '%s/%s*chr*.pileup_proc' % (procDir,id) break if procN == '': ## .pileup_proc not found sys.stderr.write('Can\'t find .pileup_proc\n') sys.exit(1) cnN = os.popen('find %s -name %s*.ngCGH.seg' % (mysetting.wxsCNADir,sampN)).readlines()[0].rstrip() cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_xsq2purity.py -i \'%s\' -j %s -k %s -n %s -p %s -c %s -s %s -g %s' % (mysetting.SRC_HOME, procN, mutscanN, cnN, sampN, projectN, clean, server, genome) print sampN print procN, mutscanN, cnN if pbs: log = '%s/%s.Xsq2purity.qlog' % (storageBase+projectN+'/'+sampN,sampN) os.system('echo "%s" | qsub -q %s -N x2purity_%s -o %s -j oe' % (cmd, server, sampN, log)) else: log = '%s/%s.Xsq2purity.qlog' % (storageBase+projectN, sampN) os.system('(%s) 2> %s' % (cmd, log)) ## primary of pair if trioH[tid]['recur_id'] != []: for recur in range(len(trioH[tid]['Recurrent'])): sampN = trioH[tid]['recur_id'][recur] if sampL == [] or (sampL != [] and sampN in sampL): procN = '' for dir in mysetting.wxsPileupProcDirL: fileL = os.popen('find %s -name %s*chr*.pileup_proc' % (dir, sampN)).readlines() if len(fileL) > 0: procDir = list(set(map(lambda x: os.path.dirname(x.rstrip()), fileL)))[0] procN = '%s/%s*chr*.pileup_proc' % (procDir,sampN) break if procN == '': ## .pileup_proc not found sys.stderr.write('Can\'t find .pileup_proc\n') sys.exit(1) cnN = os.popen('find %s -name %s*.ngCGH.seg' % (mysetting.wxsCNADir,sampN)).readlines()[0].rstrip() cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_xsq2purity.py -i \'%s\' -j %s -k %s -n %s -p %s -c %s -s %s -g %s' % (mysetting.SRC_HOME, procN, mutscanN, cnN, sampN, projectN, clean, server, genome) print sampN print procN, mutscanN, cnN if pbs: log = '%s/%s.Xsq2purity.qlog' % (storageBase+projectN+'/'+sampN,sampN) os.system('echo "%s" | qsub -q %s -N x2purity_%s -o %s -j oe' % (cmd, server, sampN, log)) else: log = '%s/%s.Xsq2purity.qlog' % (storageBase+projectN, sampN) os.system('(%s) 2> %s' % (cmd, log))