示例#1
0
文件: pipe_rsq2skip.py 项目: SMC1/JK1
def main(inputFilePathL, projectN, clean=False, pbs=False, server='smc1', genome='hg19', sampNL=[]):
	storageBase = os.path.dirname(mypipe.prepare_baseDir(projectN, mkdir=False)) + '/'
	apacheBase = storageBase

	if glob(storageBase+projectN):
		print ('File directory: already exists')
	else:
		os.system('mkdir %s/%s; \
			chmod a+w %s/%s' % (storageBase,projectN, storageBase,projectN))
		print('File directory: created')
	
	if glob(apacheBase+projectN):
		print ('Log directory: already exists')
	else:
		os.system('mkdir %s/%s; \
			chmod a+w %s/%s' % (apacheBase,projectN, apacheBase,projectN))
		print('Log directory: created')

	for inputFileP in inputFilePathL:

		inputFileN = inputFileP.split('/')[-1]
		sampN = inputFileN.split('_splice')[0]
		
		if sampNL != [] and sampN not in sampNL:
			continue
		print sampN
		
		cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_rsq2skip.py -i %s -n %s -p %s -c %s -s %s -g %s' % (mysetting.SRC_HOME, inputFileP, sampN, projectN, False, server, genome)
		if pbs:
			log = '%s/%s.Rsq_skip.qlog' % (storageBase+projectN+'/'+sampN,sampN)
			os.system('echo "%s" | qsub -q %s -N %s -o %s -j oe' % (cmd, server, sampN, log))

		else:
			log = '%s/%s.Rsq_skip.qlog' % (storageBase+projectN,sampN)
			os.system('(%s) 2> %s' % (cmd, log))
示例#2
0
def main(inputFilePathL,
         projectN,
         clean=False,
         pbs=False,
         server='smc1',
         genome='hg19'):

    baseDir = mypipe.prepare_baseDir(projectN, mkdir=True)

    for inputFileP in inputFilePathL:

        inputFileP2 = inputFileP[:-7] + '\*.fq.gz'
        inputFileN = inputFileP.split('/')[-1]
        sampN = inputFileN.split('.')[0]

        #		if sampN not in ['IRCR_MBT14_182_T_CS','S1117889_T_CS']:
        #			continue
        print sampN, inputFileP2
        cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_cs2mut.py -i %s -n %s -p %s -c %s -s %s -g %s' % (
            mysetting.SRC_HOME, inputFileP2, sampN, projectN, False, server,
            genome)
        if pbs:
            log = '%s/%s.Xsq.qlog' % (baseDir + '/' + sampN, sampN)
            os.system('echo "%s" | qsub -q %s -N %s -o %s -j oe' %
                      (cmd, server, sampN, log))

        else:
            log = '%s/%s.Xsq.qlog' % (baseDir, sampN)
            os.system('(%s) 2> %s' % (cmd, log))
示例#3
0
def tcga(tumorDirN, bloodDirN, projectN='TCGA_somatic', pbs=False, server='smc2', genome='hg19'):
	storageBase = os.path.dirname(mypipe.prepare_baseDir(projectN, mkdir=False)) + '/'
	apacheBase = storageBase
	if glob(storageBase+projectN):
		print ('File directory: already exists')
	else:
		os.system('mkdir %s/%s; \
			chmod a+w %s/%s' % (storageBase,projectN, storageBase,projectN))
		print ('File directory: created')

	if glob(apacheBase+projectN):
		print ('Log directory: already exists')
	else:
		os.system('mkdir %s/%s; \
			chmod a+w %s/%s' % (apacheBase,projectN, apacheBase,projectN))
		print ('Log directory: created')

	for normalFileN in glob('%s/*/*recal.bam' % bloodDirN):
		sid = re.match('(.*)-B.recal.bam', os.path.basename(normalFileN)).group(1)
		tumorFileNL = glob('%s/*/%s*recal.bam' % (tumorDirN, sid))

		for tumorFileN in tumorFileNL:
			tid = re.match('(.*)\.recal.bam$', os.path.basename(tumorFileN)).group(1)
			if not os.path.isdir('%s/%s/%s' % (storageBase, projectN, tid)):
				cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_xsq2somatic.py -i %s -j %s -n %s -p %s -c %s -s %s -g %s' % (mysetting.SRC_HOME, tumorFileN, normalFileN, tid, projectN, False, server, genome)
				print cmd

				if pbs:
					log = '%s/%s.Xsq2somatic.qlog' % (storageBase+projectN+'/'+tid,tid)
					os.system('echo "%s" | qsub -q %s -N x2somatic_%s -o %s -j oe' % (cmd, server, tid, log))
				else:
					log = '%s/%s.Xsq2somatic.qlog' % (storageBase+projectN, tid)
					os.system('(%s) 2> %s' % (cmd, log))
示例#4
0
def main(trioFileN,
         projectN,
         tidL=[],
         clean=False,
         pbs=False,
         server='smc1',
         genome='hg19'):
    storageBase = os.path.dirname(mypipe.prepare_baseDir(projectN,
                                                         mkdir=False)) + '/'
    apacheBase = storageBase
    if glob(storageBase + projectN):
        print('File directory: already exists')
    else:
        os.system('mkdir %s/%s; \
			chmod a+w %s/%s' % (storageBase, projectN, storageBase, projectN))
        print('File directory: created')

    if glob(apacheBase + projectN):
        print('Log directory: already exists')
    else:
        os.system('mkdir %s/%s; \
			chmod a+w %s/%s' % (apacheBase, projectN, apacheBase, projectN))
        print('Log directory: created')

    bamDirL = mysetting.wxsBamDirL
    trioH = mypipe.read_trio(trioFileN, bamDirL, tidL)

    ## assume 1 primary & normal per trio
    for tid in trioH:
        if tidL != [] and tid not in tidL:
            continue
        if trioH[tid]['Normal'] == [] or trioH[tid]['prim_id'] == []:
            continue

        bamS = set()
        if trioH[tid]['prim_id'] != []:  ##primary
            bamS.add(trioH[tid]['Normal'][0])
            bamS.add(trioH[tid]['Primary'][0])

        if trioH[tid]['recur_id'] != []:  ##recurrent
            for recur in range(len(trioH[tid]['Recurrent'])):
                bamS.add(trioH[tid]['Recurrent'][recur])

        sampN = trioH[tid]['prim_id'][0]

        cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_xsq2phylotree.py -i %s -n %s -p %s -c %s -s %s -g %s' % (
            mysetting.SRC_HOME, ','.join(
                list(bamS)), sampN, projectN, False, server, genome)
        print cmd

        if pbs:
            log = '%s/%s.Xsq2phylotree.qlog' % (storageBase + projectN + '/' +
                                                sampN, sampN)
            os.system('echo "%s" | qsub -q %s -N x2phylotree_%s -o %s -j oe' %
                      (cmd, server, sampN, log))
        else:
            log = '%s/%s.Xsq2phylotree.qlog' % (storageBase + projectN, sampN)
            os.system('(%s) 2> %s' % (cmd, log))
示例#5
0
def main(inputFilePathL, projectN, clean=False, pbs=False, server='smc1'):
    storageBase = os.path.dirname(mypipe.prepare_baseDir(projectN,
                                                         mkdir=False)) + '/'
    apacheBase = storageBase

    if glob(storageBase + projectN):
        print('File directory: already exists')
    else:
        os.system('mkdir %s/%s; \
			chmod a+w %s/%s' % (storageBase, projectN, storageBase, projectN))
        print('File directory: created')

    if glob(apacheBase + projectN):
        print('Log directory: already exists')
    else:
        os.system('mkdir %s/%s; \
			chmod a+w %s/%s' % (apacheBase, projectN, apacheBase, projectN))
        print('Log directory: created')

    (con, cursor) = mymysql.connectDB(user=mysqlH['smc1']['user'],
                                      passwd=mysqlH['smc1']['passwd'],
                                      db='ircr1',
                                      host=mysqlH['smc1']['host'])
    for inputFileP in inputFilePathL:

        inputFileN = inputFileP.split('/')[-1]
        sampN = re.match('(.*)\.ngCGH', inputFileN).group(1)
        (sid, tag) = re.match('(.*)_([XCT].{0,2})_.*\.ngCGH',
                              inputFileN).groups()
        if tag != 'T':
            sid = '%s_%s' % (sid, tag)


#		if sid not in ['IRCR_GBM13_352_T02_C01']:
#			continue

        cursor.execute('SELECT tumor_frac FROM xsq_purity WHERE samp_id="%s"' %
                       (sid))
        results = cursor.fetchall()
        if len(results) > 0 and results[0][
                0] != 'ND':  ##Of samples for which purity was calculated
            if any(
                    sid in x for x in os.listdir('/EQL3/pipeline/CNA_corr')
            ):  # only those for which corrected cn were not calculated, yet
                continue

            print sid
            cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_xsq2cnCorr.py -i %s -n %s -p %s -c %s -s %s' % (
                mysetting.SRC_HOME, inputFileP, sampN, projectN, False, server)
            print cmd
            if pbs:
                log = '%s/%s.Xsq_cnCorr.qlog' % (storageBase + projectN + '/' +
                                                 sampN, sampN)
                os.system('echo "%s" | qsub -q %s -N %s -o %s -j oe' %
                          (cmd, server, sampN, log))
            else:
                log = '%s/%s.Xsq_cnCorr.qlog' % (storageBase + projectN, sampN)
                os.system('(%s) 2> %s' % (cmd, log))
示例#6
0
def main(trioFileN, projectN, tidL=[], clean=False, pbs=False, server='smc1', genome='hg19'):
	storageBase = os.path.dirname(mypipe.prepare_baseDir(projectN, mkdir=False)) + '/'
	apacheBase = storageBase
	if glob(storageBase+projectN):
		print ('File directory: already exists')
	else:
		os.system('mkdir %s/%s; \
			chmod a+w %s/%s' % (storageBase,projectN, storageBase,projectN))
		print ('File directory: created')
	
	if glob(apacheBase+projectN):
		print ('Log directory: already exists')
	else:
		os.system('mkdir %s/%s; \
			chmod a+w %s/%s' % (apacheBase,projectN, apacheBase,projectN))
		print ('Log directory: created')
	
	bamDirL = mysetting.wxsBamDirL
	trioH = mypipe.read_trio(trioFileN, bamDirL, tidL)

	## assume 1 primary & normal per trio
	for tid in trioH:
		if tidL != [] and tid not in tidL:
			continue
		if trioH[tid]['Normal'] == []:
			continue
		
		if trioH[tid]['prim_id'] != []: ##primary
			sampN = trioH[tid]['prim_id'][0]
			print tid, trioH[tid]['Primary']
			tumor = trioH[tid]['Primary'][0]
			normal = trioH[tid]['Normal'][0]

			cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_xsq2somatic.py -i %s -j %s -n %s -p %s -c %s -s %s -g %s' % (mysetting.SRC_HOME, tumor, normal, sampN, projectN, False, server, genome)
			print cmd

			if pbs:
				log = '%s/%s.Xsq2somatic.qlog' % (storageBase+projectN+'/'+sampN,sampN)
				os.system('echo "%s" | qsub -q %s -N x2somatic_%s -o %s -j oe' % (cmd, server, sampN, log))
			else:
				log = '%s/%s.Xsq2somatic.qlog' % (storageBase+projectN, sampN)
				os.system('(%s) 2> %s' % (cmd, log))

		if trioH[tid]['recur_id'] != []: ##recurrent
			for recur in range(len(trioH[tid]['Recurrent'])):
				sampN = trioH[tid]['recur_id'][recur]
				tumor = trioH[tid]['Recurrent'][recur]
				normal = trioH[tid]['Normal'][0]
				cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_xsq2somatic.py -i %s -j %s -n %s -p %s -c %s -s %s -g %s' % (mysetting.SRC_HOME, tumor, normal, sampN, projectN, False, server, genome)
				print cmd
				if pbs:
					log = '%s/%s.Xsq2somatic.qlog' % (storageBase+projectN+'/'+sampN,sampN)
					os.system('echo "%s" | qsub -q %s -N x2somatic_%s -o %s -j oe' % (cmd, server, sampN, log))
				else:
					log = '%s/%s.Xsq2somatic.qlog' % (storageBase+projectN, sampN)
					os.system('(%s) 2> %s' % (cmd, log))
示例#7
0
def main(inputFilePathL,
         projectN,
         clean=False,
         pbs=False,
         server='smc1',
         genome='hg19'):
    storageBase = os.path.dirname(mypipe.prepare_baseDir(projectN,
                                                         mkdir=False)) + '/'
    apacheBase = storageBase

    if glob(storageBase + projectN):
        print('File directory: already exists')
    else:
        os.system('mkdir %s/%s; \
			chmod a+w %s/%s' % (storageBase, projectN, storageBase, projectN))
        print('File directory: created')

    if glob(apacheBase + projectN):
        print('Log directory: already exists')
    else:
        os.system('mkdir %s/%s; \
			chmod a+w %s/%s' % (apacheBase, projectN, apacheBase, projectN))
        print('Log directory: created')

    for inputFileP in inputFilePathL:

        #		inputLogFileP = inputFileP.split('/')[:-1]
        #
        #		prosampNameL = list(set([re.match('.*/(.*).gsnap.qlog:Processed.*',line).group(1) for line in os.popen('grep Processed %s/*.qlog' % '/'.join(inputLogFileP))]))

        inputFileP2 = inputFileP[:-7] + '\*.fq.gz'
        inputFileN = inputFileP.split('/')[-1]
        sampN = inputFileN.split('.')[0]

        #		if sampN[:14] in ['IRCR_GBM13_327','IRCR_GBM14_390','IRCR_GBM14_399','IRCR_GBM14_630','IRCR_GBM15_677','IRCR_MBT15_204','IRCR_MBT15_205']:
        #		if sampN[:-4] in ['IRCR_LC14_440','IRCR_LC14_394','IRCR_LC14_423','IRCR_RCC14_148','IRCR_MBT15_206']:
        #			continue

        print sampN
        cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_rsq2mut.py -i %s -n %s -p %s -c %s -s %s -g %s' % (
            mysetting.SRC_HOME, inputFileP2, sampN, projectN, False, server,
            genome)
        if pbs:
            log = '%s/%s.Rsq_mut.qlog' % (storageBase + projectN + '/' + sampN,
                                          sampN)
            os.system('echo "%s" | qsub -q %s -N %s -o %s -j oe' %
                      (cmd, server, sampN, log))
        else:
            log = '%s/%s.Rsq_mut.qlog' % (storageBase + projectN, sampN)
            os.system('(%s) 2> %s' % (cmd, log))
示例#8
0
def pooled(inputFileL, projectN, pool='SGI', clean=False, pbs=False, server='smc1', genome='hg19'):
	storageBase = os.path.dirname(mypipe.prepare_baseDir(projectN, mkdir=False)) + '/'
	apacheBase = storageBase
	if glob(storageBase+projectN):
		print ('File directory: already exists')
	else:
		os.system('mkdir %s/%s; \
			chmod a+w %s/%s' % (storageBase,projectN, storageBase,projectN))
		print('File directory: created')
	
	if glob(apacheBase+projectN):
		print ('Log directory: already exists')
	else:
		os.system('mkdir %s/%s; \
			chmod a+w %s/%s' % (apacheBase,projectN, apacheBase,projectN))
		print('Log directory: created')

	for inputFile in inputFileL:

		inputFileN = inputFile.rstrip().split('/')[-1]
		sampN = inputFileN.split('.')[0]
		if 'KN' in sampN:
			continue
#		sId = re.match('.{1}(.*)_T.{,2}_[STKNC]{2}', sampN).group(1)
#		if sId not in ['437','453','559','671','775']: ## unmatched samples from DNA Link
#		if sId not in ['722','796','171','302','121','319','652','208','015','202','314','503','279','320','285','335','284','334','388','533','585','783','316','317','223','240','243','323','3A','3B','5A','5B','7A','7B','8A','8B','9A','9B','10A','10B','11A','11B','12A','12B','14A','14B']:
#		if sampN not in ['IRCR_GBM14_460_T_CS','IRCR_GBM14_460_B_CS']:
#		if sampN in os.listdir('/EQL5/pipeline/CS_CNA'):
#		if sampN not in ['IRCR_GBM10_031_T_SS','IRCR_GBM11_107_T_SS','NS09_763_T_SS','IRCR_GBM14_587_T_SS','IRCR_GBM14_588_T_SS','NS09_737_T_SS','IRCR_GBM11_106_T_SS']:
#		if sampN not in ['IRCR_GBM14_629_T_SS','IRCR_GBM11_133_T_SS','IRCR_GBM12_181_T_SS','IRCR_GBM12_194_T_SS','NS07_466_T_SS']:
#		if sampN not in ['IRCR_GBM14_554_TA_SS','IRCR_GBM14_562_T_SS']:
#		if sampN not in ['NS10_809_T_SS','IRCR_GBM11_112_T_SS','IRCR_GBM14_596_T_SS','IRCR_GBM14_491_T_SS']:
#			continue

		if pool == 'SGI':
			flag = '--use_pool_sgi'
		elif pool == 'CS':
			flag = '--cancerscan'
		else:
			flag = '--use_pool_dlink'
		if not os.path.isdir(storageBase + projectN + '/' + sampN): ## if the sample had not beed processed already
			cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_xsq2cn.py -i %s -n %s -p %s -c %s -s %s -g %s %s' % (mysetting.SRC_HOME, inputFile, sampN, projectN, False, server, genome, flag)
			print sampN, cmd, storageBase
			if pbs:
				log = '%s/%s.Xsq2cn.qlog' % (storageBase+projectN+'/'+sampN,sampN)
				os.system('echo "%s" | qsub -q %s -N x2cn_%s -o %s -j oe' % (cmd, server, sampN, log))
			else:
				log = '%s/%s.Xsq2cn.qlog' % (storageBase+projectN, sampN)
				os.system('(%s) 2> %s' % (cmd, log))
示例#9
0
def main(inputFilePathL,
         projectN,
         clean=False,
         pbs=False,
         server='smc1',
         genome='hg19'):
    storageBase = os.path.dirname(mypipe.prepare_baseDir(projectN,
                                                         mkdir=False)) + '/'
    apacheBase = storageBase

    if glob(storageBase + projectN):
        print('File directory: already exists')
    else:
        os.system('mkdir %s/%s; \
			chmod a+w %s/%s' % (storageBase, projectN, storageBase, projectN))
        print('File directory: created')

    if glob(apacheBase + projectN):
        print('Log directory: already exists')
    else:
        os.system('mkdir %s/%s; \
			chmod a+w %s/%s' % (apacheBase, projectN, apacheBase, projectN))
        print('Log directory: created')

    for inputFileP in inputFilePathL:

        inputFileP2 = inputFileP[:-7] + '\*.fq.gz'
        inputFileN = inputFileP.split('/')[-1]
        sampN = inputFileN[:-8]

        #		if sampN[1:4] not in ['096','145']:
        #		if sampN[:-4] in ['IRCR_LC14_440','IRCR_LC14_394','IRCR_LC14_423','IRCR_RCC14_148','IRCR_MBT15_206']:
        #			continue
        print sampN

        cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_rsq2expr.py -i %s -n %s -p %s -c %s -s %s -g %s' % (
            mysetting.SRC_HOME, inputFileP2, sampN, projectN, False, server,
            genome)

        if pbs:
            log = '%s/%s.Rsq_expr.qlog' % (storageBase + projectN + '/' +
                                           sampN, sampN)
            os.system('echo "%s" | qsub -q %s -N %s -o %s -j oe' %
                      (cmd, server, sampN, log))

        else:
            log = '%s/%s.Rsq_expr.qlog' % (storageBase + projectN, sampN)
            os.system('(%s) 2> %s' % (cmd, log))
示例#10
0
def single(inputFilePathL,
           projectN,
           clean=False,
           pbs=False,
           server='smc1',
           genome='hg19'):
    storageBase = os.path.dirname(mypipe.prepare_baseDir(projectN,
                                                         mkdir=False)) + '/'
    apacheBase = storageBase
    if glob(storageBase + projectN):
        print('File directory: already exists')
    else:
        os.system('mkdir %s/%s; \
			chmod a+w %s/%s' % (storageBase, projectN, storageBase, projectN))
        print('File directory: created')

    if glob(apacheBase + projectN):
        print('Log directory: already exists')
    else:
        os.system('mkdir %s/%s; \
			chmod a+w %s/%s' % (apacheBase, projectN, apacheBase, projectN))
        print('Log directory: created')

    for inputFileP in inputFilePathL:
        if '_B_' in inputFileP:
            continue
        inputFileN = os.path.basename(inputFileP)
        sampN = inputFileN.split('.')[0]

        #		if sampN not in ['IRCR_GBM14_571_T_SS','IRCR_GBM10_031_T_SS','IRCR_GBM11_107_T_SS','NS09_763_T_SS','IRCR_GBM14_587_T_SS','IRCR_GBM14_588_T_SS','NS09_737_T_SS','IRCR_GBM11_106_T_SS','IRCR_GBM14_606_T_SS','IRCR_GBM14_554_TA_SS','IRCR_GBM10_007_T_SS','IRCR_GBM14_562_T_SS','NS09_756_T_SS','IRCR_GBM11_133_T_SS','IRCR_GBM12_181_T_SS','IRCR_GBM12_194_T_SS','NS07_466_T_SS','IRCR_GBM14_629_T_SS','IRCR_GBM14_648_T_SS']:
        #		if sampN not in ['NS10_809_T_SS','IRCR_GBM11_112_T_SS','IRCR_GBM14_596_T_SS','IRCR_GBM14_491_T_SS']:
        #			continue

        print sampN, inputFileP

        cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_xsq2somatic.py -i %s -n %s -p %s -c %s -s %s -g %s' % (
            mysetting.SRC_HOME, inputFileP, sampN, projectN, False, server,
            genome)
        #		print cmd

        if pbs:
            log = '%s/%s.Xsq2somatic_s.qlog' % (storageBase + projectN + '/' +
                                                sampN, sampN)
            os.system('echo "%s" | qsub -q %s -N x2somatic_%s_s -o %s -j oe' %
                      (cmd, server, sampN, log))
        else:
            log = '%s/%s.Xsq2somatic_s.qlog' % (storageBase + projectN, sampN)
            os.system('(%s) 2> %s' % (cmd, log))
示例#11
0
def main(inputFilePathL,
         projectN,
         clean=False,
         pbs=False,
         server='smc1',
         genome='hg19'):
    storageBase = os.path.dirname(mypipe.prepare_baseDir(projectN,
                                                         mkdir=False)) + '/'
    apacheBase = storageBase

    if glob(storageBase + projectN):
        print('File directory: already exists')
    else:
        os.system('mkdir %s/%s; \
			chmod a+w %s/%s' % (storageBase, projectN, storageBase, projectN))
        print('File directory: created')

    if glob(apacheBase + projectN):
        print('Log directory: already exists')
    else:
        os.system('mkdir %s/%s; \
			chmod a+w %s/%s' % (apacheBase, projectN, apacheBase, projectN))
        print('Log directory: created')

    for inputFileP in inputFilePathL:

        inputFileP2 = inputFileP[:-7] + '\*.fq.gz'
        inputFileN = inputFileP.split('/')[-1]
        sampN = inputFileN.split('.')[0]

        #		if sampN not in ['IRCR_GBM10_002_T_SS','IRCR_GBM14_399_T_SS','IRCR_GBM14_491_T_SS']:
        #		if sampN[:14] not in ['IRCR_GBM10_002','IRCR_GBM14_410','IRCR_GBM14_505','IRCR_GBM14_553','IRCR_GBM14_570','IRCR_GBM13_225','IRCR_GBM14_630']:
        if sampN[:14] not in ['IRCR_GBM11_117']:
            continue

        print sampN, inputFileP2
        cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_xsq2mut.py -i %s -n %s -p %s -c %s -s %s -g %s' % (
            mysetting.SRC_HOME, inputFileP2, sampN, projectN, False, server,
            genome)

        if pbs:
            log = '%s/%s.Xsq.qlog' % (storageBase + projectN + '/' + sampN,
                                      sampN)
            os.system('echo "%s" | qsub -q %s -N %s -o %s -j oe' %
                      (cmd, server, sampN, log))
        else:
            log = '%s/%s.Xsq.qlog' % (storageBase + projectN, sampN)
            os.system('(%s) 2> %s' % (cmd, log))
示例#12
0
def tcga(tumorDirN,
         bloodDirN,
         projectN='TCGA_somatic',
         pbs=False,
         server='smc2',
         genome='hg19'):
    storageBase = os.path.dirname(mypipe.prepare_baseDir(projectN,
                                                         mkdir=False)) + '/'
    apacheBase = storageBase
    if glob(storageBase + projectN):
        print('File directory: already exists')
    else:
        os.system('mkdir %s/%s; \
			chmod a+w %s/%s' % (storageBase, projectN, storageBase, projectN))
        print('File directory: created')

    if glob(apacheBase + projectN):
        print('Log directory: already exists')
    else:
        os.system('mkdir %s/%s; \
			chmod a+w %s/%s' % (apacheBase, projectN, apacheBase, projectN))
        print('Log directory: created')

    for normalFileN in glob('%s/*/*recal.bam' % bloodDirN):
        sid = re.match('(.*)-B.recal.bam',
                       os.path.basename(normalFileN)).group(1)
        tumorFileNL = glob('%s/*/%s*recal.bam' % (tumorDirN, sid))

        for tumorFileN in tumorFileNL:
            tid = re.match('(.*)\.recal.bam$',
                           os.path.basename(tumorFileN)).group(1)
            if not os.path.isdir('%s/%s/%s' % (storageBase, projectN, tid)):
                cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_xsq2somatic.py -i %s -j %s -n %s -p %s -c %s -s %s -g %s' % (
                    mysetting.SRC_HOME, tumorFileN, normalFileN, tid, projectN,
                    False, server, genome)
                print cmd

                if pbs:
                    log = '%s/%s.Xsq2somatic.qlog' % (storageBase + projectN +
                                                      '/' + tid, tid)
                    os.system(
                        'echo "%s" | qsub -q %s -N x2somatic_%s -o %s -j oe' %
                        (cmd, server, tid, log))
                else:
                    log = '%s/%s.Xsq2somatic.qlog' % (storageBase + projectN,
                                                      tid)
                    os.system('(%s) 2> %s' % (cmd, log))
示例#13
0
def main(inputFilePathL,
         projectN,
         clean=False,
         pbs=False,
         server='smc1',
         genome='hg19',
         sampNL=[]):
    storageBase = os.path.dirname(mypipe.prepare_baseDir(projectN,
                                                         mkdir=False)) + '/'
    apacheBase = storageBase

    if glob(storageBase + projectN):
        print('File directory: already exists')
    else:
        os.system('mkdir %s/%s; \
			chmod a+w %s/%s' % (storageBase, projectN, storageBase, projectN))
        print('File directory: created')

    if glob(apacheBase + projectN):
        print('Log directory: already exists')
    else:
        os.system('mkdir %s/%s; \
			chmod a+w %s/%s' % (apacheBase, projectN, apacheBase, projectN))
        print('Log directory: created')

    for inputFileP in inputFilePathL:

        inputFileN = inputFileP.split('/')[-1]
        sampN = inputFileN.split('_splice')[0]

        if sampNL != [] and sampN not in sampNL:
            continue
        print sampN

        cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_rsq2skip.py -i %s -n %s -p %s -c %s -s %s -g %s' % (
            mysetting.SRC_HOME, inputFileP, sampN, projectN, False, server,
            genome)
        if pbs:
            log = '%s/%s.Rsq_skip.qlog' % (storageBase + projectN + '/' +
                                           sampN, sampN)
            os.system('echo "%s" | qsub -q %s -N %s -o %s -j oe' %
                      (cmd, server, sampN, log))

        else:
            log = '%s/%s.Rsq_skip.qlog' % (storageBase + projectN, sampN)
            os.system('(%s) 2> %s' % (cmd, log))
示例#14
0
def main(inputFilePathL, projectN, clean=False, pbs=False, server='smc1'):
	storageBase = os.path.dirname(mypipe.prepare_baseDir(projectN, mkdir=False)) + '/'
	apacheBase = storageBase

	if glob(storageBase+projectN):
		print ('File directory: already exists')
	else:
		os.system('mkdir %s/%s; \
			chmod a+w %s/%s' % (storageBase,projectN, storageBase,projectN))
		print('File directory: created')
	
	if glob(apacheBase+projectN):
		print ('Log directory: already exists')
	else:
		os.system('mkdir %s/%s; \
			chmod a+w %s/%s' % (apacheBase,projectN, apacheBase,projectN))
		print('Log directory: created')


	(con, cursor) = mymysql.connectDB(user=mysqlH['smc1']['user'], passwd=mysqlH['smc1']['passwd'], db='ircr1', host=mysqlH['smc1']['host'])
	for inputFileP in inputFilePathL:

		inputFileN = inputFileP.split('/')[-1]
		sampN = re.match('(.*)\.ngCGH', inputFileN).group(1)
		(sid, tag) = re.match('(.*)_([XCT].{0,2})_.*\.ngCGH', inputFileN).groups()
		if tag != 'T':
			sid = '%s_%s' % (sid, tag)

#		if sid not in ['IRCR_GBM13_352_T02_C01']:
#			continue

		cursor.execute('SELECT tumor_frac FROM xsq_purity WHERE samp_id="%s"' % (sid))
		results = cursor.fetchall()
		if len(results) > 0 and results[0][0] != 'ND': ##Of samples for which purity was calculated
			if any(sid in x for x in os.listdir('/EQL3/pipeline/CNA_corr')): # only those for which corrected cn were not calculated, yet
				continue

			print sid
			cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_xsq2cnCorr.py -i %s -n %s -p %s -c %s -s %s' % (mysetting.SRC_HOME, inputFileP, sampN, projectN, False, server)
			print cmd
			if pbs:
				log = '%s/%s.Xsq_cnCorr.qlog' % (storageBase+projectN+'/'+sampN,sampN)
				os.system('echo "%s" | qsub -q %s -N %s -o %s -j oe' % (cmd, server, sampN, log))
			else:
				log = '%s/%s.Xsq_cnCorr.qlog' % (storageBase+projectN,sampN)
				os.system('(%s) 2> %s' % (cmd, log))
示例#15
0
文件: pipe_rsq2mut.py 项目: SMC1/JK1
def main(inputFilePathL, projectN, clean=False, pbs=False, server='smc1', genome='hg19'):
	storageBase = os.path.dirname(mypipe.prepare_baseDir(projectN, mkdir=False)) + '/'
	apacheBase = storageBase

	if glob(storageBase+projectN):
		print ('File directory: already exists')
	else:
		os.system('mkdir %s/%s; \
			chmod a+w %s/%s' % (storageBase,projectN, storageBase,projectN))
		print('File directory: created')
	
	if glob(apacheBase+projectN):
		print ('Log directory: already exists')
	else:
		os.system('mkdir %s/%s; \
			chmod a+w %s/%s' % (apacheBase,projectN, apacheBase,projectN))
		print('Log directory: created')



	for inputFileP in inputFilePathL:

#		inputLogFileP = inputFileP.split('/')[:-1]
#
#		prosampNameL = list(set([re.match('.*/(.*).gsnap.qlog:Processed.*',line).group(1) for line in os.popen('grep Processed %s/*.qlog' % '/'.join(inputLogFileP))]))
		
		inputFileP2 = inputFileP[:-7] + '\*.fq.gz'
		inputFileN = inputFileP.split('/')[-1]
		sampN = inputFileN.split('.')[0]
		
#		if sampN[:14] in ['IRCR_GBM13_327','IRCR_GBM14_390','IRCR_GBM14_399','IRCR_GBM14_630','IRCR_GBM15_677','IRCR_MBT15_204','IRCR_MBT15_205']:
#		if sampN[:-4] in ['IRCR_LC14_440','IRCR_LC14_394','IRCR_LC14_423','IRCR_RCC14_148','IRCR_MBT15_206']:
#			continue

		print sampN
		cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_rsq2mut.py -i %s -n %s -p %s -c %s -s %s -g %s' % (mysetting.SRC_HOME, inputFileP2, sampN, projectN, False, server, genome)
		if pbs:
			log = '%s/%s.Rsq_mut.qlog' % (storageBase+projectN+'/'+sampN,sampN)
			os.system('echo "%s" | qsub -q %s -N %s -o %s -j oe' % (cmd, server, sampN, log))
		else:
			log = '%s/%s.Rsq_mut.qlog' % (storageBase+projectN,sampN)
			os.system('(%s) 2> %s' % (cmd, log))
示例#16
0
文件: pipe_cs2mut.py 项目: SMC1/JK1
def main(inputFilePathL, projectN, clean=False, pbs=False, server='smc1', genome='hg19'):

	baseDir = mypipe.prepare_baseDir(projectN, mkdir=True)

	for inputFileP in inputFilePathL:

		inputFileP2 = inputFileP[:-7] + '\*.fq.gz'
		inputFileN = inputFileP.split('/')[-1]
		sampN = inputFileN.split('.')[0]

#		if sampN not in ['IRCR_MBT14_182_T_CS','S1117889_T_CS']:
#			continue
		print sampN, inputFileP2
		cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_cs2mut.py -i %s -n %s -p %s -c %s -s %s -g %s' % (mysetting.SRC_HOME, inputFileP2, sampN, projectN, False, server, genome)
		if pbs:
			log = '%s/%s.Xsq.qlog' % (baseDir+'/'+sampN, sampN)
			os.system('echo "%s" | qsub -q %s -N %s -o %s -j oe' % (cmd, server, sampN, log))

		else:
			log = '%s/%s.Xsq.qlog' % (baseDir, sampN)
			os.system('(%s) 2> %s' % (cmd, log))
示例#17
0
文件: pipe_xsq2mut.py 项目: SMC1/JK1
def main(inputFilePathL, projectN, clean=False, pbs=False, server='smc1', genome='hg19'):
	storageBase = os.path.dirname(mypipe.prepare_baseDir(projectN, mkdir=False)) + '/'
	apacheBase = storageBase

	if glob(storageBase+projectN):
		print ('File directory: already exists')
	else:
		os.system('mkdir %s/%s; \
			chmod a+w %s/%s' % (storageBase,projectN, storageBase,projectN))
		print('File directory: created')
	
	if glob(apacheBase+projectN):
		print ('Log directory: already exists')
	else:
		os.system('mkdir %s/%s; \
			chmod a+w %s/%s' % (apacheBase,projectN, apacheBase,projectN))
		print('Log directory: created')

	for inputFileP in inputFilePathL:

		inputFileP2 = inputFileP[:-7] + '\*.fq.gz'
		inputFileN = inputFileP.split('/')[-1]
		sampN = inputFileN.split('.')[0]

#		if sampN not in ['IRCR_GBM10_002_T_SS','IRCR_GBM14_399_T_SS','IRCR_GBM14_491_T_SS']:
#		if sampN[:14] not in ['IRCR_GBM10_002','IRCR_GBM14_410','IRCR_GBM14_505','IRCR_GBM14_553','IRCR_GBM14_570','IRCR_GBM13_225','IRCR_GBM14_630']:
		if sampN[:14] not in ['IRCR_GBM11_117']:
			continue

		print sampN, inputFileP2
		cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_xsq2mut.py -i %s -n %s -p %s -c %s -s %s -g %s' % (mysetting.SRC_HOME, inputFileP2, sampN, projectN, False, server, genome)

		if pbs:
			log = '%s/%s.Xsq.qlog' % (storageBase+projectN+'/'+sampN,sampN)
			os.system('echo "%s" | qsub -q %s -N %s -o %s -j oe' % (cmd, server, sampN, log))
		else:
			log = '%s/%s.Xsq.qlog' % (storageBase+projectN,sampN)
			os.system('(%s) 2> %s' % (cmd, log))
示例#18
0
def single(inputFilePathL, projectN, clean=False, pbs=False, server='smc1', genome='hg19'):
	storageBase = os.path.dirname(mypipe.prepare_baseDir(projectN, mkdir=False)) + '/'
	apacheBase = storageBase
	if glob(storageBase+projectN):
		print ('File directory: already exists')
	else:
		os.system('mkdir %s/%s; \
			chmod a+w %s/%s' % (storageBase,projectN, storageBase,projectN))
		print ('File directory: created')

	if glob(apacheBase+projectN):
		print ('Log directory: already exists')
	else:
		os.system('mkdir %s/%s; \
			chmod a+w %s/%s' % (apacheBase,projectN, apacheBase,projectN))
		print ('Log directory: created')
			
	for inputFileP in inputFilePathL:
		if '_B_' in inputFileP:
			continue
		inputFileN = os.path.basename(inputFileP)
		sampN = inputFileN.split('.')[0]
		
#		if sampN not in ['IRCR_GBM14_571_T_SS','IRCR_GBM10_031_T_SS','IRCR_GBM11_107_T_SS','NS09_763_T_SS','IRCR_GBM14_587_T_SS','IRCR_GBM14_588_T_SS','NS09_737_T_SS','IRCR_GBM11_106_T_SS','IRCR_GBM14_606_T_SS','IRCR_GBM14_554_TA_SS','IRCR_GBM10_007_T_SS','IRCR_GBM14_562_T_SS','NS09_756_T_SS','IRCR_GBM11_133_T_SS','IRCR_GBM12_181_T_SS','IRCR_GBM12_194_T_SS','NS07_466_T_SS','IRCR_GBM14_629_T_SS','IRCR_GBM14_648_T_SS']:
#		if sampN not in ['NS10_809_T_SS','IRCR_GBM11_112_T_SS','IRCR_GBM14_596_T_SS','IRCR_GBM14_491_T_SS']:
#			continue

		print sampN, inputFileP

		cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_xsq2somatic.py -i %s -n %s -p %s -c %s -s %s -g %s' % (mysetting.SRC_HOME, inputFileP, sampN, projectN, False, server, genome)
#		print cmd

		if pbs:
			log = '%s/%s.Xsq2somatic_s.qlog' % (storageBase+projectN+'/'+sampN,sampN)
			os.system('echo "%s" | qsub -q %s -N x2somatic_%s_s -o %s -j oe' % (cmd, server, sampN, log))
		else:
			log = '%s/%s.Xsq2somatic_s.qlog' % (storageBase+projectN, sampN)
			os.system('(%s) 2> %s' % (cmd, log))
示例#19
0
文件: pipe_rsq2expr.py 项目: SMC1/JK1
def main(inputFilePathL, projectN, clean=False, pbs=False, server='smc1', genome='hg19'):
	storageBase = os.path.dirname(mypipe.prepare_baseDir(projectN, mkdir=False)) + '/'
	apacheBase = storageBase

	if glob(storageBase+projectN):
		print ('File directory: already exists')
	else:
		os.system('mkdir %s/%s; \
			chmod a+w %s/%s' % (storageBase,projectN, storageBase,projectN))
		print('File directory: created')
	
	if glob(apacheBase+projectN):
		print ('Log directory: already exists')
	else:
		os.system('mkdir %s/%s; \
			chmod a+w %s/%s' % (apacheBase,projectN, apacheBase,projectN))
		print('Log directory: created')

	for inputFileP in inputFilePathL:

		inputFileP2 = inputFileP[:-7] + '\*.fq.gz'
		inputFileN = inputFileP.split('/')[-1]
		sampN = inputFileN[:-8]

#		if sampN[1:4] not in ['096','145']:
#		if sampN[:-4] in ['IRCR_LC14_440','IRCR_LC14_394','IRCR_LC14_423','IRCR_RCC14_148','IRCR_MBT15_206']:
#			continue
		print sampN

		cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_rsq2expr.py -i %s -n %s -p %s -c %s -s %s -g %s' % (mysetting.SRC_HOME, inputFileP2, sampN, projectN, False, server, genome)

		if pbs:
			log = '%s/%s.Rsq_expr.qlog' % (storageBase+projectN+'/'+sampN,sampN)
			os.system('echo "%s" | qsub -q %s -N %s -o %s -j oe' % (cmd, server, sampN, log))

		else:
			log = '%s/%s.Rsq_expr.qlog' % (storageBase+projectN,sampN)
			os.system('(%s) 2> %s' % (cmd, log))
示例#20
0
def main(trioFileN, projectN, clean=False, pbs=False, server='smc1', genome='hg19', sampL=[]):
	storageBase = os.path.dirname(mypipe.prepare_baseDir(projectN, mkdir=False)) + '/'
	apacheBase = storageBase
	if glob(storageBase+projectN):
		print ('File directory: already exists')
	else:
		os.system('mkdir %s/%s; \
			chmod a+w %s/%s' % (storageBase,projectN, storageBase,projectN))
		print('File directory: created')
	
	if glob(apacheBase+projectN):
		print ('Log directory: already exists')
	else:
		os.system('mkdir %s/%s; \
			chmod a+w %s/%s' % (apacheBase,projectN, apacheBase,projectN))
		print('Log directory: created')

	bamDirL = mysetting.wxsBamDirL
	trioH = mypipe.read_trio(trioFileN, bamDirL)

	## assume 1 primary & normal per trio
	for tid in trioH:
		## must have normal sample
		if trioH[tid]['norm_id'] == []:
			continue

		norm_id = trioH[tid]['norm_id'][0]
#		if norm_id == 'S567_B_SS': ## id flip for mutscan(B)
#			norm_id = 'S567_T_SS'
		mutscanN = ''
		for dir in mysetting.wxsMutscanDirL:
			mutscanL = os.popen('find %s -name %s*.mutscan' % (dir, norm_id)).readlines()
			if len(mutscanL) > 0:
				mutscanN = mutscanL[0].rstrip()
				break
		if mutscanN == '': ## .mutscan not found
			print norm_id
			sys.stderr.write('Can\'t find .mutscan\n')
#			sys.exit(1)
			continue

		if trioH[tid]['prim_id'] != []:
			sampN = trioH[tid]['prim_id'][0]
			if sampL == [] or (sampL != [] and sampN in sampL):
				procN = ''
				for dir in mysetting.wxsPileupProcDirL:
					id = sampN
#					if sampN == 'S567_T_SS': ## id flip for pileup_proc
#						id = 'S567_B_SS'
					fileL = os.popen('find %s -name %s*chr*.pileup_proc' % (dir, id)).readlines()
					if len(fileL) > 0:
						procDir = list(set(map(lambda x: os.path.dirname(x.rstrip()), fileL)))[0]
						procN = '%s/%s*chr*.pileup_proc' % (procDir,id)
						break
				if procN == '': ## .pileup_proc not found
					sys.stderr.write('Can\'t find .pileup_proc\n')
					sys.exit(1)
				cnN = os.popen('find %s -name %s*.ngCGH.seg' % (mysetting.wxsCNADir,sampN)).readlines()[0].rstrip()

				cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_xsq2purity.py -i \'%s\' -j %s -k %s -n %s -p %s -c %s -s %s -g %s' % (mysetting.SRC_HOME, procN, mutscanN, cnN, sampN, projectN, clean, server, genome)
				print sampN
				print procN, mutscanN, cnN
				if pbs:
					log = '%s/%s.Xsq2purity.qlog' % (storageBase+projectN+'/'+sampN,sampN)
					os.system('echo "%s" | qsub -q %s -N x2purity_%s -o %s -j oe' % (cmd, server, sampN, log))
				else:
					log = '%s/%s.Xsq2purity.qlog' % (storageBase+projectN, sampN)
					os.system('(%s) 2> %s' % (cmd, log))
		## primary of pair
	
		if trioH[tid]['recur_id'] != []:
			for recur in range(len(trioH[tid]['Recurrent'])):
				sampN = trioH[tid]['recur_id'][recur]
				if sampL == [] or (sampL != [] and sampN in sampL):
					procN = ''
					for dir in mysetting.wxsPileupProcDirL:
						fileL = os.popen('find %s -name %s*chr*.pileup_proc' % (dir, sampN)).readlines()
						if len(fileL) > 0:
							procDir = list(set(map(lambda x: os.path.dirname(x.rstrip()), fileL)))[0]
							procN = '%s/%s*chr*.pileup_proc' % (procDir,sampN)
							break
					if procN == '': ## .pileup_proc not found
						sys.stderr.write('Can\'t find .pileup_proc\n')
						sys.exit(1)
					cnN = os.popen('find %s -name %s*.ngCGH.seg' % (mysetting.wxsCNADir,sampN)).readlines()[0].rstrip()

					cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_xsq2purity.py -i \'%s\' -j %s -k %s -n %s -p %s -c %s -s %s -g %s' % (mysetting.SRC_HOME, procN, mutscanN, cnN, sampN, projectN, clean, server, genome)
					print sampN
					print procN, mutscanN, cnN
					if pbs:
						log = '%s/%s.Xsq2purity.qlog' % (storageBase+projectN+'/'+sampN,sampN)
						os.system('echo "%s" | qsub -q %s -N x2purity_%s -o %s -j oe' % (cmd, server, sampN, log))
					else:
						log = '%s/%s.Xsq2purity.qlog' % (storageBase+projectN, sampN)
						os.system('(%s) 2> %s' % (cmd, log))