Пример #1
0
def main(trioFileN,
         projectN,
         tidL=[],
         clean=False,
         pbs=False,
         server='smc1',
         genome='hg19'):
    storageBase = os.path.dirname(mypipe.prepare_baseDir(projectN,
                                                         mkdir=False)) + '/'
    apacheBase = storageBase
    if glob(storageBase + projectN):
        print('File directory: already exists')
    else:
        os.system('mkdir %s/%s; \
			chmod a+w %s/%s' % (storageBase, projectN, storageBase, projectN))
        print('File directory: created')

    if glob(apacheBase + projectN):
        print('Log directory: already exists')
    else:
        os.system('mkdir %s/%s; \
			chmod a+w %s/%s' % (apacheBase, projectN, apacheBase, projectN))
        print('Log directory: created')

    bamDirL = mysetting.wxsBamDirL
    trioH = mypipe.read_trio(trioFileN, bamDirL, tidL)

    ## assume 1 primary & normal per trio
    for tid in trioH:
        if tidL != [] and tid not in tidL:
            continue
        if trioH[tid]['Normal'] == [] or trioH[tid]['prim_id'] == []:
            continue

        bamS = set()
        if trioH[tid]['prim_id'] != []:  ##primary
            bamS.add(trioH[tid]['Normal'][0])
            bamS.add(trioH[tid]['Primary'][0])

        if trioH[tid]['recur_id'] != []:  ##recurrent
            for recur in range(len(trioH[tid]['Recurrent'])):
                bamS.add(trioH[tid]['Recurrent'][recur])

        sampN = trioH[tid]['prim_id'][0]

        cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_xsq2phylotree.py -i %s -n %s -p %s -c %s -s %s -g %s' % (
            mysetting.SRC_HOME, ','.join(
                list(bamS)), sampN, projectN, False, server, genome)
        print cmd

        if pbs:
            log = '%s/%s.Xsq2phylotree.qlog' % (storageBase + projectN + '/' +
                                                sampN, sampN)
            os.system('echo "%s" | qsub -q %s -N x2phylotree_%s -o %s -j oe' %
                      (cmd, server, sampN, log))
        else:
            log = '%s/%s.Xsq2phylotree.qlog' % (storageBase + projectN, sampN)
            os.system('(%s) 2> %s' % (cmd, log))
Пример #2
0
def main(trioFileN, projectN, tidL=[], clean=False, pbs=False, server='smc1', genome='hg19'):
	storageBase = os.path.dirname(mypipe.prepare_baseDir(projectN, mkdir=False)) + '/'
	apacheBase = storageBase
	if glob(storageBase+projectN):
		print ('File directory: already exists')
	else:
		os.system('mkdir %s/%s; \
			chmod a+w %s/%s' % (storageBase,projectN, storageBase,projectN))
		print ('File directory: created')
	
	if glob(apacheBase+projectN):
		print ('Log directory: already exists')
	else:
		os.system('mkdir %s/%s; \
			chmod a+w %s/%s' % (apacheBase,projectN, apacheBase,projectN))
		print ('Log directory: created')
	
	bamDirL = mysetting.wxsBamDirL
	trioH = mypipe.read_trio(trioFileN, bamDirL, tidL)

	## assume 1 primary & normal per trio
	for tid in trioH:
		if tidL != [] and tid not in tidL:
			continue
		if trioH[tid]['Normal'] == []:
			continue
		
		if trioH[tid]['prim_id'] != []: ##primary
			sampN = trioH[tid]['prim_id'][0]
			print tid, trioH[tid]['Primary']
			tumor = trioH[tid]['Primary'][0]
			normal = trioH[tid]['Normal'][0]

			cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_xsq2somatic.py -i %s -j %s -n %s -p %s -c %s -s %s -g %s' % (mysetting.SRC_HOME, tumor, normal, sampN, projectN, False, server, genome)
			print cmd

			if pbs:
				log = '%s/%s.Xsq2somatic.qlog' % (storageBase+projectN+'/'+sampN,sampN)
				os.system('echo "%s" | qsub -q %s -N x2somatic_%s -o %s -j oe' % (cmd, server, sampN, log))
			else:
				log = '%s/%s.Xsq2somatic.qlog' % (storageBase+projectN, sampN)
				os.system('(%s) 2> %s' % (cmd, log))

		if trioH[tid]['recur_id'] != []: ##recurrent
			for recur in range(len(trioH[tid]['Recurrent'])):
				sampN = trioH[tid]['recur_id'][recur]
				tumor = trioH[tid]['Recurrent'][recur]
				normal = trioH[tid]['Normal'][0]
				cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_xsq2somatic.py -i %s -j %s -n %s -p %s -c %s -s %s -g %s' % (mysetting.SRC_HOME, tumor, normal, sampN, projectN, False, server, genome)
				print cmd
				if pbs:
					log = '%s/%s.Xsq2somatic.qlog' % (storageBase+projectN+'/'+sampN,sampN)
					os.system('echo "%s" | qsub -q %s -N x2somatic_%s -o %s -j oe' % (cmd, server, sampN, log))
				else:
					log = '%s/%s.Xsq2somatic.qlog' % (storageBase+projectN, sampN)
					os.system('(%s) 2> %s' % (cmd, log))
Пример #3
0
def main(trioFileN, projectN, clean=False, pbs=False, server='smc1', genome='hg19', sampL=[]):
	storageBase = os.path.dirname(mypipe.prepare_baseDir(projectN, mkdir=False)) + '/'
	apacheBase = storageBase
	if glob(storageBase+projectN):
		print ('File directory: already exists')
	else:
		os.system('mkdir %s/%s; \
			chmod a+w %s/%s' % (storageBase,projectN, storageBase,projectN))
		print('File directory: created')
	
	if glob(apacheBase+projectN):
		print ('Log directory: already exists')
	else:
		os.system('mkdir %s/%s; \
			chmod a+w %s/%s' % (apacheBase,projectN, apacheBase,projectN))
		print('Log directory: created')

	bamDirL = mysetting.wxsBamDirL
	trioH = mypipe.read_trio(trioFileN, bamDirL)

	## assume 1 primary & normal per trio
	for tid in trioH:
		## must have normal sample
		if trioH[tid]['norm_id'] == []:
			continue

		norm_id = trioH[tid]['norm_id'][0]
#		if norm_id == 'S567_B_SS': ## id flip for mutscan(B)
#			norm_id = 'S567_T_SS'
		mutscanN = ''
		for dir in mysetting.wxsMutscanDirL:
			mutscanL = os.popen('find %s -name %s*.mutscan' % (dir, norm_id)).readlines()
			if len(mutscanL) > 0:
				mutscanN = mutscanL[0].rstrip()
				break
		if mutscanN == '': ## .mutscan not found
			print norm_id
			sys.stderr.write('Can\'t find .mutscan\n')
#			sys.exit(1)
			continue

		if trioH[tid]['prim_id'] != []:
			sampN = trioH[tid]['prim_id'][0]
			if sampL == [] or (sampL != [] and sampN in sampL):
				procN = ''
				for dir in mysetting.wxsPileupProcDirL:
					id = sampN
#					if sampN == 'S567_T_SS': ## id flip for pileup_proc
#						id = 'S567_B_SS'
					fileL = os.popen('find %s -name %s*chr*.pileup_proc' % (dir, id)).readlines()
					if len(fileL) > 0:
						procDir = list(set(map(lambda x: os.path.dirname(x.rstrip()), fileL)))[0]
						procN = '%s/%s*chr*.pileup_proc' % (procDir,id)
						break
				if procN == '': ## .pileup_proc not found
					sys.stderr.write('Can\'t find .pileup_proc\n')
					sys.exit(1)
				cnN = os.popen('find %s -name %s*.ngCGH.seg' % (mysetting.wxsCNADir,sampN)).readlines()[0].rstrip()

				cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_xsq2purity.py -i \'%s\' -j %s -k %s -n %s -p %s -c %s -s %s -g %s' % (mysetting.SRC_HOME, procN, mutscanN, cnN, sampN, projectN, clean, server, genome)
				print sampN
				print procN, mutscanN, cnN
				if pbs:
					log = '%s/%s.Xsq2purity.qlog' % (storageBase+projectN+'/'+sampN,sampN)
					os.system('echo "%s" | qsub -q %s -N x2purity_%s -o %s -j oe' % (cmd, server, sampN, log))
				else:
					log = '%s/%s.Xsq2purity.qlog' % (storageBase+projectN, sampN)
					os.system('(%s) 2> %s' % (cmd, log))
		## primary of pair
	
		if trioH[tid]['recur_id'] != []:
			for recur in range(len(trioH[tid]['Recurrent'])):
				sampN = trioH[tid]['recur_id'][recur]
				if sampL == [] or (sampL != [] and sampN in sampL):
					procN = ''
					for dir in mysetting.wxsPileupProcDirL:
						fileL = os.popen('find %s -name %s*chr*.pileup_proc' % (dir, sampN)).readlines()
						if len(fileL) > 0:
							procDir = list(set(map(lambda x: os.path.dirname(x.rstrip()), fileL)))[0]
							procN = '%s/%s*chr*.pileup_proc' % (procDir,sampN)
							break
					if procN == '': ## .pileup_proc not found
						sys.stderr.write('Can\'t find .pileup_proc\n')
						sys.exit(1)
					cnN = os.popen('find %s -name %s*.ngCGH.seg' % (mysetting.wxsCNADir,sampN)).readlines()[0].rstrip()

					cmd = '/usr/bin/python %s/NGS/pipeline/pipe_s_xsq2purity.py -i \'%s\' -j %s -k %s -n %s -p %s -c %s -s %s -g %s' % (mysetting.SRC_HOME, procN, mutscanN, cnN, sampN, projectN, clean, server, genome)
					print sampN
					print procN, mutscanN, cnN
					if pbs:
						log = '%s/%s.Xsq2purity.qlog' % (storageBase+projectN+'/'+sampN,sampN)
						os.system('echo "%s" | qsub -q %s -N x2purity_%s -o %s -j oe' % (cmd, server, sampN, log))
					else:
						log = '%s/%s.Xsq2purity.qlog' % (storageBase+projectN, sampN)
						os.system('(%s) 2> %s' % (cmd, log))
Пример #4
0
#!/usr/bin/python

import sys, os, re
import mysetting, mybasic

mybasic.add_module_path(['NGS/pipeline','NGS/mutation'])
import mutect_batch, somaticindeldetector_batch

import mypipe

bamDirL = mysetting.wxsBamDirL
trioH = mypipe.read_trio('/EQL1/NSL/clinical/trio_info.txt', bamDirL)

#for tid in sorted(trioH.keys()):
#	if tid not in ['59','60','61']:
#		continue
#	print tid, trioH[tid]['prim_id'], trioH[tid]['recur_id']
#	for role in ['Normal','Primary','Recurrent']:
#		print role,trioH[tid][role]
#sys.exit(1)

outDir='/EQL3/pipeline/somatic_mutect'

## assume 1 primary & normal per trio
for tid in trioH:
	if trioH[tid]['norm_id'] == []:
		continue
	if tid not in ['63']:
		continue

	norm = trioH[tid]['norm_id'][0]
Пример #5
0
		colL = line.rstrip().split('\t')
		rm = re.match('(chr[^:]*):([0-9]*)~([0-9]*)', colL[idxH['locus']])
		(chr,chrSta,chrEnd) = rm.groups()
		ref = colL[idxH['ref']]
		alt = colL[idxH['alt']]
		if (chr,chrSta,chrEnd,ref,alt) not in annotH:
			annotH[(chr,chrSta,chrEnd,ref,alt)] = {}
			for col in ['gene_symL','ch_dna','ch_aa','ch_type','cosmic','mutsig']:
				annotH[(chr,chrSta,chrEnd,ref,alt)][col] = colL[idxH[col]]
	return annotH

### until it is merged into pipeline
import mybasic
mybasic.add_module_path(['NGS/pipeline'])
import mypipe
trioH = mypipe.read_trio(bamDirL=mysetting.wxsBamDirL)
pairH = {}
for tid in trioH:
	if trioH[tid]['recur_id'] != []:
		pid = trioH[tid]['prim_id'][0][:-5]
		pairH[pid] = map(lambda x: x[:-5], trioH[tid]['recur_id'])
####
#(con,cursor) = mymysql.connectDB(db='ircr1')
#tag = 'pair_R:%'
#cursor.execute('select distinct samp_id from sample_tag where tag like "%s"' % tag)
#sIdL_p = [x for (x,) in cursor.fetchall()]
#
#tag = 'XSeq%%,N'
#cursor.execute('select distinct samp_id from sample_tag where tag like "%s"' % tag)
#wxsL = [x for (x,) in cursor.fetchall()]
#
def diff_batch(trioFileN, tidL=[]):
	bamDirL = mysetting.wxsBamDirL
	trioH = mypipe.read_trio(trioFileN, bamDirL, tidL)

	for tid in trioH:
		if tidL != [] and tid not in tidL:
			continue
		if trioH[tid]['Normal'] == [] or trioH[tid]['prim_id'] == [] or trioH[tid]['recur_id'] == []:
			continue

		datH = {}
		for ref in ['C','T']:
			for alt in ['A','C','G','T']:
				if alt == ref:
					continue
				mut = '%s>%s' % (ref, alt)
				for a in ['A','C','G','T']:
					for b in ['A','C','G','T']:
						context = a + ref + b
						datH[(mut, context)] = {'prim':0.0, 'recur':0.0, 'delta':0.0}
					#for b
				#for a
			#for alt
		#for ref

		print tid, trioH[tid]['prim_id'], trioH[tid]['recur_id']
		for rid in trioH[tid]['recur_id']:
			pid = trioH[tid]['prim_id'][0]
			p_file = '/EQL3/pipeline/somatic_mutation/%s/%s.mutation_signature.txt' % (pid,pid)
			r_file = '/EQL3/pipeline/somatic_mutation/%s/%s.mutation_signature.txt' % (rid,rid)

			if os.path.isfile(p_file) and os.path.isfile(r_file):
				print p_file
				inFile = open(p_file, 'r')
				inFile.readline()
				for line in inFile:
					colL = line.rstrip().split('\t')
					mut = colL[1]
					context = colL[2]
					frac = float(colL[3])/float(colL[5])
					datH[(mut,context)]['prim'] = frac
				#for line
				inFile.close()
				print r_file
				inFile = open(r_file, 'r')
				inFile.readline()
				for line in inFile:
					colL = line.rstrip().split('\t')
					mut = colL[1]
					context = colL[2]
					frac = float(colL[3])/float(colL[5])
					datH[(mut,context)]['recur'] = frac
				#for line
				inFile.close()

#				ofileN = '/EQL3/pipeline/somatic_mutation/%s/%s.mutation_signature_diff.txt' % (rid,rid)
#				outFile = open(ofileN, 'w')
#				outFile.write('prim_id\trecur_id\tmutation\tcontext\tp_frac\tr_frac\tdelta\n')
#				for key in datH:
#					(mut, context) = key
#					datH[key]['delta'] = datH[key]['recur'] - datH[key]['prim']
#					outFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\n' % (pid, rid, mut, context, datH[key]['prim'], datH[key]['recur'], datH[key]['delta']))
#				outFile.flush()
#				outFile.close()
				os.system('Rscript %s/NGS/mutation/mutect_mutation_signature_diff_plot.R %s %s' % (mysetting.SRC_HOME, pid, rid))