Пример #1
0
def getAlleleCount(bamfile, snpfile, outfile):
	brcparams   = Box()
	brcparams.f = ref
	brcparams.w = 0
	brcparams.l = snpfile

	brcparams[''] = bamfile
	cmd = '{bamrc} {args} > {outfile!r}'.format(
		bamrc = bamrc, args = cmdargs(brcparams, equal = ' '), outfile = outfile + '.tmp')
	runcmd(cmd)

	# reformated output to desired format
	reader = TsvReader(outfile + '.tmp', cnames = False)
	snper  = TsvReader(snpfile, cnames = False)
	#chr1	564773	C	14	=:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00	A:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00	C:14:...	G:0:...	T:0:...	N:0:...
	writer = TsvWriter(outfile)
	writer.cnames = ['Chrm', 'pos', 'A', 'C', 'G', 'T', 'Total', 'refCount', 'mutCount']

	for r in reader:
		while True:
			try:
				snp   = next(snper)
			except StopIteration:
				break
			# use the end position, in case it's 0-based
			if snp[0] == r[0] and snp[2] == r[1]:
				counts = dict(
					A = r[5].split(':', 2)[1],
					C = r[6].split(':', 2)[1],
					G = r[7].split(':', 2)[1],
					T = r[8].split(':', 2)[1]
				)
				rec    = TsvRecord()
				rec.Chrm  = r[0]
				rec.pos   = r[1]
				rec.Total = r[3]
				rec.A = counts['A']
				rec.C = counts['C']
				rec.G = counts['G']
				rec.T = counts['T']
				# if reference allele is unknown, assuming all are ref alleles
				rec.refCount = counts.get(snp[6].upper(), r[3])
				# if mut allele is unknown, assuming no mutations happened
				rec.mutCount = counts.get(snp[7].upper(), 0)
				writer.write(rec)
				# go to next snp
				break
			else:
				# go to next r
				continue
	writer.close()
Пример #2
0
# save the data file
# expfile
"""
	S1	S2	..	Sn
G1	...
G2	...
"""
expreader  = TsvReader(expfile)
expdata    = [r for r in expreader if r[0] in genes or r[0] in tfs]
expreader.close()
datawriter = TsvWriter(outdata)
for i, cname in enumerate(expreader.cnames):
	if i == 0:
		# genes + tfs
		datawriter.cnames = [r[0] for r in expdata]
		datawriter.writeHead()
	else:
		datawriter.write([cname] + [r[i] for r in expdata])
datawriter.close()
del expdata
genes = [g for g in genes if g in datawriter.cnames]
tfs   = [g for g in tfs if g in datawriter.cnames]

genetfs = {g: [tf for tf in gtfs if tf in tfs] for g, gtfs in genetfs.items() if g in genes}

# save the group file
# mutfile
"""
	S1	S2	..	Sn
M1	... (0/1/2/NA)
Пример #3
0
from bioprocs.utils.tsvio2 import TsvWriter, TsvRecord
from gff import Gff

infile    = {{i.infile | quote}}
outfile   = {{o.outfile | quote}}
attr2name = {{args.attr2name}}
keepinfo  = {{args.keepinfo | repr}}

writer = TsvWriter(outfile)
writer.cnames = ['CHR', 'START', 'END', 'NAME', 'SCORE', 'STRAND']
if keepinfo:
	writer.cnames.append('ORIGINAL')

def getNameFromAttrs(attrs):
	if attr2name:
		return attr2name(**attrs)
	for key in sorted(attrs.keys()):
		if key in writer.cnames:
			continue
		if 'id' in key.lower():
			return attrs[key]
		if 'name' in key.lower():
			return attrs[key]
		return attrs[key]

gff = Gff(infile)
for record in gff:
	r        = TsvRecord()
	r.CHR    = record['seqid']
	r.START  = record['start']
	r.END    = record['end']
Пример #4
0
outfile = {{ o.outfile | quote}}
outdir  = {{ o.outdir | quote}}
params = {{ args.params | repr}}
idxfile = {{ args.idxfile | quote}}
kallisto = {{ args.kallisto | quote}}
nthread = {{ args.nthread | repr}}

shell.TOOLS.kallisto = kallisto
params.i = idxfile
params.o = outdir
params.t = nthread
params._ = [fq1, fq2]

kallisto = shell.Shell(subcmd = True).kallisto
kallisto.quant(**params).run()

imfile        = path.join(outdir, 'abundance.tsv')
reader        = TsvReader(imfile)
writer        = TsvWriter(outfile)
writer.cnames = ['target_id', 'est_counts']
writer.writeHead()

for r in reader:
	r.target_id = r.target_id.split('::')[0]
	try:
		r.est_counts = int(round(float(r.est_counts)))
	except TypeError:
		r.est_counts = 0
	writer.write(r)
writer.close()
Пример #5
0
logger.info('%s motifs loaded', len(motifs))

if tool == 'meme':
	cmdparams        = []
	params.thresh    = pval
	params.verbosity = 4
	for motif, name in motifs.items():
		params.oc    = path.join(outdir, name + '.' + re.sub(r'[^\w_]', '', motif))
		params.motif = motif
		params[""]   = [tfmotifs, sfile]
		cmdparams.append((meme, cmdargs(params, dash = '--', equal = ' ')))
	Parallel(nthread, raiseExc = True).run('{} {}', cmdparams)

	writer = TsvWriter(outfile)
	writer.cnames = [
		"CHR", "START", "END", "NAME", "SCORE", "STRAND", "MOTIF", "SEQ", "STARTONSEQ",
		"STOPONSEQ", "RAWSCORE", "PVAL", "QVAL", "MATCHEDSEQ", "UCSCLINK"
	]
	writer.writeHead(callback = lambda cnames: "#" + "\t".join(cnames))

	def rowfactory(r):
		r.PVAL       = float(r['p-value'])
		if r.PVAL >= pval:
			return None
		r.RAWSCORE = r.score
		try:
			r.SCORE = int(float(r.score) * 10)
		except TypeError:
			r.SCORE = 0
		r.STRAND   = r.strand
		r.MOTIF    = r.motif_id
		# split motif_alt_id
Пример #6
0
		)))
		for _ in range(dist):
			writer.write(next(reader))
		writer.close()
	
	para   = Parallel(nthread, raiseExc = True)
	para.run(getAlleleCount, [
		(tumbam, path.join(
			thdir, '{bname}.thread{i}.snp'.format(bname = asbname, i = i)
		), path.join(
			thdir, '{tumbn}.thread{i}.bamrc'.format(tumbn = path.basename(tumbam), i = i)
		)) for i in range(nthread)
	])
	# merge to tumsnp
	writer = TsvWriter(tumsnp)
	writer.cnames = ['Chrm', 'pos', 'A', 'C', 'G', 'T', 'Total', 'refCount', 'mutCount']
	writer.writeHead(lambda cn: "#" + "\t".join(cn))
	for i in range(nthread):
		subrc = path.join(
			thdir, '{tumbn}.thread{i}.bamrc'.format(tumbn = path.basename(tumbam), i = i)
		)
		reader = TsvReader(subrc, cnames = False)
		for r in reader:
			writer.write(r.values())
		reader.close()
	writer.close()

	# normal
	para.run(getAlleleCount, [
		(normbam, path.join(
			thdir, '{bname}.thread{i}.snp'.format(bname = asbname, i = i)