Exemplo n.º 1
0
        writer = MemeWriter(qfile)
        writer.meta = reader.meta
        writer.writeMeta()
        for _ in range(joblist[i]):
            try:
                writer.write(reader.next())
            except StopIteration:
                break
        writer.close()
        thparams = params.copy()
        thparams[""] = [qfile, mfile2]
        thparams.thresh = qval
        thparams.oc = ocdir
        cmdps.append((tomtom, cmdargs(thparams, dash='-', equal=' ')))
    reader.close()
    Parallel(nthread, raiseExc=True).run('{} {}', cmdps)

    writer = TsvWriter(outfile)
    reader = TsvReader(path.join(ocdirs[0], 'tomtom.txt'),
                       comment='##',
                       cnames=lambda header: header[1:].strip().split("\t"))
    writer.cnames = reader.cnames
    writer.writeHead(lambda cnames: "#" + "\t".join(cnames))
    reader.close()
    for ocdir in ocdirs:
        reader = TsvReader(
            path.join(ocdir, 'tomtom.txt'),
            comment='##',
            cnames=lambda header: header[1:].strip().split("\t"))
        for r in reader:
            writer.write(r)
Exemplo n.º 2
0
pathview = {{args.pathview | repr}}

shell.TOOLS.Rscript = Rscript
if isinstance(cutoff, dict):
	if cutoff['by'] == 'p':
		cutoff['by'] = 'Pval'
	if cutoff['by'] == 'q':
		cutoff['by'] = 'AdjPval'

reader = TsvReader(infile, **inopts)
genes  = [r[genecol] for r in reader]

en = Enrichr(cutoff = cutoff, top = top, Rscript = Rscript)
en.addList(genes, description = path.basename(infile))

para = Parallel(nthread = nthread)
runPathview = lambda r, hsa: shell.Shell().Rscript(r, hsa).run()
for db in dbs:
	outfile = path.join(outdir, prefix + '.' + db + '.txt')
	en.enrich(db)
	en.export(outfile, top = 100)
	if plot:
		plotfile = path.join(outdir, prefix + '.' + db + '.png')
		en.plot(plotfile, res = devpars.res, width = devpars.width, height = devpars.height)
	if pathview and 'KEGG' in db:
		pathviewRDir  = path.join(outdir, prefix + '.' + db + '.pathview')
		pathviewRfile = path.join(pathviewRDir, 'pathview.R')
		shell.mkdir(pathviewRDir)
		with open(pathviewRfile, 'w') as f:
			f.write("""
			{rimport}('__init__.r')
Exemplo n.º 3
0
invcfs = {{i.infiles | repr}}
outfile = {{o.outfile | quote}}
nthread = {{args.nthread | int}}
joboutdir = {{job.outdir | quote}}
vcftools = {{args.vcftools | quote}}
gatk = {{args.gatk | quote}}
tabix = {{args.tabix | quote}}
ref = {{args.ref | quote}}
params = {{args.params | repr}}
tool = {{args.tool | quote}}

shell.TOOLS.vcftools = vcftools
shell.TOOLS.gatk = gatk

para = Parallel(nthread, raiseExc=True)
invcfs = para.run(vcfIndex, [(vcf, tabix) for vcf in invcfs])


def run_vcftools():
    params.d = params.get('d', True)
    params.t = params.get('t', True)
    params._ = invcfs
    params._stdout = outfile
    shell.Shell(equal=' ').vcftools(**params).run()


def run_gatk():
    params.T = 'CombineVariants'
    params.o = outfile
    params.R = ref
Exemplo n.º 4
0
	samplevcf     = "{{job.outdir}}/{{i.infile | fn}}-%s.vcf" % sample
	cmd = '{{args.vcftools}} %s {{i.infile | quote}} > "%s"' % (cmdargs(vtparams), samplevcf)

	# vcf2maf.pl --input-vcf ZYYP-ZYYB.vcf  --output-maf ZYYP-ZYYB.snpEff.maf --tumor-id ZXLT-ZXLB_TUMOR --normal-id ZXLT-ZXLB_NORMAL --vep-data /path/to/vep/cache/ --filter-vcf /path/to/vep/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz --ref-fasta /path/to/hs37d5/phase2_reference_assembly_sequence/hs37d5.fa --vep-path /path/to/miniconda2/bin
	params['input-vcf']  = samplevcf
	params['output-maf'] = "{{job.outdir}}/{{i.infile | fn}}-%s.maf" % sample
	params['vep-data']   = {{args.vepDb | quote}}
	params['vep-forks']  = {{args.nthread}}
	params['filter-vcf'] = {{args.filtervcf | quote}}
	params['ref-fasta']  = {{args.ref | quote}}
	params['vep-path']   = path.dirname(vep)

	cmd = cmd + '; {{args.vcf2maf}} --tumor-id %s %s' % (sample, cmdargs(params, equal=' '))
	cmds.append(cmd)

{% 		if args.nthread == 1 %}
for cmd in cmds: runcmd(cmd)
{% 		else %}
# Note the threads may be hanging on here.
p = Parallel({{args.nthread}})
p.run('{}', [(cmd,) for cmd in cmds])
{% 		endif %}

for i, sample in enumerate(samples):
	singlemaf = "{{job.outdir}}/{{i.infile | fn}}-%s.maf" % sample
	if i == 0:
		runcmd('cat "%s" > {{o.outfile | quote}}' % singlemaf)
	else:
		runcmd('egrep -v "^#|^Hugo_Symbol" "%s" >> {{o.outfile | quote}}' % singlemaf)
{% 	endif %}
{% endif %}
Exemplo n.º 5
0
exts = dict()
for sam in sam_meta:
	parts = sam['file_name'].split('.')
	ext = '.' + parts[-1]
	if ext == '.gz':
		ext = '.' + parts[-2] + ext
	exts      [sam['file_name']] = ext
	sample_ids[sam['file_name']] = sam['associated_entities'][0]['entity_submitter_id'][:15]

samfiles = []
for ext in set(exts.values()):
	samfiles += glob.glob (os.path.join(os.path.abspath(indir), "*" + ext))
	# or direct dir from TCGA download
	samfiles += glob.glob (os.path.join(os.path.abspath(indir), "*", "*" + ext))

lock = Lock()
def single(samfile):
	bn = os.path.basename (samfile)
	if not bn in sample_ids: return
	newfile = os.path.join (outdir, sample_ids[bn] + exts[bn])
	with lock:
		if os.path.exists (newfile):
			os.remove(newfile)
		if 'link' in method:
			os.symlink (samfile, newfile)
		elif method == 'copy':
			copyfile(samfile, newfile)

p = Parallel(nthread = nthread, backend = 'threading', raiseExc = True)
p.run(single, [(samfile,) for samfile in samfiles])