Пример #1
0
def read_tfgenes(tgfile):
    """Read tf-gene pairs"""
    logger.info('Reading TF-gene pairs in %s ...', tgfile)
    reader = TsvReader(tgfile, cnames=False)
    ret = {}  # gene => tf
    for row in reader:
        ret.setdefault(row[1], set()).add(row[0])
    reader.close()
    return ret
Пример #2
0
# get gene, snp pairs
"""
chr1	12463073	12463074	AADACL4	0	+	chr1	12463073	12463074	chr1_12463073_rs7547740_A_G	0	+
chr1	12480504	12480505	AADACL4	0	+	chr1	12480504	12480505	chr1_12480504_rs6660365_T_C	0	+
chr1	12496021	12496022	AADACL4	0	+	chr1	12496021	12496022	chr1_12496021_rs6541023_T_C	0	+
"""
mutgenes   = defaultdict(lambda: [])
intereader = TsvReader(interfile)
genes = set()
for r in intereader:
	if not r[3] in genetfs:
		continue
	mutgenes[r[9]].append(r[3])
	genes.add(r[3])
intereader.close()

# shrink the sets
genetfs = {g: genetfs[g] for g in genes}
tfs     = list({tf for gtfs in genetfs.values() for tf in gtfs})

# nothing, write empty files
if not mutgenes or not genes:
	open(outdata, 'w').close()
	open(outgroup, 'w').close()
	open(outcase, 'w').close()
	exit(0)

# save the data file
# expfile
"""
Пример #3
0
infile = {{i.infile | quote}}
snpfile = {{o.snpfile | quote}}
genefile = {{o.genefile | quote}}
snppergene = {{args.snppergene | repr}}
nchr = {{args.nchr | repr}}
seed = {{args.seed | repr}}
# distances between genes
dist = {{args.dist | repr}}

random.seed(seed)

reader = TsvReader(infile, cnames=False)
allsnps = set(reader.dump(0))
reader.rewind()
allgenes = set(reader.dump(1))
reader.close()

# assign a probability to each snp
nsnps = len(allsnps)
ngenes = len(allgenes)
snp_probs = dict(zip(allsnps, random.choices(range(ngenes * snppergene),
                                             k=nsnps)))

genebed = TsvWriter(genefile)
snpbed = TsvWriter(snpfile)

geneperchr = math.ceil(float(ngenes) / float(nchr))
for i, gene in enumerate(allgenes):
    chrname = 'chr' + str(int(i % nchr) + 1)
    start = (int(i / nchr) + 1) * dist
    end = start + 1