def getregpot(bw, tss, output, alpha, exclude): exclude = exclude.split(',') left = int(exclude[0]) right = int(exclude[1]) if left == right: _bw.getrp(bw, tss, output, alpha, 0, 0) elif left < right: _bw.getrp(bw, tss, output, alpha, left, right) else: print >> sys.stderr, "--exclude left should be smaller than right" sys.exit(1) rp = {} with open(output) as inf: for line in inf: line = line.strip().split() rp['\t'.join(line[:4])] = line[4] fout = open(output, 'w') with open(tss) as inf: for line in inf: line = line.strip().split() s = line[3].split(':') print >> fout, '\t'.join([ line[0], line[1], line[2], s[0], rp['\t'.join(line[:4])], s[1], line[-1] ])
# use _bw to get rp for each of the files status = 0 shape2 = 0 files = [] for key in factors: fin = glob.glob(os.path.join(directory[int(key)], '*treat*bw')) if fin: print(fin[0]) # check file existence #if not os.path.exists(os.path.basename(fin[0])+'.txt'): if not os.path.exists(os.path.basename(fin[0]) + '_1kb.txt'): # check status status = os.system('bigWigInfo %s' % fin[0]) if status == 0: ##_bw.getrp(fin[0], 'histonerp/test/hg38.tss', os.path.basename(fin[0])+'.txt', 1e4, 0, 0) _bw.getrp(fin[0], 'histonerp/test/hg38.tss', os.path.basename(fin[0]) + '_1kb.txt', 1e3, 0, 0) if os.path.getsize(os.path.basename(fin[0]) + '_1kb.txt') > 0: shape2 += 1 files.append(os.path.basename(fin[0]) + '_1kb.txt') else: # get shape 2 if os.path.getsize(os.path.basename(fin[0]) + '_1kb.txt') > 0: shape2 += 1 files.append(os.path.basename(fin[0]) + '_1kb.txt') f = h5py.File('margeRP_%s_1kb.h5' % factor, 'a') if not ('RP' in f.keys()): RP = f.create_dataset("RP", dtype=np.float32, shape=(l, shape2),
import h5py, _bw import numpy as np import os from pkg_resources import resource_filename import gzip meta = resource_filename("_bw", "%s.tss" % (snakemake.params.sp)) _bw.getrp(snakemake.input.bw, meta, snakemake.output.rp, 1e4, 0, 0) # single sample RP to HDF5 tmp = [] n = 0 with open(snakemake.output.rp) as inf: for line in inf: n += 1 line = line.split()[4] tmp.append(float(line.strip())) tmp = np.array(tmp, dtype='float32') with h5py.File(snakemake.output.h5, 'a') as store: RP = store.create_dataset("RP", dtype=np.float32, shape=(n, 1), compression='gzip', shuffle=True, fletcher32=True) RP[:, 0] = tmp store.flush() os.system('bigWigAverageOverBed %s %s stdout > %s' % (snakemake.input.bw, snakemake.params.bin1kb, snakemake.output.ct))
# check status status = os.system('bigWigInfo %s' % fin[0]) if status == 0: # without promoter +/- 1000bp #if factors[key] == 'H3K4me3': # _bw.getrp(fin[0], 'histonerp/test/mm10.tss', os.path.basename(fin[0])+'_rdown1kb.txt', 1e3, -1000, 1000) #else: # _bw.getrp(fin[0], 'histonerp/test/mm10.tss', os.path.basename(fin[0])+'_rdown1kb.txt', 1e4, -1000, 1000) # with promoter +/- 1000bp #if factors[key] == 'H3K4me3': # _bw.getrp(fin[0], 'histonerp/test/mm10.tss', os.path.basename(fin[0])+'_rdown1kb.txt', 1e3, 0, 0) #else: # _bw.getrp(fin[0], 'histonerp/test/mm10.tss', os.path.basename(fin[0])+'_rdown1kb.txt', 1e4, 0, 0) _bw.getrp(fin[0], '../MARGEData/histonerp/test/mm10.tss', os.path.basename(fin[0]) + '.txt', 1e4, 0, 0) if os.path.getsize(os.path.basename(fin[0]) + '.txt') > 0: shape2 += 1 files.append(os.path.basename(fin[0]) + '.txt') else: # get shape 2 if os.path.getsize(os.path.basename(fin[0]) + '.txt') > 0: shape2 += 1 files.append(os.path.basename(fin[0]) + '.txt') f = h5py.File('margeRP_%s_mm.h5' % factor, 'a') if not ('RP' in f.keys()): RP = f.create_dataset("RP", dtype=np.float32, shape=(l, shape2),