/
junc2bed.py
46 lines (40 loc) · 1.59 KB
/
junc2bed.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
from bbcflib.track import track
from bbcflib.gfminer.common import *
from bbcflib.gfminer.stream import concatenate
import itertools
import os,sys
"""junc2bed <filename> <assembly>
Transforms .junc files from SOAPsplice to BED format."""
def main():
filename = sys.argv[1]
assembly = sys.argv[2]
print filename, assembly
return to_bed(filename,assembly)
def to_bed(filename,assembly):
t = track(filename,fields=['chr','start','end','strand','score'],chrmeta=assembly,format='txt')
# Translate chr names
s = t.read()
s1 = map_chromosomes(s, t.assembly.chromosomes)
# Prepare output bed file
out = track(filename.rstrip('junc')+'bed', fields=['chr','start','end','name','score','strand'])
out.make_header({'name':filename,'description':filename})
mode='append'
# Add junction names
c = itertools.count()
s2 = duplicate(s1,'chr','name')
s3 = apply(s2,'name',lambda x: 'junction'+str(c.next()))
# Write
out.write(s3,mode=mode)
out.close()
def merge_junc_files(trackList,assembly):
out = track('all.junc',format='txt',fields=['chr','start','end','strand','score'])
from bbcflib.genrep import Assembly
a = Assembly(assembly)
for c in a.chromosomes:
tl = [track(t,fields=['chr','start','end','strand','score'],format='txt').read(str(c[0])+'_'+c[1]+'.'+str(c[2]))
for t in trackList]
#all = concatenate(tl,remove_duplicates=True)
all = concatenate(tl,group_by=['chr','start','end'],aggregate={'score':lambda x:sum(x)})
out.write(all,mode='append')
if __name__ == '__main__':
sys.exit(main())