示例#1
0
def listIntervals(operon,fastaindex):
    intervals = []
    
    operon = interval_filter.overlaps(operon,fastaindex,False)
    operon = sorted(operon,key=lambda x:x[5])
    toxins = [t for t in operon if t[1].split('.')[0]=='toxin']
    starts,ends = zip(*toxins)[5:7]
    gene = operon[0]
    st,end = map(int,gene[5:7])
    front = st
    name = gene[0]
    print "Functions",[gene[1].split('.')[0] for gene in operon ]
    strand = fasta.strand(fasta.getFrame(gene[0]))
    #if strand== "+":
    #    st,end = min(map(int,starts)),min(map(int,ends)) 
    #    front = min(st,end)
    #else:
    #    st,end = max(map(int,starts)),max(map(int,ends)) 
    #    front = max(st,end)
        
    for i in xrange(0,len(operon)):
        gene = operon[i]
        name = gene[0]
        cluster = gene[1]
        function = cluster.split('.')[0]
        st,end = map(int,gene[5:7])
        if st>end: st,end = end,st
        intervals.append( (function,st-front,end-front) )
    ints = []
    for intv in intervals:
        func,st,end = intv
        interval = xrange(st,end)
        funcs = [func]*len(interval)
        ints+=zip(funcs,interval)
    return ints
示例#2
0
def countMidpoints(operon,fastaindex):
    intervals = []
    operon = interval_filter.overlaps(operon,fastaindex,backtran=False)
    operon = sorted(operon,key=lambda x:x[5])
    #toxins = [t for t in operon if t[1].split('.')[0]=='toxin']
    #starts,ends = zip(*operon)[5:7]
    gene = operon[0]
    strand = fasta.strand(fasta.getFrame(gene[0]))
    name = gene[0]
    st,end = map(int,gene[5:7])
    front = st
    #if strand== "+":
    #    st,end = min(map(int,starts)),min(map(int,ends)) 
    #    front = min(st,end)
    #else:
    #    st,end = max(map(int,starts)),max(map(int,ends)) 
    #    front = max(st,end)
    
    #print "Operon",name,"Strand",strand
    for i in xrange(1,len(operon)):
        gene = operon[i]
        name = gene[0]
        cluster = gene[1]
        function = cluster.split('.')[0]
        st,end = map(int,gene[5:7])
        mid = (st+end)/2 - front
        #print function,'st',st,'end',end,'front',front,'mid',mid
        intervals.append( (function,mid) )
    return intervals
示例#3
0
 def test2(self):
     #all_hits=sorted(self.queries,key=lambda x: x[6])        
     #all_hits=sorted(all_hits,key=lambda x: x[5])
     #Sort by genome name
     #all_hits=sorted(all_hits,key=lambda x: x[-1])  
     reduced = interval_filter.overlaps(self.queries,self.testfai)
    
     self.assertItemsEqual(reduced,
                           [   ('CP002279.1','toxin.fa.cluster2.fa',0,0,100,25000,25100,      
                               'Mesorhizobium opportunistum WSM2075, complete genome','-','AAQEIWJ.1'),
                               ('CP002279.1','transport.fa.cluster2.fa',0,0,100,25200,25300,
                                'Mesorhizobium opportunistum WSM2075, complete genome','-','BAQEIWJ.1'),
                               ('CP002279.1','modifier.fa.cluster2.fa',0,0,100,25400,25500,
                                'Mesorhizobium opportunistum WSM2075, complete genome','-','CAQEIWJ.1'),
                               ('CP002279.1','regulator.fa.cluster2.fa',0,0,100,25600,25700,
                                'Mesorhizobium opportunistum WSM2075, complete genome','-','DAQEIWJ.1'),
                               ('CP002279.1','immunity.fa.cluster2.fa',0,0,100,25800,26900,
                                'Mesorhizobium opportunistum WSM2075, complete genome','-','EAQEIWJ.1'),
                               ('CP002279.1','immunity.fa.cluster2.fa',0,0,100, 740038, 740138,
                                'Mesorhizobium opportunistum WSM2075, complete genome','-','FAQEIWJ.1'), 
                               ('CP002279.1','transport.fa.cluster2.fa',0,0,100,35127,35356,
                                'Mesorhizobium opportunistum WSM2075, complete genome','-','GAQEIWJ.1'),
                               ('CP002279.1','transport.fa.cluster2.fa',0,0,100,45127,45356,
                                'Mesorhizobium opportunistum WSM2075, complete genome','-','KAQEIWJ.1')])