Пример #1
0
def bootstrap(n, chr_len, sra_loci, feature_loci):
    data = ol.overlap(sra_loci, feature_loci, percentage=True)
    boot = []
    for i in range(n):
        resampled_loci = resample(chr_len, sra_loci)
        ol_perc = ol.overlap(resampled_loci, feature_loci, percentage=True)
        boot.append(ol_perc)
    z = z_score(data, boot)
    return z
Пример #2
0
def bootstrap(n,chr_len, sra_loci, feature_loci):        
    data = ol.overlap(sra_loci, feature_loci, percentage = True)
    boot = []
    for i in range(n):
        resampled_loci = resample(chr_len, sra_loci)
        ol_perc = ol.overlap(resampled_loci, feature_loci,
                                  percentage = True)
        boot.append(ol_perc)
    z = z_score(data, boot)
    return z
Пример #3
0
 def count(cls, loci, norm_factor, distinct=True):
     '''count sRNA reads within given collection of loci
     loci: [chr, start, end] MUST BE SORTED'''
     locus_chr = set([i[0] for i in loci])
     sRNA_chr = set([i[0] for i in cls])
     chr = list(locus_chr & sRNA_chr)
     chr.sort()
     count = {}
     for c in chr:
         count[c] = []
         a = filter(lambda x: x[0] == c, cls)
         b = filter(lambda x: x[0] == c, loci)
         aa = [[i[1], i[2]] for i in a]
         bb = [[i[1], i[2]] for i in b]
         o = ol.overlap(aa, bb)
         for i in range(len(o)):
             if o[i]:
                 read = 0
                 for j in o[i]:
                     if distinct == True:
                         read += float(1) / norm_factor
                     else:
                         read += float(a[j][3]) / (a[j][4] * norm_factor)
                 count[c].append(read)
             else:
                 count[c].append(0)
     return count
Пример #4
0
 def count(cls, loci, norm_factor, distinct = True):
     '''count sRNA reads within given collection of loci
     loci: [chr, start, end] MUST BE SORTED'''
     locus_chr = set([i[0] for i in loci])
     sRNA_chr = set([i[0] for i in cls])
     chr = list(locus_chr & sRNA_chr)
     chr.sort()
     count = {}
     for c in chr:
         count[c] = []
         a = filter(lambda x: x[0] == c, cls)
         b = filter(lambda x: x[0] == c, loci)
         aa = [[i[1],i[2]] for i in a]
         bb = [[i[1],i[2]] for i in b]
         o = ol.overlap(aa, bb)
         for i in range(len(o)):
             if o[i]:
                 read = 0
                 for j in o[i]:
                     if distinct == True:
                         read += float(1)/norm_factor
                     else:
                         read += float(a[j][3])/(a[j][4]*norm_factor)
                 count[c].append(read)
             else:
                 count[c].append(0)
     return count
Пример #5
0
    def locus_avg_sRNA(cls, locus, flank, window, norm_factor):
        '''calculate the average distinct sRNA density in given loci'''
        cls.sort_map(locus)
        locus_chr = list(set([i[0] for i in locus]))
        sRNA_chr = list(set([i[0] for i in cls]))
        chr = locus_chr & sRNA_chr
        chr.sort()
        locus_num = len(locus)
        point = flank / window
        up = [0] * point
        trans = [0] * point
        down = [0] * point

        for c in chr:
            a = filter(lambda x: x[0] == c, cls)
            b = filter(lambda x: x[0] == c, locus)
            o = ol.overlap([i[1:4] for i in a],
                           [[i[1] - flank, i[2] + flank, i[3], i[4]]
                            for i in b])
            for i in range(len(o)):
                if o[i]:
                    locus_len = b[i][2] - b[i][1] + 1 + flank * 2
                    each_locus = [0] * locus_len
                    for j in o[i]:
                        rel_coord = [
                            a[j][1] - b[i][1] + flank,
                            a[j][2] - b[i][1] + flank
                        ]
                        for k in range(max(0, rel_coord[0]),
                                       min(locus_len, rel_coord[1]) + 1):
                            each_locus[k] += 1
                    if b[i][3] is "-":
                        each_locus.reverse()

                    up_list = each_locus[0:flank]
                    trans_list = each_locus[flank:locus_len - flank]
                    down_list = each_locus[locus_len - flank:]

                    for i in range(point):
                        up[i] += float(
                            sum(up_list[i * window:(i + 1) * window])) / window
                        down[i] += float(
                            sum(down_list[i * window:(i + 1) *
                                          window])) / window

                        a = len(trans_list) / point
                        if a >= 1:
                            for i in range(point):
                                trans[i] += float(
                                    sum(trans_list[i * a:(i + 1) * a])) / a
        return [i / (locus_num * norm_factor) for i in up + trans + down]
Пример #6
0
    def locus_avg_sRNA(cls, locus, flank, window, norm_factor):
        '''calculate the average distinct sRNA density in given loci'''
        cls.sort_map(locus)
        locus_chr = list(set([i[0] for i in locus]))
        sRNA_chr = list(set([i[0] for i in cls]))
        chr = locus_chr & sRNA_chr
        chr.sort()
        locus_num = len(locus)
        point = flank/window
        up = [0]*point
        trans = [0]*point
        down = [0]*point
        
        for c in chr:
            a = filter(lambda x: x[0] == c, cls)
            b = filter(lambda x: x[0] == c, locus)
            o = ol.overlap([i[1:4] for i in a],
                           [[i[1]-flank, i[2]+flank, i[3],i[4]] for i in b])
            for i in range(len(o)):
                if o[i]:
                    locus_len = b[i][2]-b[i][1]+1+flank*2
                    each_locus = [0] * locus_len
                    for j in o[i]:
                        rel_coord = [a[j][1]-b[i][1]+flank,
                                     a[j][2]-b[i][1]+flank]
                        for k in range(max(0,rel_coord[0]),
                                       min(locus_len,rel_coord[1])+1):
                            each_locus[k] += 1
                    if b[i][3] is "-":
                        each_locus.reverse()
                        
                    up_list = each_locus[0:flank]
                    trans_list = each_locus[flank:locus_len-flank]
                    down_list = each_locus[locus_len-flank:]

                    for i in range(point):                   
                        up[i] += float(sum(up_list[i*window:(i+1)*window]))/window
                        down[i] += float(sum(down_list[i*window:(i+1)*window]))/window
                            
                        a = len(trans_list)/point
                        if a >= 1:
                            for i in range(point):
                                trans[i]+= float(sum(trans_list[i*a:(i+1)*a]))/a
        return [i/(locus_num * norm_factor) for i in up + trans + down]
Пример #7
0
    def find(cls, locus, remove=False):
        '''find all sRNAs mapped to a given genomic region'''
        # locus needs to be sorted before doing overlap
        cls.sort_map(locus)
        locus_chr = list(set([i[0] for i in locus]))
        sRNA_chr = list(set([i[0] for i in cls]))
        locus_chr.sort()
        sRNA_chr.sort()
        new_list = []
        for c in sRNA_chr:
            a = filter(lambda x: x[0] == c, cls)
            del_index = []
            if c not in locus_chr:  # if no need to search that chromosome:
                if remove == True:  # no sRNA to be removed from sRNA_map
                    new_list = new_list + a[:]
                next
            elif c in locus_chr:
                b = filter(lambda x: x[0] == c, locus)
                # extract start and end, overlap them
                o = ol.overlap([i[1:3] for i in a], [i[1:3] for i in b])
                if all([len(i) == 0 for i in o
                        ]):  # if no sRNAs were found to match any given loci
                    new_list = new_list + a[:]
                    print 'no sRNAs were found on Chr%s' % c
                else:
                    for i in xrange(len(o)):  # for each locus
                        sRNA = []
                        try:  # if agi is provided
                            agi = b[i][3]
                            if remove == False:
                                print '@' + agi
                        except IndexError:
                            pass
                        if o[i]:  # if there is at least one sRNA in that locus
                            del_index = del_index + o[i]
                            for j in o[i]:
                                sRNA.append(a[j])
                            if remove == False:
                                for s in sRNA:
                                    print 'chr%s\t%d\t%d\t%d\t%d\t%s' % s
                        else:  # if no sRNA is mapped to that locus
                            if remove == False:
                                print 'NA'
                    if remove == True:  # if sRNAs need to be removed from sRNA_map
                        for k in xrange(len(del_index) - 1, -1,
                                        -1):  # del from the largest index
                            try:
                                del a[del_index[k]]
                            except IndexError:
                                print c, len(a), k
                        new_list = new_list + a[:]

        if remove == False:
            for c in locus_chr:
                if c not in sRNA_chr:  ## if acc of a locus is not in sRNA list
                    b = filter(lambda x: x[0] == c, locus)
                    for i in b:
                        try:
                            agi = i[3]
                            print '@' + agi
                            print 'No accession found'
                        except IndexError:
                            pass

        elif remove == True:
            cls.sort_map(new_list)
            return new_list[:]
Пример #8
0
 def find(cls, locus, remove = False):
     '''find all sRNAs mapped to a given genomic region'''
     # locus needs to be sorted before doing overlap
     cls.sort_map(locus)
     locus_chr = list(set([i[0] for i in locus]))
     sRNA_chr = list(set([i[0] for i in cls]))
     locus_chr.sort()
     sRNA_chr.sort()
     new_list = []
     for c in sRNA_chr:
         a = filter(lambda x: x[0] == c, cls)
         del_index = []
         if c not in locus_chr: # if no need to search that chromosome:
             if remove == True: # no sRNA to be removed from sRNA_map   
                 new_list = new_list + a[:]
             next
         elif c in locus_chr:
             b = filter(lambda x: x[0] == c, locus)
             # extract start and end, overlap them
             o = ol.overlap([i[1:3] for i in a],[i[1:3] for i in b])
             if all([len(i) == 0 for i in o]): # if no sRNAs were found to match any given loci
                 new_list = new_list + a[:]
                 print 'no sRNAs were found on Chr%s' % c
             else:
                 for i in xrange(len(o)):  # for each locus
                     sRNA = []
                     try:    # if agi is provided
                         agi = b[i][3]
                         if remove == False:
                             print '@' + agi
                     except IndexError:
                         pass                
                     if o[i]:  # if there is at least one sRNA in that locus
                         del_index = del_index + o[i]
                         for j in o[i]:
                             sRNA.append(a[j])
                         if remove == False:
                             for s in sRNA:
                                 print 'chr%s\t%d\t%d\t%d\t%d\t%s' % s
                     else:     # if no sRNA is mapped to that locus
                         if remove == False:
                             print 'NA'
                 if remove == True: # if sRNAs need to be removed from sRNA_map
                     for k in xrange(len(del_index)-1, -1, -1): # del from the largest index
                         try:
                             del a[del_index[k]]
                         except IndexError:
                             print c, len(a), k
                     new_list = new_list + a[:]
             
     if remove == False:
         for c in locus_chr:
             if c not in sRNA_chr:   ## if acc of a locus is not in sRNA list
                 b = filter(lambda x: x[0] == c, locus)
                 for i in b:
                     try:
                         agi = i[3]
                         print '@' + agi
                         print 'No accession found'
                     except IndexError:
                         pass
         
     elif remove == True:
         cls.sort_map(new_list)
         return new_list[:]