Exemplo n.º 1
0
 def ovarlap_search(self, s2, e2):
     array_of_indices = self._search(s2, e2)
     all_original_indices_and_length = {}
     if array_of_indices is not None:
         for indice in array_of_indices:
             (s1, e1, set_indices) = self.ranges[indice]
             s, e = overlap_boundaries(s1, e1, s2, e2)
             if e < s:
                 print 'ERROR end %s is greater than start %s' % (e, s)
                 print s1, e1, s2, e2
             for original_i in set_indices:
                 if all_original_indices_and_length.has_key(original_i):
                     list_range = all_original_indices_and_length.get(
                         original_i)
                     list_range.append((s, e))
                     list_range = merge_ranges(list_range)
                     all_original_indices_and_length[
                         original_i] = list_range
                 else:
                     all_original_indices_and_length[original_i] = [(s, e)]
     list_of_array_value_and_length = []
     for indice in all_original_indices_and_length.keys():
         list_of_array_value_and_length.append(
             (self.array[indice],
              all_original_indices_and_length.get(indice)))
     return list_of_array_value_and_length
Exemplo n.º 2
0
def summarize(all_sites):
    for site_name in all_sites.keys():
        site_info = all_sites.get(site_name)
        contig_span_list = []
        for value in site_info.get("contig_span"):
            s, e = value.split('-')
            contig_span_list.append((int(s), int(e)))
        merged_contig_span_list = merge_ranges(contig_span_list)
        regions = site_info.get("span_region").split(';')
        overlap_ranges = []
        pileup = Counter()
        #Find overlap between the contigs and the expected position
        for start2, end2 in contig_span_list:
            tmp_overlap_ranges = []
            for i in range(int(start2), int(end2) + 1):
                pileup[i] += 1
            for region in regions:
                if len(region) == 0:
                    logging.warning("Site %s has no expected regions" %
                                    (site_name))
                    continue
                start1, end1 = region.split('-')

                overlap_res = overlap.get_overlap_if_exist(
                    int(start1), int(end1), int(start2), int(end2))
                if overlap_res:
                    tmp_overlap_ranges.append(overlap_res)
                    s, e = overlap_res
            if len(tmp_overlap_ranges) == 0:
                logging.warning(
                    "%s: No overlap found between expected region %s and contig alignment %s-%s"
                    % (site_name, site_info.get("span_region"), start2, end2))
            else:
                max_ol_length = get_length_of_list_range(tmp_overlap_ranges)
                if site_info["max_ol_length"] < max_ol_length:
                    site_info["max_ol_length"] = max_ol_length
                overlap_ranges.extend(tmp_overlap_ranges)
        for pileup_value in pileup.values():
            if pileup_value >= 4:
                site_info["contig_coverage4plus"] += 1
            else:
                site_info["contig_coverage%s" % pileup_value] += 1
        site_info["overlap_length"] = get_length_of_list_range(overlap_ranges)

        length = get_length_of_list_range(merged_contig_span_list)

        site_info["alignment_length"] = length
    return all_sites
Exemplo n.º 3
0
def summarize(all_sites):
    for site_name in all_sites.keys():
        site_info=all_sites.get(site_name)
        contig_span_list = []
        for value in site_info.get("contig_span"):
            s,e = value.split('-')
            contig_span_list.append((int(s),int(e)))
        merged_contig_span_list=merge_ranges(contig_span_list)
        regions = site_info.get("span_region").split(';')
        overlap_ranges=[]
        pileup=Counter()
        #Find overlap between the contigs and the expected position
        for start2,end2 in contig_span_list:
            tmp_overlap_ranges=[]
            for i in range(int(start2), int(end2)+1):
                pileup[i]+=1
            for region in regions:
                if len(region)==0:
                    logging.warning("Site %s has no expected regions"%(site_name))
                    continue
                start1,end1 =region.split('-')
                
                overlap_res = overlap.get_overlap_if_exist(int(start1), int(end1), int(start2), int(end2))
                if overlap_res:
                    tmp_overlap_ranges.append(overlap_res)
                    s, e = overlap_res
            if len(tmp_overlap_ranges)==0: 
                logging.warning("%s: No overlap found between expected region %s and contig alignment %s-%s"%(site_name, site_info.get("span_region") ,start2, end2))
            else:
                max_ol_length = get_length_of_list_range(tmp_overlap_ranges)
                if site_info["max_ol_length"]<max_ol_length:
                    site_info["max_ol_length"]=max_ol_length
                overlap_ranges.extend(tmp_overlap_ranges)
        for pileup_value in pileup.values():
            if pileup_value>=4:
                site_info["contig_coverage4plus"]+=1
            else:
                site_info["contig_coverage%s"%pileup_value]+=1
        site_info["overlap_length"]=get_length_of_list_range(overlap_ranges)
        
        length=get_length_of_list_range(merged_contig_span_list)
        
        site_info["alignment_length"]=length
    return all_sites
Exemplo n.º 4
0
 def ovarlap_search(self,s2,e2):
     array_of_indices=self._search(s2,e2)
     all_original_indices_and_length={}
     if array_of_indices is not None:
         for indice in array_of_indices:
             (s1,e1,set_indices)=self.ranges[indice]
             s, e = overlap_boundaries(s1, e1, s2, e2)
             if e<s:
                 print 'ERROR end %s is greater than start %s'%(e,s)
                 print s1, e1, s2, e2
             for original_i in set_indices:
                 if all_original_indices_and_length.has_key(original_i):
                     list_range=all_original_indices_and_length.get(original_i)
                     list_range.append((s,e))
                     list_range=merge_ranges(list_range)
                     all_original_indices_and_length[original_i]=list_range
                 else:
                     all_original_indices_and_length[original_i]=[(s,e)]
     list_of_array_value_and_length=[]
     for indice in all_original_indices_and_length.keys():
         list_of_array_value_and_length.append((self.array[indice],
                                               all_original_indices_and_length.get(indice)))
     return list_of_array_value_and_length