예제 #1
0
 def overlap_repeats(self):        
     """Overlaps event coordinates with repeatmasker, simple repeats"""
     event_groups_by_chr = self.group_by_chr()
     
     for chrom, events in event_groups_by_chr.iteritems():          
         proper_chrom = tools.proper_chrom(chrom, chrom_proper=self.chrom_proper)
         
         for snv_type, snv_groups in events.iteritems():
             print 'processing repeat', snv_type                
             for snvs in snv_groups:
                 overlaps = repeat.find_overlaps({'chrom':proper_chrom, 'start':int(snvs[0].ref_start), 
                                                  'end':int(snvs[0].ref_end)}, self.repeat_overlaps)
                 if overlaps:
                     attrs = {}
                     for repeat_type, types in overlaps.iteritems():
                         if repeat_type == 'simple_repeats':
                             attr = 'within_simple_repeats'
                         elif repeat_type == 'segdup':
                             attr = 'within_segdup'
                         elif repeat_type == 'rmsk':
                             attr = 'repeatmasker'
                     
                         if types:
                             # only report one with shortest name
                             types_sorted = types.keys()
                             types_sorted.sort(lambda x,y: len(x)-len(y))
                             attrs[attr] = types_sorted[0]
                     
                         if attrs:
                             for snv in snvs:
                                 tools.set_attrs(snv, attrs)          
     # clears cache                                        
     for repeat_olap in self.repeat_overlaps.values():
         repeat_olap.finish()
예제 #2
0
    def parse(cls, record):
	"""Parses tabulated output into object"""
	cols = record.rstrip('\n').split('\t')
	data = {}
	headers = cls.headers[:]
	headers.extend(cls.headers_support)
	for i in range(len(headers)):
	    if cols[i+1] != 'na' and (headers[i] == 'spanning_reads' or headers[i] == 'coverage'):
		if ',' in cols[i+1]:
		    data[headers[i]] = [int(n) for n in cols[i+1].split(',')]
		else:
		    data[headers[i]] = int(cols[i+1])
	    else:
		data[headers[i]] = cols[i+1]
	    
	e = Event(cols[1])
	set_attrs(e, data)
	return e
예제 #3
0
    def parse_results(self, snv_file, select_types=None, chrom=None):
        """Parses results from single file into SNV objects"""        
        names = SNVCaller.output_headers
        # conversion between header name and object attribute
        field_name_conversion = {
            'type': 'snv_type',
            'chr': 'ref',
            'chr_start': 'ref_start',
            'chr_end': 'ref_end',
            'ctg': 'var',
            'ctg_len': 'var_len',
            'ctg_start': 'var_start',
            'ctg_end': 'var_end',
            'len': 'snv_len',
            'ref': 'ref_seq',
            'alt': 'var_seq',
            'event_reads': 'nreads_event',
            'contig_reads': 'nreads_contig',
            'genome_reads': 'nreads_genome',
            'gene': 'gene',
            'from_end': 'from_end',
            'ctg_strand': 'query_strand',
        }

        for line in open(snv_file, 'r'):
            cols = line.rstrip('\n').split('\t')
            
            if cols[0] == 'id':
                continue

            attributes = {}
            for i in range(1, len(cols)):
                name = names[i]
                value = cols[i]
                
                if field_name_conversion.has_key(name):
                    name = field_name_conversion[name]
                    
                if name in ('expansion', 'from_end'):
                    value = int(value)
                elif name == 'confirm_contig_region':
                    value = value.split('-')
                    value[0] = int(value[0])
                    value[1] = int(value[1])
                elif name == 'at_least_1_read_opposite':
                    if value == 'true':
                        value = True
                    else:
                        value = False
                    
                attributes[name] = value
            
            if select_types and not attributes['snv_type'] in select_types:
                continue
            
            if chrom and attributes['ref'] != chrom:
                continue
            
            snv = SNV(method='psl')
            tools.set_attrs(snv, attributes)
            self.snvs.append(snv)