示例#1
0
    def calc_reconstruction(self, exons, blocks, size=None):
	"""Calcuates reconstruction given list of alignment blocks and exons"""
	bases_reconstructed = intspan.intersect(exons, blocks)
	if size is None:
	    size = intspan.cardinality_multi(exons)
	fraction_reconstructed = float(bases_reconstructed)/float(size)
	
	return bases_reconstructed, fraction_reconstructed
示例#2
0
    def combine_results(self, records, add_reads=False):
	"""Combines match results to calculate coverage"""	
	pool_headers = ['num_reads', 'depth', 'contigs', 'num_contigs', 'best_contig', 'best_contig_reconstruction']
	
	grouped_records = self.group_results(records)
	pooled = {}
	for model in grouped_records.keys():
	    pooled[model] = []
	    
	    genes = grouped_records[model]['genes'].keys()
	    for gene in genes:	
		records = grouped_records[model]['genes'][gene]
		pooled_gene = {'contigs_list': [],
		               'best_contig': None,
		               'best_contig_reconstruction': None,
		               'num_reads': None,
		               'depth': None,
		               'align_blocks': None,
		               'exons': None
		               }
		for field in self.report_fields:
		    if field not in pool_headers:
			pooled_gene[field] = records[0][field]
		
		pooled_transcripts = []
		exons_gene = self.string_as_blocks(records[0]['exons'])
		blocks_gene = []
		
		transcripts = grouped_records[model]['transcripts'][gene].keys()		
		for transcript in transcripts:
		    records = grouped_records[model]['transcripts'][gene][transcript]
		    
		    pooled_transcript = {'contigs_list': [],
		                         'best_contig': None,
		                         'best_contig_reconstruction': None,
		                         'num_reads': None,
		                         'depth': None,
		                         'align_blocks': None,
		                         'exons': None
		                         }
		    for field in self.report_fields:
			if field not in pool_headers:
			    pooled_transcript[field] = records[0][field]
		    
		    exons_transcript = self.string_as_blocks(records[0]['exons'])
		    blocks_transcript = []
		    for record in records:
			# contigs
			contigs = record['contigs'].split(',')
			pooled_transcript['contigs_list'].extend(contigs)
			pooled_gene['contigs_list'].extend(contigs)
			
			# best contig
			if pooled_transcript['best_contig'] is None or float(record['best_contig_reconstruction']) > float(pooled_transcript['best_contig_reconstruction']):
			    pooled_transcript['best_contig'] = record['best_contig']
			    pooled_transcript['best_contig_reconstruction'] = record['best_contig_reconstruction']
			    
			# num_reads
			if pooled_transcript['num_reads'] is None:
			    pooled_transcript['num_reads'] = record['num_reads']
			    pooled_transcript['depth'] = record['depth']
			elif record['num_reads'] != '-' and record['num_reads'] != 'na':
			    pooled_transcript['num_reads'] = int(pooled_transcript['num_reads'])
			    if add_reads:
				pooled_transcript['num_reads'] += int(record['num_reads'])
			    elif pooled_transcript['num_reads'] < int(record['num_reads']):
				pooled_transcript['num_reads'] = int(record['num_reads'])
				
			# reconstruction
			blocks = self.string_as_blocks(record['align_blocks'])
			blocks_transcript.append(blocks)
			blocks_gene.append(blocks)
			
		    # reconstruction transcript
		    union_blocks = intspan.union(blocks_transcript)	    
		    bases_reconstructed = intspan.intersect(exons_transcript, union_blocks)
		    fraction_reconstructed = float(bases_reconstructed)/float(pooled_transcript['feature_size'])
		    
		    # update transcript
		    pooled_transcript['contigs'] = ','.join(pooled_transcript['contigs_list'])
		    pooled_transcript['num_contigs'] = len(pooled_transcript['contigs_list'])
		    if pooled_transcript['num_reads'] != '-' and pooled_transcript['num_reads'] != 'na':
			pooled_transcript['depth'] = "%.3f" % (float(pooled_transcript['num_reads']) / float(pooled_transcript['feature_size']))
		    pooled_transcripts.append(pooled_transcript)
		    pooled_transcript['bases_reconstructed'] = bases_reconstructed
		    pooled_transcript['reconstruction'] = "%.3f" % (fraction_reconstructed)
		    pooled_transcript['align_blocks'] = self.blocks_as_string(union_blocks)
		    
		# update gene	
		for pt in pooled_transcripts:
		    if pooled_gene['best_contig'] is None or float(pt['best_contig_reconstruction']) > float(pooled_gene['best_contig_reconstruction']):
			pooled_gene['best_contig']  = pt['best_contig']
			pooled_gene['best_contig_reconstruction'] = pt['best_contig_reconstruction']
			
		    if pooled_gene['num_reads'] is None:
			pooled_gene['num_reads'] = pt['num_reads'] 
			pooled_gene['depth'] = pt['depth'] 
		    elif pt['num_reads'] != '-' and pt['num_reads'] != 'na':
			pooled_gene['num_reads'] = int(pt['num_reads'])
			if add_reads:
			    pooled_gene['num_reads'] += int(pt['num_reads'])
			elif pooled_gene['num_reads'] < int(pt['num_reads']):
			    pooled_gene['num_reads'] = int(pt['num_reads'])
			    
		# coverage gene
		union_blocks = intspan.union(blocks_gene)
		bases_reconstructed = intspan.intersect(exons_gene, union_blocks)
		fraction_reconstructed = float(bases_reconstructed)/float(pooled_gene['feature_size'])
			    
		pooled_gene['contigs'] = ','.join(pooled_gene['contigs_list'])
		pooled_gene['num_contigs'] = len(pooled_gene['contigs_list'])
		if pooled_gene['num_reads'] != '-' and pooled_gene['num_reads'] != 'na':
		    pooled_gene['depth'] = "%.3f" % (float(pooled_gene['num_reads']) / float(pooled_gene['feature_size']))
		pooled_gene['bases_reconstructed'] = bases_reconstructed
		pooled_gene['reconstruction'] = "%.3f" % (fraction_reconstructed)
		pooled_gene['align_blocks'] = self.blocks_as_string(union_blocks)
		
		pooled[model].append(pooled_gene)
		for pt in pooled_transcripts:
		    pooled[model].append(pt)
		    
	self.results = []
	for model in self.models:
	    if not pooled.has_key(model):
		continue
	    self.results.extend(pooled[model])