def fusions_in_blocks(self, blocks): # TODO use exons instead of genes fusions = [] for block in blocks[:-1]: chrom_id = block['end']['chrom'] end = block['end']['pos'] fusion = {} fusion['start gene'] = ensembl_requests.get_genes(chrom_id, end, end) fusion['start is cut'] = fusion['start gene'] and len(fusion['start gene']) > 0; # Exons never seem to overlap a cut, so leaving this out for now # exons = ensembl_requests.get_exons(chrom_id, end, end) # filtered_exons = [e for e in exons if (e['start'] <= end) and (e['end'] >= end)] # fusion['start cut at exon'] = len(filtered_exons) > 0 fusions.append(fusion) for i, block in enumerate(blocks[1:]): chrom_id = block['start']['chrom'] start = block['start']['pos'] fusions[i]['end gene'] = ensembl_requests.get_genes(chrom_id, start, start) fusions[i]['end is cut'] = fusions[i]['end gene'] and len(fusions[i]['end gene']) > 0; # Exons never seem to overlap a cut, so leaving this out for now # exons = ensembl_requests.get_exons(chrom_id, start, start) # filtered_exons = [e for e in exons if (e['start'] <= start) and (e['end'] >= start)] # fusions[i]['end cut at exon'] = len(filtered_exons) > 0 fusions[i]['genes are fused'] = fusions[i]['start is cut'] and fusions[i]['end is cut'] return fusions
def fetch_genes(self, chrom_id, start, end, species='human'): '''Returns JSON list of genes in a given region, or None if request is bad. Maximum request region at a time is 5Mb. Ensembl accepts both the naming format used by UCSC and the one used by Meerkat. Note: No gene listings are available for the scaffolds and patches, which have names using the prefix GL-.''' # get rid of this method - duplicated in web_viz genes = ensembl_requests.get_genes(species, chrom_id, start, end) #print json.dumps(genes, indent=4, sort_keys=True) return genes
def genes_in_blocks(self, blocks): genes_per_block = [] request_limit = 5000000 for block in blocks: chrom_id = block['start']['chrom'] start = block['start']['pos'] end = block['end']['pos'] print 'genes in blocks', chrom_id, start, end if (abs(start - end) > request_limit): # For now, if the request region exceeds the Ensembl API limit, we just return the # genes near the ends of the sequence block start_genes = ensembl_requests.get_genes(chrom_id, start, start+request_limit-1) end_genes = ensembl_requests.get_genes(chrom_id, end-request_limit+1, end) start_genes_dict = {g['id']:g for g in start_genes} end_genes_dict = {g['id']:g for g in end_genes} start_genes_dict.update(end_genes_dict) # start_genes_dict is now the union of the two dicts (TODO confusing) genes = list(start_genes_dict.values()) else: genes = ensembl_requests.get_genes(chrom_id, start, end) if start < end \ else ensembl_requests.get_genes(chrom_id, end, start) genes_per_block.append(genes); return genes_per_block
def json_genes(chrom_id, start, end, species='human'): # TODO may want to move request into javascript genes = ensembl_requests.get_genes(chrom_id, start, end, species) return json.dumps(genes)