Пример #1
0
    def fusions_in_blocks(self, blocks):
        # TODO use exons instead of genes
        fusions = []
        for block in blocks[:-1]:
            chrom_id = block['end']['chrom']
            end = block['end']['pos']

            fusion = {}
            fusion['start gene'] = ensembl_requests.get_genes(chrom_id, end, end)
            fusion['start is cut'] = fusion['start gene'] and len(fusion['start gene']) > 0;

            # Exons never seem to overlap a cut, so leaving this out for now
            # exons = ensembl_requests.get_exons(chrom_id, end, end)
            # filtered_exons = [e for e in exons if (e['start'] <= end) and (e['end'] >= end)]
            # fusion['start cut at exon'] = len(filtered_exons) > 0

            fusions.append(fusion)

        for i, block in enumerate(blocks[1:]):
            chrom_id = block['start']['chrom']
            start = block['start']['pos']
            fusions[i]['end gene'] = ensembl_requests.get_genes(chrom_id, start, start)
            fusions[i]['end is cut'] = fusions[i]['end gene'] and len(fusions[i]['end gene']) > 0;

            # Exons never seem to overlap a cut, so leaving this out for now
            # exons = ensembl_requests.get_exons(chrom_id, start, start)
            # filtered_exons = [e for e in exons if (e['start'] <= start) and (e['end'] >= start)]
            # fusions[i]['end cut at exon'] = len(filtered_exons) > 0
            
            fusions[i]['genes are fused'] =  fusions[i]['start is cut'] and fusions[i]['end is cut']
            
        return fusions
Пример #2
0
 def fetch_genes(self, chrom_id, start, end, species='human'):
     '''Returns JSON list of genes in a given region, or None if request is
     bad. Maximum request region at a time is 5Mb. Ensembl accepts both the
     naming format used by UCSC and the one used by Meerkat. Note: No gene
     listings are available for the scaffolds and patches, which have names
     using the prefix GL-.'''
     # get rid of this method - duplicated in web_viz
     genes = ensembl_requests.get_genes(species, chrom_id, start, end)
     #print json.dumps(genes, indent=4, sort_keys=True)
     return genes
Пример #3
0
    def genes_in_blocks(self, blocks):
        genes_per_block = []
        request_limit = 5000000
        for block in blocks:
            chrom_id = block['start']['chrom']
            start = block['start']['pos']
            end = block['end']['pos']
            print 'genes in blocks', chrom_id, start, end
            if (abs(start - end) > request_limit):
                # For now, if the request region exceeds the Ensembl API limit, we just return the 
                # genes near the ends of the sequence block

                start_genes = ensembl_requests.get_genes(chrom_id, start, start+request_limit-1)
                end_genes = ensembl_requests.get_genes(chrom_id, end-request_limit+1, end)

                start_genes_dict = {g['id']:g for g in start_genes}
                end_genes_dict = {g['id']:g for g in end_genes}
                start_genes_dict.update(end_genes_dict) # start_genes_dict is now the union of the two dicts (TODO confusing)
                genes = list(start_genes_dict.values())
            else:
                genes = ensembl_requests.get_genes(chrom_id, start, end) if start < end \
                    else ensembl_requests.get_genes(chrom_id, end, start) 
            genes_per_block.append(genes);
        return genes_per_block
Пример #4
0
def json_genes(chrom_id, start, end, species='human'):
    # TODO may want to move request into javascript
    genes = ensembl_requests.get_genes(chrom_id, start, end, species)
    return json.dumps(genes)
Пример #5
0
def json_genes(chrom_id, start, end, species='human'):
    # TODO may want to move request into javascript
    genes = ensembl_requests.get_genes(chrom_id, start, end, species)
    return json.dumps(genes)