def get_descriptions (ensid): request='/xrefs/id/%s' %(ensid) decoded = ensembl_rest.get_endpoint(server, request) desc = [] for xref in decoded: if (xref['description']) and (xref['dbname']): string = "DB: %s\tDescription: %s" %(xref['dbname'], xref['description'] ) desc.append(string) return desc
import json, ensembl_rest server = 'http://rest.ensembl.org' vep_endpoint = '/vep/human/id/{}' vep_post_endpoint = '/vep/human/id' overlap_endpoint = '/overlap/region/human/{}?feature=variation' # 1) Print VEP results for rs189863975 # a) For each overlapping transcript (transcript_consequences) print # variant_allele, # transcript_id, # the consequence_terms # and if available the polyphen_score and polyphen_prediction variant_effects = ensembl_rest.get_endpoint(server, vep_endpoint.format('rs189863975')) for entry in variant_effects: for consequence in entry['transcript_consequences']: variant_allele = consequence['variant_allele'] transcript_id = consequence['transcript_id'] polyphen_score = consequence.get('polyphen_score', 'no polyphen score') polyphen_prediction = consequence.get('polyphen_prediction', 'no polyphen prediction') consequence_terms = ','.join(consequence['consequence_terms']) print("Variant allele: {}, Transcript ID: {}, Consequence terms: {}". format(variant_allele, transcript_id, consequence_terms)) if (polyphen_score != 'no polyphen score'): print(" PolyPhen score: {}, PolyPhen prediction: {}".format( polyphen_score, polyphen_prediction))
import json, ensembl_rest server = "http://rest.ensembl.org" # Gene tree endpoint exercises # CG-6a: Get the information for the protein genetree with the stable id ENSGT00390000003602. output should be in the orthoxml format ext = "/genetree/id/ENSGT00390000003602?" content_type = "text/x-orthoxml+xml" endpoint = ensembl_rest.get_endpoint(server, ext, content_type) print (endpoint)
from __future__ import (absolute_import, division, print_function, unicode_literals) import requests, json, sys, ensembl_rest server = "http://rest.ensembl.org" ## Export all microarray platforms that are annotated for humans in Ensembl and their associated information. request = '/regulatory/species/homo_sapiens/microarray' decoded = ensembl_rest.get_endpoint(server, request) print(json.dumps(decoded, indent=4, sort_keys=True)) """ You have performed a microarray experiment with the array HumanWG_6_V2. The following probes gave you a positive signal: ILMN_1763508, ILMN_1861090, ILMN_1890175, ILMN_1749304, ILMN_1894173, ILMN_1911643, ILMN_1891089, ILMN_1859810, ILMN_1843473, ILMN_1770856 a) Which transcripts do they map to? b) Which genes do these transcripts belong to? """ # Transcript and gene mappings for different probes array = 'HumanWG_6_V2' probes = [ 'ILMN_1763508', 'ILMN_1861090', 'ILMN_1890175', 'ILMN_1749304', 'ILMN_1894173', 'ILMN_1911643', 'ILMN_1891089', 'ILMN_1859810', 'ILMN_1843473', 'ILMN_1770856' ] for probe in probes: print(probe) request = '/regulatory/species/homo_sapiens/microarray/%s/probe/%s?content-type=application/json;gene=1;transcript=1' % ( array, probe) decoded = ensembl_rest.get_endpoint(server, request) if decoded: print("Probe length: %sbp Sequence: %s" %
import json, ensembl_rest server = "http://rest.ensembl.org" #http://ebi-cli-003:3000" # Alignment endpoint exercise # CG-1a: Get in json format the LastZ pairwise alignment for taeniopygia_guttata V gallus_gallus for region 2:106041430-106041480:1 ext = "/alignment/region/taeniopygia_guttata/2:106041430-106041480:1?method=LASTZ_NET;species_set=taeniopygia_guttata;species_set=gallus_gallus" endpoint = ensembl_rest.get_endpoint( server, ext ) # a third parameter 'content_type' defaults to 'application/json', so no need to define it here print(json.dumps(endpoint, indent=4, sort_keys=True))
import json, ensembl_rest server = "http://rest.ensembl.org" # Family endpoint exercises # CG-4: Get the information for families predicted for the human gene ENSG00000283087. What do you notice? ext = "/family/member/id/ENSG00000283087?" endpoint = ensembl_rest.get_endpoint(server, ext) print(json.dumps(endpoint, indent=4, sort_keys=True))
import json, ensembl_rest server = 'http://rest.ensembl.org' phenotype_endpoint = '/phenotype/term/homo_sapiens/{}' variation_post_endpoint = '/variation/human?pops=1' # 1) Get all variants that are associated with the phenotype 'Coffee consumption'. For each variant print # a) the p-value for the association # b) the PMID for the publication which describes the association between that variant and Coffee consumption # c) the risk allele and the associated gene request = phenotype_endpoint.format('coffee consumption') associations = ensembl_rest.get_endpoint(server, request) variation2risk_allele = {} for association in associations: variation = association['Variation'] desc = association['description'] source = association['source'] mapped_to_accession = association['mapped_to_accession'] attributes = association['attributes'] p_value = attributes['p_value'] external_reference = attributes['external_reference'] associated_gene = attributes['associated_gene'] risk_allele = attributes.get('risk_allele', '') if risk_allele != '': variation2risk_allele[variation] = risk_allele print( "Variation: {}, Phenotype: {}, p-value: {}, PMID: {}, Associated gene(s): {}, Risk allele: {}" .format(variation, desc, p_value, external_reference, associated_gene,
import json, ensembl_rest server = 'http://rest.ensembl.org' overlap_region_endpoint = '/overlap/region/human/{}?feature={}' variation_post_endpoint = '/variation/human' lookup_id_endpoint = '/lookup/id/{}?expand=1' overlap_id_endpoint = '/overlap/id/{}?feature={}' # 1) Print all variants that are located on chromosome 17 between 80348215 and 80348333. # Use the overlap endpoint to get the location (seq_region_name, start, end), # alleles, consequence_type and clinical_significance for each variant in the region. request = overlap_region_endpoint.format('17:80348215..80348333', 'variation') variants = ensembl_rest.get_endpoint(server, request) for v in variants: assembly_name = v['assembly_name'] seq_region_name = v['seq_region_name'] start = v['start'] end = v['end'] alleles = '/'.join(v['alleles']) consequence_type = v['consequence_type'] clinical_significance = v['clinical_significance'] print( "Location: {}:{}:{}-{}, Alleles: {}, Consequence: {}, Clinical significance: {}" .format(assembly_name, seq_region_name, start, end, alleles, consequence_type, clinical_significance)) # 2) Get the variant class, evidence attributes, source and the most_severe_consequence # for all variants in that region from the variant endpoint.
for t in efo['_embedded']['terms']: print("Link(IRI): %s" % (t['iri'])) if t['description']: for d in t['description']: print("Description: %s" % (d)) else: print('No description provided') print() ## main # 1. List all Epigenomes available in Ensembl Regulation server = "http://rest.ensembl.org" endpoint = '/regulatory/species/homo_sapiens/epigenome' decoded = ensembl_rest.get_endpoint(server, endpoint, 'application/json') print(json.dumps(decoded, indent=4, sort_keys=True)) # 2. Find additional information (where available) for each epigenome using the Ontology Lookup Service efo_server = "http://www.ebi.ac.uk/ols/api/ontologies/efo/terms?obo_id=" for r in decoded: print("Epigenome name: %s" % r['name']) # No EFO ID assigned to this epigenome if not r['efo_id']: print("No EFO ID assigned: %s\n" % (r['scientific_name'])) continue request = efo_server + r['efo_id'] efo = ensembl_rest.get_endpoint_efo(efo_server, request)
import json, ensembl_rest server = 'http://rest.ensembl.org' ld_region_endpoint = '/ld/human/region/{}/{}' ld_endpoint = '/ld/human/{}/{}' # 1) Compute LD in the region 3:196064297-196068186 # for the population 1000GENOMES:phase_3:CEU. # Print all results with r2=1 and d_prime=1. ld_values = ensembl_rest.get_endpoint( server, ld_region_endpoint.format('3:196064297-196068186', '1000GENOMES:phase_3:CEU')) high_ld_pairs = ( ld_value for ld_value in ld_values if float(ld_value['d_prime']) == 1.0 and float(ld_value['r2']) == 1.0) for pair in high_ld_pairs: variation1 = pair['variation1'] variation2 = pair['variation2'] print("Pair: {}-{}".format(variation1, variation2)) print('') # Compute pairwise LD for all variants that are not further away from rs535797132 # than 500kb. # Print all variants that are in LD (d_prime >= 0.8) with rs535797132. # For each pair of variants also print d_prime and r2. # Use 1000GENOMES:phase_3:FIN as the population.