def get(self, category): """ Summarize a set of objects """ args = parser.parse_args() slim = args.get('slim') del args['slim'] subjects = args.get('subject') del args['subject'] # Note that GO currently uses UniProt as primary ID for some sources: https://github.com/biolink/biolink-api/issues/66 # https://github.com/monarch-initiative/dipper/issues/461 sg_dev = SciGraph( url='https://scigraph-data-dev.monarchinitiative.org/scigraph/') subjects = [ x.replace('WormBase:', 'WB:') if 'WormBase:' in x else x for x in subjects ] slimmer_subjects = [] if category == FUNCTION_CATEGORY: # get proteins for a gene only when the category is 'function' for s in subjects: if 'HGNC:' in s or 'NCBIGene:' in s or 'ENSEMBL:' in s: prots = sg_dev.gene_to_uniprot_proteins(s) if len(prots) == 0: prots = [s] slimmer_subjects += prots else: slimmer_subjects.append(s) else: slimmer_subjects = subjects if category == ANATOMY_CATEGORY: category = 'anatomical entity' results = map2slim(subjects=slimmer_subjects, slim=slim, object_category=category, user_agent=USER_AGENT, **args) # To the fullest extent possible return HGNC ids checked = {} for result in results: for association in result['assocs']: taxon = association['subject']['taxon']['id'] proteinId = association['subject']['id'] if taxon == 'NCBITaxon:9606' and proteinId.startswith( 'UniProtKB:'): if checked.get(proteinId) == None: genes = sg_dev.uniprot_protein_to_genes(proteinId) for gene in genes: if gene.startswith('HGNC'): association['subject']['id'] = gene checked[proteinId] = gene else: association['subject']['id'] = checked[proteinId] return results
def get(self, id): """ Returns function associations for a gene. IMPLEMENTATION DETAILS ---------------------- Note: currently this is implemented as a query to the GO/AmiGO solr instance. This directly supports IDs such as: - ZFIN e.g. ZFIN:ZDB-GENE-050417-357 Note that the AmiGO GOlr natively stores MGI annotations to MGI:MGI:nn. However, the standard for biolink is MGI:nnnn, so you should use this (will be transparently mapped to legacy ID) Additionally, for some species such as Human, GO has the annotation attached to the UniProt ID. Again, this should be transparently handled; e.g. you can use NCBIGene:6469, and this will be mapped behind the scenes for querying. """ assocs = search_associations(object_category='function', subject=id, **core_parser.parse_args()) # If there are no associations for the given ID, try other IDs. # Note the AmiGO instance does *not* support equivalent IDs if len(assocs['associations']) == 0: # Note that GO currently uses UniProt as primary ID for some sources: https://github.com/biolink/biolink-api/issues/66 # https://github.com/monarch-initiative/dipper/issues/461 logging.debug( "Found no associations using {} - will try mapping to other IDs" .format(id)) sg_dev = SciGraph( url='https://scigraph-data-dev.monarchinitiative.org/scigraph/' ) prots = sg_dev.gene_to_uniprot_proteins(id) for prot in prots: pr_assocs = search_associations(object_category='function', subject=prot, **core_parser.parse_args()) assocs['associations'] += pr_assocs['associations'] return assocs
def get(self): """ For a given gene(s), summarize its annotations over a defined set of slim """ args = self.function_parser.parse_args() slim = args.get('slim') del args['slim'] subjects = args.get('subject') del args['subject'] # Note that GO currently uses UniProt as primary ID for some sources: https://github.com/biolink/biolink-api/issues/66 # https://github.com/monarch-initiative/dipper/issues/461 sg_dev = SciGraph(get_biolink_config()['scigraph_data']['url']) subjects = [ x.replace('WormBase:', 'WB:') if 'WormBase:' in x else x for x in subjects ] slimmer_subjects = [] for s in subjects: if 'HGNC:' in s or 'NCBIGene:' in s or 'ENSEMBL:' in s: prots = identifier_converter.convert_gene_to_protein(s) if len(prots) == 0: prots = [s] slimmer_subjects += prots else: slimmer_subjects.append(s) results = map2slim(subjects=slimmer_subjects, slim=slim, object_category='function', user_agent=USER_AGENT, **args) # To the fullest extent possible return HGNC ids checked = {} for result in results: for association in result['assocs']: taxon = association['subject']['taxon']['id'] proteinId = association['subject']['id'] if taxon == 'NCBITaxon:9606' and proteinId.startswith( 'UniProtKB:'): if proteinId not in checked: genes = identifier_converter.convert_protein_to_gene( proteinId) for gene in genes: if gene.startswith('HGNC'): association['subject']['id'] = gene checked[proteinId] = gene else: association['subject']['id'] = checked[proteinId] return results
class SciGraphIdentifierConverter(object): """ Class for performing ID conversion using SciGraph """ def __init__(self): self.scigraph = SciGraph(get_biolink_config()['scigraph_data']['url']) def convert_gene_to_protein(self, identifier): """ Query SciGraph with a gene ID and get its corresponding UniProtKB ID """ protein_ids = self.scigraph.gene_to_uniprot_proteins(identifier) return protein_ids def convert_protein_to_gene(self, identifier): """ Query SciGraph with UniProtKB ID and get its corresponding HGNC gene ID """ gene_ids = self.scigraph.uniprot_protein_to_genes(identifier) return gene_ids
def get(self, category): """ Summarize a set of objects """ args = parser.parse_args() logging.info("category is {}".format(category)) slim = args.get('slim') del args['slim'] subjects = args.get('subject') del args['subject'] results = map2slim(subjects=subjects, slim=slim, rows=200, object_category=category, **args) # If there are no associations for the given ID, try other IDs. # Note the AmiGO instance does *not* support equivalent IDs assoc_count = 0 for result in results: assoc_count += len(result['assocs']) if assoc_count == 0 and len(subjects) == 1: # Note that GO currently uses UniProt as primary ID for some sources: https://github.com/biolink/biolink-api/issues/66 # https://github.com/monarch-initiative/dipper/issues/461 # nota bene: # currently incomplete because code is not checking for the possibility of >1 subjects logging.info( "Found no associations using {} - will try mapping to other IDs" .format(subjects[0])) sg_dev = SciGraph( url='https://scigraph-data-dev.monarchinitiative.org/scigraph/' ) prots = sg_dev.gene_to_uniprot_proteins(subjects[0]) if len(prots) > 0: results = map2slim(subjects=prots, slim=slim, rows=200, object_category=category, **args) return results
def get(self, category): """ Summarize a set of objects """ args = parser.parse_args() slim = args.get('slim') del args['slim'] subjects = args.get('subject') del args['subject'] # Note that GO currently uses UniProt as primary ID for some sources: https://github.com/biolink/biolink-api/issues/66 # https://github.com/monarch-initiative/dipper/issues/461 # nota bene: # currently incomplete because code is not checking for the possibility of >1 subjects subjects[0] = subjects[0].replace('WormBase:', 'WB:', 1) if (subjects[0].startswith('HGNC') or subjects[0].startswith('NCBIGene') or subjects[0].startswith('ENSEMBL:')): sg_dev = SciGraph( url='https://scigraph-data-dev.monarchinitiative.org/scigraph/' ) prots = sg_dev.gene_to_uniprot_proteins(subjects[0]) if len(prots) == 0: prots = subjects else: prots = subjects results = map2slim(subjects=prots, slim=slim, rows=200, exclude_automatic_assertions=True, object_category=category, **args) # To the fullest extent possible return HGNC ids checked = {} for result in results: for association in result['assocs']: taxon = association['subject']['taxon']['id'] proteinId = association['subject']['id'] if taxon == 'NCBITaxon:9606' and proteinId.startswith( 'UniProtKB:'): if checked.get(proteinId) == None: sg_dev = SciGraph( url= 'https://scigraph-data-dev.monarchinitiative.org/scigraph/' ) genes = sg_dev.uniprot_protein_to_genes(proteinId) for gene in genes: if gene.startswith('HGNC'): association['subject']['id'] = gene checked[proteinId] = gene else: association['subject']['id'] = checked[proteinId] return results
import logging from flask import request from flask_restplus import Resource, inputs from biolink.datamodel.serializers import bbop_graph, bio_object from biolink.error_handlers import NoResultFoundException, UnhandledException from scigraph.scigraph_util import SciGraph from scigraph.model.BBOPGraph import BBOPGraph from biolink.api.restplus import api from biolink.settings import get_biolink_config log = logging.getLogger(__name__) sg_data = SciGraph(get_biolink_config()['scigraph_data']['url']) sg_ont = SciGraph(get_biolink_config()['scigraph_ontology']['url']) @api.doc(params={'id': 'CURIE e.g. HP:0000465'}) class NodeResource(Resource): @api.marshal_list_with(bio_object) def get(self, id): """ Returns a graph node. A node is an abstract representation of some kind of entity. The entity may be a physical thing such as a patient, a molecular entity such as a gene or protein, or a conceptual entity such as a class from an ontology. """ graph = sg_data.bioobject(id) return graph
from scigraph.scigraph_util import SciGraph import json sg = SciGraph() def test_node(): n = sg.node(id="MP:0000272") assert n.lbl == "abnormal aorta morphology" def test_bio_operations(): zp="ZP:0004204" enodes = sg.phenotype_to_entity_list(id=zp) assert len(enodes)>0
import logging from flask import request from flask_restplus import Resource, inputs from biolink.datamodel.serializers import association, bbop_graph, bio_object from biolink.error_handlers import NoResultFoundException from scigraph.scigraph_util import SciGraph from scigraph.model.BBOPGraph import BBOPGraph from biolink.api.restplus import api from biolink.settings import get_biolink_config log = logging.getLogger(__name__) sg = SciGraph(get_biolink_config()['scigraph_data']['url']) @api.doc(params={'id': 'CURIE e.g. HP:0000465'}) class NodeResource(Resource): @api.marshal_list_with(bio_object) def get(self, id): """ Returns a graph node. A node is an abstract representation of some kind of entity. The entity may be a physical thing such as a patient, a molecular entity such as a gene or protein, or a conceptual entity such as a class from an ontology. """ graph = sg.bioobject(id) return graph @api.doc(params={'id': 'CURIE e.g. HP:0000465'})
ns = api.namespace('bioentity', description='Retrieval of domain entities plus associations') core_parser = api.parser() core_parser.add_argument('rows', type=int, required=False, default=100, help='number of rows') core_parser.add_argument('start', type=int, required=False, default=1, help='row to start at') core_parser.add_argument('unselect_evidence', type=bool, help='If set, excludes evidence objects in response') core_parser.add_argument('exclude_automatic_assertions', default=False, type=bool, help='If set, excludes associations that involve IEAs (ECO:0000501)') core_parser.add_argument('fetch_objects', type=bool, default=True, help='If true, returns a distinct set of association.objects (typically ontology terms). This appears at the top level of the results payload') core_parser.add_argument('use_compact_associations', type=bool, default=False, help='If true, returns results in compact associations format') core_parser.add_argument('slim', action='append', help='Map objects up (slim) to a higher level category. Value can be ontology class ID or subset ID') core_parser.add_argument('evidence', help="""Object id, e.g. ECO:0000501 (for IEA; Includes inferred by default) or a specific publication or other supporting ibject, e.g. ZFIN:ZDB-PUB-060503-2. """) scigraph = SciGraph('https://scigraph-data.monarchinitiative.org/scigraph/') homol_rel = HomologyTypes.Homolog.value def get_object_gene(id, **args): obj = scigraph.bioobject(id, 'Gene') obj.phenotype_associations = search_associations(subject=id, object_category='phenotype', **args)['associations'] obj.homology_associations = search_associations(subject=id, rel=homol_rel, object_category='gene', **args)['associations'] obj.disease_associations = search_associations(subject=id, object_category='disease', **args)['associations'] obj.genotype_associations = search_associations(subject=id, invert_subject_object=True, object_category='genotype', **args)['associations'] return(obj) def get_object_genotype(id, **args): obj = scigraph.bioobject(id, 'Genotype') obj.phenotype_associations = search_associations(subject=id, object_category='phenotype', **args)['associations']
def __init__(self): self.scigraph = SciGraph(get_biolink_config()['scigraph_data']['url'])
default=False, help='Should only the longest entity be returned for an overlapping group') parser.add_argument('include_abbreviation', type=inputs.boolean, default=False, help='Should abbreviations be included') parser.add_argument('include_acronym', type=inputs.boolean, default=False, help='Should acronyms be included') parser.add_argument('include_numbers', type=inputs.boolean, default=False, help='Should numbers be included') scigraph = SciGraph(get_biolink_config()['scigraph_ontology']['url']) def parse_args_for_annotator(parser): """ Convenience method for parsing and preparing parameters for SciGraph annotator """ args = parser.parse_args() if 'include_category' in args: val = args.pop('include_category') args['includeCat'] = val if 'exclude_category' in args: val = args.pop('exclude_category') args['excludeCat'] = val if 'min_length' in args: val = args.pop('min_length')