def test_get_all_properties_with_class_range(): toolkit = Toolkit() assert 'has gene' in toolkit.get_all_properties_with_class_range('gene') assert 'subject' in toolkit.get_all_properties_with_class_range( 'gene', check_ancestors=True) assert 'biolink:subject' in toolkit.get_all_properties_with_class_range( 'gene', check_ancestors=True, formatted=True)
def get_toolkit(biolink_release: Optional[str] = None) -> Toolkit: """ Get an instance of bmt.Toolkit If there no instance defined, then one is instantiated and returned. Parameters ---------- biolink_release: Optional[str] URL to (Biolink) Model Schema to be used for validated (default: None, use default Biolink Model Toolkit schema) """ global _default_toolkit, _toolkit_versions if biolink_release: if biolink_release in _toolkit_versions: toolkit = _toolkit_versions[biolink_release] else: schema = get_biolink_model_schema(biolink_release) toolkit = Toolkit(schema=schema) _toolkit_versions[biolink_release] = toolkit else: if _default_toolkit is None: _default_toolkit = Toolkit() toolkit = _default_toolkit biolink_release = toolkit.get_model_version() if biolink_release not in _toolkit_versions: _toolkit_versions[biolink_release] = toolkit return toolkit
def __init__(self, host, port, auth, query_timeout): self.driver = Neo4jHTTPDriver(host=host, port=port, auth=auth) self.schema = None self.summary = None self.meta_kg = None self.query_timeout = query_timeout self.toolkit = Toolkit()
def __init__(self, host, port, auth): self.driver = Neo4jHTTPDriver(host=host, port=port, auth=auth) self.schema = None self.summary = None self.meta_kg = None self.bl_version = config.get('BL_VERSION', '1.5.0') self.bl_url = f'https://raw.githubusercontent.com/biolink/biolink-model/{self.bl_version}/biolink-model.yaml' self.toolkit = Toolkit(self.bl_url)
def test_get_all_predicates_with_class_range(): toolkit = Toolkit() assert 'manifestation of' in toolkit.get_all_predicates_with_class_range( 'disease') assert 'disease has basis in' in toolkit.get_all_predicates_with_class_range( 'disease', check_ancestors=True) assert 'biolink:disease_has_basis_in' in toolkit.get_all_predicates_with_class_range( 'disease', check_ancestors=True, formatted=True)
class BiolinkModel: root_type = 'biolink:NamedThing' def __init__(self, bl_version='1.5.0'): self.bl_url = f'https://raw.githubusercontent.com/biolink/biolink-model/{bl_version}/biolink-model.yaml' self.toolkit = Toolkit(self.bl_url) """ Programmatic model of Biolink. """ def to_camel_case(self, snake_str): """ Convert a snake case string to camel case. """ components = snake_str.split('_') return ''.join(x.title() for x in components) def get_class(self, name): """ Get a Python class from a string name. """ return getattr(sys.modules["biolink.model"], name) def is_derived(self, a_class_name, classes): """ Return true if the class derives from any of the provided classes. """ for c in classes: if isinstance(self.get_class(self.to_camel_case(a_class_name)), c): return True return False def find_biolink_leaves(self, biolink_concepts): """ Given a list of biolink concepts, returns the leaves removing any parent concepts. :param biolink_concepts: list of biolink concepts :return: leave concepts. """ ancestry_set = set() all_mixins_in_tree = set() all_concepts = set(biolink_concepts) # Keep track of things like "MacromolecularMachine" in current datasets # @TODO remove this and make nodes as errors unknown_elements = set() for x in all_concepts: current_element = self.toolkit.get_element(x) mixins = set() if current_element: if 'mixins' in current_element and len( current_element['mixins']): for m in current_element['mixins']: mixins.add(self.toolkit.get_element(m).class_uri) else: unknown_elements.add(x) ancestors = set( self.toolkit.get_ancestors(x, reflexive=False, formatted=True)) ancestry_set = ancestry_set.union(ancestors) all_mixins_in_tree = all_mixins_in_tree.union(mixins) leaf_set = all_concepts - ancestry_set - all_mixins_in_tree - unknown_elements return leaf_set def get_leaf_class(self, names): """ Return the leaf classes in the provided list of names. """ leaves = list(self.find_biolink_leaves(names)) return leaves[0]
def test_get_all_associations(): toolkit = Toolkit() associations = toolkit.get_all_associations() assert 'association' in associations assert 'gene to gene association' in associations assert 'named thing' not in associations associations = toolkit.get_all_associations(formatted=True) assert 'biolink:Association' in associations assert 'biolink:GeneToGeneAssociation' in associations
def test_get_all_node_properties(): toolkit = Toolkit() properties = toolkit.get_all_node_properties() assert 'name' in properties assert 'category' in properties assert 'has gene' in properties properties = toolkit.get_all_node_properties(formatted=True) assert 'biolink:name' in properties assert 'biolink:category' in properties assert 'biolink:has_gene' in properties
def test_get_all_edge_properties(): toolkit = Toolkit() properties = toolkit.get_all_edge_properties() assert 'subject' in properties assert 'object' in properties assert 'frequency qualifier' in properties properties = toolkit.get_all_edge_properties(formatted=True) assert 'biolink:subject' in properties assert 'biolink:object' in properties assert 'biolink:frequency_qualifier' in properties
def __init__(self, label_dir): #self.url_base = 'http://arrival.edc.renci.org:32511/bl' self.url_base = 'https://bl-lookup-sri.renci.org/bl' self.toolkit = Toolkit( 'https://raw.githubusercontent.com/biolink/biolink-model/1.6.1/biolink-model.yaml' ) self.ancestor_map = {} self.prefix_map = {} self.ignored_prefixes = set() self.extra_labels = {} self.label_dir = label_dir
def test_get_all_entities(): toolkit = Toolkit() entities = toolkit.get_all_entities() assert 'named thing' in entities assert 'gene' in entities assert 'disease' in entities assert 'association' not in entities assert 'related to' not in entities entities = toolkit.get_all_entities(formatted=True) assert 'biolink:NamedThing' in entities assert 'biolink:Gene' in entities assert 'biolink:Disease' in entities
def test_get_all_elements(): toolkit = Toolkit() elements = toolkit.get_all_elements() assert 'named thing' in elements assert 'association' in elements assert 'related to' in elements assert 'uriorcurie' in elements elements = toolkit.get_all_elements(formatted=True) assert 'biolink:NamedThing' in elements assert 'biolink:GeneToGeneAssociation' in elements assert 'biolink:related_to' in elements assert 'metatype:Uriorcurie' in elements assert 'biolink:FrequencyValue' in elements
def test_get_toolkit(): """ Test to get an instance of Toolkit via get_toolkit and check if default is the default biolink model version. """ tk = get_toolkit() assert isinstance(tk, Toolkit) assert tk.get_model_version() == Toolkit().get_model_version()
def test_get_slot_range(): toolkit = Toolkit() assert 'disease or phenotypic feature' in toolkit.get_slot_range('treats') assert 'biological entity' in toolkit.get_slot_range( 'treats', include_ancestors=True) assert 'biolink:BiologicalEntity' in toolkit.get_slot_range( 'treats', include_ancestors=True, formatted=True) assert 'label type' in toolkit.get_slot_range('name') assert 'uriorcurie' in toolkit.get_slot_range('relation') assert 'metatype:Uriorcurie' in toolkit.get_slot_range('relation', formatted=True)
def test_get_all_predicates_with_class_domain(): toolkit = Toolkit() assert 'genetically interacts with' in toolkit.get_all_slots_with_class_domain( 'gene') assert 'interacts with' in toolkit.get_all_slots_with_class_domain( 'gene', check_ancestors=True) assert 'biolink:interacts_with' in toolkit.get_all_slots_with_class_domain( 'gene', check_ancestors=True, formatted=True) assert 'in complex with' in toolkit.get_all_slots_with_class_domain( 'gene or gene product') assert 'expressed in' in toolkit.get_all_slots_with_class_domain( 'gene or gene product') assert 'expressed in' in toolkit.get_all_slots_with_class_domain( 'gene or gene product') assert 'interacts with' in toolkit.get_all_slots_with_class_domain( 'gene or gene product', check_ancestors=True) assert 'biolink:interacts_with' in toolkit.get_all_slots_with_class_domain( 'gene or gene product', check_ancestors=True, formatted=True)
class MOCK_GRAPH_ADAPTER(): called = False toolkit = Toolkit() async def run_cypher(self, cypher): assert cypher == "SOME CYPHER" self.called = True @staticmethod def convert_to_dict(item): return [{"trapi": {"compatible_ result"}}]
def get_toolkit(schema: Optional[str] = None) -> Toolkit: """ Get an instance of bmt.Toolkit If there no instance defined, then one is instantiated and returned. """ global toolkit if toolkit is None: if not schema: config = get_config() schema = config['biolink-model'] toolkit = Toolkit(schema=schema) return toolkit
def test_parent(): toolkit = Toolkit() assert 'contributes to' in toolkit.get_parent('causes') assert 'interacts with' in toolkit.get_parent('physically interacts with') assert 'genomic entity' in toolkit.get_parent('gene') assert 'biolink:GenomicEntity' in toolkit.get_parent('gene', formatted=True)
def test_get_value_type_for_slot(): toolkit = Toolkit() assert 'uriorcurie' in toolkit.get_value_type_for_slot('subject') assert 'uriorcurie' in toolkit.get_value_type_for_slot('object') assert 'string' in toolkit.get_value_type_for_slot('symbol') assert 'biolink:CategoryType' in toolkit.get_value_type_for_slot( 'category', formatted=True)
def test_get_all_properties_with_class_domain(): toolkit = Toolkit() assert 'category' in toolkit.get_all_properties_with_class_domain('entity') assert 'category' in toolkit.get_all_properties_with_class_domain( 'gene', check_ancestors=True) assert 'biolink:category' in toolkit.get_all_properties_with_class_domain( 'gene', check_ancestors=True, formatted=True) assert 'subject' in toolkit.get_all_properties_with_class_domain( 'association') assert 'subject' in toolkit.get_all_properties_with_class_domain( 'association', check_ancestors=True) assert 'biolink:subject' in toolkit.get_all_properties_with_class_domain( 'association', check_ancestors=True, formatted=True)
def test_parent(): toolkit = Toolkit() assert 'contributes to' in toolkit.get_parent('causes') assert 'interacts with' in toolkit.get_parent('physically interacts with') assert 'gene or gene product' in toolkit.get_parent('gene') assert 'biolink:GeneOrGeneProduct' in toolkit.get_parent( 'gene or gene product', formatted=True)
def set_toolkit(biolink_version): global BIOLINK_VERSION global TOOLKIT BIOLINK_VERSION = biolink_version from bmt import Toolkit if BIOLINK_VERSION == 'latest': logger.info('Using latest PyPy Biolink version.') TOOLKIT = Toolkit() else: logger.info('Loading Biolink Version {}'.format(BIOLINK_VERSION)) BMT_SCHEMA_DIR = os.path.abspath( os.path.dirname(biolink_schemas.__file__)) for schema in os.listdir(BMT_SCHEMA_DIR): # Parse schema filename parse_base = os.path.splitext(os.path.basename(schema))[0] if 'biolink' not in parse_base: continue version = parse_base.split('-')[-1] if version == BIOLINK_VERSION: TOOLKIT = Toolkit(os.path.join(BMT_SCHEMA_DIR, schema)) break return
def test_children(): toolkit = Toolkit() assert 'causes' in toolkit.get_children('contributes to') assert 'physically interacts with' in toolkit.get_children( 'interacts with') assert 'gene' in toolkit.get_children('genomic entity') assert 'biolink:Gene' in toolkit.get_children('genomic entity', formatted=True)
def __init__(self): self.bmt = BMToolkit() self.all_slots = self.bmt.get_all_slots() self.all_slots_formatted = [ "biolink:" + s.replace(" ", "_") for s in self.all_slots ] self.prefix = "biolink:" self.entity_prefix_mapping = { bmt.util.format(el_name, case="pascal"): id_prefixes for el_name in self.bmt.get_all_classes() if (el := self.bmt.get_element(el_name)) is not None if (id_prefixes := getattr(el, "id_prefixes", [])) }
def test_get_element(): toolkit = Toolkit() gene = toolkit.get_element('gene') locus = toolkit.get_element('locus') assert gene == locus o = toolkit.get_element('drug intake') assert o and o.name == 'drug exposure' o = toolkit.get_element('molecular function') assert o and o.name == 'molecular activity' o = toolkit.get_element('RNA Product') assert o and o.name == 'RNA product' o = toolkit.get_element('rna product') assert o and o.name == 'RNA product'
def get_toolkit() -> Toolkit: """ Get an instance of bmt.Toolkit If there no instance defined, then one is instantiated and returned. Returns ------- bmt.Toolkit an instance of bmt.Toolkit """ global toolkit if toolkit is None: toolkit = Toolkit() return toolkit
def generate_bl_map(url=None, version='latest'): """Generate map (dict) from BiolinkModel.""" get_models() if url is None: url = models[version] bmt = bmt_wrapper(Toolkit(url)) elements = bmt.get_descendants('related to') + bmt.get_descendants('association') + bmt.get_descendants('named thing') \ + ['named thing', 'related to', 'association'] + get_all_mixins(bmt) geneology = { key_case(entity_type): { 'ancestors': [bmt.name_to_uri(a) for a in bmt.get_ancestors(entity_type) if a != entity_type], 'descendants': [bmt.name_to_uri(a) for a in bmt.get_descendants(entity_type)], } for entity_type in elements } for entity_type, ancestors_and_descendants in geneology.items(): geneology[entity_type]['lineage'] = ancestors_and_descendants['ancestors'] + ancestors_and_descendants['descendants'] raw = { key_case(key): bmt.get_element(key) for key in elements } inverse_uri_map = { bmt.name_to_uri(key): bmt.get_element(key) for key in elements } uri_map = defaultdict(list) for key, value in inverse_uri_map.items(): # For Versions < 1.4, the term is mappings for uri in value.get('mappings', []): uri_map[uri].append({'mapping_type': 'exact', 'mapping': key}) # For versions >= 1.4.0, the term is exact_mappings, but there are other kinds for uri in value.get('exact_mappings', []): uri_map[uri].append({'mapping_type': 'exact', 'mapping': key}) for uri in value.get('narrow_mappings', []): uri_map[uri].append({'mapping_type': 'narrow', 'mapping': key}) for uri in value.get('broad_mappings', []): uri_map[uri].append({'mapping_type': 'broad', 'mapping': key}) for uri in value.get('related_mappings', []): uri_map[uri].append({'mapping_type': 'related', 'mapping': key}) for uri in value.get('close_mappings', []): uri_map[uri].append({'mapping_type': 'close', 'mapping': key}) data = { 'geneology': geneology, 'raw': raw, } return data, uri_map
def load_edges(g: nx.Graph): """ http://34.229.55.225/edges_neo4j.csv CSV row example: SEMMED_PRED,pmids,negated,:TYPE,:START_ID,:END_ID,n_pmids,is_defined_by,relation,provided_by AFFECTS,20801151,False,affects,UMLS:C1412045,UMLS:C0023946,1,semmeddb,semmeddb:affects,semmeddb_sulab """ df = pd.read_csv('data/semmeddb_edges.csv') toolkit = Toolkit() def process_row(row): p = row['pmids'] p = ['PMID:' + i for i in p.split(';')] if p is not None else None t = row[':TYPE'].replace(' ', '_') if toolkit.is_edgelabel(t): edge_label = t else: edge_label = 'related_to' kwargs = dict( publications=p, negated=row['negated'], edge_label=edge_label, defined_by=row['is_defined_by'], provided_by=[row['provided_by']], relation=row['relation'], ) g.add_edge(row[':START_ID'], row[':END_ID'], **kwargs) df.apply(process_row, axis=1)
from bmt import Toolkit from fastapi import Body from fastapi.exceptions import HTTPException from fastapi.responses import JSONResponse import httpx from reasoner_pydantic import Query as ReasonerQuery, Response from starlette.middleware.cors import CORSMiddleware from starlette.requests import Request from .config import settings from .identifiers import map_identifiers from .util import load_example from .trapi import TRAPI BMT = Toolkit() openapi_kwargs = dict( title="ROBOKOP ARA", version="2.6.3", terms_of_service="N/A", translator_component="ARA", translator_teams=["Ranking Agent"], contact={ "name": "Kenneth Morton", "email": "*****@*****.**", "x-id": "kennethmorton", "x-role": "responsible developer", }, openapi_tags=[{ "name": "robokop"
class NodeFactory: def __init__(self, label_dir): #self.url_base = 'http://arrival.edc.renci.org:32511/bl' self.url_base = 'https://bl-lookup-sri.renci.org/bl' self.toolkit = Toolkit( 'https://raw.githubusercontent.com/biolink/biolink-model/1.6.1/biolink-model.yaml' ) self.ancestor_map = {} self.prefix_map = {} self.ignored_prefixes = set() self.extra_labels = {} self.label_dir = label_dir def get_ancestors(self, input_type): if input_type in self.ancestor_map: return self.ancestor_map[input_type] a = self.toolkit.get_ancestors(input_type) ancs = [self.toolkit.get_element(ai)['class_uri'] for ai in a] if input_type not in ancs: ancs = [input_type] + ancs self.ancestor_map[input_type] = ancs return ancs def get_prefixes(self, input_type): if input_type in self.prefix_map: return self.prefix_map[input_type] url = f'{self.url_base}/{input_type}' response = requests.get(url) try: j = response.json() prefs = j['id_prefixes'] except: #this is a mega hack to deal with the taxon change prefs = ['NCBITaxon', 'MESH'] #The pref are in a particular order, but apparently it can have dups (ugh) newprefs = [''] for pref in prefs: if not pref == newprefs[-1]: newprefs.append(pref) prefs = newprefs[1:] self.prefix_map[input_type] = prefs return prefs def make_json_id(self, input): if isinstance(input, LabeledID): if input.label is not None and input.label != '': return {'identifier': input.identifier, 'label': input.label} return {'identifier': input.identifier} return {'identifier': input} def clean_list(self, input_identifiers): #Sometimes we end up with something like [(HP:123,'name'),HP:123,UMLS:3445] Clean up cleanup = defaultdict(list) for x in list(input_identifiers): if isinstance(x, LabeledID): cleanup[x.identifier].append(x) else: cleanup[x].append(x) cleaned = [] for v in cleanup.values(): if len(v) == 1: cleaned.append(v[0]) else: #Originally, we were just trying to get the LabeledID. But sometimes we get more than one, so len(v) # can be more than two. wrote = False for vi in v: if isinstance(vi, LabeledID): cleaned.append(vi) wrote = True break if not wrote: print(input_identifiers) exit() return cleaned def load_extra_labels(self, prefix): labelfname = os.path.join(self.label_dir, prefix, 'labels') lbs = {} if os.path.exists(labelfname): with open(labelfname, 'r') as inf: for line in inf: x = line.strip().split('\t') lbs[x[0]] = x[1] self.extra_labels[prefix] = lbs def apply_labels(self, input_identifiers, labels): #Originally we needed to clean up the identifer lists, because there would be both labeledids and # string ids and we had to reconcile them. # But now, we only allow regular ids in the list, and now we need to turn some of them into labeled ids for output labeled_list = [] for iid in input_identifiers: if isinstance(iid, LabeledID): print('LabeledID dont belong here, pass in labels seperately', iid) exit() if iid in labels: labeled_list.append( LabeledID(identifier=iid, label=labels[iid])) else: prefix = Text.get_prefix(iid) if prefix not in self.extra_labels: self.load_extra_labels(prefix) if iid in self.extra_labels[prefix]: labeled_list.append( LabeledID(identifier=iid, label=self.extra_labels[prefix][iid])) else: labeled_list.append(iid) return labeled_list def create_node(self, input_identifiers, node_type, labels={}): #This is where we will normalize, i.e. choose the best id, and add types in accord with BL. #we should also include provenance and version information for the node set build. ancestors = self.get_ancestors(node_type) #ancestors.reverse() prefixes = self.get_prefixes(node_type) if len(input_identifiers) == 0: return None if len(input_identifiers) > 1000: print('this seems like a lot') print(len(input_identifiers)) cleaned = self.apply_labels(input_identifiers, labels) try: idmap = defaultdict(list) for i in list(cleaned): idmap[Text.get_curie(i).upper()].append(i) except AttributeError: print('something very bad') print(input_identifiers) print(len(input_identifiers)) for i in list(input_identifiers): print(i) print(type(i)) print(Text.get_curie(i)) print(Text.get_curie(i).upper()) exit() identifiers = [] accepted_ids = set() #Converting identifiers from LabeledID to dicts #In order to be consistent from run to run, we need to worry about the # case where e.g. there are 2 UMLS id's and UMLS is the preferred pref. # We're going to choose the canonical ID here just by sorting the N . for p in prefixes: pupper = p.upper() if pupper in idmap: newids = [] for v in idmap[pupper]: newid = Text.recurie(v, p) jid = self.make_json_id(newid) newids.append((jid['identifier'], jid)) accepted_ids.add(v) newids.sort() identifiers += [nid[1] for nid in newids] #Warn if we have prefixes that we're ignoring for k, vals in idmap.items(): for v in vals: if v not in accepted_ids and ( k, node_type) not in self.ignored_prefixes: print( f'Ignoring prefix {k} for type {node_type}, identifier {v}' ) self.ignored_prefixes.add((k, node_type)) if len(identifiers) == 0: return None best_id = identifiers[0]['identifier'] # identifiers is in preferred order, so choose the first non-empty label to be the node label labels = list( filter(lambda x: len(x) > 0, [l['label'] for l in identifiers if 'label' in l])) label = None if len(labels) > 0: label = labels[0] node = { 'id': { 'identifier': best_id, }, 'equivalent_identifiers': identifiers, 'type': ancestors } if label is not None: node['id']['label'] = label return node