def get_merged_namespace_names( locations: Iterable[str], check_keywords: bool = True, ) -> Mapping[str, str]: """Load many namespaces and combines their names. :param locations: An iterable of URLs or file paths pointing to BEL namespaces. :param check_keywords: Should all the keywords be the same? Defaults to ``True`` :return: A dictionary of {names: labels} Example Usage >>> from pybel.resources import write_namespace >>> from pybel_tools.definition_utils import export_namespace, get_merged_namespace_names >>> graph = ... >>> original_ns_url = ... >>> export_namespace(graph, 'MBS') # Outputs in current directory to MBS.belns >>> value_dict = get_merged_namespace_names([original_ns_url, 'MBS.belns']) >>> with open('merged_namespace.belns', 'w') as f: >>> ... write_namespace('MyBrokenNamespace', 'MBS', 'Other', 'Charles Hoyt', 'PyBEL Citation', value_dict, file=f) """ resources = {location: get_bel_resource(location) for location in locations} if check_keywords: resource_keywords = set(config['Namespace']['Keyword'] for config in resources.values()) if 1 != len(resource_keywords): raise ValueError('Tried merging namespaces with different keywords: {}'.format(resource_keywords)) result = {} for resource in resources: result.update(resource['Values']) return result
def get_or_create_annotation(self, url: str) -> Namespace: """Insert the namespace file at the given location to the cache. :raises: pybel.resources.exc.ResourceError """ result = self.get_namespace_by_url(url) if result is not None: return result t = time.time() bel_resource = get_bel_resource(url) result = Namespace( url=url, is_annotation=True, **_get_annotation_insert_values(bel_resource), ) result.entries = [ NamespaceEntry(name=name, identifier=label) for name, label in bel_resource['Values'].items() if name ] self.session.add(result) self.session.commit() logger.info( 'inserted annotation: %s (%d terms in %.2f seconds)', url, len(bel_resource['Values']), time.time() - t, ) return result
def __init__(self, annotation_urls): self.resources = {} for key, url in annotation_urls.items(): res = get_bel_resource(url) self.resources[key] = res self.failures = defaultdict(set)
def get_or_create_namespace(self, url: str) -> Namespace: """Insert the namespace file at the given location to the cache. If not cachable, returns the dict of the values of this namespace. :raises: pybel.resources.exc.ResourceError """ result = self.get_namespace_by_url(url) if result is not None: return result t = time.time() bel_resource = get_bel_resource(url) _clean_bel_namespace_values(bel_resource) values = bel_resource['Values'] namespace_insert_values = _get_namespace_insert_values(bel_resource) name_to_id = {} if url.endswith('-names.belns'): mapping_url = url[:-len('-names.belns')] + '.belns.mapping' try: res = requests.get(mapping_url) res.raise_for_status() except requests.exceptions.HTTPError: logger.warning('No mappings found for %s', url) else: mappings = res.json() logger.debug('got %d mappings', len(mappings)) name_to_id.update({v: k for k, v in res.json().items()}) namespace = Namespace( url=url, **namespace_insert_values, ) logger.debug('building NamespaceEntry instances') namespace.entries = [ NamespaceEntry(name=name, encoding=encoding, identifier=name_to_id.get(name)) for name, encoding in values.items() ] self.session.add(namespace) logger.debug('committing namespace') self.session.commit() logger.info('inserted namespace: %s (%d terms in %.2f seconds)', url, len(values), time.time() - t) return namespace
def _ensure_namespace_urls( self, urls: Iterable[str], use_tqdm: bool = True, is_annotation: bool = False, ) -> List[Namespace]: ext = 'belanno' if is_annotation else 'belns' rv = [] url_to_namespace = {} url_to_values = {} url_to_name_to_id = {} tag = 'annotations' if is_annotation else 'namespaces' if use_tqdm: urls = tqdm(urls, desc=f'downloading {tag}') for url in urls: result = self.get_namespace_by_url(url) if result: rv.append(result) continue bel_resource = get_bel_resource(url) _clean_bel_namespace_values(bel_resource) url_to_values[url] = bel_resource['Values'] if is_annotation: namespace_kwargs = _get_annotation_insert_values(bel_resource) else: namespace_kwargs = _get_namespace_insert_values(bel_resource) result = url_to_namespace[url] = Namespace(url=url, **namespace_kwargs) rv.append(result) if url.endswith(f'-names.{ext}'): mapping_url = url[:-len(f'-names.{ext}')] + f'.{ext}.mapping' try: res = requests.get(mapping_url) res.raise_for_status() except requests.exceptions.HTTPError: logger.warning('No mappings found for %s', url) else: mappings = res.json() logger.debug('got %d mappings', len(mappings)) url_to_name_to_id[url] = { v: k for k, v in res.json().items() } self.session.add_all(url_to_namespace.values()) self.session.commit() url_to_id = { url: namespace.id for url, namespace in url_to_namespace.items() } if not url_to_values: return rv rows = [] it = url_to_values.items() if use_tqdm: it = tqdm(it, desc=f'making {tag} entry table') if is_annotation: for url, values in it: for name, identifier in values.items(): if not name: continue rows.append( (url_to_id[url], name, None, identifier)) # TODO is this a fair assumption? else: for url, values in it: name_to_id = url_to_name_to_id.get(url, {}) for name, encoding in values.items(): if not name: continue rows.append( (url_to_id[url], name, encoding, name_to_id.get(name))) df = pd.DataFrame( rows, columns=['namespace_id', 'name', 'encoding', 'identifier']) logger.info('preparing sql objects for %s', tag) df.to_sql(NamespaceEntry.__tablename__, con=self.engine, if_exists='append', index=False) logger.info('committing %s', tag) start_commit_time = time.time() self.session.commit() logger.info('done committing %s after %.2f seconds', tag, time.time() - start_commit_time) return rv
def get_nift_labels() -> Set[str]: """Map NIFT names that have been normalized to the original names.""" bel_resource = get_bel_resource(NIFT_URL) return set(bel_resource['Values'])
def get_nift_values() -> Mapping[str, str]: """Map NIFT names that have been normalized to the original names.""" r = get_bel_resource( 'https://arty.scai.fraunhofer.de/artifactory/bel/namespace/nift/NIFT.belns' ) return {name.lower(): name for name in r['Values']}
def _disease_ontology_dict(ontology: str) -> Mapping[str, str]: """Create a dictionary from the disease ontologies used for mapping HMDB disease names to those ontologies.""" doid_path = ONTOLOGY_NAMESPACES[ontology] doid_ns = get_bel_resource(doid_path) return {value.lower(): value for value in doid_ns['Values']}
def test_get_from_url(self): """Test downloading a resource by URL.""" with mock_bel_resources: res = get_bel_resource('https://example.com/test_an_1.belanno') self._help_test_annotation(res)
def test_get_from_path(self): """Test downloading a resource from a file path.""" res = get_bel_resource(TEST_ANNOTATION_PATH) self._help_test_annotation(res)
def test_raises_on_empty(self): """Test that an error is thrown if an empty resource is downloaded.""" with self.assertRaises(EmptyResourceError): get_bel_resource(TEST_NAMESPACE_EMPTY_PATH)