Пример #1
0
def get_merged_namespace_names(
    locations: Iterable[str],
    check_keywords: bool = True,
) -> Mapping[str, str]:
    """Load many namespaces and combines their names.

    :param locations: An iterable of URLs or file paths pointing to BEL namespaces.
    :param check_keywords: Should all the keywords be the same? Defaults to ``True``
    :return: A dictionary of {names: labels}

    Example Usage

    >>> from pybel.resources import write_namespace
    >>> from pybel_tools.definition_utils import export_namespace, get_merged_namespace_names
    >>> graph = ...
    >>> original_ns_url = ...
    >>> export_namespace(graph, 'MBS') # Outputs in current directory to MBS.belns
    >>> value_dict = get_merged_namespace_names([original_ns_url, 'MBS.belns'])
    >>> with open('merged_namespace.belns', 'w') as f:
    >>> ...  write_namespace('MyBrokenNamespace', 'MBS', 'Other', 'Charles Hoyt', 'PyBEL Citation', value_dict, file=f)
    """
    resources = {location: get_bel_resource(location) for location in locations}

    if check_keywords:
        resource_keywords = set(config['Namespace']['Keyword'] for config in resources.values())
        if 1 != len(resource_keywords):
            raise ValueError('Tried merging namespaces with different keywords: {}'.format(resource_keywords))

    result = {}
    for resource in resources:
        result.update(resource['Values'])
    return result
Пример #2
0
    def get_or_create_annotation(self, url: str) -> Namespace:
        """Insert the namespace file at the given location to the cache.

        :raises: pybel.resources.exc.ResourceError
        """
        result = self.get_namespace_by_url(url)

        if result is not None:
            return result

        t = time.time()

        bel_resource = get_bel_resource(url)

        result = Namespace(
            url=url,
            is_annotation=True,
            **_get_annotation_insert_values(bel_resource),
        )
        result.entries = [
            NamespaceEntry(name=name, identifier=label)
            for name, label in bel_resource['Values'].items() if name
        ]

        self.session.add(result)
        self.session.commit()

        logger.info(
            'inserted annotation: %s (%d terms in %.2f seconds)',
            url,
            len(bel_resource['Values']),
            time.time() - t,
        )

        return result
Пример #3
0
    def __init__(self, annotation_urls):
        self.resources = {}
        for key, url in annotation_urls.items():
            res = get_bel_resource(url)
            self.resources[key] = res

        self.failures = defaultdict(set)
Пример #4
0
    def get_or_create_namespace(self, url: str) -> Namespace:
        """Insert the namespace file at the given location to the cache.

        If not cachable, returns the dict of the values of this namespace.

        :raises: pybel.resources.exc.ResourceError
        """
        result = self.get_namespace_by_url(url)

        if result is not None:
            return result

        t = time.time()

        bel_resource = get_bel_resource(url)

        _clean_bel_namespace_values(bel_resource)

        values = bel_resource['Values']

        namespace_insert_values = _get_namespace_insert_values(bel_resource)

        name_to_id = {}
        if url.endswith('-names.belns'):
            mapping_url = url[:-len('-names.belns')] + '.belns.mapping'
            try:
                res = requests.get(mapping_url)
                res.raise_for_status()
            except requests.exceptions.HTTPError:
                logger.warning('No mappings found for %s', url)
            else:
                mappings = res.json()
                logger.debug('got %d mappings', len(mappings))
                name_to_id.update({v: k for k, v in res.json().items()})

        namespace = Namespace(
            url=url,
            **namespace_insert_values,
        )

        logger.debug('building NamespaceEntry instances')
        namespace.entries = [
            NamespaceEntry(name=name,
                           encoding=encoding,
                           identifier=name_to_id.get(name))
            for name, encoding in values.items()
        ]

        self.session.add(namespace)

        logger.debug('committing namespace')
        self.session.commit()

        logger.info('inserted namespace: %s (%d terms in %.2f seconds)', url,
                    len(values),
                    time.time() - t)
        return namespace
Пример #5
0
    def _ensure_namespace_urls(
        self,
        urls: Iterable[str],
        use_tqdm: bool = True,
        is_annotation: bool = False,
    ) -> List[Namespace]:
        ext = 'belanno' if is_annotation else 'belns'

        rv = []
        url_to_namespace = {}
        url_to_values = {}
        url_to_name_to_id = {}

        tag = 'annotations' if is_annotation else 'namespaces'

        if use_tqdm:
            urls = tqdm(urls, desc=f'downloading {tag}')
        for url in urls:
            result = self.get_namespace_by_url(url)
            if result:
                rv.append(result)
                continue
            bel_resource = get_bel_resource(url)
            _clean_bel_namespace_values(bel_resource)
            url_to_values[url] = bel_resource['Values']

            if is_annotation:
                namespace_kwargs = _get_annotation_insert_values(bel_resource)
            else:
                namespace_kwargs = _get_namespace_insert_values(bel_resource)
            result = url_to_namespace[url] = Namespace(url=url,
                                                       **namespace_kwargs)
            rv.append(result)
            if url.endswith(f'-names.{ext}'):
                mapping_url = url[:-len(f'-names.{ext}')] + f'.{ext}.mapping'
                try:
                    res = requests.get(mapping_url)
                    res.raise_for_status()
                except requests.exceptions.HTTPError:
                    logger.warning('No mappings found for %s', url)
                else:
                    mappings = res.json()
                    logger.debug('got %d mappings', len(mappings))
                    url_to_name_to_id[url] = {
                        v: k
                        for k, v in res.json().items()
                    }

        self.session.add_all(url_to_namespace.values())
        self.session.commit()

        url_to_id = {
            url: namespace.id
            for url, namespace in url_to_namespace.items()
        }

        if not url_to_values:
            return rv

        rows = []
        it = url_to_values.items()
        if use_tqdm:
            it = tqdm(it, desc=f'making {tag} entry table')
        if is_annotation:
            for url, values in it:
                for name, identifier in values.items():
                    if not name:
                        continue
                    rows.append(
                        (url_to_id[url], name, None,
                         identifier))  # TODO is this a fair assumption?
        else:
            for url, values in it:
                name_to_id = url_to_name_to_id.get(url, {})
                for name, encoding in values.items():
                    if not name:
                        continue
                    rows.append(
                        (url_to_id[url], name, encoding, name_to_id.get(name)))

        df = pd.DataFrame(
            rows, columns=['namespace_id', 'name', 'encoding', 'identifier'])
        logger.info('preparing sql objects for %s', tag)
        df.to_sql(NamespaceEntry.__tablename__,
                  con=self.engine,
                  if_exists='append',
                  index=False)
        logger.info('committing %s', tag)
        start_commit_time = time.time()
        self.session.commit()
        logger.info('done committing %s after %.2f seconds', tag,
                    time.time() - start_commit_time)

        return rv
Пример #6
0
def get_nift_labels() -> Set[str]:
    """Map NIFT names that have been normalized to the original names."""
    bel_resource = get_bel_resource(NIFT_URL)
    return set(bel_resource['Values'])
Пример #7
0
def get_nift_values() -> Mapping[str, str]:
    """Map NIFT names that have been normalized to the original names."""
    r = get_bel_resource(
        'https://arty.scai.fraunhofer.de/artifactory/bel/namespace/nift/NIFT.belns'
    )
    return {name.lower(): name for name in r['Values']}
Пример #8
0
 def _disease_ontology_dict(ontology: str) -> Mapping[str, str]:
     """Create a dictionary from the disease ontologies used for mapping HMDB disease names to those ontologies."""
     doid_path = ONTOLOGY_NAMESPACES[ontology]
     doid_ns = get_bel_resource(doid_path)
     return {value.lower(): value for value in doid_ns['Values']}
Пример #9
0
 def test_get_from_url(self):
     """Test downloading a resource by URL."""
     with mock_bel_resources:
         res = get_bel_resource('https://example.com/test_an_1.belanno')
     self._help_test_annotation(res)
Пример #10
0
 def test_get_from_path(self):
     """Test downloading a resource from a file path."""
     res = get_bel_resource(TEST_ANNOTATION_PATH)
     self._help_test_annotation(res)
Пример #11
0
 def test_raises_on_empty(self):
     """Test that an error is thrown if an empty resource is downloaded."""
     with self.assertRaises(EmptyResourceError):
         get_bel_resource(TEST_NAMESPACE_EMPTY_PATH)