def summary_df(self) -> pd.DataFrame: """Generate a summary dataframe.""" summary_names = self.summarize_names() summary_alts = self.summarize_alts( ) if self.summarize_alts is not None else {} summary_defs = (self.summarize_definitions() if self.summarize_definitions is not None else {}) return pd.DataFrame( [( prefix, bioregistry.get_name(prefix), bioregistry.get_homepage(prefix), bioregistry.get_example(prefix), bioregistry.get_link(prefix, bioregistry.get_example(prefix)), names_count, summary_alts.get(prefix, 0), summary_defs.get(prefix, 0), ) for prefix, names_count in summary_names.items()], columns=[ "prefix", "name", "homepage", "example", "link", "names", "alts", "defs", ], )
def summary_df(self) -> pd.DataFrame: """Generate a summary dataframe.""" summary_names = self.summarize_names() summary_alts = self.summarize_alts( ) if self.summarize_alts is not None else {} summary_defs = self.summarize_definitions( ) if self.summarize_definitions is not None else {} return pd.DataFrame( [( prefix, bioregistry.get_name(prefix), bioregistry.get_homepage(prefix), bioregistry.get_example(prefix), bioregistry.get_link(prefix, bioregistry.get_example(prefix)), names_count, summary_alts.get(prefix, 0), summary_defs.get(prefix, 0), ) for prefix, names_count in summary_names.items()], columns=[ 'prefix', 'name', 'homepage', 'example', 'link', 'names', 'alts', 'defs', ], )
def metaresource(metaprefix: str): """Serve the a Bioregistry registry page.""" entry = bioregistry.get_registry(metaprefix) if entry is None: abort(404, f'Invalid metaprefix: {metaprefix}') example_prefix = entry.get('example') example_identifier = bioregistry.get_example( example_prefix) if example_prefix else None return render_template( 'metaresource.html', metaprefix=metaprefix, name=bioregistry.get_registry_name(metaprefix), description=bioregistry.get_registry_description(metaprefix), homepage=bioregistry.get_registry_homepage(metaprefix), download=entry.get('download'), formatter=entry.get('formatter'), example_prefix=example_prefix, example_prefix_url=bioregistry.get_registry_url( metaprefix, example_prefix) if example_prefix else None, example_identifier=example_identifier, example_curie_url=(bioregistry.get_registry_resolve_url( metaprefix, example_prefix, example_identifier) if example_prefix and example_identifier else None), entry=entry, )
def resource(prefix: str): """Serve the a Bioregistry entry page.""" prefix = _normalize_prefix_or_404(prefix, '.' + resource.__name__) if not isinstance(prefix, str): return prefix example = bioregistry.get_example(prefix) return render_template( 'resource.html', prefix=prefix, name=bioregistry.get_name(prefix), example=example, mappings=_get_resource_mapping_rows(prefix), synonyms=bioregistry.get_synonyms(prefix), homepage=bioregistry.get_homepage(prefix), pattern=bioregistry.get_pattern(prefix), version=bioregistry.get_version(prefix), has_terms=bioregistry.has_terms(prefix), obo_download=bioregistry.get_obo_download(prefix), owl_download=bioregistry.get_owl_download(prefix), namespace_in_lui=bioregistry.namespace_in_lui(prefix), deprecated=bioregistry.is_deprecated(prefix), contact=bioregistry.get_email(prefix), banana=bioregistry.get_banana(prefix), description=bioregistry.get_description(prefix), providers=None if example is None else _get_resource_providers( prefix, example), )
def test_url_auto(self): """Test formatting URLs.""" for prefix, entry in bioregistry.read_registry().items(): if prefix in IDOT_BROKEN: continue identifier = bioregistry.get_example(prefix) if identifier is None: continue if ('example' not in entry and 'banana' not in entry and 'pattern' not in entry): continue url = get_identifiers_org_url(prefix, identifier) if url is None: continue print(prefix) with self.subTest(prefix=prefix, identifier=identifier): # The following tests don't work because the CURIE generation often throws away the prefix. # miriam_prefix = bioregistry.get_identifiers_org_prefix(prefix) # self.assertIsNotNone(miriam_prefix) # self.assertTrue( # url.startswith(f'https://identifiers.org/{miriam_prefix}:'), # msg=f"bad prefix for {prefix}. Expected {miriam_prefix} in {url}", # ) res = self.session.get(url, allow_redirects=False) self.assertEqual(302, res.status_code, msg='\n' + dedent(f'''\ Prefix: {prefix} Identifier: {identifier} URL: {url} Text: ''') + fill(res.text, 70, subsequent_indent=' '))
def test_examples(self): """Test that all entries have examples.""" for prefix, entry in self.registry.items(): if 'pattern' not in entry: # TODO remove this later continue with self.subTest(prefix=prefix): msg = f'{prefix} is missing an example local identifier' if 'ols' in entry: msg += f'\nSee: https://www.ebi.ac.uk/ols/ontologies/{entry["ols"]["prefix"]}/terms' self.assertIsNotNone(bioregistry.get_example(prefix), msg=msg)
def resources(): """Serve the Bioregistry page.""" rows = [ dict( prefix=prefix, name=bioregistry.get_name(prefix), example=bioregistry.get_example(prefix), homepage=bioregistry.get_homepage(prefix), pattern=bioregistry.get_pattern(prefix), namespace_in_lui=bioregistry.namespace_in_lui(prefix), banana=bioregistry.get_banana(prefix), description=bioregistry.get_description(prefix), ) for prefix in bioregistry.read_registry() ] return render_template('resources.html', rows=rows)
def main(registry): """Add examples to the bioregistry from Inspector Javert's Xref Database.""" missing = {key for key in registry if get_example(key) is None} with gzip.open(MAPPINGS_DB_TSV_CACHE, 'rt') as file: for line in tqdm(file): source_ns, source_id, target_ns, target_id, _ = line.strip().split( '\t') if source_ns in missing: registry[source_ns]['example'] = source_id tqdm.write(f'added example {source_ns} {source_id}') missing.remove(source_ns) if target_ns in missing: registry[target_ns]['example'] = target_id tqdm.write(f'added example {target_ns} {target_id}') missing.remove(target_ns) return registry
def test_examples_pass_patterns(self): """Test that all examples pass the patterns.""" for prefix, entry in self.registry.items(): pattern = bioregistry.get_pattern_re(prefix) example = bioregistry.get_example(prefix) if pattern is None or example is None: continue if bioregistry.namespace_in_lui(prefix): miriam_prefix = get_identifiers_org_prefix(prefix) if entry.get('namespace.capitalized') or 'obofoundry' in entry: miriam_prefix = miriam_prefix.upper() example = f'{miriam_prefix}:{example}' if bioregistry.validate(prefix, example): continue with self.subTest(prefix=prefix): self.assertRegex(example, pattern)
def curation(): """Make curation list.""" missing_wikidata_database = _g(lambda prefix, entry: entry.get( 'wikidata', {}).get('database') is None) missing_pattern = _g( lambda prefix, entry: bioregistry.get_pattern(prefix) is None) missing_format_url = _g( lambda prefix, entry: bioregistry.get_format(prefix) is None) missing_example = _g( lambda prefix, entry: bioregistry.get_example(prefix) is None) with open(os.path.join(DOCS_DATA, 'curation.yml'), 'w') as file: yaml.safe_dump( { 'wikidata': missing_wikidata_database, 'pattern': missing_pattern, 'formatter': missing_format_url, 'example': missing_example, }, file, )
def test_banana(self): """Test that entries curated with a new banana are resolved properly.""" for prefix, entry in bioregistry.read_registry().items(): banana = entry.get('banana') if banana is None: continue if prefix in IDOT_BROKEN: continue # identifiers.org is broken for these prefixes with self.subTest( prefix=prefix, banana=banana, pattern=bioregistry.get_pattern(prefix), ): identifier = bioregistry.get_example(prefix) self.assertIsNotNone(identifier) url = bioregistry.resolve_identifier.get_identifiers_org_url( prefix, identifier) res = self.session.get(url, allow_redirects=False) self.assertEqual(302, res.status_code, msg=f'failed with URL: {url}')
def main(url: str, local: bool): """Test the API.""" url = url.rstrip('/') if local: url = 'http://localhost:5000' click.echo(f'Testing resolution API on {url}') failure = False prefixes = tqdm(bioregistry.read_registry()) for prefix in prefixes: identifier = bioregistry.get_example(prefix) if identifier is None: continue prefixes.set_postfix({'prefix': prefix}) req_url = f'{url}/{prefix}:{identifier}' res = requests.get(req_url, allow_redirects=False) log = partial(_log, req_url=req_url) if res.status_code == 302: # redirect continue elif res.status_code != 404: text = res.text.splitlines()[3][len('<p>'):-len('</p>')] log(f'HTTP {res.status_code}: {res.reason} {text}', fg='red') elif not bioregistry.get_providers(prefix, identifier): continue elif '/' in identifier or SLASH_URL_ENCODED in identifier: log('contains slash 🎩 🎸', fg='red') elif not bioregistry.validate(prefix, identifier): pattern = bioregistry.get_pattern(prefix) if bioregistry.get_banana(prefix): log(f'banana {pattern} 🍌', fg='red') else: log(f'invalid example does not match pattern {pattern}', fg='red') else: log('404 unknown issue', fg='red') failure = True return sys.exit(1 if failure else 0)