def summary_df(self) -> pd.DataFrame: """Generate a summary dataframe.""" summary_names = self.summarize_names() summary_alts = self.summarize_alts( ) if self.summarize_alts is not None else {} summary_defs = (self.summarize_definitions() if self.summarize_definitions is not None else {}) return pd.DataFrame( [( prefix, bioregistry.get_name(prefix), bioregistry.get_homepage(prefix), bioregistry.get_example(prefix), bioregistry.get_link(prefix, bioregistry.get_example(prefix)), names_count, summary_alts.get(prefix, 0), summary_defs.get(prefix, 0), ) for prefix, names_count in summary_names.items()], columns=[ "prefix", "name", "homepage", "example", "link", "names", "alts", "defs", ], )
def summary_df(self) -> pd.DataFrame: """Generate a summary dataframe.""" summary_names = self.summarize_names() summary_alts = self.summarize_alts( ) if self.summarize_alts is not None else {} summary_defs = self.summarize_definitions( ) if self.summarize_definitions is not None else {} return pd.DataFrame( [( prefix, bioregistry.get_name(prefix), bioregistry.get_homepage(prefix), bioregistry.get_example(prefix), bioregistry.get_link(prefix, bioregistry.get_example(prefix)), names_count, summary_alts.get(prefix, 0), summary_defs.get(prefix, 0), ) for prefix, names_count in summary_names.items()], columns=[ 'prefix', 'name', 'homepage', 'example', 'link', 'names', 'alts', 'defs', ], )
def resource(prefix: str): """Serve the a Bioregistry entry page.""" prefix = _normalize_prefix_or_404(prefix, '.' + resource.__name__) if not isinstance(prefix, str): return prefix example = bioregistry.get_example(prefix) return render_template( 'resource.html', prefix=prefix, name=bioregistry.get_name(prefix), example=example, mappings=_get_resource_mapping_rows(prefix), synonyms=bioregistry.get_synonyms(prefix), homepage=bioregistry.get_homepage(prefix), pattern=bioregistry.get_pattern(prefix), version=bioregistry.get_version(prefix), has_terms=bioregistry.has_terms(prefix), obo_download=bioregistry.get_obo_download(prefix), owl_download=bioregistry.get_owl_download(prefix), namespace_in_lui=bioregistry.namespace_in_lui(prefix), deprecated=bioregistry.is_deprecated(prefix), contact=bioregistry.get_email(prefix), banana=bioregistry.get_banana(prefix), description=bioregistry.get_description(prefix), providers=None if example is None else _get_resource_providers( prefix, example), )
def reference(prefix: str, identifier: str): """Serve the a Bioregistry reference page.""" return render_template( 'reference.html', prefix=prefix, name=bioregistry.get_name(prefix), identifier=identifier, providers=_get_resource_providers(prefix, identifier), )
def warnings(): """Make warnings list.""" miriam_pattern_wrong = [ dict( prefix=prefix, name=bioregistry.get_name(prefix), correct=entry['pattern'], miriam=entry['miriam']['pattern'], ) for prefix, entry in items if 'miriam' in entry and 'pattern' in entry and entry['pattern'] != entry['miriam']['pattern'] ] miriam_embedding_rewrites = [ dict( prefix=prefix, name=bioregistry.get_name(prefix), pattern=bioregistry.get_pattern(prefix), correct=entry['namespace.embedded'], miriam=entry['miriam']['namespaceEmbeddedInLui'], ) for prefix, entry in items if 'namespace.embedded' in entry ] # When are namespace rewrites required? miriam_prefix_rewrites = [ dict( prefix=prefix, name=bioregistry.get_name(prefix), pattern=bioregistry.get_pattern(prefix), correct=entry['namespace.rewrite'], ) for prefix, entry in items if 'namespace.rewrite' in entry ] with open(os.path.join(DOCS_DATA, 'warnings.yml'), 'w') as file: yaml.safe_dump( { 'wrong_patterns': miriam_pattern_wrong, 'embedding_rewrites': miriam_embedding_rewrites, 'prefix_rewrites': miriam_prefix_rewrites, }, file, )
def resources(): """Serve the Bioregistry page.""" rows = [ dict( prefix=prefix, name=bioregistry.get_name(prefix), example=bioregistry.get_example(prefix), homepage=bioregistry.get_homepage(prefix), pattern=bioregistry.get_pattern(prefix), namespace_in_lui=bioregistry.namespace_in_lui(prefix), banana=bioregistry.get_banana(prefix), description=bioregistry.get_description(prefix), ) for prefix in bioregistry.read_registry() ] return render_template('resources.html', rows=rows)
def test_name_expansions(self): """Test that default names are not capital acronyms.""" for prefix in bioregistry.read_registry(): if bioregistry.is_deprecated(prefix): continue entry = bioregistry.get(prefix) if 'name' in entry: continue name = bioregistry.get_name(prefix) if prefix == name.lower() and name.upper() == name: with self.subTest(prefix=prefix): self.fail(msg=f'{prefix} acronym ({name}) is not expanded') if '.' in prefix and prefix.split('.')[0] == name.lower(): with self.subTest(prefix=prefix): self.fail(msg=f'{prefix} acronym ({name}) is not expanded')
def make_resource_node(cx: NiceCXBuilder, prefix: str) -> int: """Generate a CX node for a resource.""" node = cx.add_node( name=bioregistry.get_name(prefix), represents=f'bioregistry.resource:{prefix}', ) homepage = bioregistry.get_homepage(prefix) if homepage: cx.add_node_attribute(node, 'homepage', homepage) description = bioregistry.get_description(prefix) if description: cx.add_node_attribute(node, 'description', description) pattern = bioregistry.get_pattern(prefix) if pattern: cx.add_node_attribute(node, 'pattern', pattern) # TODO add more return node
def test_no_redundant_acronym(self): """Test that there is no redundant acronym in the name. For example, "Amazon Standard Identification Number (ASIN)" is a problematic name for prefix "asin". """ for prefix in bioregistry.read_registry(): if bioregistry.is_deprecated(prefix): continue entry = bioregistry.get(prefix) if 'name' in entry: continue name = bioregistry.get_name(prefix) try: _, rest = name.rstrip(')').rsplit('(', 1) except ValueError: continue if rest.lower() == prefix.lower(): with self.subTest(prefix=prefix): self.fail(msg=f'{prefix} has redundany acronym in name "{name}"')
def namespaces_table_df(graph: BELGraph, examples: bool = True) -> pd.DataFrame: """Create a dataframe describing the namespaces in the graph.""" namespace_mapping = multidict((node.namespace, node) for node in graph if isinstance(node, BaseConcept)) namespace_c = count_namespaces(graph) if not examples: return pd.DataFrame(namespace_c.most_common(), columns=['Namespace', 'Count']) return pd.DataFrame( [ ( prefix, bioregistry.get_name(prefix), count, random.choice(namespace_mapping[prefix]) if prefix in namespace_mapping else '', # noqa:S311 ) for prefix, count in namespace_c.most_common() ], columns=['Prefix', 'Name', 'Count', 'Example'], )
def __post_init__(self): """Run post-init checks.""" if self.ontology != bioregistry.normalize_prefix(self.ontology): raise BioregistryError(self.ontology) # The type ignores are because of the hack where we override the # class variables in the instance if self.name is None: self.name = bioregistry.get_name(self.ontology) # type:ignore if not self.data_version: if self.static_version: self.data_version = self.static_version else: self.data_version = self._get_version() if not self.dynamic_version: if self.data_version is None: raise ValueError(f"{self.ontology} is missing data_version") elif "/" in self.data_version: raise ValueError( f"{self.ontology} has a slash in version: {self.data_version}" ) if self.auto_generated_by is None: self.auto_generated_by = f"bio2obo:{self.ontology}" # type:ignore
def _g(predicate): return [{ 'prefix': bioregistry_id, 'name': bioregistry.get_name(bioregistry_id), } for bioregistry_id, bioregistry_entry in items if predicate(bioregistry_id, bioregistry_entry)]