def normalize_node(self, node: BaseEntity) -> Optional[BaseEntity]: """Normalize a node if possible, otherwise return None.""" namespace = node.get(NAMESPACE) if not namespace or namespace.lower() not in {'hbp', 'conso'}: return identifier = node.get(IDENTIFIER) name = node.get(NAME) if identifier is None and name is None: raise ValueError if identifier is not None: name = self.identifier_to_label.get(identifier) if name is not None: return node.__class__(namespace=namespace, name=name, identifier=identifier) identifier = self.label_to_identifier.get(name) if identifier is not None: return node.__class__(namespace=namespace, name=name, identifier=identifier)
def assert_has_edge(self: unittest.TestCase, u: BaseEntity, v: BaseEntity, graph: BELGraph, permissive=True, **kwargs): """A helper function for checking if an edge with the given properties is contained within a graph.""" self.assertIsInstance(u, BaseEntity) self.assertIsInstance(v, BaseEntity) self.assertTrue( graph.has_edge(u, v), msg='Edge ({}, {}) not in graph. Other edges:\n{}'.format( u, v, '\n'.join('{} {} {}'.format(u.as_bel(), d[RELATION], v.as_bel()) for u, v, d in graph.edges(data=True)))) if not kwargs: return if permissive: matches = any_subdict_matches(graph[u][v], kwargs) else: matches = any_dict_matches(graph[u][v], kwargs) msg = 'No edge ({}, {}) with correct properties. expected:\n {}\nbut got:\n{}'.format( u, v, dumps(kwargs, indent=2, sort_keys=True), str(graph[u][v])) self.assertTrue(matches, msg=msg)
def lookup_target(self, node: BaseEntity) -> Optional[Protein]: namespace = node.get(NAMESPACE) if node[FUNCTION] != PROTEIN or namespace is None: return identifier = node.get(IDENTIFIER) if namespace.lower() == 'hgnc' and identifier: return self.get_protein_by_hgnc_id(identifier) if namespace.lower() == 'uniprot' and identifier: return self.get_protein_by_uniprot_id(identifier)
def look_up_enzyme(self, node: BaseEntity) -> Optional[Enzyme]: """Try to get an enzyme model from the given node.""" namespace = node.get(NAMESPACE) if namespace is None: return if namespace.lower() not in {'expasy', 'ec', 'eccode'}: return name = node.get(NAME) return self.get_enzyme_by_id(name)
def lookup_drug(self, node: BaseEntity) -> Optional[Drug]: """Try and look up a drug.""" namespace = node.get(NAMESPACE) if node[FUNCTION] != ABUNDANCE or namespace is None: return name, identifier = node.get(NAME), node.get(IDENTIFIER) if namespace.lower() == 'drugbank': if identifier is not None: return self.get_drug_by_drugbank_id(identifier) elif name.startswith('DB'): return self.get_drug_by_drugbank_id(name) else: return self.get_drug_by_name(name)
def lookup_drug(self, node: BaseEntity) -> Optional[Drug]: """Try and look up a drug.""" namespace = node.get(NAMESPACE) if node[FUNCTION] != ABUNDANCE or namespace is None: return name, identifier = node.get(NAME), node.get(IDENTIFIER) if namespace.lower() == 'drugbank': if identifier is not None: return self.get_drug_by_drugbank_id(identifier) if name is not None and name.startswith('DB'): return self.get_drug_by_drugbank_id(name) logging.warning( f'could not normalize {node} ({identifier}:{name})')
def look_up_node(self, node: BaseEntity) -> Optional[Descriptor]: """Look up a descriptor based on a PyBEL node.""" namespace = node.get(NAMESPACE) if namespace is None or not namespace.lower().startswith('mesh'): return name, identifier = node.get(NAME), node.get(IDENTIFIER) if identifier: return self.get_descriptor_by_ui(identifier) term = self.get_term_by_name(name) if term: return term.concept.descriptor log.warning('Could not map MeSH node: %r', node)
def is_famplex_node(node: BaseEntity) -> bool: """Check if this is a node that can be enriched with FamPlex relations. - Does this node have the FamPlex namespace? - Does this node have the HGNC namespace? """ namespace = node.get(NAMESPACE) return namespace is not None and namespace.lower() in {'famplex', 'fplx', 'hgnc'}
def lookup_node(self, node: BaseEntity) -> Optional[Gene]: """Look up a gene from a PyBEL data dictionary.""" namespace = node.get(NAMESPACE) if namespace is None: return name = node.name identifier = node.identifier if namespace.lower() in VALID_ENTREZ_NAMESPACES: return self._handle_entrez_node(identifier, name)
def normalize_node(self, node: BaseEntity) -> Optional[BaseEntity]: """Normalize a node if possible, otherwise return None.""" namespace = node.get(NAMESPACE) if not namespace or namespace.lower() not in {'hbp', 'conso'}: return identifier = node.get(IDENTIFIER) name = node.get(NAME) if identifier is None and name is None: raise ValueError elif identifier is not None: name = self.identifier_to_label.get(identifier) if name is not None: return node.__class__(namespace=namespace, name=name, identifier=identifier) logger.warning(f'Could not find CONSO name for {node:r}') elif name is not None: if name.startswith('CONSO'): identifier = self.identifier_to_label.get(name) if identifier is not None: # flip it! return node.__class__(namespace=namespace, name=identifier, identifier=name) logger.warning(f'Could not find CONSO name for {node:r}') else: identifier = self.label_to_identifier.get(name) if identifier is not None: return node.__class__(namespace=namespace, name=name, identifier=identifier) logger.warning(f'Could not find CONSO identifier for {node:r}')
def get_rat_gene_from_bel(self, node: BaseEntity) -> Optional[RatGene]: namespace = node.get(NAMESPACE) if not namespace or namespace.lower() not in {'rgd', 'rgdid'}: return identifier = node.get(IDENTIFIER) name = node.get(NAME) if identifier is None and name is None: raise ValueError if namespace.lower() == 'rgdid': return self.get_gene_by_rgd_id(name) elif namespace.lower() == 'rgd': if identifier is not None: return self.get_gene_by_rgd_id(identifier) else: # elif name is not None: return self.get_gene_by_rgd_symbol(name) logger.warning('Could not map RGD node: %r', node)
def get_chemical_from_data(self, node: BaseEntity) -> Optional[Chemical]: namespace = node.get(NAMESPACE) if not namespace or namespace.lower() not in {'chebi', 'chebiid'}: return identifier = node.get(IDENTIFIER) name = node.get(NAME) if identifier is None and name is None: raise ValueError if namespace.lower() == 'chebiid': return self.get_chemical_by_chebi_id(name) elif namespace.lower() == 'chebi': if identifier is not None: return self.get_chemical_by_chebi_id(identifier) else: # elif name is not None: return self.get_chemical_by_chebi_name(name) log.warning('Could not find ChEBI node: %r', node)
def calculate_canonical_cx_identifier(node: BaseEntity) -> str: """Calculate the canonical name for a given node. If it is a simple node, uses the namespace:name combination. Otherwise, it uses the BEL string. """ if node[FUNCTION] == COMPLEX and NAMESPACE in node: return '{}:{}'.format(node[NAMESPACE], node[NAME]) if VARIANTS in node or FUSION in node or node[FUNCTION] in { REACTION, COMPOSITE, COMPLEX }: return node.as_bel() namespace = node[NAMESPACE] name = node.get(NAME) identifier = node.get(IDENTIFIER) if VARIANTS not in node and FUSION not in node: # this is should be a simple node if name: return name if identifier: return '{}:{}'.format(namespace, identifier) raise ValueError('Unexpected node data: {}'.format(node))
def lookup_term(self, node: BaseEntity) -> Optional[Term]: """Guess the identifier from a PyBEL node data dictionary.""" namespace = node.get(NAMESPACE) if namespace is None or namespace.upper() not in BEL_NAMESPACES: return identifier = node.identifier if identifier: return self.get_term_by_id(identifier) model = self.get_term_by_id(node.name) if model is not None: return model return self.get_term_by_name(node.name)
def assert_has_node(self: unittest.TestCase, node: BaseEntity, graph: BELGraph, **kwargs): """Check if a node with the given properties is contained within a graph.""" self.assertIsInstance(node, BaseEntity) self.assertIn( node, graph, msg='{} not found in graph. Other nodes:\n{}'.format(node.as_bel(), '\n'.join( n.as_bel() for n in graph )), ) if kwargs: missing = set(kwargs) - set(graph.nodes[node]) self.assertFalse(missing, msg="Missing {} in node data".format(', '.join(sorted(missing)))) self.assertTrue(all(kwarg in graph.nodes[node] for kwarg in kwargs), msg="Missing kwarg in node data") self.assertEqual(kwargs, {k: graph.nodes[node][k] for k in kwargs}, msg="Wrong values in node data")
def get_triplet_tuple(a: BaseEntity, b: BaseEntity, c: BaseEntity) -> BELTripleTuple: """Get the triple as a tuple of BEL/hashes.""" return a.as_bel(), a.sha512, b.as_bel(), b.sha512, c.as_bel(), c.sha512
def look_up(df: pd.DataFrame, node: BaseEntity): """Get a subset of a DataFrame relevant to this node.""" name = node.get(NAME) return df[(df[1] == name) | (df[4] == name)]
def node_has_namespaces(node: BaseEntity, namespaces: Set[str]) -> bool: """Pass for nodes that have one of the given namespaces.""" ns = node.get(NAMESPACE) return ns is not None and ns in namespaces
def node_has_namespace(node: BaseEntity, namespace: str) -> bool: """Pass for nodes that have the given namespace.""" ns = node.get(NAMESPACE) return ns is not None and ns == namespace
def get_pair_tuple(a: BaseEntity, b: BaseEntity) -> BELPairTuple: """Get the pair as a tuple of BEL/hashes.""" return a.as_bel(), a.sha512, b.as_bel(), b.sha512