def _bfs( self, kg: KG, entity: Vertex, is_reverse: bool = False ) -> List[Walk]: """Extracts random walks for an entity based on Knowledge Graph using the Breath First Search (BFS) algorithm. Args: kg: The Knowledge Graph. entity: The root node to extract walks. is_reverse: True to get the parent neighbors instead of the child neighbors, False otherwise. Defaults to False. Returns: The list of unique walks for the provided entity. """ walks: Set[Walk] = {(entity,)} for i in range(self.max_depth): for walk in walks.copy(): if is_reverse: hops = kg.get_hops(walk[0], True) for pred, obj in hops: walks.add((obj, pred) + walk) else: hops = kg.get_hops(walk[-1]) for pred, obj in hops: walks.add(walk + (pred, obj)) if len(hops) > 0: walks.remove(walk) return list(walks)
def _bfs( self, kg: KG, root: Vertex, is_reverse: bool = False ) -> List[Walk]: """Extracts random walks with Breadth-first search. Args: kg: The Knowledge Graph. root: The root node to extract walks. is_reverse: True to get the parent neighbors instead of the child neighbors, False otherwise. Defaults to False. Returns: The list of walks for the root node. """ walks: Set[Walk] = {(root,)} for i in range(self.max_depth): for walk in walks.copy(): if is_reverse: hops = kg.get_hops(walk[0], True) for pred, obj in hops: walks.add((obj, pred) + walk) else: hops = kg.get_hops(walk[-1]) for pred, obj in hops: walks.add(walk + (pred, obj)) if len(hops) > 0: walks.remove(walk) return list(walks)
def _extract( self, kg: KG, instance: rdflib.URIRef ) -> Dict[Any, Tuple[Tuple[str, ...], ...]]: """Extracts walks rooted at the provided instances which are then each transformed into a numerical representation. Args: kg: The Knowledge Graph. The graph from which the neighborhoods are extracted for the provided instances. instance: The instance to be extracted from the Knowledge Graph. Returns: The 2D matrix with its number of rows equal to the number of provided instances; number of column equal to the embedding size. """ canonical_walks = set() walks = self.extract_random_walks(kg, str(instance)) for walk in walks: kg.get_hops(walk[-1]) # type: ignore self._weisfeiler_lehman(kg) for n in range(self.wl_iterations + 1): for walk in walks: canonical_walk = [] for i, hop in enumerate(walk): # type: ignore if i == 0 or i % 2 == 1: canonical_walk.append(str(hop)) else: canonical_walk.append(self._label_map[hop][n]) canonical_walks.add(tuple(canonical_walk)) return {instance: tuple(canonical_walks)}
def sample_neighbor(self, kg: KG, walk, last): not_tag_neighbors = [ x for x in kg.get_hops(walk[-1]) if (x, len(walk)) not in self.visited ] # If there are no untagged neighbors, then tag # this vertex and return None if len(not_tag_neighbors) == 0: if len(walk) > 2: self.visited.add(((walk[-2], walk[-1]), len(walk) - 2)) return None weights = [self.get_weight(hop) for hop in not_tag_neighbors] if self.inverse: weights = [max(weights) - (x - min(weights)) for x in weights] if self.split: weights = [ w / self.degrees[v[1]] for w, v in zip(weights, not_tag_neighbors) ] weights = [x / sum(weights) for x in weights] # Sample a random neighbor and add them to visited if needed. rand_ix = np.random.choice(range(len(not_tag_neighbors)), p=weights) if last: self.visited.add((not_tag_neighbors[rand_ix], len(walk))) return not_tag_neighbors[rand_ix]
def _bfs(self, kg: KG, root: Vertex, is_reverse: bool = False) -> List[Walk]: """Extracts random walks of depth - 1 hops rooted in root with Breadth-first search. Args: kg: The Knowledge Graph. The graph from which the neighborhoods are extracted for the provided entities. root: The root node to extract walks. is_reverse: True to get the parent neighbors instead of the child neighbors, False otherwise. Defaults to False. Returns: The list of walks for the root node according to the depth and max_walks. """ walks: Set[Walk] = {(root, )} for i in range(self.max_depth): for walk in walks.copy(): if is_reverse: hops = kg.get_hops(walk[0], True) for pred, obj in hops: walks.add((obj, pred) + walk) if (obj in self.communities and np.random.RandomState( self.random_state).random() < self.hop_prob): walks.add((np.random.RandomState(self.random_state) .choice(self.labels_per_community[ self.communities[obj]]), ) + walk) else: hops = kg.get_hops(walk[-1]) for pred, obj in hops: walks.add(walk + (pred, obj)) if (obj in self.communities and np.random.RandomState( self.random_state).random() < self.hop_prob): walks.add(walk + (np.random.RandomState(self.random_state) .choice(self.labels_per_community[ self.communities[obj]]), )) if len(hops) > 0: walks.remove(walk) return list(walks)
def fit(self, kg: KG) -> None: """Fits the sampling strategy by running PageRank on a provided KG according to the specified damping. Args: kg: The Knowledge Graph. """ super().fit(kg) nx_graph = nx.DiGraph() subs_objs = [vertex for vertex in kg._vertices if not vertex.predicate] for vertex in subs_objs: nx_graph.add_node(vertex.name, vertex=vertex) for hop in kg.get_hops(vertex): nx_graph.add_edge(vertex.name, hop[1].name, name=hop[0].name) self._pageranks = nx.pagerank(nx_graph, alpha=self.alpha)
def sample_hop( self, kg: KG, walk: Walk, is_last_hop: bool, is_reverse: bool = False ) -> Optional[Hop]: """Samples an unvisited random hop in the (predicate, object) form, according to the weight of hops for a given walk. Args: kg: The Knowledge Graph. walk: The walk with one or several vertices. is_last_hop: True if the next hop to be visited is the last one for the desired depth, False otherwise. is_reverse: True to get the parent neighbors instead of the child neighbors, False otherwise. Defaults to False. Returns: An unvisited hop in the (predicate, object) form. """ subj = walk[0] if is_reverse else walk[-1] untagged_neighbors = [ pred_obj for pred_obj in kg.get_hops(subj, is_reverse) if (pred_obj, len(walk)) not in self.visited ] if len(untagged_neighbors) == 0: if len(walk) > 2: pred_obj = ( (walk[1], walk[0]) if is_reverse else (walk[-2], walk[-1]) ) self.visited.add((pred_obj, len(walk) - 2)) return None rnd_id = np.random.RandomState(self._random_state).choice( range(len(untagged_neighbors)), p=self.get_weights(untagged_neighbors), ) if is_last_hop: self.visited.add((untagged_neighbors[rnd_id], len(walk))) return untagged_neighbors[rnd_id]
def extract_random_walks_bfs(self, kg: KG, root: str): """Breadth-first search to extract all possible walks. Args: kg: The Knowledge Graph. The graph from which the neighborhoods are extracted for the provided entities. root: The root node. Returns: The list of the walks. """ walks = {(root, )} for i in range(self.depth): for walk in walks.copy(): hops = kg.get_hops(walk[-1]) if len(hops) > 0: walks.remove(walk) for (pred, obj) in hops: walks.add(walk + (pred, obj)) # type: ignore return list(walks)