class Walker(metaclass=abc.ABCMeta): """Base class for the walking strategies. Attributes: depth: The depth per entity. max_walks: The maximum number of walks per entity. sampler: The sampling strategy. Defaults to UniformSampler(). n_jobs: The number of processes to use for multiprocessing. Use -1 to allocate as many processes as there are CPU cores available in the machine. Defaults to 1. is_support_remote: If true, indicate that the walking strategy can be used to retrieve walks via a SPARQL endpoint server. Defaults to False. """ # Global KG used later on for the worker process. kg = None def __init__( self, depth: int, max_walks: Optional[int] = None, sampler: Optional[Sampler] = None, n_jobs: int = 1, is_support_remote: bool = True, ): self.depth = depth self.is_support_remote = is_support_remote if n_jobs == -1: self.n_jobs = multiprocessing.cpu_count() else: self.n_jobs = n_jobs self.max_walks = max_walks if sampler is not None: self.sampler = sampler else: self.sampler = UniformSampler() def extract( self, kg: KG, instances: List[rdflib.URIRef], verbose=False ) -> Set[Tuple[Any, ...]]: """Fits the provided sampling strategy and then calls the private _extract method that is implemented for each of the walking strategies. Args: kg: The Knowledge Graph. The graph from which the neighborhoods are extracted for the provided instances. instances: The instances to be extracted from the Knowledge Graph. verbose: If true, display a progress bar for the extraction of the walks. Returns: The 2D matrix with its number of rows equal to the number of provided instances; number of column equal to the embedding size. """ if kg.is_remote and not self.is_support_remote: raise RemoteNotSupported( "Invalid walking strategy. Please, choose a walking strategy " + "that can retrieve walks via a SPARQL endpoint server." ) self.sampler.fit(kg) canonical_walks = set() # To avoid circular imports if "CommunityWalker" in str(self): self._community_detection(kg) # type: ignore if kg.is_remote: asyncio.run(kg._fill_entity_hops(instances)) # type: ignore with multiprocessing.Pool( self.n_jobs, self._init_worker, [kg] ) as pool: res = list( tqdm( pool.imap_unordered(self._proc, instances), total=len(instances), disable=not verbose, ) ) res = {k: v for elm in res for k, v in elm.items()} # type: ignore for instance in instances: canonical_walks.update(res[instance]) return canonical_walks @abc.abstractmethod def _extract( self, kg: KG, instance: rdflib.URIRef ) -> Dict[Any, Tuple[Tuple[str, ...], ...]]: """Extracts walks rooted at the provided instances which are then each transformed into a numerical representation. Args: kg: The Knowledge Graph. The graph from which the neighborhoods are extracted for the provided instances. instance: The instance to be extracted from the Knowledge Graph. Returns: The 2D matrix with its number of rows equal to the number of provided instances; number of column equal to the embedding size. """ raise NotImplementedError("This must be implemented!") def _init_worker(self, init_kg): """Initializes each worker process. Args: init_kg: The Knowledge Graph to provide to each worker process. """ global kg kg = init_kg def info(self): """Gets informations related to a Walker. Returns: A friendly display of the Walker. """ return ( f"{type(self).__name__}(depth={self.depth}," + f"max_walks={self.max_walks}," + f"sampler={type(self.sampler).__name__}," + f"n_jobs={self.n_jobs}," + f"is_support_remote={self.is_support_remote})" ) def print_walks( self, kg: KG, instances: List[rdflib.URIRef], filename: str, ) -> None: """Prints the walks of a Knowledge Graph. Args: kg: The Knowledge Graph. The graph from which the neighborhoods are extracted for the provided instances. instances: The instances to be extracted from the Knowledge Graph. filename: The filename that contains the rdflib.Graph """ walks = self.extract(kg, instances) walk_strs = [] for _, walk in enumerate(walks): s = "" for i in range(len(walk)): s += f"{walk[i]} " if i < len(walk) - 1: s += "--> " walk_strs.append(s) with open(filename, "w+") as f: for s in walk_strs: f.write(s) f.write("\n\n") def _proc( self, instance: rdflib.URIRef ) -> Dict[Any, Tuple[Tuple[str, ...], ...]]: """Executed by each process. Args: instance: The instance to be extracted from the Knowledge Graph. Returns: The extraction of walk by the process. """ global kg return self._extract(kg, instance) # type:ignore
class Walker(metaclass=abc.ABCMeta): """Base class for the walking strategies. Attributes: depth: The depth per entity. walks_per_graph: The maximum number of walks per entity. sampler: The sampling strategy. Default to UniformSampler(). """ def __init__( self, depth: int, walks_per_graph: float, sampler: Sampler = None, ): self.depth = depth self.walks_per_graph = walks_per_graph if sampler is not None: self.sampler = sampler else: self.sampler = UniformSampler() def extract( self, kg: KG, instances: List[rdflib.URIRef] ) -> Set[Tuple[Any, ...]]: """Fits the provided sampling strategy and then calls the private _extract method that is implemented for each of the walking strategies. Args: graph: The knowledge graph. The graph from which the neighborhoods are extracted for the provided instances. instances: The instances to extract the knowledge graph. Returns: The 2D matrix with its number of rows equal to the number of provided instances; number of column equal to the embedding size. """ self.sampler.fit(kg) return self._extract(kg, instances) @abc.abstractmethod def _extract( self, kg: KG, instances: List[rdflib.URIRef] ) -> Set[Tuple[Any, ...]]: """Extracts walks rooted at the provided instances which are then each transformed into a numerical representation. Args: graph: The knowledge graph. The graph from which the neighborhoods are extracted for the provided instances. instances: The instances to extract the knowledge graph. Returns: The 2D matrix with its number of rows equal to the number of provided instances; number of column equal to the embedding size. """ raise NotImplementedError("This must be implemented!") def print_walks( self, kg: KG, instances: List[rdflib.URIRef], file_name: str, ) -> None: """Prints the walks of a knowledge graph. Args: kg: The knowledge graph. The graph from which the neighborhoods are extracted for the provided instances. instances: The instances to extract the knowledge graph. file_name: The filename that contains the rdflib.Graph """ walks = self.extract(kg, instances) walk_strs = [] for _, walk in enumerate(walks): s = "" for i in range(len(walk)): s += f"{walk[i]} " if i < len(walk) - 1: s += "--> " walk_strs.append(s) with open(file_name, "w+") as f: for s in walk_strs: f.write(s) f.write("\n\n")