def _fit_transform( self, graph: Graph, return_dataframe: bool = True, verbose: bool = True ) -> EmbeddingResult: """Return node embedding.""" edges, weights = graph.get_symmetric_normalized_laplacian_coo_matrix() coo = coo_matrix( (weights, (edges[:, 0], edges[:, 1])), shape=( graph.get_number_of_nodes(), graph.get_number_of_nodes() ), dtype=np.float32 ) embedding = eigsh( coo, k=self._embedding_size + 1, which="LM", return_eigenvectors=True )[1] if return_dataframe: node_names = graph.get_node_names() embedding = pd.DataFrame( embedding, index=node_names ) return EmbeddingResult( embedding_method_name=self.model_name(), node_embeddings=embedding )
def _extract_embeddings(self, graph: Graph, model: Model, return_dataframe: bool) -> EmbeddingResult: """Returns embedding from the model. Parameters ------------------ graph: Graph The graph that was embedded. model: Model The Keras model used to embed the graph. return_dataframe: bool Whether to return a dataframe of a numpy array. """ node_embeddings = self.get_layer_weights( "node_embeddings", model, ) context_embeddings = self.get_layer_weights( "context_embeddings", model, ) if return_dataframe: node_names = graph.get_node_names() node_embeddings = pd.DataFrame(node_embeddings, index=node_names) context_embeddings = pd.DataFrame(context_embeddings, index=node_names) return EmbeddingResult( embedding_method_name=self.model_name(), node_embeddings=[node_embeddings, context_embeddings])
def _extract_embeddings(self, graph: Graph, model: Union[EntityRelationEmbeddingModel, ERModel], return_dataframe: bool) -> EmbeddingResult: """Returns embedding from the model. Parameters ------------------ graph: Graph The graph that was embedded. model: Type[Model] The Keras model used to embed the graph. return_dataframe: bool Whether to return a dataframe of a numpy array. """ if isinstance(model, EntityRelationEmbeddingModel): node_embeddings = [model.entity_embeddings] edge_type_embeddings = [model.relation_embeddings] elif isinstance(model, ERModel): node_embeddings = model.entity_representations edge_type_embeddings = model.relation_representations else: raise NotImplementedError( f"The provided model has type {type(model)}, which " "is not currently supported. The supported types " "are `EntityRelationEmbeddingModel` and `ERModel`.") node_embeddings = [ node_embedding._embeddings.weight.cpu().detach().numpy() for node_embedding in node_embeddings ] edge_type_embeddings = [ edge_type_embedding._embeddings.weight.cpu().detach().numpy() for edge_type_embedding in edge_type_embeddings ] if return_dataframe: node_embeddings = [ pd.DataFrame(node_embedding, index=graph.get_node_names()) for node_embedding in node_embeddings ] edge_type_embeddings = [ pd.DataFrame(edge_type_embedding, index=graph.get_unique_edge_type_names()) for edge_type_embedding in edge_type_embeddings ] return EmbeddingResult(embedding_method_name=self.model_name(), node_embeddings=node_embeddings, edge_type_embeddings=edge_type_embeddings)
def _fit_transform(self, graph: Graph, return_dataframe: bool = True, verbose: bool = True) -> EmbeddingResult: """Return node embedding.""" node_embedding = self._model.fit_transform( graph, verbose=verbose, ).T if return_dataframe: node_embedding = pd.DataFrame(node_embedding, index=graph.get_node_names()) return EmbeddingResult(embedding_method_name=self.model_name(), node_embeddings=node_embedding)
def _fit_transform(self, graph: Graph, return_dataframe: bool = True, verbose: bool = True) -> EmbeddingResult: """Return node embedding.""" number_of_nodes = graph.get_number_of_nodes() embedding = eigh(graph.get_dense_modularity_matrix(), eigvals=(number_of_nodes - self._embedding_size, number_of_nodes - 1))[1] if return_dataframe: node_names = graph.get_node_names() embedding = pd.DataFrame(embedding, index=node_names) return EmbeddingResult(embedding_method_name=self.model_name(), node_embeddings=embedding)
def _fit_transform(self, graph: Graph, return_dataframe: bool = True, verbose: bool = True) -> EmbeddingResult: """Return node embedding.""" node_embeddings = self._model.fit_transform(graph) if not isinstance(node_embeddings, list): node_embeddings = [node_embeddings] if return_dataframe: node_names = graph.get_node_names() node_embeddings = [ pd.DataFrame(node_embedding, index=node_names) for node_embedding in node_embeddings ] return EmbeddingResult(embedding_method_name=self.model_name(), node_embeddings=node_embeddings)
def _fit_transform(self, graph: Graph, return_dataframe: bool = True, verbose: bool = True) -> EmbeddingResult: """Return node embedding. Parameters --------------- graph: Graph The graph to embed. return_dataframe: bool = True Whether to return a DataFrame. verbose: bool = True Whether to show a loading bar. """ model: Type[Estimator] = self._build_model() if not issubclass(model.__class__, Estimator): raise NotImplementedError( "The model created with the `_build_model` in the child " f"class {self.__class__.__name__} for the model {self.model_name()} " f"in the library {self.library_name()} did not return a " f"Estimator but an object of type {type(model)}. " "It is not clear what to do with this object.") model.fit(convert_ensmallen_graph_to_networkx_graph(graph)) node_embeddings: np.ndarray = model.get_embedding() if not issubclass(node_embeddings.__class__, np.ndarray): raise NotImplementedError( "The model created with the `get_embedding` in the child " f"class {self.__class__.__name__} for the model {self.model_name()} " f"in the library {self.library_name()} did not return a " f"Numpy Array but an object of type {type(model)}. " "It is not clear what to do with this object.") if return_dataframe: node_embeddings: pd.DataFrame = pd.DataFrame( node_embeddings, index=graph.get_node_names()) return EmbeddingResult(embedding_method_name=self.model_name(), node_embeddings=node_embeddings)
def _fit_transform(self, graph: Graph, return_dataframe: bool = True, verbose: bool = True) -> EmbeddingResult: """Return node embedding.""" edges, weights = graph.get_log_normalized_cooccurrence_coo_matrix( **self._walk_parameters) coo = coo_matrix( (weights, (edges[:, 0], edges[:, 1])), shape=(graph.get_number_of_nodes(), graph.get_number_of_nodes()), dtype=np.float32) model = TruncatedSVD(n_components=self._embedding_size, random_state=self._random_state) model.fit(coo) embedding = model.transform(coo) if return_dataframe: node_names = graph.get_node_names() embedding = pd.DataFrame(embedding, index=node_names) return EmbeddingResult(embedding_method_name=self.model_name(), node_embeddings=embedding)
def _fit_transform(self, graph: Graph, return_dataframe: bool = True, verbose: bool = True) -> EmbeddingResult: """Return node embedding.""" node_embedding, edge_type_embedding = self._model.fit_transform( graph, epochs=self._epochs, learning_rate=self._learning_rate, learning_rate_decay=self._learning_rate_decay, verbose=verbose, ) if return_dataframe: node_embedding = pd.DataFrame(node_embedding, index=graph.get_node_names()) edge_type_embedding = pd.DataFrame( edge_type_embedding, index=graph.get_unique_edge_type_names()) return EmbeddingResult( embedding_method_name=self.model_name(), node_embeddings=node_embedding, edge_type_embeddings=edge_type_embedding, )
def _extract_embeddings(self, graph: Graph, model: Model, return_dataframe: bool) -> EmbeddingResult: """Returns embedding from the model. Parameters ------------------ graph: Graph The graph that was embedded. model: Model The Keras model used to embed the graph. return_dataframe: bool Whether to return a dataframe of a numpy array. """ if return_dataframe: result = { layer_name: pd.DataFrame(self.get_layer_weights( layer_name, model, drop_first_row=drop_first_row), index=names) for layer_name, names, drop_first_row in ( ("node_embeddings", graph.get_node_names(), False), ("edge_type_embeddings", graph.get_unique_edge_type_names(), graph.has_unknown_edge_types())) } else: result = { layer_name: self.get_layer_weights(layer_name, model, drop_first_row=drop_first_row) for layer_name, drop_first_row in ( ("node_embeddings", False), ("edge_type_embeddings", graph.has_unknown_edge_types())) } return EmbeddingResult(embedding_method_name=self.model_name(), **result)
def _fit_transform(self, graph: Graph, return_dataframe: bool = True, verbose: bool = True) -> EmbeddingResult: """Return node embedding.""" matrix = None if self._metric == "Jaccard": edges, weights = graph.get_jaccard_coo_matrix() elif self._metric == "Laplacian": edges, weights = graph.get_laplacian_coo_matrix() elif self._metric == "Modularity": matrix = graph.get_dense_modularity_matrix() elif self._metric == "Left Normalized Laplacian": edges, weights = graph.get_left_normalized_laplacian_coo_matrix() elif self._metric == "Right Normalized Laplacian": edges, weights = graph.get_right_normalized_laplacian_coo_matrix() elif self._metric == "Symmetric Normalized Laplacian": edges, weights = graph.get_symmetric_normalized_laplacian_coo_matrix( ) elif self._metric == "Neighbours Intersection size": edges, weights = graph.get_neighbours_intersection_size_coo_matrix( ) elif self._metric == "Ancestors Jaccard": matrix = graph.get_shared_ancestors_jaccard_adjacency_matrix( graph.get_breadth_first_search_from_node_names( src_node_name=self._root_node_name, compute_predecessors=True), verbose=verbose) elif self._metric == "Ancestors size": matrix = graph.get_shared_ancestors_size_adjacency_matrix( graph.get_breadth_first_search_from_node_names( src_node_name=self._root_node_name, compute_predecessors=True), verbose=verbose) elif self._metric == "Adamic-Adar": edges, weights = graph.get_adamic_adar_coo_matrix() elif self._metric == "Adjacency": edges, weights = graph.get_directed_edge_node_ids(), np.ones( graph.get_number_of_directed_edges()) else: raise NotImplementedError(f"The provided metric {self._metric} " "is not currently supported.") if matrix is None: matrix = coo_matrix((weights, (edges[:, 0], edges[:, 1])), shape=(graph.get_number_of_nodes(), graph.get_number_of_nodes()), dtype=np.float32) U, sigmas, Vt = sparse_svds(matrix, k=int(self._embedding_size / 2)) else: U, sigmas, Vt = randomized_svd(matrix, n_components=int( self._embedding_size / 2)) sigmas = np.diagflat(np.sqrt(sigmas)) left_embedding = np.dot(U, sigmas) right_embedding = np.dot(Vt.T, sigmas) if return_dataframe: node_names = graph.get_node_names() left_embedding = pd.DataFrame(left_embedding, index=node_names) right_embedding = pd.DataFrame(right_embedding, index=node_names) return EmbeddingResult( embedding_method_name=self.model_name(), node_embeddings=[left_embedding, right_embedding])