示例#1
0
    def _fit_transform(
        self,
        graph: Graph,
        return_dataframe: bool = True,
        verbose: bool = True
    ) -> EmbeddingResult:
        """Return node embedding."""
        edges, weights = graph.get_symmetric_normalized_laplacian_coo_matrix()

        coo = coo_matrix(
            (weights, (edges[:, 0], edges[:, 1])),
            shape=(
                graph.get_number_of_nodes(),
                graph.get_number_of_nodes()
            ),
            dtype=np.float32
        )

        embedding = eigsh(
            coo,
            k=self._embedding_size + 1,
            which="LM",
            return_eigenvectors=True
        )[1]

        if return_dataframe:
            node_names = graph.get_node_names()
            embedding = pd.DataFrame(
                embedding,
                index=node_names
            )
        return EmbeddingResult(
            embedding_method_name=self.model_name(),
            node_embeddings=embedding
        )
    def _extract_embeddings(self, graph: Graph, model: Model,
                            return_dataframe: bool) -> EmbeddingResult:
        """Returns embedding from the model.

        Parameters
        ------------------
        graph: Graph
            The graph that was embedded.
        model: Model
            The Keras model used to embed the graph.
        return_dataframe: bool
            Whether to return a dataframe of a numpy array.
        """
        node_embeddings = self.get_layer_weights(
            "node_embeddings",
            model,
        )
        context_embeddings = self.get_layer_weights(
            "context_embeddings",
            model,
        )
        if return_dataframe:
            node_names = graph.get_node_names()
            node_embeddings = pd.DataFrame(node_embeddings, index=node_names)
            context_embeddings = pd.DataFrame(context_embeddings,
                                              index=node_names)

        return EmbeddingResult(
            embedding_method_name=self.model_name(),
            node_embeddings=[node_embeddings, context_embeddings])
    def _extract_embeddings(self, graph: Graph,
                            model: Union[EntityRelationEmbeddingModel,
                                         ERModel],
                            return_dataframe: bool) -> EmbeddingResult:
        """Returns embedding from the model.

        Parameters
        ------------------
        graph: Graph
            The graph that was embedded.
        model: Type[Model]
            The Keras model used to embed the graph.
        return_dataframe: bool
            Whether to return a dataframe of a numpy array.
        """
        if isinstance(model, EntityRelationEmbeddingModel):
            node_embeddings = [model.entity_embeddings]
            edge_type_embeddings = [model.relation_embeddings]
        elif isinstance(model, ERModel):
            node_embeddings = model.entity_representations
            edge_type_embeddings = model.relation_representations
        else:
            raise NotImplementedError(
                f"The provided model has type {type(model)}, which "
                "is not currently supported. The supported types "
                "are `EntityRelationEmbeddingModel` and `ERModel`.")

        node_embeddings = [
            node_embedding._embeddings.weight.cpu().detach().numpy()
            for node_embedding in node_embeddings
        ]

        edge_type_embeddings = [
            edge_type_embedding._embeddings.weight.cpu().detach().numpy()
            for edge_type_embedding in edge_type_embeddings
        ]

        if return_dataframe:
            node_embeddings = [
                pd.DataFrame(node_embedding, index=graph.get_node_names())
                for node_embedding in node_embeddings
            ]

            edge_type_embeddings = [
                pd.DataFrame(edge_type_embedding,
                             index=graph.get_unique_edge_type_names())
                for edge_type_embedding in edge_type_embeddings
            ]

        return EmbeddingResult(embedding_method_name=self.model_name(),
                               node_embeddings=node_embeddings,
                               edge_type_embeddings=edge_type_embeddings)
示例#4
0
 def _fit_transform(self,
                    graph: Graph,
                    return_dataframe: bool = True,
                    verbose: bool = True) -> EmbeddingResult:
     """Return node embedding."""
     node_embedding = self._model.fit_transform(
         graph,
         verbose=verbose,
     ).T
     if return_dataframe:
         node_embedding = pd.DataFrame(node_embedding,
                                       index=graph.get_node_names())
     return EmbeddingResult(embedding_method_name=self.model_name(),
                            node_embeddings=node_embedding)
示例#5
0
    def _fit_transform(self,
                       graph: Graph,
                       return_dataframe: bool = True,
                       verbose: bool = True) -> EmbeddingResult:
        """Return node embedding."""
        number_of_nodes = graph.get_number_of_nodes()
        embedding = eigh(graph.get_dense_modularity_matrix(),
                         eigvals=(number_of_nodes - self._embedding_size,
                                  number_of_nodes - 1))[1]

        if return_dataframe:
            node_names = graph.get_node_names()
            embedding = pd.DataFrame(embedding, index=node_names)
        return EmbeddingResult(embedding_method_name=self.model_name(),
                               node_embeddings=embedding)
示例#6
0
 def _fit_transform(self,
                    graph: Graph,
                    return_dataframe: bool = True,
                    verbose: bool = True) -> EmbeddingResult:
     """Return node embedding."""
     node_embeddings = self._model.fit_transform(graph)
     if not isinstance(node_embeddings, list):
         node_embeddings = [node_embeddings]
     if return_dataframe:
         node_names = graph.get_node_names()
         node_embeddings = [
             pd.DataFrame(node_embedding, index=node_names)
             for node_embedding in node_embeddings
         ]
     return EmbeddingResult(embedding_method_name=self.model_name(),
                            node_embeddings=node_embeddings)
示例#7
0
    def _fit_transform(self,
                       graph: Graph,
                       return_dataframe: bool = True,
                       verbose: bool = True) -> EmbeddingResult:
        """Return node embedding.

        Parameters
        ---------------
        graph: Graph
            The graph to embed.
        return_dataframe: bool = True
            Whether to return a DataFrame.
        verbose: bool = True
            Whether to show a loading bar.
        """
        model: Type[Estimator] = self._build_model()

        if not issubclass(model.__class__, Estimator):
            raise NotImplementedError(
                "The model created with the `_build_model` in the child "
                f"class {self.__class__.__name__} for the model {self.model_name()} "
                f"in the library {self.library_name()} did not return a "
                f"Estimator but an object of type {type(model)}. "
                "It is not clear what to do with this object.")

        model.fit(convert_ensmallen_graph_to_networkx_graph(graph))

        node_embeddings: np.ndarray = model.get_embedding()

        if not issubclass(node_embeddings.__class__, np.ndarray):
            raise NotImplementedError(
                "The model created with the `get_embedding` in the child "
                f"class {self.__class__.__name__} for the model {self.model_name()} "
                f"in the library {self.library_name()} did not return a "
                f"Numpy Array but an object of type {type(model)}. "
                "It is not clear what to do with this object.")

        if return_dataframe:
            node_embeddings: pd.DataFrame = pd.DataFrame(
                node_embeddings, index=graph.get_node_names())

        return EmbeddingResult(embedding_method_name=self.model_name(),
                               node_embeddings=node_embeddings)
示例#8
0
    def _fit_transform(self,
                       graph: Graph,
                       return_dataframe: bool = True,
                       verbose: bool = True) -> EmbeddingResult:
        """Return node embedding."""
        edges, weights = graph.get_log_normalized_cooccurrence_coo_matrix(
            **self._walk_parameters)

        coo = coo_matrix(
            (weights, (edges[:, 0], edges[:, 1])),
            shape=(graph.get_number_of_nodes(), graph.get_number_of_nodes()),
            dtype=np.float32)

        model = TruncatedSVD(n_components=self._embedding_size,
                             random_state=self._random_state)
        model.fit(coo)
        embedding = model.transform(coo)

        if return_dataframe:
            node_names = graph.get_node_names()
            embedding = pd.DataFrame(embedding, index=node_names)
        return EmbeddingResult(embedding_method_name=self.model_name(),
                               node_embeddings=embedding)
示例#9
0
    def _fit_transform(self,
                       graph: Graph,
                       return_dataframe: bool = True,
                       verbose: bool = True) -> EmbeddingResult:
        """Return node embedding."""
        node_embedding, edge_type_embedding = self._model.fit_transform(
            graph,
            epochs=self._epochs,
            learning_rate=self._learning_rate,
            learning_rate_decay=self._learning_rate_decay,
            verbose=verbose,
        )
        if return_dataframe:
            node_embedding = pd.DataFrame(node_embedding,
                                          index=graph.get_node_names())
            edge_type_embedding = pd.DataFrame(
                edge_type_embedding, index=graph.get_unique_edge_type_names())

        return EmbeddingResult(
            embedding_method_name=self.model_name(),
            node_embeddings=node_embedding,
            edge_type_embeddings=edge_type_embedding,
        )
示例#10
0
    def _extract_embeddings(self, graph: Graph, model: Model,
                            return_dataframe: bool) -> EmbeddingResult:
        """Returns embedding from the model.

        Parameters
        ------------------
        graph: Graph
            The graph that was embedded.
        model: Model
            The Keras model used to embed the graph.
        return_dataframe: bool
            Whether to return a dataframe of a numpy array.
        """
        if return_dataframe:
            result = {
                layer_name: pd.DataFrame(self.get_layer_weights(
                    layer_name, model, drop_first_row=drop_first_row),
                                         index=names)
                for layer_name, names, drop_first_row in (
                    ("node_embeddings", graph.get_node_names(),
                     False), ("edge_type_embeddings",
                              graph.get_unique_edge_type_names(),
                              graph.has_unknown_edge_types()))
            }
        else:
            result = {
                layer_name:
                self.get_layer_weights(layer_name,
                                       model,
                                       drop_first_row=drop_first_row)
                for layer_name, drop_first_row in (
                    ("node_embeddings", False),
                    ("edge_type_embeddings", graph.has_unknown_edge_types()))
            }
        return EmbeddingResult(embedding_method_name=self.model_name(),
                               **result)
示例#11
0
    def _fit_transform(self,
                       graph: Graph,
                       return_dataframe: bool = True,
                       verbose: bool = True) -> EmbeddingResult:
        """Return node embedding."""
        matrix = None
        if self._metric == "Jaccard":
            edges, weights = graph.get_jaccard_coo_matrix()
        elif self._metric == "Laplacian":
            edges, weights = graph.get_laplacian_coo_matrix()
        elif self._metric == "Modularity":
            matrix = graph.get_dense_modularity_matrix()
        elif self._metric == "Left Normalized Laplacian":
            edges, weights = graph.get_left_normalized_laplacian_coo_matrix()
        elif self._metric == "Right Normalized Laplacian":
            edges, weights = graph.get_right_normalized_laplacian_coo_matrix()
        elif self._metric == "Symmetric Normalized Laplacian":
            edges, weights = graph.get_symmetric_normalized_laplacian_coo_matrix(
            )
        elif self._metric == "Neighbours Intersection size":
            edges, weights = graph.get_neighbours_intersection_size_coo_matrix(
            )
        elif self._metric == "Ancestors Jaccard":
            matrix = graph.get_shared_ancestors_jaccard_adjacency_matrix(
                graph.get_breadth_first_search_from_node_names(
                    src_node_name=self._root_node_name,
                    compute_predecessors=True),
                verbose=verbose)
        elif self._metric == "Ancestors size":
            matrix = graph.get_shared_ancestors_size_adjacency_matrix(
                graph.get_breadth_first_search_from_node_names(
                    src_node_name=self._root_node_name,
                    compute_predecessors=True),
                verbose=verbose)
        elif self._metric == "Adamic-Adar":
            edges, weights = graph.get_adamic_adar_coo_matrix()
        elif self._metric == "Adjacency":
            edges, weights = graph.get_directed_edge_node_ids(), np.ones(
                graph.get_number_of_directed_edges())
        else:
            raise NotImplementedError(f"The provided metric {self._metric} "
                                      "is not currently supported.")

        if matrix is None:
            matrix = coo_matrix((weights, (edges[:, 0], edges[:, 1])),
                                shape=(graph.get_number_of_nodes(),
                                       graph.get_number_of_nodes()),
                                dtype=np.float32)

            U, sigmas, Vt = sparse_svds(matrix,
                                        k=int(self._embedding_size / 2))
        else:
            U, sigmas, Vt = randomized_svd(matrix,
                                           n_components=int(
                                               self._embedding_size / 2))

        sigmas = np.diagflat(np.sqrt(sigmas))
        left_embedding = np.dot(U, sigmas)
        right_embedding = np.dot(Vt.T, sigmas)

        if return_dataframe:
            node_names = graph.get_node_names()
            left_embedding = pd.DataFrame(left_embedding, index=node_names)
            right_embedding = pd.DataFrame(right_embedding, index=node_names)
        return EmbeddingResult(
            embedding_method_name=self.model_name(),
            node_embeddings=[left_embedding, right_embedding])