def _build_model(self, graph: Graph) -> Model: """Return SkipGram model.""" # Create first the input with the central terms central_terms = Input((1, ), dtype=tf.int32) # Then we create the input of the contextual terms contextual_terms = Input((self._window_size * 2, ), dtype=tf.int32) # Creating the embedding layer for the contexts central_term_embedding = Flatten()(Embedding( input_dim=graph.get_number_of_nodes(), output_dim=self._embedding_size, input_length=1, name=self.NODE_EMBEDDING, )(central_terms)) # Adding layer that also executes the loss function output = NoiseContrastiveEstimation( vocabulary_size=graph.get_number_of_nodes(), embedding_size=self._embedding_size, number_of_negative_samples=self._number_of_negative_samples, positive_samples=self._window_size * 2, )((central_term_embedding, contextual_terms)) # Creating the actual model model = Model(inputs=[contextual_terms, central_terms], outputs=output, name=self.model_name()) model.compile(optimizer=self._optimizer) return model
def _fit_transform( self, graph: Graph, return_dataframe: bool = True, verbose: bool = True ) -> EmbeddingResult: """Return node embedding.""" edges, weights = graph.get_symmetric_normalized_laplacian_coo_matrix() coo = coo_matrix( (weights, (edges[:, 0], edges[:, 1])), shape=( graph.get_number_of_nodes(), graph.get_number_of_nodes() ), dtype=np.float32 ) embedding = eigsh( coo, k=self._embedding_size + 1, which="LM", return_eigenvectors=True )[1] if return_dataframe: node_names = graph.get_node_names() embedding = pd.DataFrame( embedding, index=node_names ) return EmbeddingResult( embedding_method_name=self.model_name(), node_embeddings=embedding )
def _fit( self, graph: Graph, support: Optional[Graph] = None, node_features: Optional[List[np.ndarray]] = None, node_type_features: Optional[List[np.ndarray]] = None, edge_features: Optional[List[np.ndarray]] = None, ): """Run fitting on the provided graph. Parameters -------------------- graph: Graph The graph to run predictions on. support: Optional[Graph] = None The graph describiding the topological structure that includes also the above graph. This parameter is mostly useful for topological classifiers such as Graph Convolutional Networks. node_features: Optional[List[np.ndarray]] = None The node features to use. node_type_features: Optional[List[np.ndarray]] = None The node type features to use. edge_features: Optional[List[np.ndarray]] = None The edge features to use. """ lpt = EdgePredictionTransformer(method=self._edge_embedding_method, aligned_node_mapping=True) lpt.fit(node_features) if support is None: support = graph negative_graph = graph.sample_negative_graph( number_of_negative_samples=int( math.ceil(graph.get_number_of_edges() * self._training_unbalance_rate)), random_state=self._random_state, sample_only_edges_with_heterogeneous_node_types=self. _training_sample_only_edges_with_heterogeneous_node_types, use_zipfian_sampling=self._use_zipfian_sampling) if self._use_edge_metrics: self._support = support edge_features = np.vstack((support.get_all_edge_metrics( normalize=True, subgraph=graph, ), support.get_all_edge_metrics( normalize=True, subgraph=negative_graph, ))) self._model_instance.fit( *lpt.transform(positive_graph=graph, negative_graph=negative_graph, edge_features=edge_features, shuffle=True, random_state=self._random_state))
def _build_model(self, graph: Graph) -> Model: """Return CBOW model.""" # Creating the inputs layers # Create first the input with the central terms central_terms = Input((1, ), dtype=tf.int32) # Then we create the input of the contextual terms contextual_terms = Input((self._window_size * 2, ), dtype=tf.int32) # Getting the average context embedding average_context_embedding = GlobalAveragePooling1D()(Embedding( input_dim=graph.get_number_of_nodes(), output_dim=self._embedding_size, input_length=self._window_size * 2, name="node_embedding", )(contextual_terms)) # Adding layer that also executes the loss function sampled_softmax = SampledSoftmax( vocabulary_size=graph.get_number_of_nodes(), embedding_size=self._embedding_size, number_of_negative_samples=self._number_of_negative_samples, )((average_context_embedding, central_terms)) # Creating the actual model model = Model(inputs=[contextual_terms, central_terms], outputs=sampled_softmax, name=self.model_name()) model.compile(optimizer=self._optimizer) return model
def __init__(self, graph: Graph, use_node_types: bool = False, use_edge_metrics: bool = False, batch_size: int = 2**10, negative_samples_rate: float = 0.5, avoid_false_negatives: bool = False, graph_to_avoid: Graph = None, sample_only_edges_with_heterogeneous_node_types: bool = False, random_state: int = 42): """Create new EdgePredictionSequence object. Parameters -------------------------------- graph: Graph, The graph from which to sample the edges. use_node_types: bool = False, Whether to return the node types. use_edge_metrics: bool = False, Whether to return the edge metrics. batch_size: int = 2**10, The batch size to use. negative_samples_rate: float = 0.5, Factor of negatives to use in every batch. For example, with a batch size of 128 and negative_samples_rate equal to 0.5, there will be 64 positives and 64 negatives. avoid_false_negatives: bool = False, Whether to filter out false negatives. By default False. Enabling this will slow down the batch generation while (likely) not introducing any significant gain to the model performance. graph_to_avoid: Graph = None, Graph to avoid when generating the edges. This can be the validation component of the graph, for example. More information to how to generate the holdouts is available in the Graph package. sample_only_edges_with_heterogeneous_node_types: bool = False Whether to only sample edges between heterogeneous node types. This may be useful when training a model to predict between two portions in a bipartite graph. random_state: int = 42, The random_state to use to make extraction reproducible. """ if not graph.has_edges(): raise ValueError( f"An empty instance of graph {graph.get_name()} was provided!") self._graph = graph self._negative_samples_rate = negative_samples_rate self._avoid_false_negatives = avoid_false_negatives self._graph_to_avoid = graph_to_avoid self._random_state = random_state self._use_node_types = use_node_types self._use_edge_metrics = use_edge_metrics self._sample_only_edges_with_heterogeneous_node_types = sample_only_edges_with_heterogeneous_node_types self._current_index = 0 super().__init__( sample_number=graph.get_number_of_directed_edges(), batch_size=batch_size, )
def _get_class_weights(self, graph: Graph) -> Dict[int, float]: """Returns dictionary with class weights.""" nodes_number = graph.get_number_of_nodes() node_types_number = graph.get_number_of_node_types() return { node_type_id: nodes_number / count / node_types_number for node_type_id, count in graph.get_node_type_id_counts_hashmap().items() }
def _get_class_weights(self, graph: Graph) -> Dict[int, float]: """Returns dictionary with class weights.""" number_of_directed_edges = graph.get_number_of_directed_edges() edge_types_number = graph.get_number_of_edge_types() return { edge_type_id: number_of_directed_edges / count / edge_types_number for edge_type_id, count in graph.get_edge_type_id_counts_hashmap().items() }
def _get_model_training_output( self, graph: Graph, ) -> Optional[np.ndarray]: """Returns training output tuple.""" if self.is_multilabel_prediction_task(): return graph.get_one_hot_encoded_node_types() if self.is_binary_prediction_task(): return graph.get_boolean_node_type_ids() return graph.get_single_label_node_type_ids()
def _extract_embeddings(self, graph: Graph, model: Union[EntityRelationEmbeddingModel, ERModel], return_dataframe: bool) -> EmbeddingResult: """Returns embedding from the model. Parameters ------------------ graph: Graph The graph that was embedded. model: Type[Model] The Keras model used to embed the graph. return_dataframe: bool Whether to return a dataframe of a numpy array. """ if isinstance(model, EntityRelationEmbeddingModel): node_embeddings = [model.entity_embeddings] edge_type_embeddings = [model.relation_embeddings] elif isinstance(model, ERModel): node_embeddings = model.entity_representations edge_type_embeddings = model.relation_representations else: raise NotImplementedError( f"The provided model has type {type(model)}, which " "is not currently supported. The supported types " "are `EntityRelationEmbeddingModel` and `ERModel`.") node_embeddings = [ node_embedding._embeddings.weight.cpu().detach().numpy() for node_embedding in node_embeddings ] edge_type_embeddings = [ edge_type_embedding._embeddings.weight.cpu().detach().numpy() for edge_type_embedding in edge_type_embeddings ] if return_dataframe: node_embeddings = [ pd.DataFrame(node_embedding, index=graph.get_node_names()) for node_embedding in node_embeddings ] edge_type_embeddings = [ pd.DataFrame(edge_type_embedding, index=graph.get_unique_edge_type_names()) for edge_type_embedding in edge_type_embeddings ] return EmbeddingResult(embedding_method_name=self.model_name(), node_embeddings=node_embeddings, edge_type_embeddings=edge_type_embeddings)
def _fit_transform(self, graph: Graph, return_dataframe: bool = True, verbose: bool = True) -> EmbeddingResult: """Return node embedding.""" number_of_nodes = graph.get_number_of_nodes() embedding = eigh(graph.get_dense_modularity_matrix(), eigvals=(number_of_nodes - self._embedding_size, number_of_nodes - 1))[1] if return_dataframe: node_names = graph.get_node_names() embedding = pd.DataFrame(embedding, index=node_names) return EmbeddingResult(embedding_method_name=self.model_name(), node_embeddings=embedding)
def _get_model_training_input( self, graph: Graph, support: Graph, node_features: Optional[List[np.ndarray]] = None, node_type_features: Optional[List[np.ndarray]] = None, edge_features: Optional[List[np.ndarray]] = None, ) -> Tuple[Union[np.ndarray, Type[Sequence]]]: """Returns training input tuple.""" kernel = self.convert_graph_to_kernel(support) return ( *( () if kernel is None else (kernel,) ), *( () if node_features is None else node_features ), *( (graph.get_node_ids(),) if self._use_node_embedding else () ) )
def __init__( self, graph: Graph, graph_used_in_training: Graph, use_node_types: bool, use_edge_metrics: bool, batch_size: int = 2**10, ): """Create new EdgePredictionSequence object. Parameters -------------------------------- graph: Graph The graph whose edges are to be predicted. graph_used_in_training: Graph The graph that was used while training the current edge prediction model. use_node_types: bool Whether to return the node types. use_edge_metrics: bool = False Whether to return the edge metrics. batch_size: int = 2**10, The batch size to use. """ self._sequence = GenericEdgePredictionSequence( graph=graph, graph_used_in_training=graph_used_in_training, use_node_types=use_node_types, use_edge_metrics=use_edge_metrics, batch_size=batch_size) self._current_index = 0 super().__init__( sample_number=graph.get_number_of_directed_edges(), batch_size=batch_size, )
def _extract_embeddings(self, graph: Graph, model: Model, return_dataframe: bool) -> EmbeddingResult: """Returns embedding from the model. Parameters ------------------ graph: Graph The graph that was embedded. model: Model The Keras model used to embed the graph. return_dataframe: bool Whether to return a dataframe of a numpy array. """ node_embeddings = self.get_layer_weights( "node_embeddings", model, ) context_embeddings = self.get_layer_weights( "context_embeddings", model, ) if return_dataframe: node_names = graph.get_node_names() node_embeddings = pd.DataFrame(node_embeddings, index=node_names) context_embeddings = pd.DataFrame(context_embeddings, index=node_names) return EmbeddingResult( embedding_method_name=self.model_name(), node_embeddings=[node_embeddings, context_embeddings])
def load_graph(self) -> Graph: """ Loads graph nodes and edges into Ensmallen. Creates a node type list, as Ensmallen requires this to parse node types. :param graph_args: dict, output of main_graph_args :return: ensmallen Graph """ graph_args_with_indir = self.main_graph_args() for pathtype in ['node_path', 'edge_path']: filepath = graph_args_with_indir[pathtype] if is_url(filepath): url_as_filename = \ ''.join(c if c in VALID_CHARS else "_" for c in filepath) outfile = os.path.join(self.outdir(), url_as_filename) download_file(filepath, outfile) graph_args_with_indir[pathtype] = outfile elif not is_valid_path(filepath): raise FileNotFoundError(f"Please check path: {filepath}") # Now load the Ensmallen graph loaded_graph = Graph.from_csv(**graph_args_with_indir) return loaded_graph
def fit( self, graph: Graph, support: Optional[Graph] = None, node_features: Optional[Union[pd.DataFrame, np.ndarray, List[Union[pd.DataFrame, np.ndarray]]]] = None, node_type_features: Optional[Union[pd.DataFrame, np.ndarray, List[Union[pd.DataFrame, np.ndarray]]]] = None, edge_features: Optional[Union[pd.DataFrame, np.ndarray, List[Union[pd.DataFrame, np.ndarray]]]] = None, ): """Execute predictions on the provided graph. Parameters -------------------- graph: Graph The graph to run predictions on. support: Optional[Graph] = None The graph describiding the topological structure that includes also the above graph. This parameter is mostly useful for topological classifiers such as Graph Convolutional Networks. node_features: Optional[Union[pd.DataFrame, np.ndarray, List[Union[pd.DataFrame, np.ndarray]]]] = None The node features to use. node_type_features: Optional[Union[pd.DataFrame, np.ndarray, List[Union[pd.DataFrame, np.ndarray]]]] = None The node type features to use. edge_features: Optional[Union[pd.DataFrame, np.ndarray, List[Union[pd.DataFrame, np.ndarray]]]] = None The edge features to use. """ if node_type_features is not None: raise NotImplementedError( "Support for node type features is not currently available for any " "of the edge-label prediction models.") self._is_binary_prediction_task = graph.get_number_of_edge_types() == 2 self._is_multilabel_prediction_task = graph.is_multigraph() super().fit( graph=graph, support=support, node_features=node_features, node_type_features=node_type_features, edge_features=edge_features, )
def convert_ensmallen_graph_to_networkx_graph(graph: Graph) -> nx.Graph: """Return networkX graph derived from the provided Ensmallen Graph. Parameters ----------- graph: Graph The graph to be converted. """ if graph.is_directed(): result_graph = nx.DiGraph() else: result_graph = nx.Graph() result_graph.add_nodes_from(graph.get_node_ids()) if graph.has_edge_weights(): result_graph.add_weighted_edges_from([ (src_name, dst_name, edge_weight) for (src_name, dst_name), edge_weight in zip( graph.get_directed_edge_node_ids(), graph.get_edge_weights()) ]) else: result_graph.add_edges_from( graph.get_edge_node_ids(directed=graph.is_directed())) return result_graph
def _get_steps_per_epoch(self, graph: Graph) -> int: """Returns number of steps per epoch. Parameters ------------------ graph: Graph The graph to compute the number of steps. """ return max(graph.get_number_of_directed_edges() // self._batch_size, 1)
def _get_steps_per_epoch(self, graph: Graph) -> Tuple[Any]: """Returns number of steps per epoch. Parameters ------------------ graph: Graph The graph to compute the number of steps. """ return max(graph.get_number_of_nodes() // self._batch_size, 1)
def __init__( self, graph: Graph, graph_used_in_training: Graph, return_node_types: bool, return_edge_types: bool, use_edge_metrics: bool, batch_size: int = 2**10, ): """Create new EdgePredictionSequence object. Parameters -------------------------------- graph: Graph The graph whose edges are to be predicted. graph_used_in_training: Graph The graph that was used while training the current edge prediction model. return_node_types: bool Whether to return the node types. return_edge_types: bool Whether to return the edge types. use_edge_metrics: bool = False Whether to return the edge metrics. batch_size: int = 2**10, The batch size to use. """ if not graph.has_edges(): raise ValueError( f"An empty instance of graph {graph.get_name()} was provided!") if not graph.has_edges(): raise ValueError( f"An empty instance of graph {graph_used_in_training.get_name()} was provided!" ) if not graph.has_compatible_node_vocabularies(graph_used_in_training): raise ValueError( f"The provided graph {graph.get_name()} does not have a node vocabulary " "that is compatible with the provided graph used in training.") self._graph = graph self._graph_used_in_training = graph_used_in_training self._return_node_types = return_node_types self._return_edge_types = return_edge_types self._use_edge_metrics = use_edge_metrics self._batch_size = batch_size
def split_graph_following_evaluation_schema( cls, graph: Graph, evaluation_schema: str, random_state: int, holdout_number: int, number_of_holdouts: int, **holdouts_kwargs: Dict[str, Any], ) -> Tuple[Graph]: """Return train and test graphs tuple following the provided evaluation schema. Parameters ---------------------- graph: Graph The graph to split. evaluation_schema: str The evaluation schema to follow. random_state: int The random state for the evaluation holdout_number: int The current holdout number. number_of_holdouts: int The number of holdouts that will be generated throught the evaluation. holdouts_kwargs: Dict[str, Any] The kwargs to be forwarded to the holdout method. """ if evaluation_schema == "Stratified Monte Carlo": return graph.get_edge_label_holdout_graphs( **holdouts_kwargs, use_stratification=True, random_state=random_state + holdout_number, ) if evaluation_schema == "Stratified Kfold": return graph.get_edge_label_kfold( k=number_of_holdouts, k_index=holdout_number, use_stratification=True, random_state=random_state, ) raise ValueError( f"The requested evaluation schema `{evaluation_schema}` " "is not available. The available evaluation schemas " f"are: {format_list(cls.get_available_evaluation_schemas())}.")
def make_link_prediction_data(self, embedding_file: str, training_graph_args: dict, pos_validation_args: dict, neg_training_args: dict, neg_validation_args: dict, edge_method: str) -> Tuple[Tuple, Tuple]: """Prepare training and validation data for training link prediction classifers Args: embedding_file: path to embedding file for nodes in graph training_graph_args: EnsmallenGraph arguments to load training graph pos_validation_args: EnsmallenGraph arguments to load positive validation graph neg_training_args: EnsmallenGraph arguments to load negative training graph neg_validation_args: EnsmallenGraph arguments to load negative validation graph edge_method: edge embedding method to use (average, L1, L2, etc) Returns: A tuple of tuples """ embedding = pd.read_csv(embedding_file, index_col=0, header=None) # load graphs graphs = {'pos_training': Graph.from_csv(**training_graph_args)} for name, graph_args in [('pos_validation', pos_validation_args), ('neg_training', neg_training_args), ('neg_validation', neg_validation_args)]: these_params = copy.deepcopy(training_graph_args) these_params.update(graph_args) graphs[name] = Graph.from_csv(**these_params) # create transformer object to convert graphs into edge embeddings lpt = LinkPredictionTransformer(method=edge_method) lpt.fit(embedding ) # pass node embeddings to be used to create edge embeddings train_edges, train_labels = lpt.transform( positive_graph=graphs['pos_training'], negative_graph=graphs['neg_training']) valid_edges, valid_labels = lpt.transform( positive_graph=graphs['pos_validation'], negative_graph=graphs['neg_validation']) return (train_edges, train_labels), (valid_edges, valid_labels)
def _fit_transform(self, graph: Graph, return_dataframe: bool = True, verbose: bool = True) -> EmbeddingResult: """Return node embedding.""" edges, weights = graph.get_log_normalized_cooccurrence_coo_matrix( **self._walk_parameters) coo = coo_matrix( (weights, (edges[:, 0], edges[:, 1])), shape=(graph.get_number_of_nodes(), graph.get_number_of_nodes()), dtype=np.float32) model = TruncatedSVD(n_components=self._embedding_size, random_state=self._random_state) model.fit(coo) embedding = model.transform(coo) if return_dataframe: node_names = graph.get_node_names() embedding = pd.DataFrame(embedding, index=node_names) return EmbeddingResult(embedding_method_name=self.model_name(), node_embeddings=embedding)
def _fit_transform(self, graph: Graph, return_dataframe: bool = True, verbose: bool = True) -> EmbeddingResult: """Return node embedding.""" node_embedding, edge_type_embedding = self._model.fit_transform( graph, epochs=self._epochs, learning_rate=self._learning_rate, learning_rate_decay=self._learning_rate_decay, verbose=verbose, ) if return_dataframe: node_embedding = pd.DataFrame(node_embedding, index=graph.get_node_names()) edge_type_embedding = pd.DataFrame( edge_type_embedding, index=graph.get_unique_edge_type_names()) return EmbeddingResult( embedding_method_name=self.model_name(), node_embeddings=node_embedding, edge_type_embeddings=edge_type_embedding, )
def _fit_transform(self, graph: Graph, return_dataframe: bool = True, verbose: bool = True) -> EmbeddingResult: """Return node embedding.""" node_embedding = self._model.fit_transform( graph, verbose=verbose, ).T if return_dataframe: node_embedding = pd.DataFrame(node_embedding, index=graph.get_node_names()) return EmbeddingResult(embedding_method_name=self.model_name(), node_embeddings=node_embedding)
def _build_edge_prediction_based_model( self, graph: Graph, sources: tf.Tensor, destinations: tf.Tensor) -> Union[List[tf.Tensor], tf.Tensor]: """Return the model implementation. Parameters ------------------- sources: tf.Tensor The source nodes to be used in the model. destinations: tf.Tensor The destinations nodes to be used in the model. """ node_embedding = Embedding(input_dim=graph.get_number_of_nodes(), output_dim=self._embedding_size, input_length=1, name="node_embeddings") context_embedding = Embedding(input_dim=graph.get_number_of_nodes(), output_dim=self._embedding_size, input_length=1, name="context_embeddings") return Activation(self._activation)(Dot(axes=-1)([ Flatten()(node_embedding(sources)), Flatten()(context_embedding(destinations)) ]))
def _extract_embeddings(self, graph: Graph, model: Model, return_dataframe: bool) -> EmbeddingResult: """Returns embedding from the model. Parameters ------------------ graph: Graph The graph that was embedded. model: Model The Keras model used to embed the graph. return_dataframe: bool Whether to return a dataframe of a numpy array. """ if return_dataframe: result = { layer_name: pd.DataFrame(self.get_layer_weights( layer_name, model, drop_first_row=drop_first_row), index=names) for layer_name, names, drop_first_row in ( ("node_embeddings", graph.get_node_names(), False), ("edge_type_embeddings", graph.get_unique_edge_type_names(), graph.has_unknown_edge_types())) } else: result = { layer_name: self.get_layer_weights(layer_name, model, drop_first_row=drop_first_row) for layer_name, drop_first_row in ( ("node_embeddings", False), ("edge_type_embeddings", graph.has_unknown_edge_types())) } return EmbeddingResult(embedding_method_name=self.model_name(), **result)
def _fit_transform(self, graph: Graph, return_dataframe: bool = True, verbose: bool = True) -> EmbeddingResult: """Return node embedding.""" node_embeddings = self._model.fit_transform(graph) if not isinstance(node_embeddings, list): node_embeddings = [node_embeddings] if return_dataframe: node_names = graph.get_node_names() node_embeddings = [ pd.DataFrame(node_embedding, index=node_names) for node_embedding in node_embeddings ] return EmbeddingResult(embedding_method_name=self.model_name(), node_embeddings=node_embeddings)
def _fit_transform( self, graph: Graph, return_dataframe: bool = True, verbose: bool = True ) -> Union[np.ndarray, pd.DataFrame, Dict[str, np.ndarray], Dict[ str, pd.DataFrame]]: """Return node embedding""" torch_device = torch.device(self._device) triples_factory = CoreTriplesFactory( torch.IntTensor(graph.get_directed_edge_triples_ids().astype( np.int32)), num_entities=graph.get_number_of_nodes(), num_relations=graph.get_number_of_edge_types(), entity_ids=graph.get_node_ids(), relation_ids=graph.get_unique_edge_type_ids(), create_inverse_triples=False, ) batch_size = min(self._batch_size, graph.get_number_of_directed_edges()) model = self._build_model(triples_factory) if not issubclass(model.__class__, Model): raise NotImplementedError( "The model created with the `_build_model` in the child " f"class {self.__class__.__name__} for the model {self.model_name()} " f"in the library {self.library_name()} did not return a " f"PyKeen model but an object of type {type(model)}.") # Move the model to gpu if we need to model.to(torch_device) training_loop = SLCWATrainingLoop( model=model, triples_factory=triples_factory, ) training_loop.train(triples_factory=triples_factory, num_epochs=self._epochs, batch_size=batch_size, use_tqdm=True, use_tqdm_batch=True, tqdm_kwargs=dict(disable=not verbose)) # Extract and return the embedding return self._extract_embeddings(graph, model, return_dataframe=return_dataframe)
def convert_graph_to_kernel(self, graph: Graph) -> Optional[tf.SparseTensor]: """Returns provided graph converted to a sparse Tensor. Implementation details --------------------------- Do note that when the model does not have convolutional layers the model will return None, as to avoid allocating like object for apparently no reason. """ if not self.has_convolutional_layers(): return None return graph_to_sparse_tensor( graph, use_weights=graph.has_edge_weights() and not self._use_simmetric_normalized_laplacian, use_simmetric_normalized_laplacian=self. _use_simmetric_normalized_laplacian, handling_multi_graph=self._handling_multi_graph)
def _fit_transform(self, graph: Graph, return_dataframe: bool = True, verbose: bool = True) -> EmbeddingResult: """Return node embedding. Parameters --------------- graph: Graph The graph to embed. return_dataframe: bool = True Whether to return a DataFrame. verbose: bool = True Whether to show a loading bar. """ model: Type[Estimator] = self._build_model() if not issubclass(model.__class__, Estimator): raise NotImplementedError( "The model created with the `_build_model` in the child " f"class {self.__class__.__name__} for the model {self.model_name()} " f"in the library {self.library_name()} did not return a " f"Estimator but an object of type {type(model)}. " "It is not clear what to do with this object.") model.fit(convert_ensmallen_graph_to_networkx_graph(graph)) node_embeddings: np.ndarray = model.get_embedding() if not issubclass(node_embeddings.__class__, np.ndarray): raise NotImplementedError( "The model created with the `get_embedding` in the child " f"class {self.__class__.__name__} for the model {self.model_name()} " f"in the library {self.library_name()} did not return a " f"Numpy Array but an object of type {type(model)}. " "It is not clear what to do with this object.") if return_dataframe: node_embeddings: pd.DataFrame = pd.DataFrame( node_embeddings, index=graph.get_node_names()) return EmbeddingResult(embedding_method_name=self.model_name(), node_embeddings=node_embeddings)