Exemplo n.º 1
0
    def _build_model(self, graph: Graph) -> Model:
        """Return SkipGram model."""
        # Create first the input with the central terms
        central_terms = Input((1, ), dtype=tf.int32)

        # Then we create the input of the contextual terms
        contextual_terms = Input((self._window_size * 2, ), dtype=tf.int32)

        # Creating the embedding layer for the contexts
        central_term_embedding = Flatten()(Embedding(
            input_dim=graph.get_number_of_nodes(),
            output_dim=self._embedding_size,
            input_length=1,
            name=self.NODE_EMBEDDING,
        )(central_terms))

        # Adding layer that also executes the loss function
        output = NoiseContrastiveEstimation(
            vocabulary_size=graph.get_number_of_nodes(),
            embedding_size=self._embedding_size,
            number_of_negative_samples=self._number_of_negative_samples,
            positive_samples=self._window_size * 2,
        )((central_term_embedding, contextual_terms))

        # Creating the actual model
        model = Model(inputs=[contextual_terms, central_terms],
                      outputs=output,
                      name=self.model_name())

        model.compile(optimizer=self._optimizer)

        return model
Exemplo n.º 2
0
    def _fit_transform(
        self,
        graph: Graph,
        return_dataframe: bool = True,
        verbose: bool = True
    ) -> EmbeddingResult:
        """Return node embedding."""
        edges, weights = graph.get_symmetric_normalized_laplacian_coo_matrix()

        coo = coo_matrix(
            (weights, (edges[:, 0], edges[:, 1])),
            shape=(
                graph.get_number_of_nodes(),
                graph.get_number_of_nodes()
            ),
            dtype=np.float32
        )

        embedding = eigsh(
            coo,
            k=self._embedding_size + 1,
            which="LM",
            return_eigenvectors=True
        )[1]

        if return_dataframe:
            node_names = graph.get_node_names()
            embedding = pd.DataFrame(
                embedding,
                index=node_names
            )
        return EmbeddingResult(
            embedding_method_name=self.model_name(),
            node_embeddings=embedding
        )
    def _fit(
        self,
        graph: Graph,
        support: Optional[Graph] = None,
        node_features: Optional[List[np.ndarray]] = None,
        node_type_features: Optional[List[np.ndarray]] = None,
        edge_features: Optional[List[np.ndarray]] = None,
    ):
        """Run fitting on the provided graph.

        Parameters
        --------------------
        graph: Graph
            The graph to run predictions on.
        support: Optional[Graph] = None
            The graph describiding the topological structure that
            includes also the above graph. This parameter
            is mostly useful for topological classifiers
            such as Graph Convolutional Networks.
        node_features: Optional[List[np.ndarray]] = None
            The node features to use.
        node_type_features: Optional[List[np.ndarray]] = None
            The node type features to use.
        edge_features: Optional[List[np.ndarray]] = None
            The edge features to use.
        """
        lpt = EdgePredictionTransformer(method=self._edge_embedding_method,
                                        aligned_node_mapping=True)

        lpt.fit(node_features)

        if support is None:
            support = graph

        negative_graph = graph.sample_negative_graph(
            number_of_negative_samples=int(
                math.ceil(graph.get_number_of_edges() *
                          self._training_unbalance_rate)),
            random_state=self._random_state,
            sample_only_edges_with_heterogeneous_node_types=self.
            _training_sample_only_edges_with_heterogeneous_node_types,
            use_zipfian_sampling=self._use_zipfian_sampling)

        if self._use_edge_metrics:
            self._support = support
            edge_features = np.vstack((support.get_all_edge_metrics(
                normalize=True,
                subgraph=graph,
            ),
                                       support.get_all_edge_metrics(
                                           normalize=True,
                                           subgraph=negative_graph,
                                       )))

        self._model_instance.fit(
            *lpt.transform(positive_graph=graph,
                           negative_graph=negative_graph,
                           edge_features=edge_features,
                           shuffle=True,
                           random_state=self._random_state))
Exemplo n.º 4
0
    def _build_model(self, graph: Graph) -> Model:
        """Return CBOW model."""
        # Creating the inputs layers

        # Create first the input with the central terms
        central_terms = Input((1, ), dtype=tf.int32)

        # Then we create the input of the contextual terms
        contextual_terms = Input((self._window_size * 2, ), dtype=tf.int32)

        # Getting the average context embedding
        average_context_embedding = GlobalAveragePooling1D()(Embedding(
            input_dim=graph.get_number_of_nodes(),
            output_dim=self._embedding_size,
            input_length=self._window_size * 2,
            name="node_embedding",
        )(contextual_terms))

        # Adding layer that also executes the loss function
        sampled_softmax = SampledSoftmax(
            vocabulary_size=graph.get_number_of_nodes(),
            embedding_size=self._embedding_size,
            number_of_negative_samples=self._number_of_negative_samples,
        )((average_context_embedding, central_terms))

        # Creating the actual model
        model = Model(inputs=[contextual_terms, central_terms],
                      outputs=sampled_softmax,
                      name=self.model_name())

        model.compile(optimizer=self._optimizer)

        return model
    def __init__(self,
                 graph: Graph,
                 use_node_types: bool = False,
                 use_edge_metrics: bool = False,
                 batch_size: int = 2**10,
                 negative_samples_rate: float = 0.5,
                 avoid_false_negatives: bool = False,
                 graph_to_avoid: Graph = None,
                 sample_only_edges_with_heterogeneous_node_types: bool = False,
                 random_state: int = 42):
        """Create new EdgePredictionSequence object.

        Parameters
        --------------------------------
        graph: Graph,
            The graph from which to sample the edges.
        use_node_types: bool = False,
            Whether to return the node types.
        use_edge_metrics: bool = False,
            Whether to return the edge metrics.
        batch_size: int = 2**10,
            The batch size to use.
        negative_samples_rate: float = 0.5,
            Factor of negatives to use in every batch.
            For example, with a batch size of 128 and negative_samples_rate equal
            to 0.5, there will be 64 positives and 64 negatives.
        avoid_false_negatives: bool = False,
            Whether to filter out false negatives.
            By default False.
            Enabling this will slow down the batch generation while (likely) not
            introducing any significant gain to the model performance.
        graph_to_avoid: Graph = None,
            Graph to avoid when generating the edges.
            This can be the validation component of the graph, for example.
            More information to how to generate the holdouts is available
            in the Graph package.
        sample_only_edges_with_heterogeneous_node_types: bool = False
            Whether to only sample edges between heterogeneous node types.
            This may be useful when training a model to predict between
            two portions in a bipartite graph.
        random_state: int = 42,
            The random_state to use to make extraction reproducible.
        """
        if not graph.has_edges():
            raise ValueError(
                f"An empty instance of graph {graph.get_name()} was provided!")
        self._graph = graph
        self._negative_samples_rate = negative_samples_rate
        self._avoid_false_negatives = avoid_false_negatives
        self._graph_to_avoid = graph_to_avoid
        self._random_state = random_state
        self._use_node_types = use_node_types
        self._use_edge_metrics = use_edge_metrics
        self._sample_only_edges_with_heterogeneous_node_types = sample_only_edges_with_heterogeneous_node_types
        self._current_index = 0
        super().__init__(
            sample_number=graph.get_number_of_directed_edges(),
            batch_size=batch_size,
        )
Exemplo n.º 6
0
 def _get_class_weights(self, graph: Graph) -> Dict[int, float]:
     """Returns dictionary with class weights."""
     nodes_number = graph.get_number_of_nodes()
     node_types_number = graph.get_number_of_node_types()
     return {
         node_type_id: nodes_number / count / node_types_number
         for node_type_id, count in graph.get_node_type_id_counts_hashmap().items()
     }
Exemplo n.º 7
0
 def _get_class_weights(self, graph: Graph) -> Dict[int, float]:
     """Returns dictionary with class weights."""
     number_of_directed_edges = graph.get_number_of_directed_edges()
     edge_types_number = graph.get_number_of_edge_types()
     return {
         edge_type_id: number_of_directed_edges / count / edge_types_number
         for edge_type_id, count in
         graph.get_edge_type_id_counts_hashmap().items()
     }
Exemplo n.º 8
0
 def _get_model_training_output(
     self,
     graph: Graph,
 ) -> Optional[np.ndarray]:
     """Returns training output tuple."""
     if self.is_multilabel_prediction_task():
         return graph.get_one_hot_encoded_node_types()
     if self.is_binary_prediction_task():
         return graph.get_boolean_node_type_ids()
     return graph.get_single_label_node_type_ids()
    def _extract_embeddings(self, graph: Graph,
                            model: Union[EntityRelationEmbeddingModel,
                                         ERModel],
                            return_dataframe: bool) -> EmbeddingResult:
        """Returns embedding from the model.

        Parameters
        ------------------
        graph: Graph
            The graph that was embedded.
        model: Type[Model]
            The Keras model used to embed the graph.
        return_dataframe: bool
            Whether to return a dataframe of a numpy array.
        """
        if isinstance(model, EntityRelationEmbeddingModel):
            node_embeddings = [model.entity_embeddings]
            edge_type_embeddings = [model.relation_embeddings]
        elif isinstance(model, ERModel):
            node_embeddings = model.entity_representations
            edge_type_embeddings = model.relation_representations
        else:
            raise NotImplementedError(
                f"The provided model has type {type(model)}, which "
                "is not currently supported. The supported types "
                "are `EntityRelationEmbeddingModel` and `ERModel`.")

        node_embeddings = [
            node_embedding._embeddings.weight.cpu().detach().numpy()
            for node_embedding in node_embeddings
        ]

        edge_type_embeddings = [
            edge_type_embedding._embeddings.weight.cpu().detach().numpy()
            for edge_type_embedding in edge_type_embeddings
        ]

        if return_dataframe:
            node_embeddings = [
                pd.DataFrame(node_embedding, index=graph.get_node_names())
                for node_embedding in node_embeddings
            ]

            edge_type_embeddings = [
                pd.DataFrame(edge_type_embedding,
                             index=graph.get_unique_edge_type_names())
                for edge_type_embedding in edge_type_embeddings
            ]

        return EmbeddingResult(embedding_method_name=self.model_name(),
                               node_embeddings=node_embeddings,
                               edge_type_embeddings=edge_type_embeddings)
Exemplo n.º 10
0
    def _fit_transform(self,
                       graph: Graph,
                       return_dataframe: bool = True,
                       verbose: bool = True) -> EmbeddingResult:
        """Return node embedding."""
        number_of_nodes = graph.get_number_of_nodes()
        embedding = eigh(graph.get_dense_modularity_matrix(),
                         eigvals=(number_of_nodes - self._embedding_size,
                                  number_of_nodes - 1))[1]

        if return_dataframe:
            node_names = graph.get_node_names()
            embedding = pd.DataFrame(embedding, index=node_names)
        return EmbeddingResult(embedding_method_name=self.model_name(),
                               node_embeddings=embedding)
Exemplo n.º 11
0
 def _get_model_training_input(
     self,
     graph: Graph,
     support: Graph,
     node_features: Optional[List[np.ndarray]] = None,
     node_type_features: Optional[List[np.ndarray]] = None,
     edge_features: Optional[List[np.ndarray]] = None,
 ) -> Tuple[Union[np.ndarray, Type[Sequence]]]:
     """Returns training input tuple."""
     kernel = self.convert_graph_to_kernel(support)
     return (
         *(
             ()
             if kernel is None
             else (kernel,)
         ),
         *(
             ()
             if node_features is None
             else node_features
         ),
         *(
             (graph.get_node_ids(),)
             if self._use_node_embedding
             else ()
         )
     )
Exemplo n.º 12
0
    def __init__(
        self,
        graph: Graph,
        graph_used_in_training: Graph,
        use_node_types: bool,
        use_edge_metrics: bool,
        batch_size: int = 2**10,
    ):
        """Create new EdgePredictionSequence object.

        Parameters
        --------------------------------
        graph: Graph
            The graph whose edges are to be predicted.
        graph_used_in_training: Graph
            The graph that was used while training the current
            edge prediction model.
        use_node_types: bool
            Whether to return the node types.
        use_edge_metrics: bool = False
            Whether to return the edge metrics.
        batch_size: int = 2**10,
            The batch size to use.
        """
        self._sequence = GenericEdgePredictionSequence(
            graph=graph,
            graph_used_in_training=graph_used_in_training,
            use_node_types=use_node_types,
            use_edge_metrics=use_edge_metrics,
            batch_size=batch_size)
        self._current_index = 0
        super().__init__(
            sample_number=graph.get_number_of_directed_edges(),
            batch_size=batch_size,
        )
    def _extract_embeddings(self, graph: Graph, model: Model,
                            return_dataframe: bool) -> EmbeddingResult:
        """Returns embedding from the model.

        Parameters
        ------------------
        graph: Graph
            The graph that was embedded.
        model: Model
            The Keras model used to embed the graph.
        return_dataframe: bool
            Whether to return a dataframe of a numpy array.
        """
        node_embeddings = self.get_layer_weights(
            "node_embeddings",
            model,
        )
        context_embeddings = self.get_layer_weights(
            "context_embeddings",
            model,
        )
        if return_dataframe:
            node_names = graph.get_node_names()
            node_embeddings = pd.DataFrame(node_embeddings, index=node_names)
            context_embeddings = pd.DataFrame(context_embeddings,
                                              index=node_names)

        return EmbeddingResult(
            embedding_method_name=self.model_name(),
            node_embeddings=[node_embeddings, context_embeddings])
Exemplo n.º 14
0
    def load_graph(self) -> Graph:
        """
        Loads graph nodes and edges into Ensmallen.
        Creates a node type list, as Ensmallen
        requires this to parse node types.
        :param graph_args: dict, output of main_graph_args
        :return: ensmallen Graph
        """

        graph_args_with_indir = self.main_graph_args()

        for pathtype in ['node_path', 'edge_path']:
            filepath = graph_args_with_indir[pathtype]
            if is_url(filepath):
                url_as_filename = \
                    ''.join(c if c in VALID_CHARS else "_" for c in filepath)
                outfile = os.path.join(self.outdir(), url_as_filename)
                download_file(filepath, outfile)
                graph_args_with_indir[pathtype] = outfile
            elif not is_valid_path(filepath):
                raise FileNotFoundError(f"Please check path: {filepath}")
        
        # Now load the Ensmallen graph
        loaded_graph = Graph.from_csv(**graph_args_with_indir)

        return loaded_graph
Exemplo n.º 15
0
    def fit(
        self,
        graph: Graph,
        support: Optional[Graph] = None,
        node_features: Optional[Union[pd.DataFrame, np.ndarray,
                                      List[Union[pd.DataFrame,
                                                 np.ndarray]]]] = None,
        node_type_features: Optional[Union[pd.DataFrame, np.ndarray,
                                           List[Union[pd.DataFrame,
                                                      np.ndarray]]]] = None,
        edge_features: Optional[Union[pd.DataFrame, np.ndarray,
                                      List[Union[pd.DataFrame,
                                                 np.ndarray]]]] = None,
    ):
        """Execute predictions on the provided graph.

        Parameters
        --------------------
        graph: Graph
            The graph to run predictions on.
        support: Optional[Graph] = None
            The graph describiding the topological structure that
            includes also the above graph. This parameter
            is mostly useful for topological classifiers
            such as Graph Convolutional Networks.
        node_features: Optional[Union[pd.DataFrame, np.ndarray, List[Union[pd.DataFrame, np.ndarray]]]] = None
            The node features to use.
        node_type_features: Optional[Union[pd.DataFrame, np.ndarray, List[Union[pd.DataFrame, np.ndarray]]]] = None
            The node type features to use.
        edge_features: Optional[Union[pd.DataFrame, np.ndarray, List[Union[pd.DataFrame, np.ndarray]]]] = None
            The edge features to use.
        """
        if node_type_features is not None:
            raise NotImplementedError(
                "Support for node type features is not currently available for any "
                "of the edge-label prediction models.")

        self._is_binary_prediction_task = graph.get_number_of_edge_types() == 2
        self._is_multilabel_prediction_task = graph.is_multigraph()

        super().fit(
            graph=graph,
            support=support,
            node_features=node_features,
            node_type_features=node_type_features,
            edge_features=edge_features,
        )
Exemplo n.º 16
0
def convert_ensmallen_graph_to_networkx_graph(graph: Graph) -> nx.Graph:
    """Return networkX graph derived from the provided Ensmallen Graph.
    
    Parameters
    -----------
    graph: Graph
        The graph to be converted.
    """
    if graph.is_directed():
        result_graph = nx.DiGraph()
    else:
        result_graph = nx.Graph()

    result_graph.add_nodes_from(graph.get_node_ids())

    if graph.has_edge_weights():
        result_graph.add_weighted_edges_from([
            (src_name, dst_name, edge_weight)
            for (src_name, dst_name), edge_weight in zip(
                graph.get_directed_edge_node_ids(), graph.get_edge_weights())
        ])
    else:
        result_graph.add_edges_from(
            graph.get_edge_node_ids(directed=graph.is_directed()))

    return result_graph
    def _get_steps_per_epoch(self, graph: Graph) -> int:
        """Returns number of steps per epoch.

        Parameters
        ------------------
        graph: Graph
            The graph to compute the number of steps.
        """
        return max(graph.get_number_of_directed_edges() // self._batch_size, 1)
Exemplo n.º 18
0
    def _get_steps_per_epoch(self, graph: Graph) -> Tuple[Any]:
        """Returns number of steps per epoch.

        Parameters
        ------------------
        graph: Graph
            The graph to compute the number of steps.
        """
        return max(graph.get_number_of_nodes() // self._batch_size, 1)
    def __init__(
        self,
        graph: Graph,
        graph_used_in_training: Graph,
        return_node_types: bool,
        return_edge_types: bool,
        use_edge_metrics: bool,
        batch_size: int = 2**10,
    ):
        """Create new EdgePredictionSequence object.

        Parameters
        --------------------------------
        graph: Graph
            The graph whose edges are to be predicted.
        graph_used_in_training: Graph
            The graph that was used while training the current
            edge prediction model.
        return_node_types: bool
            Whether to return the node types.
        return_edge_types: bool
            Whether to return the edge types.
        use_edge_metrics: bool = False
            Whether to return the edge metrics.
        batch_size: int = 2**10,
            The batch size to use.
        """
        if not graph.has_edges():
            raise ValueError(
                f"An empty instance of graph {graph.get_name()} was provided!")
        if not graph.has_edges():
            raise ValueError(
                f"An empty instance of graph {graph_used_in_training.get_name()} was provided!"
            )
        if not graph.has_compatible_node_vocabularies(graph_used_in_training):
            raise ValueError(
                f"The provided graph {graph.get_name()} does not have a node vocabulary "
                "that is compatible with the provided graph used in training.")
        self._graph = graph
        self._graph_used_in_training = graph_used_in_training
        self._return_node_types = return_node_types
        self._return_edge_types = return_edge_types
        self._use_edge_metrics = use_edge_metrics
        self._batch_size = batch_size
Exemplo n.º 20
0
    def split_graph_following_evaluation_schema(
        cls,
        graph: Graph,
        evaluation_schema: str,
        random_state: int,
        holdout_number: int,
        number_of_holdouts: int,
        **holdouts_kwargs: Dict[str, Any],
    ) -> Tuple[Graph]:
        """Return train and test graphs tuple following the provided evaluation schema.

        Parameters
        ----------------------
        graph: Graph
            The graph to split.
        evaluation_schema: str
            The evaluation schema to follow.
        random_state: int
            The random state for the evaluation
        holdout_number: int
            The current holdout number.
        number_of_holdouts: int
            The number of holdouts that will be generated throught the evaluation.
        holdouts_kwargs: Dict[str, Any]
            The kwargs to be forwarded to the holdout method.
        """
        if evaluation_schema == "Stratified Monte Carlo":
            return graph.get_edge_label_holdout_graphs(
                **holdouts_kwargs,
                use_stratification=True,
                random_state=random_state + holdout_number,
            )
        if evaluation_schema == "Stratified Kfold":
            return graph.get_edge_label_kfold(
                k=number_of_holdouts,
                k_index=holdout_number,
                use_stratification=True,
                random_state=random_state,
            )
        raise ValueError(
            f"The requested evaluation schema `{evaluation_schema}` "
            "is not available. The available evaluation schemas "
            f"are: {format_list(cls.get_available_evaluation_schemas())}.")
Exemplo n.º 21
0
    def make_link_prediction_data(self, embedding_file: str,
                                  training_graph_args: dict,
                                  pos_validation_args: dict,
                                  neg_training_args: dict,
                                  neg_validation_args: dict,
                                  edge_method: str) -> Tuple[Tuple, Tuple]:
        """Prepare training and validation data for training link prediction classifers

        Args:
            embedding_file: path to embedding file for nodes in graph
            training_graph_args: EnsmallenGraph arguments to load training graph
            pos_validation_args: EnsmallenGraph arguments to load positive validation graph
            neg_training_args: EnsmallenGraph arguments to load negative training graph
            neg_validation_args: EnsmallenGraph arguments to load negative validation graph
            edge_method: edge embedding method to use (average, L1, L2, etc)
        Returns:
            A tuple of tuples

        """
        embedding = pd.read_csv(embedding_file, index_col=0, header=None)

        # load graphs
        graphs = {'pos_training': Graph.from_csv(**training_graph_args)}
        for name, graph_args in [('pos_validation', pos_validation_args),
                                 ('neg_training', neg_training_args),
                                 ('neg_validation', neg_validation_args)]:
            these_params = copy.deepcopy(training_graph_args)
            these_params.update(graph_args)
            graphs[name] = Graph.from_csv(**these_params)

        # create transformer object to convert graphs into edge embeddings
        lpt = LinkPredictionTransformer(method=edge_method)
        lpt.fit(embedding
                )  # pass node embeddings to be used to create edge embeddings
        train_edges, train_labels = lpt.transform(
            positive_graph=graphs['pos_training'],
            negative_graph=graphs['neg_training'])
        valid_edges, valid_labels = lpt.transform(
            positive_graph=graphs['pos_validation'],
            negative_graph=graphs['neg_validation'])
        return (train_edges, train_labels), (valid_edges, valid_labels)
Exemplo n.º 22
0
    def _fit_transform(self,
                       graph: Graph,
                       return_dataframe: bool = True,
                       verbose: bool = True) -> EmbeddingResult:
        """Return node embedding."""
        edges, weights = graph.get_log_normalized_cooccurrence_coo_matrix(
            **self._walk_parameters)

        coo = coo_matrix(
            (weights, (edges[:, 0], edges[:, 1])),
            shape=(graph.get_number_of_nodes(), graph.get_number_of_nodes()),
            dtype=np.float32)

        model = TruncatedSVD(n_components=self._embedding_size,
                             random_state=self._random_state)
        model.fit(coo)
        embedding = model.transform(coo)

        if return_dataframe:
            node_names = graph.get_node_names()
            embedding = pd.DataFrame(embedding, index=node_names)
        return EmbeddingResult(embedding_method_name=self.model_name(),
                               node_embeddings=embedding)
Exemplo n.º 23
0
    def _fit_transform(self,
                       graph: Graph,
                       return_dataframe: bool = True,
                       verbose: bool = True) -> EmbeddingResult:
        """Return node embedding."""
        node_embedding, edge_type_embedding = self._model.fit_transform(
            graph,
            epochs=self._epochs,
            learning_rate=self._learning_rate,
            learning_rate_decay=self._learning_rate_decay,
            verbose=verbose,
        )
        if return_dataframe:
            node_embedding = pd.DataFrame(node_embedding,
                                          index=graph.get_node_names())
            edge_type_embedding = pd.DataFrame(
                edge_type_embedding, index=graph.get_unique_edge_type_names())

        return EmbeddingResult(
            embedding_method_name=self.model_name(),
            node_embeddings=node_embedding,
            edge_type_embeddings=edge_type_embedding,
        )
Exemplo n.º 24
0
 def _fit_transform(self,
                    graph: Graph,
                    return_dataframe: bool = True,
                    verbose: bool = True) -> EmbeddingResult:
     """Return node embedding."""
     node_embedding = self._model.fit_transform(
         graph,
         verbose=verbose,
     ).T
     if return_dataframe:
         node_embedding = pd.DataFrame(node_embedding,
                                       index=graph.get_node_names())
     return EmbeddingResult(embedding_method_name=self.model_name(),
                            node_embeddings=node_embedding)
    def _build_edge_prediction_based_model(
            self, graph: Graph, sources: tf.Tensor,
            destinations: tf.Tensor) -> Union[List[tf.Tensor], tf.Tensor]:
        """Return the model implementation.

        Parameters
        -------------------
        sources: tf.Tensor
            The source nodes to be used in the model.
        destinations: tf.Tensor
            The destinations nodes to be used in the model.
        """
        node_embedding = Embedding(input_dim=graph.get_number_of_nodes(),
                                   output_dim=self._embedding_size,
                                   input_length=1,
                                   name="node_embeddings")
        context_embedding = Embedding(input_dim=graph.get_number_of_nodes(),
                                      output_dim=self._embedding_size,
                                      input_length=1,
                                      name="context_embeddings")
        return Activation(self._activation)(Dot(axes=-1)([
            Flatten()(node_embedding(sources)),
            Flatten()(context_embedding(destinations))
        ]))
Exemplo n.º 26
0
    def _extract_embeddings(self, graph: Graph, model: Model,
                            return_dataframe: bool) -> EmbeddingResult:
        """Returns embedding from the model.

        Parameters
        ------------------
        graph: Graph
            The graph that was embedded.
        model: Model
            The Keras model used to embed the graph.
        return_dataframe: bool
            Whether to return a dataframe of a numpy array.
        """
        if return_dataframe:
            result = {
                layer_name: pd.DataFrame(self.get_layer_weights(
                    layer_name, model, drop_first_row=drop_first_row),
                                         index=names)
                for layer_name, names, drop_first_row in (
                    ("node_embeddings", graph.get_node_names(),
                     False), ("edge_type_embeddings",
                              graph.get_unique_edge_type_names(),
                              graph.has_unknown_edge_types()))
            }
        else:
            result = {
                layer_name:
                self.get_layer_weights(layer_name,
                                       model,
                                       drop_first_row=drop_first_row)
                for layer_name, drop_first_row in (
                    ("node_embeddings", False),
                    ("edge_type_embeddings", graph.has_unknown_edge_types()))
            }
        return EmbeddingResult(embedding_method_name=self.model_name(),
                               **result)
Exemplo n.º 27
0
 def _fit_transform(self,
                    graph: Graph,
                    return_dataframe: bool = True,
                    verbose: bool = True) -> EmbeddingResult:
     """Return node embedding."""
     node_embeddings = self._model.fit_transform(graph)
     if not isinstance(node_embeddings, list):
         node_embeddings = [node_embeddings]
     if return_dataframe:
         node_names = graph.get_node_names()
         node_embeddings = [
             pd.DataFrame(node_embedding, index=node_names)
             for node_embedding in node_embeddings
         ]
     return EmbeddingResult(embedding_method_name=self.model_name(),
                            node_embeddings=node_embeddings)
Exemplo n.º 28
0
    def _fit_transform(
        self,
        graph: Graph,
        return_dataframe: bool = True,
        verbose: bool = True
    ) -> Union[np.ndarray, pd.DataFrame, Dict[str, np.ndarray], Dict[
            str, pd.DataFrame]]:
        """Return node embedding"""

        torch_device = torch.device(self._device)

        triples_factory = CoreTriplesFactory(
            torch.IntTensor(graph.get_directed_edge_triples_ids().astype(
                np.int32)),
            num_entities=graph.get_number_of_nodes(),
            num_relations=graph.get_number_of_edge_types(),
            entity_ids=graph.get_node_ids(),
            relation_ids=graph.get_unique_edge_type_ids(),
            create_inverse_triples=False,
        )

        batch_size = min(self._batch_size,
                         graph.get_number_of_directed_edges())

        model = self._build_model(triples_factory)

        if not issubclass(model.__class__, Model):
            raise NotImplementedError(
                "The model created with the `_build_model` in the child "
                f"class {self.__class__.__name__} for the model {self.model_name()} "
                f"in the library {self.library_name()} did not return a "
                f"PyKeen model but an object of type {type(model)}.")

        # Move the model to gpu if we need to
        model.to(torch_device)

        training_loop = SLCWATrainingLoop(
            model=model,
            triples_factory=triples_factory,
        )

        training_loop.train(triples_factory=triples_factory,
                            num_epochs=self._epochs,
                            batch_size=batch_size,
                            use_tqdm=True,
                            use_tqdm_batch=True,
                            tqdm_kwargs=dict(disable=not verbose))

        # Extract and return the embedding
        return self._extract_embeddings(graph,
                                        model,
                                        return_dataframe=return_dataframe)
Exemplo n.º 29
0
 def convert_graph_to_kernel(self,
                             graph: Graph) -> Optional[tf.SparseTensor]:
     """Returns provided graph converted to a sparse Tensor.
     
     Implementation details
     ---------------------------
     Do note that when the model does not have convolutional layers
     the model will return None, as to avoid allocating like object for
     apparently no reason.
     """
     if not self.has_convolutional_layers():
         return None
     return graph_to_sparse_tensor(
         graph,
         use_weights=graph.has_edge_weights()
         and not self._use_simmetric_normalized_laplacian,
         use_simmetric_normalized_laplacian=self.
         _use_simmetric_normalized_laplacian,
         handling_multi_graph=self._handling_multi_graph)
Exemplo n.º 30
0
    def _fit_transform(self,
                       graph: Graph,
                       return_dataframe: bool = True,
                       verbose: bool = True) -> EmbeddingResult:
        """Return node embedding.

        Parameters
        ---------------
        graph: Graph
            The graph to embed.
        return_dataframe: bool = True
            Whether to return a DataFrame.
        verbose: bool = True
            Whether to show a loading bar.
        """
        model: Type[Estimator] = self._build_model()

        if not issubclass(model.__class__, Estimator):
            raise NotImplementedError(
                "The model created with the `_build_model` in the child "
                f"class {self.__class__.__name__} for the model {self.model_name()} "
                f"in the library {self.library_name()} did not return a "
                f"Estimator but an object of type {type(model)}. "
                "It is not clear what to do with this object.")

        model.fit(convert_ensmallen_graph_to_networkx_graph(graph))

        node_embeddings: np.ndarray = model.get_embedding()

        if not issubclass(node_embeddings.__class__, np.ndarray):
            raise NotImplementedError(
                "The model created with the `get_embedding` in the child "
                f"class {self.__class__.__name__} for the model {self.model_name()} "
                f"in the library {self.library_name()} did not return a "
                f"Numpy Array but an object of type {type(model)}. "
                "It is not clear what to do with this object.")

        if return_dataframe:
            node_embeddings: pd.DataFrame = pd.DataFrame(
                node_embeddings, index=graph.get_node_names())

        return EmbeddingResult(embedding_method_name=self.model_name(),
                               node_embeddings=node_embeddings)