Пример #1
0
    def pyg_to_graphs(dataset,
                      verbose: bool = False,
                      fixed_split: bool = False) -> List[Graph]:
        r"""
        Transform a torch_geometric.data.Dataset object to a list of Graph object.

        Args:
            dataset: a torch_geometric.data.Dataset object.
            verbose: if print verbose warning
            fixed_split: if load fixed data split from PyG dataset

        Returns:
            list: A list of :class:`deepsnap.graph.Graph` object.
        """
        if fixed_split:
            graphs = [
                Graph.pyg_to_graph(data, verbose=verbose, fixed_split=True)
                for data in dataset
            ]
            graphs_split = [[graph] for graph in graphs[0]]
            return graphs_split
        else:
            return [
                Graph.pyg_to_graph(data, verbose=verbose) for data in dataset
            ]
Пример #2
0
    def test_split_edge_case(self):
        G, x, y, edge_x, edge_y, edge_index, graph_x, graph_y = \
            simple_networkx_graph()
        dg = Graph(G)

        dg_node = dg.split()
        dg_num_nodes_reduced = dg.num_nodes - 3
        self.assertEqual(
            dg_node[0].node_label_index.shape[0], 1 + int(dg_num_nodes_reduced * 0.8))
        self.assertEqual(
            dg_node[1].node_label_index.shape[0], 1 + int(dg_num_nodes_reduced * 0.1))
        self.assertEqual(
            dg_node[2].node_label_index.shape[0], dg.num_nodes - 2 -
            int(dg_num_nodes_reduced * 0.8) - int(dg_num_nodes_reduced * 0.1))

        dg_edge = dg.split(task='edge')
        dg_num_edges_reduced = dg.num_edges - 3
        edge_0 = 1 + int(dg_num_edges_reduced * 0.8)
        edge_1 = 1 + int(dg_num_edges_reduced * 0.1)
        edge_2 = dg.num_edges - edge_0 - edge_1
        self.assertEqual(dg_edge[0].edge_label_index.shape[1], edge_0)
        self.assertEqual(dg_edge[1].edge_label_index.shape[1], edge_1)
        self.assertEqual(dg_edge[2].edge_label_index.shape[1], edge_2)

        dg_link = dg.split(task='link_pred')
        dg_num_edges_reduced = dg.num_edges - 3
        edge_0 = 1 + int(dg_num_edges_reduced * 0.8)
        edge_1 = 1 + int(dg_num_edges_reduced * 0.1)
        edge_2 = dg.num_edges - edge_0 - edge_1
        self.assertEqual(dg_link[0].edge_label_index.shape[1], edge_0)
        self.assertEqual(dg_link[1].edge_label_index.shape[1], edge_1)
        self.assertEqual(dg_link[2].edge_label_index.shape[1], edge_2)
Пример #3
0
    def test_graph_property_edge_case(self):
        G_1, x, y, edge_x, edge_y, edge_index, graph_x, graph_y = (
            simple_networkx_graph())
        Graph.add_node_attr(G_1, "node_feature", x)
        dg_1 = Graph(G_1)
        self.assertEqual(dg_1.num_nodes, G_1.number_of_nodes())
        self.assertEqual(dg_1.num_edges, G_1.number_of_edges())
        self.assertEqual(dg_1.num_node_features, 2)
        self.assertEqual(dg_1.num_edge_features, 0)
        self.assertEqual(dg_1.num_graph_features, 0)
        self.assertEqual(dg_1.num_node_labels, 0)
        self.assertEqual(dg_1.num_edge_labels, 0)
        self.assertEqual(dg_1.num_graph_labels, 0)

        G_2, x, y, edge_x, edge_y, edge_index, graph_x, graph_y = (
            simple_networkx_graph())
        Graph.add_edge_attr(G_2, "edge_label", edge_y.type(torch.FloatTensor))
        Graph.add_node_attr(G_2, "node_label", y.type(torch.FloatTensor))
        Graph.add_graph_attr(G_2, "graph_label",
                             graph_y.type(torch.FloatTensor))

        dg_2 = Graph(G_2)
        self.assertEqual(dg_2.num_node_labels, 1)
        self.assertEqual(dg_2.num_edge_labels, 1)
        self.assertEqual(dg_2.num_graph_labels, 1)
Пример #4
0
    def test_unbatch_nested(self):
        dims = [2, 3]
        G_sizes = [10, 5]
        G_list = []
        for i, size in enumerate(G_sizes):
            G = Graph()
            G.G = nx.complete_graph(i + 1)
            G.node_property = {
                "node_prop0": torch.ones(size, dims[0]) * i,
                "node_prop1": torch.ones(size, dims[1]) * i,
            }
            G_list.append(G)

        batch = Batch.from_data_list(G_list)

        # reconstruct graph list
        G_list_recon = batch.to_data_list()
        self.assertEqual(
            G_list_recon[0].node_property["node_prop0"].size(0),
            10,
        )
        self.assertEqual(
            G_list_recon[0].node_property["node_prop0"].size(1),
            2,
        )
        self.assertEqual(
            G_list_recon[1].node_property["node_prop1"].size(0),
            5,
        )
        self.assertEqual(
            G_list_recon[1].node_property["node_prop1"].size(1),
            3,
        )
Пример #5
0
    def __getitem__(self, idx: int) -> Union[Graph, List[Graph]]:
        r"""
        Takes in an integer (or a list of integers)
        returns a single Graph object (a subset of graphs).

        Args:
            idx: index to be selected from graphs.

        Returns:
            Union[:class:`deepsnap.graph.Graph`, List[:class:`deepsnap.graph.Graph`]]: A single
            :class:`deepsnap.graph.Graph` object or subset of :class:`deepsnap.graph.Graph` objects.
        """
        if self.task == 'link_pred' and self._resample_negatives:
            # resample negative examples
            for graph in self.graphs:
                if type(graph) == Graph:
                    graph._create_neg_sampling(
                        self.edge_negative_sampling_ratio, resample=True)
                elif type(graph) == HeteroGraph:
                    graph._create_neg_sampling(
                        self.edge_negative_sampling_ratio, split_types=self._split_types, resample=True)

        # TODO: add the hetero graph equivalent of these functions ?
        if self.graphs is None:
            graph = self.generator.generate()
            if not isinstance(graph, Graph):
                graph = Graph(graph)
            # generated an networkx graph
            if self.otf_device is not None:
                graph.to(self.otf_device)
            return graph
        elif isinstance(idx, int):
            return self.graphs[idx]
        else:
            return self._index_select(idx)
Пример #6
0
def concatenate_citeseer_cora(cora_pyg, citeseer_pyg):
    cora = Graph.pyg_to_graph(cora_pyg)
    citeseer = Graph.pyg_to_graph(citeseer_pyg)
    cora_g = cora.G
    citeseer_g = citeseer.G
    nx.set_node_attributes(cora_g, 'cora_node', name='node_type')
    nx.set_edge_attributes(cora_g, 'cora_edge', name='edge_type')
    nx.set_node_attributes(citeseer_g, 'citeseer_node', name='node_type')
    nx.set_edge_attributes(citeseer_g, 'citeseer_edge', name='edge_type')

    G = deepcopy(cora_g)
    num_nodes_cora = cora_g.number_of_nodes()
    num_edges_cora = cora_g.number_of_edges()
    for i, node in enumerate(citeseer_g.nodes(data=True)):
        G.add_node(node[0] + num_nodes_cora, **node[1])
        assert G.nodes[num_nodes_cora +
                       i]['node_label'] == citeseer_g.nodes[i]['node_label']
        assert G.nodes[num_nodes_cora +
                       i]['node_type'] == citeseer_g.nodes[i]['node_type']
    assert G.number_of_nodes(
    ) == cora_g.number_of_nodes() + citeseer_g.number_of_nodes()

    for i, edge in enumerate(citeseer_g.edges(data=True)):
        u = edge[0] + num_nodes_cora
        v = edge[1] + num_nodes_cora
        G.add_edge(u, v, **edge[2])
        assert G.edges[(u, v)]['edge_type'] == citeseer_g.edges[(
            edge[0], edge[1])]['edge_type']
    assert G.number_of_edges(
    ) == cora_g.number_of_edges() + citeseer_g.number_of_edges()
    return G
Пример #7
0
 def _custom_split_link_pred_disjoint(self, graph_train):
     objective_edges = graph_train.custom_disjoint_split
     message_edges = list(set(graph_train.G.edges) - set(objective_edges))
     graph_train = Graph(
         graph_train._edge_subgraph_with_isonodes(
             graph_train.G,
             message_edges,
         ))
     graph_train._create_label_link_pred(graph_train, objective_edges)
     return graph_train
Пример #8
0
    def test_split_edge_case(self):
        G, x, y, edge_x, edge_y, edge_index, graph_x, graph_y = (
            simple_networkx_graph())
        Graph.add_node_attr(G, "node_label", y)
        Graph.add_edge_attr(G, "edge_label", edge_y)
        dg = Graph(G)

        dg_node = dg.split()
        dg_num_nodes = dg.num_nodes
        node_0 = int(dg_num_nodes * 0.8)
        node_1 = int(dg_num_nodes * 0.1)
        node_2 = dg_num_nodes - node_0 - node_1
        self.assertEqual(dg_node[0].node_label_index.shape[0], node_0)
        self.assertEqual(dg_node[1].node_label_index.shape[0], node_1)
        self.assertEqual(dg_node[2].node_label_index.shape[0], node_2)

        dg_edge = dg.split(task="edge")
        dg_num_edges = dg.num_edges
        edge_0 = int(dg_num_edges * 0.8)
        edge_1 = int(dg_num_edges * 0.1)
        edge_2 = dg_num_edges - edge_0 - edge_1
        self.assertEqual(dg_edge[0].edge_label_index.shape[1], edge_0)
        self.assertEqual(dg_edge[1].edge_label_index.shape[1], edge_1)
        self.assertEqual(dg_edge[2].edge_label_index.shape[1], edge_2)

        dg_link = dg.split(task="link_pred")
        edge_0 = int(dg_num_edges * 0.8)
        edge_1 = int(dg_num_edges * 0.1)
        edge_2 = dg.num_edges - edge_0 - edge_1
        self.assertEqual(dg_link[0].edge_label_index.shape[1], edge_0)
        self.assertEqual(dg_link[1].edge_label_index.shape[1], edge_1)
        self.assertEqual(dg_link[2].edge_label_index.shape[1], edge_2)
Пример #9
0
    def test_split_edge_case(self):
        G, x, y, edge_x, edge_y, edge_index, graph_x, graph_y = (
            simple_networkx_graph()
        )

        dg = Graph(
            node_label=y,
            edge_label=edge_y,
            edge_index=edge_index,
            directed=True
        )
        dg_node = dg.split()
        dg_num_nodes = dg.num_nodes
        self.assertEqual(
            dg_node[0].node_label_index.shape[0],
            int(dg_num_nodes * 0.8),
        )
        self.assertEqual(
            dg_node[1].node_label_index.shape[0],
            int(dg_num_nodes * 0.1),
        )
        self.assertEqual(
            dg_node[2].node_label_index.shape[0],
            dg.num_nodes
            - int(dg_num_nodes * 0.8)
            - int(dg_num_nodes * 0.1)
        )

        dg_edge = dg.split(task="edge")
        dg_num_edges = dg.num_edges
        edge_0 = int(dg_num_edges * 0.8)
        edge_1 = int(dg_num_edges * 0.1)
        edge_2 = dg.num_edges - edge_0 - edge_1
        self.assertEqual(
            dg_edge[0].edge_label_index.shape[1],
            edge_0
        )
        self.assertEqual(
            dg_edge[1].edge_label_index.shape[1],
            edge_1
        )
        self.assertEqual(
            dg_edge[2].edge_label_index.shape[1],
            edge_2
        )

        dg_link = dg.split(task="link_pred")
        dg_num_edges = dg.num_edges
        edge_0 = int(dg_num_edges * 0.8)
        edge_1 = int(dg_num_edges * 0.1)
        edge_2 = dg.num_edges - edge_0 - edge_1
        self.assertEqual(dg_link[0].edge_label_index.shape[1], edge_0)
        self.assertEqual(dg_link[1].edge_label_index.shape[1], edge_1)
        self.assertEqual(dg_link[2].edge_label_index.shape[1], edge_2)
Пример #10
0
    def test_transform(self):
        G, x, y, edge_x, edge_y, edge_index, graph_x, graph_y = (
            simple_networkx_graph()
        )
        dg = Graph(
            node_feature=x, node_label=y, edge_index=edge_index,
            edge_feature=edge_x, edge_label=edge_y,
            graph_feature=graph_x, graph_label=graph_y, directed=True
        )

        dg_edge_feature = dg.edge_feature.clone()
        dg_node_feature = dg.node_feature.clone()
        dg_graph_feature = dg.graph_feature.clone()

        dg.apply_tensor(
            lambda x: x, "edge_feature", "node_feature", "graph_feature"
        )
        self.assertTrue(torch.all(dg_edge_feature.eq(dg.edge_feature)))
        self.assertTrue(torch.all(dg_node_feature.eq(dg.node_feature)))
        self.assertTrue(torch.all(dg_graph_feature.eq(dg.graph_feature)))

        dg.apply_tensor(
            lambda x: x + 10, "edge_feature", "node_feature", "graph_feature"
        )
        self.assertFalse(torch.all(dg_edge_feature.eq(dg.edge_feature)))
        self.assertFalse(torch.all(dg_node_feature.eq(dg.node_feature)))
        self.assertFalse(torch.all(dg_graph_feature.eq(dg.graph_feature)))

        dg.apply_tensor(
            lambda x: x + 100, "edge_feature", "node_feature", "graph_feature"
        )
        self.assertTrue(
            torch.all(dg.edge_feature.eq(dg_edge_feature + 10 + 100))
        )
        self.assertTrue(
            torch.all(dg.node_feature.eq(dg_node_feature + 10 + 100))
        )
        self.assertTrue(
            torch.all(dg.graph_feature.eq(dg_graph_feature + 10 + 100))
        )

        dg.apply_tensor(
            lambda x: x * 2, "edge_feature", "node_feature", "graph_feature"
        )
        self.assertTrue(
            torch.all(dg.edge_feature.eq((dg_edge_feature + 10 + 100) * 2))
        )
        self.assertTrue(
            torch.all(dg.node_feature.eq((dg_node_feature + 10 + 100) * 2))
        )
        self.assertTrue(
            torch.all(dg.graph_feature.eq((dg_graph_feature + 10 + 100) * 2))
        )
Пример #11
0
    def test_pyg_to_graph_global(self):
        import deepsnap
        deepsnap.use(nx)

        pyg_dataset = Planetoid('./planetoid', "Cora")
        pyg_data = pyg_dataset[0]
        graph = Graph.pyg_to_graph(pyg_data)
        self.assertTrue(isinstance(graph.G, nx.Graph))

        deepsnap.use(sx)
        graph = Graph.pyg_to_graph(pyg_data)
        self.assertTrue(isinstance(graph.G, sx.Graph))
Пример #12
0
    def test_split(self):
        pyg_dataset = Planetoid("./cora", "Cora")

        x = pyg_dataset[0].x
        y = pyg_dataset[0].y
        edge_index = pyg_dataset[0].edge_index

        row, col = copy.deepcopy(edge_index)
        mask = row < col
        row, col = row[mask], col[mask]
        edge_index = torch.stack([row, col], dim=0)
        edge_index = torch.cat(
            [edge_index, torch.flip(edge_index, [0])], dim=1)

        dg = Graph(node_feature=x,
                   node_label=y,
                   edge_index=edge_index,
                   directed=False)

        dg_node = dg.split()
        dg_num_nodes = dg.num_nodes
        node_0 = int(dg_num_nodes * 0.8)
        node_1 = int(dg_num_nodes * 0.1)
        node_2 = dg_num_nodes - node_0 - node_1
        self.assertEqual(dg_node[0].node_label_index.shape[0], node_0)
        self.assertEqual(dg_node[1].node_label_index.shape[0], node_1)
        self.assertEqual(dg_node[2].node_label_index.shape[0], node_2)

        for split_ratio in [[0.1, 0.4, 0.5], [0.4, 0.3, 0.3], [0.7, 0.2, 0.1]]:
            dg_link_custom = (dg.split(task="link_pred",
                                       split_ratio=split_ratio))
            dg_num_edges = dg.num_edges
            edge_0 = 2 * int(split_ratio[0] * dg_num_edges)
            edge_1 = 2 * int(split_ratio[1] * dg_num_edges)
            edge_2 = 2 * (dg_num_edges - int(split_ratio[0] * dg_num_edges) -
                          int(split_ratio[1] * dg_num_edges))
            self.assertEqual(
                dg_link_custom[0].edge_label_index.shape[1],
                edge_0,
            )
            self.assertEqual(
                dg_link_custom[1].edge_label_index.shape[1],
                edge_1,
            )
            self.assertEqual(
                dg_link_custom[2].edge_label_index.shape[1],
                edge_2,
            )
Пример #13
0
    def __getitem__(self, idx: int) -> Union[Graph, List[Graph]]:
        r"""
        Takes in an integer (or a list of integers)
        returns a single Graph object (a subset of graphs).

        Args:
            idx: index to be selected from graphs.

        Returns:
            Union[:class:`deepsnap.graph.Graph`, List[:class:`deepsnap.graph.Graph`]]: A single
            :class:`deepsnap.graph.Graph` object or subset of :class:`deepsnap.graph.Graph` objects.
        """
        # TODO: add the hetero graph equivalent of these functions ?
        if self.graphs is None:
            graph = self.generator.generate()
            if not isinstance(graph, Graph):
                graph = Graph(graph)
            # generated an networkx graph
            if self.otf_device is not None:
                graph.to(self.otf_device)
            # return graph
        elif isinstance(idx, int):
            graph = self.graphs[idx]
        else:
            graph = self._index_select(idx)

        if self.task == "link_pred" and self._resample_negatives:
            # resample negative examples
            if isinstance(graph, Graph):
                if isinstance(graph, HeteroGraph):
                    if self.negative_edges_mode == "random":
                        graph._create_neg_sampling(
                            self.edge_negative_sampling_ratio,
                            split_types=self._split_types,
                            resample=True)
                    elif self.negative_edges_mode == "custom":
                        raise NotImplementedError()
                else:
                    if self.negative_edges_mode == "random":
                        graph._create_neg_sampling(
                            self.edge_negative_sampling_ratio, resample=True)
                    elif self.negative_edges_mode == "custom":
                        graph._custom_create_neg_sampling(
                            self.edge_negative_sampling_ratio, resample=True)

            else:
                raise TypeError("element in self.graphs of unexpected type.")
        return graph
Пример #14
0
    def apply_transform_multi(self,
                              transform,
                              update_tensors: bool = True,
                              update_graphs: bool = False,
                              deep_copy: bool = False,
                              **kwargs):
        r"""
        Comparison to apply_transform, this allows multiple graph objects
        to be returned by the supplied transform function.

        Args:
            transform: (Multiple return value) tranformation function
                applied to each graph object. It needs to return a tuple of
                Graph objects or internal .G (NetworkX) objects.

        Returns:
            a tuple of batch objects. The i-th batch object contains the i-th
            return value of the transform function applied to all graphs
            in the batch.
        """
        g_lists = (zip(*[
            Graph(graph).apply_transform_multi(
                transform,
                update_tensors,
                update_graphs,
                deep_copy,
                **kwargs,
            ) for graph in self.G
        ]))
        return (self.from_data_list(g_list) for g_list in g_lists)
Пример #15
0
    def apply_transform(self,
                        transform,
                        update_tensor: bool = True,
                        update_graph: bool = False,
                        deep_copy: bool = False,
                        **kwargs):
        r"""
        Applies a transformation to each graph object in parallel by first
        calling `to_data_list`, applying the transform, and then perform
        re-batching again to a `Batch`.
        A transform should edit the graph object,
        including changing the graph structure, and adding node/edge/graph attributes.
        The rest are automatically handled by the :class:`deepsnap.graph.Graph` object,
        including everything ended with index.

        Args:
            transform: Transformation function applied to each graph object.
            update_tensor: Whether use nx graph to update tensor attributes.
            update_graph: Whether use tensor attributes to update nx graphs.
            deep_copy: :obj:`True` if a new deep copy of batch is returned.
                This option allows modifying the batch of graphs without
                changing the graphs in the original dataset.
            kwargs: Parameters used in transform function in :class:`deepsnap.graph.Graph` objects.

        Returns:
            a batch object containing all transformed graph objects.

        """
        # TODO: transductive setting, assert update_tensor == True
        return self.from_data_list([
            Graph(graph).apply_transform(transform, update_tensor,
                                         update_graph, deep_copy, **kwargs)
            for graph in self.G
        ])
Пример #16
0
 def _dict_list_to_tensor(dict_of_list, graph):
     r"""Convert a dict/Graph with list as values to a dict/Graph with
     concatenated/stacked tensor as values.
     """
     if isinstance(dict_of_list, dict):
         keys = dict_of_list.keys()
     else:
         keys = dict_of_list.keys
     for key in keys:
         if isinstance(dict_of_list[key], dict):
             # recursively convert the dictionary of list to dict of tensor
             Batch._dict_list_to_tensor(dict_of_list[key], graph)
             continue
         item = dict_of_list[key][0]
         if torch.is_tensor(item):
             if (Graph._is_graph_attribute(key) and item.ndim == 1
                     and (not item.dtype == torch.long)
                     and "feature" in key):
                 # special consideration: 1D tensor for graph attribute (classification)
                 # named as: "graph_xx_feature"
                 # batch by stacking the first dim
                 dict_of_list[key] = torch.stack(dict_of_list[key], dim=0)
             else:
                 # concat at the __cat_dim__
                 dict_of_list[key] = torch.cat(dict_of_list[key],
                                               dim=graph.__cat_dim__(
                                                   key, item))
         elif isinstance(item, (float, int)):
             dict_of_list[key] = torch.tensor(dict_of_list[key])
Пример #17
0
    def test_split(self):
        pyg_dataset = Planetoid("./cora", "Cora")
        dg = Graph.pyg_to_graph(pyg_dataset[0])

        dg_node = dg.split()
        dg_num_nodes = dg.num_nodes
        node_0 = int(0.8 * dg_num_nodes)
        node_1 = int(0.1 * dg_num_nodes)
        node_2 = dg_num_nodes - node_0 - node_1
        self.assertEqual(dg_node[0].node_label_index.shape[0], node_0)
        self.assertEqual(dg_node[1].node_label_index.shape[0], node_1)
        self.assertEqual(dg_node[2].node_label_index.shape[0], node_2)

        for split_ratio in [[0.1, 0.4, 0.5], [0.4, 0.3, 0.3], [0.7, 0.2, 0.1]]:
            dg_link_custom = (dg.split(task="link_pred",
                                       split_ratio=split_ratio))
            dg_num_edges = dg.num_edges
            edge_0 = 2 * int(split_ratio[0] * dg_num_edges)
            edge_1 = 2 * int(split_ratio[1] * dg_num_edges)
            edge_2 = 2 * (dg_num_edges - int(split_ratio[0] * dg_num_edges) -
                          int(split_ratio[1] * dg_num_edges))
            self.assertEqual(
                dg_link_custom[0].edge_label_index.shape[1],
                edge_0,
            )
            self.assertEqual(
                dg_link_custom[1].edge_label_index.shape[1],
                edge_1,
            )
            self.assertEqual(
                dg_link_custom[2].edge_label_index.shape[1],
                edge_2,
            )
    def test_resample_disjoint(self):
        pyg_dataset = Planetoid("./cora", "Cora")
        graphs = GraphDataset.pyg_to_graphs(pyg_dataset)
        graph = graphs[0]
        graph = Graph(node_label=graph.node_label,
                      node_feature=graph.node_feature,
                      edge_index=graph.edge_index,
                      edge_feature=graph.edge_feature,
                      directed=False)
        graphs = [graph]
        dataset = GraphDataset(graphs,
                               task="link_pred",
                               edge_train_mode="disjoint",
                               edge_message_ratio=0.8,
                               resample_disjoint=True,
                               resample_disjoint_period=1)
        dataset_train, _, _ = dataset.split(split_ratio=[0.5, 0.2, 0.3])
        graph_train_first = dataset_train[0]
        graph_train_second = dataset_train[0]

        self.assertEqual(graph_train_first.edge_label_index.shape[1],
                         graph_train_second.edge_label_index.shape[1])
        self.assertTrue(
            torch.equal(graph_train_first.edge_label,
                        graph_train_second.edge_label))
Пример #19
0
    def test_ensemble_generator(self):
        pyg_dataset = Planetoid("./cora", "Cora")
        dg = Graph.pyg_to_graph(pyg_dataset[0])

        num_nodes = 500
        sizes = [2, 3]

        class NeighborGenerator1(Generator):
            def __len__(self):
                return sizes

            def generate(self):
                graph = Graph(gen_graph(num_nodes, dg.G))
                return graph

        class NeighborGenerator2(Generator):
            def __len__(self):
                return sizes

            def generate(self):
                graph = Graph(gen_graph(num_nodes, dg.G))
                return graph

        ensemble_generator = (
            EnsembleGenerator(
                [
                    NeighborGenerator1(sizes),
                    NeighborGenerator2(sizes),
                ]
            )
        )
        dataset = GraphDataset(None, generator=ensemble_generator)
        self.assertTrue(dataset[0].node_feature.shape[0] == num_nodes)
    def test_dataset_property(self):
        _, x, y, edge_x, edge_y, edge_index, graph_x, graph_y = (
            simple_networkx_graph())
        G = Graph(node_feature=x,
                  node_label=y,
                  edge_index=edge_index,
                  edge_feature=edge_x,
                  edge_label=edge_y,
                  graph_feature=graph_x,
                  graph_label=graph_y,
                  directed=True)

        H = deepcopy(G)

        H.graph_label = torch.tensor([1])

        graphs = [G, H]
        dataset = GraphDataset(graphs)
        self.assertEqual(dataset.num_node_labels, 5)
        self.assertEqual(dataset.num_node_features, 2)
        self.assertEqual(dataset.num_edge_labels, 4)
        self.assertEqual(dataset.num_edge_features, 2)
        self.assertEqual(dataset.num_graph_labels, 1)
        self.assertEqual(dataset.num_graph_features, 2)
        self.assertEqual(dataset.num_labels, 5)  # node task
        dataset = GraphDataset(graphs, task="edge")
        self.assertEqual(dataset.num_labels, 4)
        dataset = GraphDataset(graphs, task="link_pred")
        self.assertEqual(dataset.num_labels, 5)
        dataset = GraphDataset(graphs, task="graph")
        self.assertEqual(dataset.num_labels, 1)
Пример #21
0
    def test_graph_basics(self):
        G, x, y, edge_x, edge_y, edge_index, graph_x, graph_y = (
            simple_networkx_graph()
        )

        dg = Graph(
            node_feature=x, node_label=y, edge_index=edge_index,
            edge_feature=edge_x, edge_label=edge_y,
            graph_feature=graph_x, graph_label=graph_y, directed=True
        )

        for item in [
                "directed",
                "node_feature",
                "node_label",
                "edge_feature",
                "edge_label",
                "graph_feature",
                "graph_label",
                "edge_index",
                "edge_label_index",
                "node_label_index"
                # "is_train"
        ]:
            self.assertEqual(item in dg, True)
        # self.assertEqual(len([key for key in dg]), 11)
        self.assertEqual(len([key for key in dg]), 10)
Пример #22
0
    def test_graph_property_general(self):
        G, x, y, edge_x, edge_y, edge_index, graph_x, graph_y = (
            simple_networkx_graph())

        dg = Graph(node_feature=x,
                   node_label=y,
                   edge_index=edge_index,
                   edge_feature=edge_x,
                   edge_label=edge_y,
                   graph_feature=graph_x,
                   graph_label=graph_y,
                   directed=True)
        self.assertEqual(sorted(dg.keys), [
            "directed", "edge_feature", "edge_index", "edge_label",
            "edge_label_index", "graph_feature", "graph_label", "is_train",
            "node_feature", "node_label", "node_label_index"
        ])
        self.assertEqual(dg.num_nodes, G.number_of_nodes())
        self.assertEqual(dg.num_edges, G.number_of_edges())
        self.assertEqual(dg.num_node_features, 2)
        self.assertEqual(dg.num_edge_features, 2)
        self.assertEqual(dg.num_graph_features, 2)
        self.assertEqual(dg.num_node_labels, np.max(y.data.numpy()) + 1)
        self.assertEqual(dg.num_edge_labels, np.max(edge_y.data.numpy()) + 1)
        self.assertEqual(dg.num_graph_labels, np.max(graph_y.data.numpy()) + 1)
Пример #23
0
    def test_collate_batch_nested(self):
        dims = [2, 3]
        G_sizes = [10, 5]
        G_list = []
        for i, size in enumerate(G_sizes):
            G = Graph()
            G.G = nx.complete_graph(i + 1)
            G.node_property = {
                'node_prop0': torch.ones(size, dims[0]) * i,
                'node_prop1': torch.ones(size, dims[1]) * i
            }
            G_list.append(G)
        batch = Batch.from_data_list(G_list)

        self.assertEqual(batch.num_graphs, 2)
        self.assertEqual(batch.node_property['node_prop0'].size(0),
                         sum(G_sizes))
Пример #24
0
 def _orig_features(graph, key):
     # repeat the feature to have length feature_dims
     if not isinstance(graph[key], torch.Tensor):
         graph[key] = torch.tensor(graph[key])
     if cfg.dataset.task_type == 'regression' and 'label' in key:
         graph[key] = graph[key].float()
     assert graph[key].ndim <= 2
     if graph[key].ndim == 1 and Graph._is_node_attribute(key):
         # n-by-1 tensor for node attributes
         graph[key] = graph[key].unsqueeze(-1)
Пример #25
0
    def test_clone(self):
        G, x, y, edge_x, edge_y, edge_index, graph_x, graph_y = (
            simple_networkx_graph()
        )

        dg = Graph(
            node_feature=x, node_label=y, edge_index=edge_index,
            edge_feature=edge_x, edge_label=edge_y,
            graph_feature=graph_x, graph_label=graph_y, directed=True
        )
        dg1 = dg.clone()
        self.assertEqual(dg.num_nodes, dg1.num_nodes)
        self.assertEqual(dg.num_edges, dg1.num_edges)
        self.assertEqual(dg.num_node_features, dg1.num_node_features)
        self.assertEqual(dg.num_edge_features, dg1.num_edge_features)
        self.assertEqual(dg.num_node_labels, dg1.num_node_labels)
        self.assertEqual(dg.num_edge_labels, dg1.num_edge_labels)
        self.assertTrue(not id(dg.edge_index) == id(dg1.edge_index))
        self.assertTrue(tuple(dg.keys) == tuple(dg1.keys))
Пример #26
0
    def test_specify_graph_backend_init(self):
        G = sx.Graph()
        G.add_nodes_from(range(100))
        G.add_edges_from([[0, 4], [1, 5], [2, 6]])
        graph = Graph(G, netlib=sx)
        self.assertTrue(isinstance(graph.G, sx.Graph))
        self.assertEqual(list(graph.edge_index.shape), [2, 6])
        self.assertEqual(list(graph.edge_label_index.shape), [2, 6])
        self.assertEqual(list(graph.node_label_index.shape), [100])

        import networkx as nx
        G = nx.Graph()
        G.add_nodes_from(range(100))
        G.add_edges_from([[0, 4], [1, 5], [2, 6]])
        graph = Graph(G, netlib=nx)
        self.assertTrue(isinstance(graph.G, nx.Graph))
        self.assertEqual(list(graph.edge_index.shape), [2, 6])
        self.assertEqual(list(graph.edge_label_index.shape), [2, 6])
        self.assertEqual(list(graph.node_label_index.shape), [100])
Пример #27
0
    def test_split(self):
        pyg_dataset = Planetoid('./cora', 'Cora')
        dg = Graph.pyg_to_graph(pyg_dataset[0])

        dg_node = dg.split()
        dg_num_nodes_reduced = dg.num_nodes - 3
        self.assertEqual(
            dg_node[0].node_label_index.shape[0], 1 + int(dg_num_nodes_reduced * 0.8))
        self.assertEqual(
            dg_node[1].node_label_index.shape[0], 1 + int(dg_num_nodes_reduced * 0.1))
        self.assertEqual(
            dg_node[2].node_label_index.shape[0], dg.num_nodes - 2 -
            int(dg_num_nodes_reduced * 0.8) - int(dg_num_nodes_reduced * 0.1))

        dg_edge = dg.split(task='edge')
        dg_num_edges_reduced = dg.num_edges - 3
        edge_0 = 2 * (1 + int(dg_num_edges_reduced * 0.8))
        edge_1 = 2 * (1 + int(dg_num_edges_reduced * 0.1))
        edge_2 = dg.num_edges * 2 - edge_0 - edge_1
        self.assertEqual(dg_edge[0].edge_label_index.shape[1], edge_0)
        self.assertEqual(dg_edge[1].edge_label_index.shape[1], edge_1)
        self.assertEqual(dg_edge[2].edge_label_index.shape[1], edge_2)

        dg_link = dg.split(task='link_pred')
        dg_num_edges_reduced = dg.num_edges - 3
        edge_0 = 2 * (1 + int(dg_num_edges_reduced * 0.8))
        edge_1 = 2 * (1 + int(dg_num_edges_reduced * 0.1))
        edge_2 = dg.num_edges * 2 - edge_0 - edge_1
        self.assertEqual(dg_link[0].edge_label_index.shape[1], edge_0)
        self.assertEqual(dg_link[1].edge_label_index.shape[1], edge_1)
        self.assertEqual(dg_link[2].edge_label_index.shape[1], edge_2)

        for message_ratio in [0.1, 0.2, 0.4, 0.8]:
            dg_link_resample = dg_link[0].clone().\
                resample_disjoint(message_ratio=message_ratio)
            positive_edge_num = \
                int(0.5 * dg_link[0].clone().edge_label_index.shape[1])
            self.assertEqual(dg_link_resample.edge_label_index.shape[1],
                             2 * (positive_edge_num - 1 - int(message_ratio * (positive_edge_num - 2))))

        for split_ratio in [[0.1, 0.4, 0.5], [0.4, 0.3, 0.3], [0.7, 0.2, 0.1]]:
            dg_link_custom = \
                dg.split(task='link_pred', split_ratio=split_ratio)
            dg_num_edges_reduced = dg.num_edges - 3
            edge_0 = 2 * (1 + int(dg_num_edges_reduced * split_ratio[0]))
            self.assertEqual(dg_link_custom[0].
                             edge_label_index.shape[1], edge_0)
            edge_1 = \
                (1 + int(split_ratio[0] * dg_num_edges_reduced) + 1 + int(split_ratio[1] * dg_num_edges_reduced)) * 2 - edge_0
            self.assertEqual(dg_link_custom[1].edge_label_index.shape[1],
                             edge_1)
            edge_2 = dg.num_edges * 2 - edge_0 - edge_1
            self.assertEqual(dg_link_custom[2].edge_label_index.shape[1],
                             edge_2)
Пример #28
0
    def list_to_graphs(G_list) -> List[Graph]:
        r"""
        Transform a list of networkx data object to a list of Graph object.

        Args:
            G_list: a list of networkx data object.

        Returns:
            list: A list of :class:`deepsnap.graph.Graph` object.
        """
        return [Graph(G) for G in G_list]
Пример #29
0
def preprocess(G, node_label_index, method="louvain"):
    graphs = []
    labeled_nodes = set(node_label_index.tolist())
    if method == "louvain":
        community_mapping = community_louvain.best_partition(G, resolution=10)
        communities = {}
        for node in community_mapping:
            comm = community_mapping[node]
            if comm in communities:
                communities[comm].add(node)
            else:
                communities[comm] = set([node])
        communities = communities.values()
    elif method == "bisection":
        communities = nx.algorithms.community.kernighan_lin_bisection(G)
    elif method == "greedy":
        communities = nx.algorithms.community.greedy_modularity_communities(G)

    for community in communities:
        nodes = set(community)
        subgraph = G.subgraph(nodes)
        # Make sure each subgraph has more than 10 nodes
        if subgraph.number_of_nodes() > 10:
            node_mapping = {node: i for i, node in enumerate(subgraph.nodes())}
            subgraph = nx.relabel_nodes(subgraph, node_mapping)
            # Get the id of the training set labeled node in the new graph
            train_label_index = []
            for node in labeled_nodes:
                if node in node_mapping:
                    # Append relabeled labeled node index
                    train_label_index.append(node_mapping[node])

            # Make sure the subgraph contains at least one training set labeled node
            if len(train_label_index) > 0:
                dg = Graph(subgraph)
                # Update node_label_index
                dg.node_label_index = torch.tensor(train_label_index,
                                                   dtype=torch.long)
                graphs.append(dg)
    return graphs
Пример #30
0
    def test_graph_property_edge_case(self):
        G, x, y, edge_x, edge_y, edge_index, graph_x, graph_y = (
            simple_networkx_graph()
        )

        dg = Graph(
            node_feature=x, node_label=y.type(torch.FloatTensor),
            edge_index=edge_index, edge_label=edge_y.type(torch.FloatTensor),
            graph_label=graph_y.type(torch.FloatTensor), directed=True
        )
        self.assertEqual(dg.num_node_labels, 1)
        self.assertEqual(dg.num_edge_labels, 1)
        self.assertEqual(dg.num_graph_labels, 1)