def pyg_to_graphs(dataset, verbose: bool = False, fixed_split: bool = False) -> List[Graph]: r""" Transform a torch_geometric.data.Dataset object to a list of Graph object. Args: dataset: a torch_geometric.data.Dataset object. verbose: if print verbose warning fixed_split: if load fixed data split from PyG dataset Returns: list: A list of :class:`deepsnap.graph.Graph` object. """ if fixed_split: graphs = [ Graph.pyg_to_graph(data, verbose=verbose, fixed_split=True) for data in dataset ] graphs_split = [[graph] for graph in graphs[0]] return graphs_split else: return [ Graph.pyg_to_graph(data, verbose=verbose) for data in dataset ]
def test_split_edge_case(self): G, x, y, edge_x, edge_y, edge_index, graph_x, graph_y = \ simple_networkx_graph() dg = Graph(G) dg_node = dg.split() dg_num_nodes_reduced = dg.num_nodes - 3 self.assertEqual( dg_node[0].node_label_index.shape[0], 1 + int(dg_num_nodes_reduced * 0.8)) self.assertEqual( dg_node[1].node_label_index.shape[0], 1 + int(dg_num_nodes_reduced * 0.1)) self.assertEqual( dg_node[2].node_label_index.shape[0], dg.num_nodes - 2 - int(dg_num_nodes_reduced * 0.8) - int(dg_num_nodes_reduced * 0.1)) dg_edge = dg.split(task='edge') dg_num_edges_reduced = dg.num_edges - 3 edge_0 = 1 + int(dg_num_edges_reduced * 0.8) edge_1 = 1 + int(dg_num_edges_reduced * 0.1) edge_2 = dg.num_edges - edge_0 - edge_1 self.assertEqual(dg_edge[0].edge_label_index.shape[1], edge_0) self.assertEqual(dg_edge[1].edge_label_index.shape[1], edge_1) self.assertEqual(dg_edge[2].edge_label_index.shape[1], edge_2) dg_link = dg.split(task='link_pred') dg_num_edges_reduced = dg.num_edges - 3 edge_0 = 1 + int(dg_num_edges_reduced * 0.8) edge_1 = 1 + int(dg_num_edges_reduced * 0.1) edge_2 = dg.num_edges - edge_0 - edge_1 self.assertEqual(dg_link[0].edge_label_index.shape[1], edge_0) self.assertEqual(dg_link[1].edge_label_index.shape[1], edge_1) self.assertEqual(dg_link[2].edge_label_index.shape[1], edge_2)
def test_graph_property_edge_case(self): G_1, x, y, edge_x, edge_y, edge_index, graph_x, graph_y = ( simple_networkx_graph()) Graph.add_node_attr(G_1, "node_feature", x) dg_1 = Graph(G_1) self.assertEqual(dg_1.num_nodes, G_1.number_of_nodes()) self.assertEqual(dg_1.num_edges, G_1.number_of_edges()) self.assertEqual(dg_1.num_node_features, 2) self.assertEqual(dg_1.num_edge_features, 0) self.assertEqual(dg_1.num_graph_features, 0) self.assertEqual(dg_1.num_node_labels, 0) self.assertEqual(dg_1.num_edge_labels, 0) self.assertEqual(dg_1.num_graph_labels, 0) G_2, x, y, edge_x, edge_y, edge_index, graph_x, graph_y = ( simple_networkx_graph()) Graph.add_edge_attr(G_2, "edge_label", edge_y.type(torch.FloatTensor)) Graph.add_node_attr(G_2, "node_label", y.type(torch.FloatTensor)) Graph.add_graph_attr(G_2, "graph_label", graph_y.type(torch.FloatTensor)) dg_2 = Graph(G_2) self.assertEqual(dg_2.num_node_labels, 1) self.assertEqual(dg_2.num_edge_labels, 1) self.assertEqual(dg_2.num_graph_labels, 1)
def test_unbatch_nested(self): dims = [2, 3] G_sizes = [10, 5] G_list = [] for i, size in enumerate(G_sizes): G = Graph() G.G = nx.complete_graph(i + 1) G.node_property = { "node_prop0": torch.ones(size, dims[0]) * i, "node_prop1": torch.ones(size, dims[1]) * i, } G_list.append(G) batch = Batch.from_data_list(G_list) # reconstruct graph list G_list_recon = batch.to_data_list() self.assertEqual( G_list_recon[0].node_property["node_prop0"].size(0), 10, ) self.assertEqual( G_list_recon[0].node_property["node_prop0"].size(1), 2, ) self.assertEqual( G_list_recon[1].node_property["node_prop1"].size(0), 5, ) self.assertEqual( G_list_recon[1].node_property["node_prop1"].size(1), 3, )
def __getitem__(self, idx: int) -> Union[Graph, List[Graph]]: r""" Takes in an integer (or a list of integers) returns a single Graph object (a subset of graphs). Args: idx: index to be selected from graphs. Returns: Union[:class:`deepsnap.graph.Graph`, List[:class:`deepsnap.graph.Graph`]]: A single :class:`deepsnap.graph.Graph` object or subset of :class:`deepsnap.graph.Graph` objects. """ if self.task == 'link_pred' and self._resample_negatives: # resample negative examples for graph in self.graphs: if type(graph) == Graph: graph._create_neg_sampling( self.edge_negative_sampling_ratio, resample=True) elif type(graph) == HeteroGraph: graph._create_neg_sampling( self.edge_negative_sampling_ratio, split_types=self._split_types, resample=True) # TODO: add the hetero graph equivalent of these functions ? if self.graphs is None: graph = self.generator.generate() if not isinstance(graph, Graph): graph = Graph(graph) # generated an networkx graph if self.otf_device is not None: graph.to(self.otf_device) return graph elif isinstance(idx, int): return self.graphs[idx] else: return self._index_select(idx)
def concatenate_citeseer_cora(cora_pyg, citeseer_pyg): cora = Graph.pyg_to_graph(cora_pyg) citeseer = Graph.pyg_to_graph(citeseer_pyg) cora_g = cora.G citeseer_g = citeseer.G nx.set_node_attributes(cora_g, 'cora_node', name='node_type') nx.set_edge_attributes(cora_g, 'cora_edge', name='edge_type') nx.set_node_attributes(citeseer_g, 'citeseer_node', name='node_type') nx.set_edge_attributes(citeseer_g, 'citeseer_edge', name='edge_type') G = deepcopy(cora_g) num_nodes_cora = cora_g.number_of_nodes() num_edges_cora = cora_g.number_of_edges() for i, node in enumerate(citeseer_g.nodes(data=True)): G.add_node(node[0] + num_nodes_cora, **node[1]) assert G.nodes[num_nodes_cora + i]['node_label'] == citeseer_g.nodes[i]['node_label'] assert G.nodes[num_nodes_cora + i]['node_type'] == citeseer_g.nodes[i]['node_type'] assert G.number_of_nodes( ) == cora_g.number_of_nodes() + citeseer_g.number_of_nodes() for i, edge in enumerate(citeseer_g.edges(data=True)): u = edge[0] + num_nodes_cora v = edge[1] + num_nodes_cora G.add_edge(u, v, **edge[2]) assert G.edges[(u, v)]['edge_type'] == citeseer_g.edges[( edge[0], edge[1])]['edge_type'] assert G.number_of_edges( ) == cora_g.number_of_edges() + citeseer_g.number_of_edges() return G
def _custom_split_link_pred_disjoint(self, graph_train): objective_edges = graph_train.custom_disjoint_split message_edges = list(set(graph_train.G.edges) - set(objective_edges)) graph_train = Graph( graph_train._edge_subgraph_with_isonodes( graph_train.G, message_edges, )) graph_train._create_label_link_pred(graph_train, objective_edges) return graph_train
def test_split_edge_case(self): G, x, y, edge_x, edge_y, edge_index, graph_x, graph_y = ( simple_networkx_graph()) Graph.add_node_attr(G, "node_label", y) Graph.add_edge_attr(G, "edge_label", edge_y) dg = Graph(G) dg_node = dg.split() dg_num_nodes = dg.num_nodes node_0 = int(dg_num_nodes * 0.8) node_1 = int(dg_num_nodes * 0.1) node_2 = dg_num_nodes - node_0 - node_1 self.assertEqual(dg_node[0].node_label_index.shape[0], node_0) self.assertEqual(dg_node[1].node_label_index.shape[0], node_1) self.assertEqual(dg_node[2].node_label_index.shape[0], node_2) dg_edge = dg.split(task="edge") dg_num_edges = dg.num_edges edge_0 = int(dg_num_edges * 0.8) edge_1 = int(dg_num_edges * 0.1) edge_2 = dg_num_edges - edge_0 - edge_1 self.assertEqual(dg_edge[0].edge_label_index.shape[1], edge_0) self.assertEqual(dg_edge[1].edge_label_index.shape[1], edge_1) self.assertEqual(dg_edge[2].edge_label_index.shape[1], edge_2) dg_link = dg.split(task="link_pred") edge_0 = int(dg_num_edges * 0.8) edge_1 = int(dg_num_edges * 0.1) edge_2 = dg.num_edges - edge_0 - edge_1 self.assertEqual(dg_link[0].edge_label_index.shape[1], edge_0) self.assertEqual(dg_link[1].edge_label_index.shape[1], edge_1) self.assertEqual(dg_link[2].edge_label_index.shape[1], edge_2)
def test_split_edge_case(self): G, x, y, edge_x, edge_y, edge_index, graph_x, graph_y = ( simple_networkx_graph() ) dg = Graph( node_label=y, edge_label=edge_y, edge_index=edge_index, directed=True ) dg_node = dg.split() dg_num_nodes = dg.num_nodes self.assertEqual( dg_node[0].node_label_index.shape[0], int(dg_num_nodes * 0.8), ) self.assertEqual( dg_node[1].node_label_index.shape[0], int(dg_num_nodes * 0.1), ) self.assertEqual( dg_node[2].node_label_index.shape[0], dg.num_nodes - int(dg_num_nodes * 0.8) - int(dg_num_nodes * 0.1) ) dg_edge = dg.split(task="edge") dg_num_edges = dg.num_edges edge_0 = int(dg_num_edges * 0.8) edge_1 = int(dg_num_edges * 0.1) edge_2 = dg.num_edges - edge_0 - edge_1 self.assertEqual( dg_edge[0].edge_label_index.shape[1], edge_0 ) self.assertEqual( dg_edge[1].edge_label_index.shape[1], edge_1 ) self.assertEqual( dg_edge[2].edge_label_index.shape[1], edge_2 ) dg_link = dg.split(task="link_pred") dg_num_edges = dg.num_edges edge_0 = int(dg_num_edges * 0.8) edge_1 = int(dg_num_edges * 0.1) edge_2 = dg.num_edges - edge_0 - edge_1 self.assertEqual(dg_link[0].edge_label_index.shape[1], edge_0) self.assertEqual(dg_link[1].edge_label_index.shape[1], edge_1) self.assertEqual(dg_link[2].edge_label_index.shape[1], edge_2)
def test_transform(self): G, x, y, edge_x, edge_y, edge_index, graph_x, graph_y = ( simple_networkx_graph() ) dg = Graph( node_feature=x, node_label=y, edge_index=edge_index, edge_feature=edge_x, edge_label=edge_y, graph_feature=graph_x, graph_label=graph_y, directed=True ) dg_edge_feature = dg.edge_feature.clone() dg_node_feature = dg.node_feature.clone() dg_graph_feature = dg.graph_feature.clone() dg.apply_tensor( lambda x: x, "edge_feature", "node_feature", "graph_feature" ) self.assertTrue(torch.all(dg_edge_feature.eq(dg.edge_feature))) self.assertTrue(torch.all(dg_node_feature.eq(dg.node_feature))) self.assertTrue(torch.all(dg_graph_feature.eq(dg.graph_feature))) dg.apply_tensor( lambda x: x + 10, "edge_feature", "node_feature", "graph_feature" ) self.assertFalse(torch.all(dg_edge_feature.eq(dg.edge_feature))) self.assertFalse(torch.all(dg_node_feature.eq(dg.node_feature))) self.assertFalse(torch.all(dg_graph_feature.eq(dg.graph_feature))) dg.apply_tensor( lambda x: x + 100, "edge_feature", "node_feature", "graph_feature" ) self.assertTrue( torch.all(dg.edge_feature.eq(dg_edge_feature + 10 + 100)) ) self.assertTrue( torch.all(dg.node_feature.eq(dg_node_feature + 10 + 100)) ) self.assertTrue( torch.all(dg.graph_feature.eq(dg_graph_feature + 10 + 100)) ) dg.apply_tensor( lambda x: x * 2, "edge_feature", "node_feature", "graph_feature" ) self.assertTrue( torch.all(dg.edge_feature.eq((dg_edge_feature + 10 + 100) * 2)) ) self.assertTrue( torch.all(dg.node_feature.eq((dg_node_feature + 10 + 100) * 2)) ) self.assertTrue( torch.all(dg.graph_feature.eq((dg_graph_feature + 10 + 100) * 2)) )
def test_pyg_to_graph_global(self): import deepsnap deepsnap.use(nx) pyg_dataset = Planetoid('./planetoid', "Cora") pyg_data = pyg_dataset[0] graph = Graph.pyg_to_graph(pyg_data) self.assertTrue(isinstance(graph.G, nx.Graph)) deepsnap.use(sx) graph = Graph.pyg_to_graph(pyg_data) self.assertTrue(isinstance(graph.G, sx.Graph))
def test_split(self): pyg_dataset = Planetoid("./cora", "Cora") x = pyg_dataset[0].x y = pyg_dataset[0].y edge_index = pyg_dataset[0].edge_index row, col = copy.deepcopy(edge_index) mask = row < col row, col = row[mask], col[mask] edge_index = torch.stack([row, col], dim=0) edge_index = torch.cat( [edge_index, torch.flip(edge_index, [0])], dim=1) dg = Graph(node_feature=x, node_label=y, edge_index=edge_index, directed=False) dg_node = dg.split() dg_num_nodes = dg.num_nodes node_0 = int(dg_num_nodes * 0.8) node_1 = int(dg_num_nodes * 0.1) node_2 = dg_num_nodes - node_0 - node_1 self.assertEqual(dg_node[0].node_label_index.shape[0], node_0) self.assertEqual(dg_node[1].node_label_index.shape[0], node_1) self.assertEqual(dg_node[2].node_label_index.shape[0], node_2) for split_ratio in [[0.1, 0.4, 0.5], [0.4, 0.3, 0.3], [0.7, 0.2, 0.1]]: dg_link_custom = (dg.split(task="link_pred", split_ratio=split_ratio)) dg_num_edges = dg.num_edges edge_0 = 2 * int(split_ratio[0] * dg_num_edges) edge_1 = 2 * int(split_ratio[1] * dg_num_edges) edge_2 = 2 * (dg_num_edges - int(split_ratio[0] * dg_num_edges) - int(split_ratio[1] * dg_num_edges)) self.assertEqual( dg_link_custom[0].edge_label_index.shape[1], edge_0, ) self.assertEqual( dg_link_custom[1].edge_label_index.shape[1], edge_1, ) self.assertEqual( dg_link_custom[2].edge_label_index.shape[1], edge_2, )
def __getitem__(self, idx: int) -> Union[Graph, List[Graph]]: r""" Takes in an integer (or a list of integers) returns a single Graph object (a subset of graphs). Args: idx: index to be selected from graphs. Returns: Union[:class:`deepsnap.graph.Graph`, List[:class:`deepsnap.graph.Graph`]]: A single :class:`deepsnap.graph.Graph` object or subset of :class:`deepsnap.graph.Graph` objects. """ # TODO: add the hetero graph equivalent of these functions ? if self.graphs is None: graph = self.generator.generate() if not isinstance(graph, Graph): graph = Graph(graph) # generated an networkx graph if self.otf_device is not None: graph.to(self.otf_device) # return graph elif isinstance(idx, int): graph = self.graphs[idx] else: graph = self._index_select(idx) if self.task == "link_pred" and self._resample_negatives: # resample negative examples if isinstance(graph, Graph): if isinstance(graph, HeteroGraph): if self.negative_edges_mode == "random": graph._create_neg_sampling( self.edge_negative_sampling_ratio, split_types=self._split_types, resample=True) elif self.negative_edges_mode == "custom": raise NotImplementedError() else: if self.negative_edges_mode == "random": graph._create_neg_sampling( self.edge_negative_sampling_ratio, resample=True) elif self.negative_edges_mode == "custom": graph._custom_create_neg_sampling( self.edge_negative_sampling_ratio, resample=True) else: raise TypeError("element in self.graphs of unexpected type.") return graph
def apply_transform_multi(self, transform, update_tensors: bool = True, update_graphs: bool = False, deep_copy: bool = False, **kwargs): r""" Comparison to apply_transform, this allows multiple graph objects to be returned by the supplied transform function. Args: transform: (Multiple return value) tranformation function applied to each graph object. It needs to return a tuple of Graph objects or internal .G (NetworkX) objects. Returns: a tuple of batch objects. The i-th batch object contains the i-th return value of the transform function applied to all graphs in the batch. """ g_lists = (zip(*[ Graph(graph).apply_transform_multi( transform, update_tensors, update_graphs, deep_copy, **kwargs, ) for graph in self.G ])) return (self.from_data_list(g_list) for g_list in g_lists)
def apply_transform(self, transform, update_tensor: bool = True, update_graph: bool = False, deep_copy: bool = False, **kwargs): r""" Applies a transformation to each graph object in parallel by first calling `to_data_list`, applying the transform, and then perform re-batching again to a `Batch`. A transform should edit the graph object, including changing the graph structure, and adding node/edge/graph attributes. The rest are automatically handled by the :class:`deepsnap.graph.Graph` object, including everything ended with index. Args: transform: Transformation function applied to each graph object. update_tensor: Whether use nx graph to update tensor attributes. update_graph: Whether use tensor attributes to update nx graphs. deep_copy: :obj:`True` if a new deep copy of batch is returned. This option allows modifying the batch of graphs without changing the graphs in the original dataset. kwargs: Parameters used in transform function in :class:`deepsnap.graph.Graph` objects. Returns: a batch object containing all transformed graph objects. """ # TODO: transductive setting, assert update_tensor == True return self.from_data_list([ Graph(graph).apply_transform(transform, update_tensor, update_graph, deep_copy, **kwargs) for graph in self.G ])
def _dict_list_to_tensor(dict_of_list, graph): r"""Convert a dict/Graph with list as values to a dict/Graph with concatenated/stacked tensor as values. """ if isinstance(dict_of_list, dict): keys = dict_of_list.keys() else: keys = dict_of_list.keys for key in keys: if isinstance(dict_of_list[key], dict): # recursively convert the dictionary of list to dict of tensor Batch._dict_list_to_tensor(dict_of_list[key], graph) continue item = dict_of_list[key][0] if torch.is_tensor(item): if (Graph._is_graph_attribute(key) and item.ndim == 1 and (not item.dtype == torch.long) and "feature" in key): # special consideration: 1D tensor for graph attribute (classification) # named as: "graph_xx_feature" # batch by stacking the first dim dict_of_list[key] = torch.stack(dict_of_list[key], dim=0) else: # concat at the __cat_dim__ dict_of_list[key] = torch.cat(dict_of_list[key], dim=graph.__cat_dim__( key, item)) elif isinstance(item, (float, int)): dict_of_list[key] = torch.tensor(dict_of_list[key])
def test_split(self): pyg_dataset = Planetoid("./cora", "Cora") dg = Graph.pyg_to_graph(pyg_dataset[0]) dg_node = dg.split() dg_num_nodes = dg.num_nodes node_0 = int(0.8 * dg_num_nodes) node_1 = int(0.1 * dg_num_nodes) node_2 = dg_num_nodes - node_0 - node_1 self.assertEqual(dg_node[0].node_label_index.shape[0], node_0) self.assertEqual(dg_node[1].node_label_index.shape[0], node_1) self.assertEqual(dg_node[2].node_label_index.shape[0], node_2) for split_ratio in [[0.1, 0.4, 0.5], [0.4, 0.3, 0.3], [0.7, 0.2, 0.1]]: dg_link_custom = (dg.split(task="link_pred", split_ratio=split_ratio)) dg_num_edges = dg.num_edges edge_0 = 2 * int(split_ratio[0] * dg_num_edges) edge_1 = 2 * int(split_ratio[1] * dg_num_edges) edge_2 = 2 * (dg_num_edges - int(split_ratio[0] * dg_num_edges) - int(split_ratio[1] * dg_num_edges)) self.assertEqual( dg_link_custom[0].edge_label_index.shape[1], edge_0, ) self.assertEqual( dg_link_custom[1].edge_label_index.shape[1], edge_1, ) self.assertEqual( dg_link_custom[2].edge_label_index.shape[1], edge_2, )
def test_resample_disjoint(self): pyg_dataset = Planetoid("./cora", "Cora") graphs = GraphDataset.pyg_to_graphs(pyg_dataset) graph = graphs[0] graph = Graph(node_label=graph.node_label, node_feature=graph.node_feature, edge_index=graph.edge_index, edge_feature=graph.edge_feature, directed=False) graphs = [graph] dataset = GraphDataset(graphs, task="link_pred", edge_train_mode="disjoint", edge_message_ratio=0.8, resample_disjoint=True, resample_disjoint_period=1) dataset_train, _, _ = dataset.split(split_ratio=[0.5, 0.2, 0.3]) graph_train_first = dataset_train[0] graph_train_second = dataset_train[0] self.assertEqual(graph_train_first.edge_label_index.shape[1], graph_train_second.edge_label_index.shape[1]) self.assertTrue( torch.equal(graph_train_first.edge_label, graph_train_second.edge_label))
def test_ensemble_generator(self): pyg_dataset = Planetoid("./cora", "Cora") dg = Graph.pyg_to_graph(pyg_dataset[0]) num_nodes = 500 sizes = [2, 3] class NeighborGenerator1(Generator): def __len__(self): return sizes def generate(self): graph = Graph(gen_graph(num_nodes, dg.G)) return graph class NeighborGenerator2(Generator): def __len__(self): return sizes def generate(self): graph = Graph(gen_graph(num_nodes, dg.G)) return graph ensemble_generator = ( EnsembleGenerator( [ NeighborGenerator1(sizes), NeighborGenerator2(sizes), ] ) ) dataset = GraphDataset(None, generator=ensemble_generator) self.assertTrue(dataset[0].node_feature.shape[0] == num_nodes)
def test_dataset_property(self): _, x, y, edge_x, edge_y, edge_index, graph_x, graph_y = ( simple_networkx_graph()) G = Graph(node_feature=x, node_label=y, edge_index=edge_index, edge_feature=edge_x, edge_label=edge_y, graph_feature=graph_x, graph_label=graph_y, directed=True) H = deepcopy(G) H.graph_label = torch.tensor([1]) graphs = [G, H] dataset = GraphDataset(graphs) self.assertEqual(dataset.num_node_labels, 5) self.assertEqual(dataset.num_node_features, 2) self.assertEqual(dataset.num_edge_labels, 4) self.assertEqual(dataset.num_edge_features, 2) self.assertEqual(dataset.num_graph_labels, 1) self.assertEqual(dataset.num_graph_features, 2) self.assertEqual(dataset.num_labels, 5) # node task dataset = GraphDataset(graphs, task="edge") self.assertEqual(dataset.num_labels, 4) dataset = GraphDataset(graphs, task="link_pred") self.assertEqual(dataset.num_labels, 5) dataset = GraphDataset(graphs, task="graph") self.assertEqual(dataset.num_labels, 1)
def test_graph_basics(self): G, x, y, edge_x, edge_y, edge_index, graph_x, graph_y = ( simple_networkx_graph() ) dg = Graph( node_feature=x, node_label=y, edge_index=edge_index, edge_feature=edge_x, edge_label=edge_y, graph_feature=graph_x, graph_label=graph_y, directed=True ) for item in [ "directed", "node_feature", "node_label", "edge_feature", "edge_label", "graph_feature", "graph_label", "edge_index", "edge_label_index", "node_label_index" # "is_train" ]: self.assertEqual(item in dg, True) # self.assertEqual(len([key for key in dg]), 11) self.assertEqual(len([key for key in dg]), 10)
def test_graph_property_general(self): G, x, y, edge_x, edge_y, edge_index, graph_x, graph_y = ( simple_networkx_graph()) dg = Graph(node_feature=x, node_label=y, edge_index=edge_index, edge_feature=edge_x, edge_label=edge_y, graph_feature=graph_x, graph_label=graph_y, directed=True) self.assertEqual(sorted(dg.keys), [ "directed", "edge_feature", "edge_index", "edge_label", "edge_label_index", "graph_feature", "graph_label", "is_train", "node_feature", "node_label", "node_label_index" ]) self.assertEqual(dg.num_nodes, G.number_of_nodes()) self.assertEqual(dg.num_edges, G.number_of_edges()) self.assertEqual(dg.num_node_features, 2) self.assertEqual(dg.num_edge_features, 2) self.assertEqual(dg.num_graph_features, 2) self.assertEqual(dg.num_node_labels, np.max(y.data.numpy()) + 1) self.assertEqual(dg.num_edge_labels, np.max(edge_y.data.numpy()) + 1) self.assertEqual(dg.num_graph_labels, np.max(graph_y.data.numpy()) + 1)
def test_collate_batch_nested(self): dims = [2, 3] G_sizes = [10, 5] G_list = [] for i, size in enumerate(G_sizes): G = Graph() G.G = nx.complete_graph(i + 1) G.node_property = { 'node_prop0': torch.ones(size, dims[0]) * i, 'node_prop1': torch.ones(size, dims[1]) * i } G_list.append(G) batch = Batch.from_data_list(G_list) self.assertEqual(batch.num_graphs, 2) self.assertEqual(batch.node_property['node_prop0'].size(0), sum(G_sizes))
def _orig_features(graph, key): # repeat the feature to have length feature_dims if not isinstance(graph[key], torch.Tensor): graph[key] = torch.tensor(graph[key]) if cfg.dataset.task_type == 'regression' and 'label' in key: graph[key] = graph[key].float() assert graph[key].ndim <= 2 if graph[key].ndim == 1 and Graph._is_node_attribute(key): # n-by-1 tensor for node attributes graph[key] = graph[key].unsqueeze(-1)
def test_clone(self): G, x, y, edge_x, edge_y, edge_index, graph_x, graph_y = ( simple_networkx_graph() ) dg = Graph( node_feature=x, node_label=y, edge_index=edge_index, edge_feature=edge_x, edge_label=edge_y, graph_feature=graph_x, graph_label=graph_y, directed=True ) dg1 = dg.clone() self.assertEqual(dg.num_nodes, dg1.num_nodes) self.assertEqual(dg.num_edges, dg1.num_edges) self.assertEqual(dg.num_node_features, dg1.num_node_features) self.assertEqual(dg.num_edge_features, dg1.num_edge_features) self.assertEqual(dg.num_node_labels, dg1.num_node_labels) self.assertEqual(dg.num_edge_labels, dg1.num_edge_labels) self.assertTrue(not id(dg.edge_index) == id(dg1.edge_index)) self.assertTrue(tuple(dg.keys) == tuple(dg1.keys))
def test_specify_graph_backend_init(self): G = sx.Graph() G.add_nodes_from(range(100)) G.add_edges_from([[0, 4], [1, 5], [2, 6]]) graph = Graph(G, netlib=sx) self.assertTrue(isinstance(graph.G, sx.Graph)) self.assertEqual(list(graph.edge_index.shape), [2, 6]) self.assertEqual(list(graph.edge_label_index.shape), [2, 6]) self.assertEqual(list(graph.node_label_index.shape), [100]) import networkx as nx G = nx.Graph() G.add_nodes_from(range(100)) G.add_edges_from([[0, 4], [1, 5], [2, 6]]) graph = Graph(G, netlib=nx) self.assertTrue(isinstance(graph.G, nx.Graph)) self.assertEqual(list(graph.edge_index.shape), [2, 6]) self.assertEqual(list(graph.edge_label_index.shape), [2, 6]) self.assertEqual(list(graph.node_label_index.shape), [100])
def test_split(self): pyg_dataset = Planetoid('./cora', 'Cora') dg = Graph.pyg_to_graph(pyg_dataset[0]) dg_node = dg.split() dg_num_nodes_reduced = dg.num_nodes - 3 self.assertEqual( dg_node[0].node_label_index.shape[0], 1 + int(dg_num_nodes_reduced * 0.8)) self.assertEqual( dg_node[1].node_label_index.shape[0], 1 + int(dg_num_nodes_reduced * 0.1)) self.assertEqual( dg_node[2].node_label_index.shape[0], dg.num_nodes - 2 - int(dg_num_nodes_reduced * 0.8) - int(dg_num_nodes_reduced * 0.1)) dg_edge = dg.split(task='edge') dg_num_edges_reduced = dg.num_edges - 3 edge_0 = 2 * (1 + int(dg_num_edges_reduced * 0.8)) edge_1 = 2 * (1 + int(dg_num_edges_reduced * 0.1)) edge_2 = dg.num_edges * 2 - edge_0 - edge_1 self.assertEqual(dg_edge[0].edge_label_index.shape[1], edge_0) self.assertEqual(dg_edge[1].edge_label_index.shape[1], edge_1) self.assertEqual(dg_edge[2].edge_label_index.shape[1], edge_2) dg_link = dg.split(task='link_pred') dg_num_edges_reduced = dg.num_edges - 3 edge_0 = 2 * (1 + int(dg_num_edges_reduced * 0.8)) edge_1 = 2 * (1 + int(dg_num_edges_reduced * 0.1)) edge_2 = dg.num_edges * 2 - edge_0 - edge_1 self.assertEqual(dg_link[0].edge_label_index.shape[1], edge_0) self.assertEqual(dg_link[1].edge_label_index.shape[1], edge_1) self.assertEqual(dg_link[2].edge_label_index.shape[1], edge_2) for message_ratio in [0.1, 0.2, 0.4, 0.8]: dg_link_resample = dg_link[0].clone().\ resample_disjoint(message_ratio=message_ratio) positive_edge_num = \ int(0.5 * dg_link[0].clone().edge_label_index.shape[1]) self.assertEqual(dg_link_resample.edge_label_index.shape[1], 2 * (positive_edge_num - 1 - int(message_ratio * (positive_edge_num - 2)))) for split_ratio in [[0.1, 0.4, 0.5], [0.4, 0.3, 0.3], [0.7, 0.2, 0.1]]: dg_link_custom = \ dg.split(task='link_pred', split_ratio=split_ratio) dg_num_edges_reduced = dg.num_edges - 3 edge_0 = 2 * (1 + int(dg_num_edges_reduced * split_ratio[0])) self.assertEqual(dg_link_custom[0]. edge_label_index.shape[1], edge_0) edge_1 = \ (1 + int(split_ratio[0] * dg_num_edges_reduced) + 1 + int(split_ratio[1] * dg_num_edges_reduced)) * 2 - edge_0 self.assertEqual(dg_link_custom[1].edge_label_index.shape[1], edge_1) edge_2 = dg.num_edges * 2 - edge_0 - edge_1 self.assertEqual(dg_link_custom[2].edge_label_index.shape[1], edge_2)
def list_to_graphs(G_list) -> List[Graph]: r""" Transform a list of networkx data object to a list of Graph object. Args: G_list: a list of networkx data object. Returns: list: A list of :class:`deepsnap.graph.Graph` object. """ return [Graph(G) for G in G_list]
def preprocess(G, node_label_index, method="louvain"): graphs = [] labeled_nodes = set(node_label_index.tolist()) if method == "louvain": community_mapping = community_louvain.best_partition(G, resolution=10) communities = {} for node in community_mapping: comm = community_mapping[node] if comm in communities: communities[comm].add(node) else: communities[comm] = set([node]) communities = communities.values() elif method == "bisection": communities = nx.algorithms.community.kernighan_lin_bisection(G) elif method == "greedy": communities = nx.algorithms.community.greedy_modularity_communities(G) for community in communities: nodes = set(community) subgraph = G.subgraph(nodes) # Make sure each subgraph has more than 10 nodes if subgraph.number_of_nodes() > 10: node_mapping = {node: i for i, node in enumerate(subgraph.nodes())} subgraph = nx.relabel_nodes(subgraph, node_mapping) # Get the id of the training set labeled node in the new graph train_label_index = [] for node in labeled_nodes: if node in node_mapping: # Append relabeled labeled node index train_label_index.append(node_mapping[node]) # Make sure the subgraph contains at least one training set labeled node if len(train_label_index) > 0: dg = Graph(subgraph) # Update node_label_index dg.node_label_index = torch.tensor(train_label_index, dtype=torch.long) graphs.append(dg) return graphs
def test_graph_property_edge_case(self): G, x, y, edge_x, edge_y, edge_index, graph_x, graph_y = ( simple_networkx_graph() ) dg = Graph( node_feature=x, node_label=y.type(torch.FloatTensor), edge_index=edge_index, edge_label=edge_y.type(torch.FloatTensor), graph_label=graph_y.type(torch.FloatTensor), directed=True ) self.assertEqual(dg.num_node_labels, 1) self.assertEqual(dg.num_edge_labels, 1) self.assertEqual(dg.num_graph_labels, 1)