def part_graph(dataset, nparts, output_path, sparse): if os.path.exists(output_path): os.rmdir(output_path) os.mkdir(output_path) start = time.time() if sparse: graph, idx_max = load_sparse_dataset(dataset) else: graph = load_dataset(dataset) print("step1: load_dataset complete, time cost {:.3f}s".format(time.time()-start)) start = time.time() subgraphs, edge_index, edges = graph.part_graph(nparts) print("step2: partition graph complete, time cost {:.3f}s".format(time.time()-start)) start = time.time() for i in range(nparts): part_dir = os.path.join(output_path, "part{}".format(i)) os.mkdir(part_dir) edge_path = os.path.join(part_dir, "edge.npz") data_path = os.path.join(part_dir, "data.npz") all_edges = {} for j in range(nparts): index = edge_index[i][j] all_edges["edge_"+str(j)] = (edges[0][index], edges[1][index]) with open(edge_path, 'wb') as f: np.savez(file=f, **all_edges) with open(data_path, 'wb') as f: np.savez(file=f, x=subgraphs[i].x, y=subgraphs[i].y) print("step3: save partitioned graph, time cost {:.3f}s".format(time.time()-start)) parititon = { "nodes" : [g.num_nodes for g in subgraphs], "edges" : [g.num_edges for g in subgraphs], } meta = { "name": dataset, "node": graph.num_nodes, "edge": graph.num_edges, "feature": graph.num_features, "class": graph.num_classes, "num_part": nparts, "partition": parititon, } if sparse: meta["idx_max"] = idx_max edge_path = os.path.join(output_path, "meta.yml") with open(edge_path, 'w') as f: yaml.dump(meta, f, sort_keys=False)
def worker(): rank = int(os.environ["WORKER_ID"]) nrank = int(os.environ["DMLC_NUM_WORKER"]) ps.ps_init(rank, nrank) if rank == 0: f = open(args.output, "w") ps.ps_set_trace(f) graph = load_dataset(args.dataset) walk_length = int(args.walk) num_head = int(graph.num_nodes / nrank / walk_length / 10) with DistributedDemo(graph, num_head, walk_length, rank=rank, nrank=nrank) as sampler: while rank == 0 and ps.PS.trace_len < int(args.length): g = sampler.sample() print("{}/{}".format(ps.PS.trace_len, args.length)) ps.ps_get_worker_communicator().Barrier_Worker() if rank == 0: f.close()
from hetu import initializers from hetu import ndarray from hetu import gpu_ops as ad from hetu import optimizer import time def convert_to_one_hot(vals, max_val = 0): """Helper method to convert label array to one-hot array.""" if max_val == 0: max_val = vals.max() + 1 one_hot_vals = np.zeros((vals.size, max_val)) one_hot_vals[np.arange(vals.size), vals] = 1 return one_hot_vals graph_full = load_dataset("Reddit") #graph_full = shuffle(graph_full) train_split = int(0.8 * graph_full.num_nodes) graph_full.add_self_loop() graph = split_training_set(graph_full, train_split) def transform(graph): mp_val = mp_matrix(graph, ndarray.gpu(0)) return graph, mp_val hidden_layer_size = 128 def train_hetu(num_epoch): ctx = ndarray.gpu(0) x_ = ad.Variable(name="x_") y_ = ad.Variable(name="y_") mask_ = ad.Variable(name="mask_")
import numpy as np from GNN.dataset import load_dataset from GNN.graph import * import torch import torch_geometric as geo import torch.nn.functional as F from torch_geometric.nn import GCNConv import time graph_full = load_dataset("Cora") train_split = int(0.8 * graph_full.num_nodes) graph_full.add_self_loop() graph = split_training_set(graph_full, train_split) num_features = graph_full.num_features # =1433 num_classes = graph_full.num_classes # =7 class Net(torch.nn.Module): def __init__(self): super(Net, self).__init__() self.conv1 = GCNConv(num_features, 16) self.conv2 = GCNConv(16, num_classes) def forward(self, x, edge_index): x = self.conv1(x, edge_index) x = F.relu(x) x = self.conv2(x, edge_index) return x
from hetu import gpu_ops as ad from hetu import optimizer import time def convert_to_one_hot(vals, max_val=0): """Helper method to convert label array to one-hot array.""" if max_val == 0: max_val = vals.max() + 1 one_hot_vals = np.zeros((vals.size, max_val)) one_hot_vals[np.arange(vals.size), vals] = 1 return one_hot_vals graph = load_dataset("Cora") train_split = graph.num_nodes // 10 # dual mode message passing # use dense matmul for subgraph with dense_efficient > dense_threshold # bisect profiling required to decide this value dense_threshold = 1 def train_hetu(num_epoch): ctx = ndarray.gpu(0) feed_dict = {} nparts = 4 graph.add_self_loop() norm = graph.gcn_norm(True)
import numpy as np from GNN.dataset import load_dataset from GNN.layer import GCN from GNN.graph import * import torch import dgl from dgl.nn import GraphConv import torch.nn.functional as F import time graph = load_dataset("Reddit") graph.add_self_loop() num_features = graph.num_features # =1433 num_classes = graph.num_classes # =7 class Net(torch.nn.Module): def __init__(self): super(Net, self).__init__() self.conv1 = GraphConv(num_features, 128, norm='both', weight=True, bias=True) self.conv2 = GraphConv(128, num_classes, norm='both', weight=True, bias=True)