Пример #1
0
def part_graph(dataset, nparts, output_path, sparse):
    if os.path.exists(output_path):
        os.rmdir(output_path)
    os.mkdir(output_path)
    start = time.time()
    if sparse:
        graph, idx_max = load_sparse_dataset(dataset)
    else:
        graph = load_dataset(dataset)
    print("step1: load_dataset complete, time cost {:.3f}s".format(time.time()-start))
    start = time.time()
    subgraphs, edge_index, edges = graph.part_graph(nparts)
    print("step2: partition graph complete, time cost {:.3f}s".format(time.time()-start))
    start = time.time()
    for i in range(nparts):
        part_dir = os.path.join(output_path, "part{}".format(i))
        os.mkdir(part_dir)
        edge_path = os.path.join(part_dir, "edge.npz")
        data_path = os.path.join(part_dir, "data.npz")
        all_edges = {}
        for j in range(nparts):
            index = edge_index[i][j]
            all_edges["edge_"+str(j)] = (edges[0][index], edges[1][index])

        with open(edge_path, 'wb') as f:
            np.savez(file=f, **all_edges)
        with open(data_path, 'wb') as f:
            np.savez(file=f, x=subgraphs[i].x, y=subgraphs[i].y)
    print("step3: save partitioned graph, time cost {:.3f}s".format(time.time()-start))
    parititon = {
        "nodes" : [g.num_nodes for g in subgraphs],
        "edges" : [g.num_edges for g in subgraphs],
    }
    meta = {
        "name": dataset,
        "node": graph.num_nodes,
        "edge": graph.num_edges,
        "feature": graph.num_features,
        "class": graph.num_classes,
        "num_part": nparts,
        "partition": parititon,
    }
    if sparse:
        meta["idx_max"] = idx_max
    edge_path = os.path.join(output_path, "meta.yml")
    with open(edge_path, 'w') as f:
        yaml.dump(meta, f, sort_keys=False)
Пример #2
0
def worker():
    rank = int(os.environ["WORKER_ID"])
    nrank = int(os.environ["DMLC_NUM_WORKER"])
    ps.ps_init(rank, nrank)
    if rank == 0:
        f = open(args.output, "w")
        ps.ps_set_trace(f)

    graph = load_dataset(args.dataset)
    walk_length = int(args.walk)
    num_head = int(graph.num_nodes / nrank / walk_length / 10)
    with DistributedDemo(graph, num_head, walk_length, rank=rank, nrank=nrank) as sampler:
        while rank == 0 and ps.PS.trace_len < int(args.length):
            g = sampler.sample()
            print("{}/{}".format(ps.PS.trace_len, args.length))
        ps.ps_get_worker_communicator().Barrier_Worker()
    if rank == 0:
        f.close()
Пример #3
0
from hetu import initializers
from hetu import ndarray
from hetu import gpu_ops as ad
from hetu import optimizer

import time

def convert_to_one_hot(vals, max_val = 0):
    """Helper method to convert label array to one-hot array."""
    if max_val == 0:
      max_val = vals.max() + 1
    one_hot_vals = np.zeros((vals.size, max_val))
    one_hot_vals[np.arange(vals.size), vals] = 1
    return one_hot_vals

graph_full = load_dataset("Reddit")
#graph_full = shuffle(graph_full)
train_split = int(0.8 * graph_full.num_nodes)
graph_full.add_self_loop()
graph = split_training_set(graph_full, train_split)
def transform(graph):
    mp_val = mp_matrix(graph, ndarray.gpu(0))
    return graph, mp_val
hidden_layer_size = 128

def train_hetu(num_epoch):
    ctx = ndarray.gpu(0)

    x_ = ad.Variable(name="x_")
    y_ = ad.Variable(name="y_")
    mask_ = ad.Variable(name="mask_")
Пример #4
0
import numpy as np
from GNN.dataset import load_dataset
from GNN.graph import *

import torch
import torch_geometric as geo
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

import time

graph_full = load_dataset("Cora")
train_split = int(0.8 * graph_full.num_nodes)
graph_full.add_self_loop()
graph = split_training_set(graph_full, train_split)
num_features = graph_full.num_features  # =1433
num_classes = graph_full.num_classes  # =7


class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = GCNConv(num_features, 16)
        self.conv2 = GCNConv(16, num_classes)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return x
Пример #5
0
from hetu import gpu_ops as ad
from hetu import optimizer

import time


def convert_to_one_hot(vals, max_val=0):
    """Helper method to convert label array to one-hot array."""
    if max_val == 0:
        max_val = vals.max() + 1
    one_hot_vals = np.zeros((vals.size, max_val))
    one_hot_vals[np.arange(vals.size), vals] = 1
    return one_hot_vals


graph = load_dataset("Cora")

train_split = graph.num_nodes // 10

# dual mode message passing
# use dense matmul for subgraph with dense_efficient > dense_threshold
# bisect profiling required to decide this value
dense_threshold = 1


def train_hetu(num_epoch):
    ctx = ndarray.gpu(0)
    feed_dict = {}
    nparts = 4
    graph.add_self_loop()
    norm = graph.gcn_norm(True)
Пример #6
0
import numpy as np
from GNN.dataset import load_dataset
from GNN.layer import GCN
from GNN.graph import *

import torch
import dgl
from dgl.nn import GraphConv
import torch.nn.functional as F

import time

graph = load_dataset("Reddit")
graph.add_self_loop()
num_features = graph.num_features  # =1433
num_classes = graph.num_classes  # =7


class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = GraphConv(num_features,
                               128,
                               norm='both',
                               weight=True,
                               bias=True)
        self.conv2 = GraphConv(128,
                               num_classes,
                               norm='both',
                               weight=True,
                               bias=True)