示例#1
0
def main(args):
    if args.gpu < 0:
        device = "cpu"
    else:
        device = f"cuda:{args.gpu}"

    g = load_data(args).graph
    n_nodes = g.number_of_nodes()

    # build sparse matrix
    src, dst = g.all_edges()
    adj = torch.sparse.FloatTensor(torch.stack([dst, src]),
                                   torch.ones(src.shape),
                                   torch.Size([n_nodes, n_nodes]))
    adj = adj.coalesce().to(device)

    # generate features
    features = torch.randn(n_nodes, args.n_hidden).to(device)

    # warm up
    for _ in range(args.n_repeat):
        x = torch.spmm(adj, features)

    torch.cuda.synchronize()

    start = time.time()
    for _ in range(args.n_repeat):
        x = torch.spmm(adj, features)
    torch.cuda.synchronize()
    end = time.time()
    print("Time (ms): {:.3f}".format((end - start) * 1e3 / args.n_repeat))
def main(args):
    # load and preprocess dataset
    data = load_data(args)
    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    train_mask = torch.ByteTensor(data.train_mask)
    val_mask = torch.ByteTensor(data.val_mask)
    test_mask = torch.ByteTensor(data.test_mask)
    num_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()
    print("""----Data statistics------'
      #Edges %d
      #Classes %d 
      #Train samples %d
      #Val samples %d
      #Test samples %d""" % (n_edges, n_classes, train_mask.sum().item(),
                             val_mask.sum().item(), test_mask.sum().item()))

    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        labels = labels.cuda()
        train_mask = train_mask.cuda()
        val_mask = val_mask.cuda()
        test_mask = test_mask.cuda()

    G = data.graph
    fun_decompose_graph_central_rectangle.main_of_decompose(G, args.dataset)
    print('done')
    exit(0)
示例#3
0
文件: gat_batch.py 项目: cclauss/dgl
def main(args):
    # load and preprocess dataset
    data = load_data(args)

    features = mx.nd.array(data.features)
    labels = mx.nd.array(data.labels)
    mask = mx.nd.array(data.train_mask)
    in_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()

    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        labels = labels.cuda()
        mask = mask.cuda()

    # create GCN model
    g = DGLGraph(data.graph)

    # create model
    model = GAT(g,
                args.num_layers,
                in_feats,
                args.num_hidden,
                n_classes,
                args.num_heads,
                elu,
                args.in_drop,
                args.attn_drop,
                args.residual)

    if cuda:
        model.cuda()
    model.initialize()

    # use optimizer
    trainer = gluon.Trainer(model.collect_params(), 'adam', {'learning_rate': args.lr})

    # initialize graph
    dur = []
    for epoch in range(args.epochs):
        if epoch >= 3:
            t0 = time.time()
        # forward
        with mx.autograd.record():
            logits = model(features)
            loss = mx.nd.softmax_cross_entropy(logits, labels)

        #optimizer.zero_grad()
        loss.backward()
        trainer.step(features.shape[0])

        if epoch >= 3:
            dur.append(time.time() - t0)
            print("Epoch {:05d} | Loss {:.4f} | Time(s) {:.4f} | ETputs(KTEPS) {:.2f}".format(
                epoch, loss.asnumpy()[0], np.mean(dur), n_edges / np.mean(dur) / 1000))
示例#4
0
def emb_dataloader(args):
    # load and preprocess dataset
    data = load_data(args)
    normal_class=get_normal_class(args)
    labels,train_mask,val_mask,test_mask=one_class_processing(data,normal_class,args)

    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(labels)
    train_mask = torch.BoolTensor(train_mask)
    val_mask = torch.BoolTensor(val_mask)
    test_mask = torch.BoolTensor(test_mask)
    in_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()
    print("""----Data statistics------'
      #Edges %d
      #Classes %d
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (n_edges, n_classes,
              train_mask.sum().item(),
              val_mask.sum().item(),
              test_mask.sum().item()))

    g = data.graph


    datadict={'g':g,'features':features,'labels':labels,'train_mask':train_mask,
        'val_mask':val_mask,'test_mask': test_mask,'in_feats':in_feats,'n_classes':n_classes,'n_edges':n_edges}

    return datadict
示例#5
0
def load_dataset(dataset="cora"):
    args = namedtuple("args", ["dataset"])
    dataset = load_data(args(dataset))

    params = {}
    params['infeats'] = dataset.features.astype(
        'float32')  # Only support float32 as feature for now

    # Remove self-loops to avoid duplicate passing of a node's feature to itself
    g = dataset.graph
    g.remove_edges_from(g.selfloop_edges())
    g.add_edges_from(zip(g.nodes, g.nodes))

    # Generate adjacency matrix
    adjacency = nx.to_scipy_sparse_matrix(g)
    params['data'] = adjacency.data.astype('float32')
    params['indices'] = adjacency.indices.astype('int32')
    params['indptr'] = adjacency.indptr.astype('int32')

    # Normalization w.r.t. node degrees
    degs = [g.in_degree[i] for i in range(g.number_of_nodes())]
    params['norm'] = np.power(degs, -0.5).astype('float32')
    params['norm'] = params['norm'].reshape((params['norm'].shape[0], 1))

    return params
示例#6
0
    def worker(self, args):
        """User-defined worker function
        """
        # Start sender
        namebook = { 0:args.ip }
        sender = dgl.contrib.sampling.SamplerSender(namebook)

        # load and preprocess dataset
        data = load_data(args)

        ctx = mx.cpu()

        if args.self_loop and not args.dataset.startswith('reddit'):
            data.graph.add_edges_from([(i,i) for i in range(len(data.graph))])

        train_nid = mx.nd.array(np.nonzero(data.train_mask)[0]).astype(np.int64).as_in_context(ctx)
        test_nid = mx.nd.array(np.nonzero(data.test_mask)[0]).astype(np.int64).as_in_context(ctx)

        # create GCN model
        g = DGLGraph(data.graph, readonly=True)

        while True:
            idx = 0
            for nf in dgl.contrib.sampling.NeighborSampler(g, args.batch_size,
                                                           args.num_neighbors,
                                                           neighbor_type='in',
                                                           shuffle=True,
                                                           num_hops=args.n_layers+1,
                                                           seed_nodes=train_nid):
                print("send train nodeflow: %d" %(idx))
                sender.send(nf, 0)
                idx += 1
            sender.signal(0)
示例#7
0
def get_data(args):
    """
    Data loader. For now, just a test sample
    """
    args.syn_train_ratio = 0.1
    args.syn_val_ratio = 0.1
    args.syn_test_ratio = 0.8
    data = load_data(args)
    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    train_mask = torch.ByteTensor(data.train_mask)
    val_mask = torch.ByteTensor(data.val_mask)
    test_mask = torch.ByteTensor(data.test_mask)
    args.in_feats = features.shape[1]
    args.classes = data.num_labels
    args.n_edges = data.graph.number_of_edges()
    print("""----Data statistics------'
      #Edges %d
      #Classes %d
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (args.n_edges, args.classes, train_mask.sum().item(),
           val_mask.sum().item(), test_mask.sum().item()))

    train_mask = train_mask.cuda()
    val_mask = val_mask.cuda()
    test_mask = test_mask.cuda()
    stop_number = int(np.round(len(labels) * 0.1))
    attacker_mask = torch.ByteTensor(
        sample_mask(range(stop_number), labels.shape[0]))
    target_mask = torch.ByteTensor(
        sample_mask(range(stop_number), labels.shape[0]))
    return features, labels, train_mask, val_mask, test_mask, data
示例#8
0
def load_cls_data(args):
    data = load_data(args)
    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    n_classes = data.num_labels

    if hasattr(torch, 'BoolTensor'):
        train_mask = torch.BoolTensor(data.train_mask)
        val_mask = torch.BoolTensor(data.val_mask)
        test_mask = torch.BoolTensor(data.test_mask)
    else:
        train_mask = torch.ByteTensor(data.train_mask)
        val_mask = torch.ByteTensor(data.val_mask)
        test_mask = torch.ByteTensor(data.test_mask)

    g = data.graph
    # add self loop
    g.remove_edges_from(nx.selfloop_edges(g))
    g = DGLGraph(g)
    g.add_edges(g.nodes(), g.nodes())
    row = g.edges()[0]
    col = g.edges()[1]
    g = dgl.graph((row, col))

    return g, features, labels, n_classes, train_mask, val_mask, test_mask
示例#9
0
def load(args, save_file=".npy"):
    save_file = args.dataset + save_file
    if os.path.exists(save_file):
        return np.load(save_file).tolist()
    else:
        datas = load_data(args)
        np.save(save_file, datas)
        return datas
示例#10
0
def main(args):
    data = load_data(args)
    g = data.graph
    if isinstance(g, dgl.DGLGraph):
        csr = g.adjacency_matrix_scipy(transpose=True)
    else:
        csr = nx.to_scipy_sparse_matrix(g, weight=None, format='csr')

    graph_io.save_graph(args.out, csr)
示例#11
0
def load_dataset(dataset="cora"):
    args = namedtuple("args", ["dataset"])
    data = load_data(args(dataset))

    # Remove self-loops to avoid duplicate passing of a node's feature to itself
    g = data.graph
    g.remove_edges_from(nx.selfloop_edges(g))
    g.add_edges_from(zip(g.nodes, g.nodes))

    return g, data
示例#12
0
文件: gat.py 项目: zqxyz73/dgl
def main(args):
    # load and preprocess dataset
    data = load_data(args)

    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    mask = torch.ByteTensor(data.train_mask)
    in_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()

    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        labels = labels.cuda()
        mask = mask.cuda()

    # create GCN model
    g = DGLGraph(data.graph)

    # create model
    model = GAT(g, args.num_layers, in_feats, args.num_hidden, n_classes,
                args.num_heads, F.elu, args.in_drop, args.attn_drop,
                args.residual)

    if cuda:
        model.cuda()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

    # initialize graph
    dur = []
    for epoch in range(args.epochs):
        if epoch >= 3:
            t0 = time.time()
        # forward
        logits = model(features)
        logp = F.log_softmax(logits, 1)
        loss = F.nll_loss(logp, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch >= 3:
            dur.append(time.time() - t0)

        print(
            "Epoch {:05d} | Loss {:.4f} | Time(s) {:.4f} | ETputs(KTEPS) {:.2f}"
            .format(epoch, loss.item(), np.mean(dur),
                    n_edges / np.mean(dur) / 1000))
示例#13
0
def main(args):
    # load and preprocess dataset
    data = load_data(args)

    if args.gpu >= 0:
        ctx = mx.gpu(args.gpu)
    else:
        ctx = mx.cpu()

    if args.self_loop and not args.dataset.startswith('reddit'):
        data.graph.add_edges_from([(i, i) for i in range(len(data.graph))])

    train_nid = mx.nd.array(np.nonzero(data.train_mask)[0]).astype(
        np.int64).as_in_context(ctx)
    test_nid = mx.nd.array(np.nonzero(data.test_mask)[0]).astype(
        np.int64).as_in_context(ctx)

    features = mx.nd.array(data.features).as_in_context(ctx)
    labels = mx.nd.array(data.labels).as_in_context(ctx)
    train_mask = mx.nd.array(data.train_mask).as_in_context(ctx)
    val_mask = mx.nd.array(data.val_mask).as_in_context(ctx)
    test_mask = mx.nd.array(data.test_mask).as_in_context(ctx)
    in_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()

    n_train_samples = train_mask.sum().asscalar()
    n_val_samples = val_mask.sum().asscalar()
    n_test_samples = test_mask.sum().asscalar()

    print("""----Data statistics------'
      #Edges %d
      #Classes %d
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (n_edges, n_classes, n_train_samples, n_val_samples, n_test_samples))

    # create GCN model
    g = DGLGraph(data.graph, readonly=True)
    g.ndata['features'] = features
    g.ndata['labels'] = labels

    if args.model == "gcn_ns":
        gcn_ns_train(g, ctx, args, n_classes, train_nid, test_nid,
                     n_test_samples)
    elif args.model == "gcn_cv":
        gcn_cv_train(g, ctx, args, n_classes, train_nid, test_nid,
                     n_test_samples)
    elif args.model == "graphsage_cv":
        graphsage_cv_train(g, ctx, args, n_classes, train_nid, test_nid,
                           n_test_samples)
    else:
        print("unknown model. Please choose from gcn_ns, gcn_cv, graphsage_cv")
示例#14
0
def main(args):
    if args.dataset == 'segtree':
        g = build_segtree(batch_size=32, seq_len=512)
        print('#Nodes: %d #Edges: %d' % (g.number_of_nodes(), g.number_of_edges()))
        csr = g.adjacency_matrix_scipy(fmt='csr')        
    else:
        data = load_data(args)
        g = data.graph
        csr = nx.to_scipy_sparse_matrix(g, weight=None, format='csr')

    graph_io.save_graph(args.out, csr)
示例#15
0
def load(kwargs, save_file=".pkl"):
    kwarg_nt = namedtuple('kwarg', kwargs.keys())(*kwargs.values())

    save_file = 'data/' + kwarg_nt.dataset + save_file 
    if os.path.exists(save_file):
        with open(save_file, "rb") as f:
            return pkl.load(f)
    else:
        datas = load_data(kwarg_nt)
        with open(save_file, "wb") as f:
            pkl.dump(datas, f)
        return datas
示例#16
0
def main():
    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)

    # TODO: train test split
    # load and preprocess dataset
    data = load_data(args)
    features = torch.FloatTensor(data.features)
    in_feats = features.shape[1]
    print(features.shape)
    model = VGAE(in_feats, [32, 16], zdim=10, device=device)
    model.train()
    optim = torch.optim.Adam(model.parameters(), lr=1e-4)
    loss_function = BCELoss

    g = DGLGraph(data.graph)
    g.ndata['h'] = features

    n_epochs = 500
    losses = []
    loss = 0.0
    print('Training Start')
    t = trange(n_epochs, desc="Loss: 0.0", leave=True)
    for epoch in t:
        g.ndata['h'] = features

        t.set_description("Loss: {}".format(loss))
        t.refresh()
        # normalization
        adj = g.adjacency_matrix().to_dense()
        norm = adj.shape[0] * adj.shape[0] / float(
            (adj.shape[0] * adj.shape[0] - adj.sum()) * 2)
        #g.ndata['norm'] = norm.unsqueeze(1)

        pos_weight = torch.Tensor([
            float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()
        ]).to(device)

        z, adj_logits = model.forward(g)

        loss = model.compute_loss(z, adj_logits, adj, norm, pos_weight)

        optim.zero_grad()
        loss.backward()
        optim.step()
        losses.append(loss.item())
        #print('Epoch: {:02d} | Loss: {:.5f}'.format(epoch, loss))

    plt.plot(losses)
    plt.xlabel('iteration')
    plt.ylabel('train loss')
    plt.grid()
    plt.show()
示例#17
0
def load_citation(args):
    data = load_data(args)
    features = torch.FloatTensor(data.features).to(device)
    labels = torch.LongTensor(data.labels).to(device)
    train_mask = torch.BoolTensor(data.train_mask).to(device)
    valid_mask = torch.BoolTensor(data.val_mask).to(device)
    test_mask = torch.BoolTensor(data.test_mask).to(device)
    g = data.graph
    # add self loop
    g.remove_edges_from(nx.selfloop_edges(g))
    g = DGLGraph(g)
    g.add_edges(g.nodes(), g.nodes())
    return g, features, labels, train_mask, valid_mask, test_mask
示例#18
0
def main(args):
    # load and preprocess dataset
    if args.graph_file != '':
        csr = mx.nd.load(args.graph_file)[0]
        n_edges = csr.shape[0]
        graph_name = os.path.basename(args.graph_file)
        data = GraphData(csr, args.num_feats, graph_name)
        csr = None
    else:
        data = load_data(args)
        n_edges = data.graph.number_of_edges()
        graph_name = args.dataset

    if args.self_loop and not args.dataset.startswith('reddit'):
        data.graph.add_edges_from([(i, i) for i in range(len(data.graph))])

    mem_ctx = mx.cpu()

    features = mx.nd.array(data.features, ctx=mem_ctx)
    labels = mx.nd.array(data.labels, ctx=mem_ctx)
    train_mask = mx.nd.array(data.train_mask, ctx=mem_ctx)
    val_mask = mx.nd.array(data.val_mask, ctx=mem_ctx)
    test_mask = mx.nd.array(data.test_mask, ctx=mem_ctx)
    n_classes = data.num_labels

    n_train_samples = train_mask.sum().asscalar()
    n_val_samples = val_mask.sum().asscalar()
    n_test_samples = test_mask.sum().asscalar()

    print("""----Data statistics------'
      #Edges %d
      #Classes %d
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (n_edges, n_classes, n_train_samples, n_val_samples, n_test_samples))

    # create GCN model
    print('graph name: ' + graph_name)
    g = dgl.contrib.graph_store.create_graph_store_server(data.graph,
                                                          graph_name,
                                                          "shared_mem",
                                                          args.num_workers,
                                                          False,
                                                          edge_dir='in')
    g.ndata['features'] = features
    g.ndata['labels'] = labels
    g.ndata['train_mask'] = train_mask
    g.ndata['val_mask'] = val_mask
    g.ndata['test_mask'] = test_mask
    g.run()
示例#19
0
def generate_data(args):
    data = load_data(args)
    labels = torch.LongTensor(data.labels)
    features = torch.FloatTensor(data.features)
    if hasattr(torch, 'BoolTensor'):
        train_mask = torch.BoolTensor(data.train_mask)
        val_mask = torch.BoolTensor(data.val_mask)
        test_mask = torch.BoolTensor(data.test_mask)
    else:
        train_mask = torch.ByteTensor(data.train_mask)
        val_mask = torch.ByteTensor(data.val_mask)
        test_mask = torch.ByteTensor(data.test_mask)

    num_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()
    print("""----Data statistics------'
      #Edges %d
      #Classes %d 
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (n_edges, n_classes, train_mask.int().sum().item(),
           val_mask.int().sum().item(), test_mask.int().sum().item()))

    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        labels = labels.cuda()
        train_mask = train_mask.cuda()
        val_mask = val_mask.cuda()
        test_mask = test_mask.cuda()

    g = data.graph
    # add self loop
    g.remove_edges_from(nx.selfloop_edges(g))
    g = DGLGraph(g).to('cuda:0')
    g.add_edges(g.nodes(), g.nodes())
    netg = nx.from_numpy_matrix(g.adjacency_matrix().to_dense().numpy(),
                                create_using=nx.DiGraph)
    print(netg)
    g = dgl.from_networkx(netg, edge_attrs=['weight']).to("cuda:0")
    n_edges = g.number_of_edges()
    # create model
    heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads]
    print("train_mask-shape", train_mask)
    return g, num_feats, n_classes, heads, cuda, features, labels, train_mask, val_mask, test_mask
示例#20
0
def main():
    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)

    # TODO: train test split
    # load and preprocess dataset
    data = load_data(args)
    features = torch.FloatTensor(data.features)
    in_feats = features.shape[1]
    print(features.shape)
    model = GAE(in_feats, [32,16])
    model.train()
    optim = torch.optim.Adam(model.parameters(), lr=1e-2)
    loss_function = BCELoss

    g = DGLGraph(data.graph)
    g.ndata['h'] = features


    n_epochs = 500
    losses = []
    print('Training Start')
    for epoch in tqdm(range(n_epochs)):
        g.ndata['h'] = features
        # normalization
        degs = g.in_degrees().float()
        norm = torch.pow(degs, -0.5)
        norm[torch.isinf(norm)] = 0
        g.ndata['norm'] = norm.unsqueeze(1)
        adj = g.adjacency_matrix().to_dense()
        pos_weight = torch.Tensor([float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()])
        
        
        adj_logits = model.forward(g)#, features)
        
        loss = loss_function(adj_logits, adj, pos_weight=pos_weight)
        optim.zero_grad()
        loss.backward()
        optim.step()
        losses.append(loss.item())
        print('Epoch: {:02d} | Loss: {:.5f}'.format(epoch, loss))
        
    
    plt.plot(losses)
    plt.xlabel('iteration')
    plt.ylabel('train loss')
    plt.grid()
    plt.show()
def main(args):
  data = load_data(args)

  if args.self_loop and not args.dataset.startswith('reddit'):
    data.graph.add_edges_from([(i,i) for i in range(len(data.graph))])

  features = torch.FloatTensor(data.features)
  labels = torch.LongTensor(data.labels)
  train_mask = torch.ByteTensor(data.train_mask)
  val_mask = torch.ByteTensor(data.val_mask)
  test_mask = torch.ByteTensor(data.test_mask)
  in_feats = features.shape[1]
  n_classes = data.num_labels
  n_edges = data.graph.number_of_edges()

  n_train_samples = train_mask.sum().item()
  n_val_samples = val_mask.sum().item()
  n_test_samples = test_mask.sum().item()

  graph_name = args.dataset

  print("""----Data statistics------'
    #Edges %d
    #Classes %d
    #Train samples %d
    #Val samples %d
    #Test samples %d""" %
        (n_edges, n_classes,
            n_train_samples,
            n_val_samples,
            n_test_samples))
  
  g = dgl.contrib.graph_store.create_graph_store_server(
        data.graph, graph_name,
        'shared_mem', args.num_workers, 
        False, edge_dir='in')
  dgl_g = DGLGraph(data.graph, readonly=True)
  norm = 1. / dgl_g.in_degrees().float().unsqueeze(1)
  del dgl_g
  g.ndata['norm'] = norm
  g.ndata['features'] = features
  g.ndata['labels'] = labels
  g.ndata['train_mask'] = train_mask
  g.ndata['val_mask'] = val_mask
  g.ndata['test_mask'] = test_mask
  print('start running graph server on dataset: {}'.format(graph_name))
  g.run()
示例#22
0
def main(args):
    if args.dataset == 'segtree':
        g = build_segtree(batch_size=32, seq_len=512)
        print('#Nodes: %d #Edges: %d' %
              (g.number_of_nodes(), g.number_of_edges()))
        csr = g.adjacency_matrix_scipy(fmt='csr')
        n, m = 32 * 512, 32 * 512
    else:
        data = load_data(args)
        g = data.graph
        if isinstance(g, dgl.DGLGraph):
            csr = g.adjacency_matrix_scipy(transpose=True)
        else:
            csr = nx.to_scipy_sparse_matrix(g, weight=None, format='csr')
        n, m = csr.indptr.shape[0] - 1, csr.indptr.shape[0] - 1

    graph_io.save_graph(args.out, csr, n, m)
def main():
    n_users = 100
    n_relationships = 10

    data = load_data(args)
    gl = DGLGraph(data.graph)
    features, labels = data.features, data.labels
    G = gl.to_networkx()
    #print(gl.nodes().tolist())
    features, labels = data.features, data.labels
    sampled_nodes = random_walk(G)
    print(' features:', len(features))
    print(' labels:', len(labels))
    print(' nodes:', len(gl.nodes()))
    gl, features, labels = sample_data(gl, features, labels, sampled_nodes)
    print(' features:', len(features))
    print(' labels:', len(labels))
    print(' nodes:', len(gl.nodes()))
示例#24
0
def main(args):
    #print('in main...')
    percents = [10,20,30,40]#,5, 10,15,20,25,30,35,40,45,50,55,60,65,70]##[1,2,3,4,5,6,7,8]
    criteria = ['random']#'closeness','rank','betweenness']#'random']#]#, 'katz' ,
    degrees =[0,1,2,3,4,5,6,7]#]
    folds =range(160,162)
    epocs = [600]#,200]

    # load and preprocess dataset
    data = load_data(args)
    dgl_g = DGLGraph(data.graph)
    features, labels = data.features, data.labels

    #####################  MY WORK ################
    #g = DGLGraph(data.graph)
    #print(g.ndata['norm'] )
    #sys.exit()
    print(np.shape(data.features))
    print(np.shape(data.labels))
    #print(type(data.graph))
    
    node_list = dgl_g.nodes().tolist()
    G, temp_G = get_weak_ties_network(dgl_g,node_list)
    

    for p in percents:
        for c in criteria:
            G_ = G.copy()
            g, remaining_nodes= na.network_preprocess(G_, temp_G, node_list, args.dir_, args.dataset, p, c)
            for f in folds:
                features2,labels2, train_mask,val_mask,test_mask = na.get_model_parameters(remaining_nodes, features, labels)
                #print('number of nodes in strong tie network:',dgl_g.number_of_nodes())
                #print('number of nodes in weak tie network:',G.number_of_nodes())
                #print('number of nodes in undirected weak tie network:',temp_G.number_of_nodes())
    
                #print('number of nodes in weak tie network after %s removal:'%p,len(remaining_nodes))
                #print('number of nodes in weak tie network after %s removal:'%p,g.number_of_nodes())
                #print('number of features in weak tie network after %s removal:'%p,len(features2))

                for d in degrees:
                    print('******************* degree %s******************'%d)
                    for e in epocs:
                        print(args.dataset,'criteria:',c,'percent:',p, 'degree:',d,'fold:',f,'epoch:',e)
                        run_(data, args, g, features2, labels2, train_mask,val_mask,test_mask, p, c, f, d, e)
示例#25
0
def load_cls_data(args):
    data = load_data(args)
    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    n_classes = data.num_labels

    if hasattr(torch, 'BoolTensor'):
        train_mask = torch.BoolTensor(data.train_mask)
        val_mask = torch.BoolTensor(data.val_mask)
        test_mask = torch.BoolTensor(data.test_mask)
    else:
        train_mask = torch.ByteTensor(data.train_mask)
        val_mask = torch.ByteTensor(data.val_mask)
        test_mask = torch.ByteTensor(data.test_mask)

    g = DGLGraph(data.graph)
    g.add_edges(g.nodes(), g.nodes())

    return g, features, labels, n_classes, train_mask, val_mask, test_mask
示例#26
0
    def worker(self, args):

        number_hops = 1
        if args.model == "gcn_ns":
            number_hops = args.n_layers + 1
        elif args.model == "gcn_cv":
            number_hops = args.n_layers
        else:
            print("unknown model. Please choose from gcn_ns and gcn_cv")

        # Start sender
        namebook = {0: args.ip}
        sender = dgl.contrib.sampling.SamplerSender(namebook)

        # load and preprocess dataset
        data = load_data(args)

        if args.self_loop and not args.dataset.startswith('reddit'):
            data.graph.add_edges_from([(i, i) for i in range(len(data.graph))])

        train_nid = np.nonzero(data.train_mask)[0].astype(np.int64)
        test_nid = np.nonzero(data.test_mask)[0].astype(np.int64)

        # create GCN model
        g = DGLGraph(data.graph, readonly=True)

        while True:
            idx = 0
            for nf in dgl.contrib.sampling.NeighborSampler(
                    g,
                    args.batch_size,
                    args.num_neighbors,
                    neighbor_type='in',
                    shuffle=True,
                    num_workers=32,
                    num_hops=number_hops,
                    seed_nodes=train_nid):
                print("send train nodeflow: %d" % (idx))
                sender.send(nf, 0)
                idx += 1
            sender.signal(0)
示例#27
0
def main(args):
    # load and preprocess dataset
    train_acc_list = []
    test_acc_list = []
    data = load_data(args)
    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    if hasattr(torch, 'BoolTensor'):
        train_mask = torch.BoolTensor(data.train_mask)
        val_mask = torch.BoolTensor(data.val_mask)
        test_mask = torch.BoolTensor(data.test_mask)
    else:
        train_mask = torch.ByteTensor(data.train_mask)
        val_mask = torch.ByteTensor(data.val_mask)
        test_mask = torch.ByteTensor(data.test_mask)
    in_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()
    print("""----Data statistics------'
      #Edges %d
      #Classes %d
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (n_edges, n_classes, train_mask.int().sum().item(),
           val_mask.int().sum().item(), test_mask.int().sum().item()))

    if args.gpu < 0:
        cuda = False
        print('>> no use GPU')
    else:
        cuda = True
        print('>> using GPU ...')
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        labels = labels.cuda()
        train_mask = train_mask.cuda()
        val_mask = val_mask.cuda()
        test_mask = test_mask.cuda()
    # graph preprocess and calculate normalization factor
    g = DGLGraph(data.graph)
    n_edges = g.number_of_edges()
    # add self loop
    g.add_edges(g.nodes(), g.nodes())

    # create SGC model
    model = SGConv(in_feats, n_classes, k=2, cached=True, bias=args.bias)

    if cuda: model.cuda()
    loss_fcn = torch.nn.CrossEntropyLoss()
    # loss_fcn = FocalLoss(gamma=0)

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    # initialize graph
    dur = []
    for epoch in range(args.n_epochs):
        model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        logits = model(g, features)  # only compute the train set
        print(torch.nonzero(train_mask))
        print(logits[train_mask].size())
        exit()
        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch >= 3:
            dur.append(time.time() - t0)

        acc = evaluate(model, g, features, labels, val_mask)
        test_acc = evaluate(model, g, features, labels, test_mask)
        print(
            "Epoch {:05d} | Time(s) {:.4f} | Loss {:.6f} | Val accuracy {:.6f} | Test accuracy {:.6f} | ETputs(KTEPS) {:.2f}"
            .format(epoch, np.mean(dur), loss.item(), acc, test_acc,
                    n_edges / np.mean(dur) / 1000))
        train_acc_list.append(acc)
        test_acc_list.append(test_acc)
    plot_curve(train_acc_list, test_acc_list, args.dataset)
示例#28
0
文件: train.py 项目: wangdomg/dgl
def main(args):
    # load and preprocess dataset
    data = load_data(args)
    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    if hasattr(torch, 'BoolTensor'):
        train_mask = torch.BoolTensor(data.train_mask)
        val_mask = torch.BoolTensor(data.val_mask)
        test_mask = torch.BoolTensor(data.test_mask)
    else:
        train_mask = torch.ByteTensor(data.train_mask)
        val_mask = torch.ByteTensor(data.val_mask)
        test_mask = torch.ByteTensor(data.test_mask)
    in_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()
    print("""----Data statistics------'
      #Edges %d
      #Classes %d
      #Train samples %d
      #Val samples %d
      #Test samples %d""" % (n_edges, n_classes, train_mask.sum().item(),
                             val_mask.sum().item(), test_mask.sum().item()))

    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        labels = labels.cuda()
        train_mask = train_mask.cuda()
        val_mask = val_mask.cuda()
        test_mask = test_mask.cuda()

    # graph preprocess and calculate normalization factor
    g = data.graph
    # add self loop
    if args.self_loop:
        g.remove_edges_from(nx.selfloop_edges(g))
        g.add_edges_from(zip(g.nodes(), g.nodes()))
    g = DGLGraph(g)
    n_edges = g.number_of_edges()

    # create TAGCN model
    model = TAGCN(g, in_feats, args.n_hidden, n_classes, args.n_layers, F.relu,
                  args.dropout)

    if cuda:
        model.cuda()
    loss_fcn = torch.nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    # initialize graph
    dur = []
    for epoch in range(args.n_epochs):
        model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        logits = model(features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch >= 3:
            dur.append(time.time() - t0)

        acc = evaluate(model, features, labels, val_mask)
        print(
            "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
            "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(),
                                          acc, n_edges / np.mean(dur) / 1000))

    print()
    acc = evaluate(model, features, labels, test_mask)
    print("Test Accuracy {:.4f}".format(acc))
示例#29
0
def main(args):
    # load and preprocess dataset
    args.dataset = "reddit-self-loop"
    data = load_data(args)
    g = data.graph
    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        g = g.int().to(args.gpu)

    features = g.ndata['feat']
    labels = g.ndata['label']
    train_mask = g.ndata['train_mask']
    val_mask = g.ndata['val_mask']
    test_mask = g.ndata['test_mask']
    in_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()
    print("""----Data statistics------'
      #Edges %d
      #Classes %d
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (n_edges, n_classes, g.ndata['train_mask'].int().sum().item(),
           g.ndata['val_mask'].int().sum().item(),
           g.ndata['test_mask'].int().sum().item()))

    # graph preprocess and calculate normalization factor
    n_edges = g.number_of_edges()
    # normalization
    degs = g.in_degrees().float()
    norm = torch.pow(degs, -0.5)
    norm[torch.isinf(norm)] = 0
    g.ndata['norm'] = norm.unsqueeze(1)

    # create SGC model
    model = SGConv(in_feats,
                   n_classes,
                   k=2,
                   cached=True,
                   bias=True,
                   norm=normalize)
    if args.gpu >= 0:
        model = model.cuda()

    # use optimizer
    optimizer = torch.optim.LBFGS(model.parameters())

    # define loss closure
    def closure():
        optimizer.zero_grad()
        output = model(g, features)[train_mask]
        loss_train = F.cross_entropy(output, labels[train_mask])
        loss_train.backward()
        return loss_train

    # initialize graph
    for epoch in range(args.n_epochs):
        model.train()
        optimizer.step(closure)

    acc = evaluate(model, features, g, labels, test_mask)
    print("Test Accuracy {:.4f}".format(acc))
示例#30
0
def main(args):
    # load and preprocess dataset
    data = load_data(args)
    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    if hasattr(torch, "BoolTensor"):
        train_mask = torch.BoolTensor(data.train_mask)
        val_mask = torch.BoolTensor(data.val_mask)
        test_mask = torch.BoolTensor(data.test_mask)
    else:
        train_mask = torch.ByteTensor(data.train_mask)
        val_mask = torch.ByteTensor(data.val_mask)
        test_mask = torch.ByteTensor(data.test_mask)
    in_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()
    print(
        """----Data statistics------'
      #Edges %d
      #Classes %d
      #Train samples %d
      #Val samples %d
      #Test samples %d"""
        % (
            n_edges,
            n_classes,
            train_mask.int().sum().item(),
            val_mask.int().sum().item(),
            test_mask.int().sum().item(),
        )
    )

    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        labels = labels.cuda()
        train_mask = train_mask.cuda()
        val_mask = val_mask.cuda()
        test_mask = test_mask.cuda()
        print("use cuda:", args.gpu)

    # graph preprocess and calculate normalization factor
    g = data.graph
    g.remove_edges_from(nx.selfloop_edges(g))
    g = DGLGraph(g)
    n_edges = g.number_of_edges()

    # create GraphSAGE model
    model = GraphSAGE(
        g,
        in_feats,
        args.n_hidden,
        n_classes,
        args.n_layers,
        F.relu,
        args.dropout,
        args.aggregator_type,
    )

    if cuda:
        model.cuda()
    loss_fcn = torch.nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(
        model.parameters(), lr=args.lr, weight_decay=args.weight_decay
    )

    # initialize graph
    dur = []
    for epoch in range(args.n_epochs):
        model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        logits = model(features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch >= 3:
            dur.append(time.time() - t0)

        accuracy, precision, recall, fscore, _ = evaluate(
            model, features, labels, val_mask
        )
        print("Epoch:", epoch)
        print("Loss:", loss.item())
        print("Accuracy:", accuracy)
        print("Precision:", precision)
        print("Recall:", recall)
        print("F-Score:", fscore)
        print()
        print("=" * 80)
        print()

    accuracy, precision, recall, fscore, class_based_report = evaluate(
        model, features, labels, test_mask
    )
    print("=" * 80)
    print(" " * 28 + "Final Statistics")
    print("=" * 80)
    print("Accuracy", accuracy)
    print("Precision", precision)
    print("Recall", recall)
    print("F-Score", fscore)
    print(class_based_report)