示例#1
0
def main(_):
    flags_obj = tf.flags.FLAGS
    euler_graph = tf_euler.dataset.get_dataset(flags_obj.dataset)
    euler_graph.load_graph()

    dims = [flags_obj.hidden_dim, flags_obj.hidden_dim]
    if flags_obj.run_mode == 'train':
        metapath = [euler_graph.train_edge_type]
    else:
        metapath = [euler_graph.all_edge_type]
    num_steps = int((euler_graph.total_size + 1) // flags_obj.batch_size *
                    flags_obj.num_epochs)

    model = APPNP(dims,
                  metapath,
                  euler_graph.feature_idx,
                  euler_graph.feature_dim,
                  euler_graph.label_idx,
                  euler_graph.label_dim,
                  K=flags_obj.layers,
                  alpha=flags_obj.alpha)

    params = {
        'train_node_type': euler_graph.train_node_type[0],
        'batch_size': flags_obj.batch_size,
        'optimizer': flags_obj.optimizer,
        'learning_rate': flags_obj.learning_rate,
        'log_steps': flags_obj.log_steps,
        'model_dir': flags_obj.model_dir,
        'id_file': euler_graph.id_file,
        'infer_dir': flags_obj.model_dir,
        'total_step': num_steps
    }
    config = tf.estimator.RunConfig(log_step_count_steps=None)
    model_estimator = NodeEstimator(model, params, config)

    if flags_obj.run_mode == 'train':
        model_estimator.train()
    elif flags_obj.run_mode == 'evaluate':
        model_estimator.evaluate()
    elif flags_obj.run_mode == 'infer':
        model_estimator.infer()
    else:
        raise ValueError('Run mode not exist!')
示例#2
0
def main(args):
    # load and preprocess dataset
    if args.dataset == 'cora':
        data = CoraGraphDataset()
    elif args.dataset == 'citeseer':
        data = CiteseerGraphDataset()
    elif args.dataset == 'pubmed':
        data = PubmedGraphDataset()
    else:
        raise ValueError('Unknown dataset: {}'.format(args.dataset))

    g = data[0]
    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        g = g.to(args.gpu)

    features = g.ndata['feat']
    labels = g.ndata['label']
    train_mask = g.ndata['train_mask']
    val_mask = g.ndata['val_mask']
    test_mask = g.ndata['test_mask']
    in_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = g.number_of_edges()
    print("""----Data statistics------'
      #Edges %d
      #Classes %d
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (n_edges, n_classes,
           train_mask.int().sum().item(),
           val_mask.int().sum().item(),
           test_mask.int().sum().item()))

    n_edges = g.number_of_edges()
    # add self loop
    g = dgl.remove_self_loop(g)
    g = dgl.add_self_loop(g)

    # create APPNP model
    model = APPNP(g,
                  in_feats,
                  args.hidden_sizes,
                  n_classes,
                  F.relu,
                  args.in_drop,
                  args.edge_drop,
                  args.alpha,
                  args.k)

    if cuda:
        model.cuda()
    loss_fcn = torch.nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    # initialize graph
    dur = []
    for epoch in range(args.n_epochs):
        model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        logits = model(features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch >= 3:
            dur.append(time.time() - t0)

        acc = evaluate(model, features, labels, val_mask)
        print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
              "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(),
                                            acc, n_edges / np.mean(dur) / 1000))

    print()
    acc = evaluate(model, features, labels, test_mask)
    print("Test Accuracy {:.4f}".format(acc))
示例#3
0
def main(args):
    # load and preprocess dataset
    data = load_data(args)
    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    if hasattr(torch, 'BoolTensor'):
        train_mask = torch.BoolTensor(data.train_mask)
        val_mask = torch.BoolTensor(data.val_mask)
        test_mask = torch.BoolTensor(data.test_mask)
    else:
        train_mask = torch.ByteTensor(data.train_mask)
        val_mask = torch.ByteTensor(data.val_mask)
        test_mask = torch.ByteTensor(data.test_mask)
    in_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()
    print("""----Data statistics------'
      #Edges %d
      #Classes %d
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (n_edges, n_classes,
           train_mask.int().sum().item(),
           val_mask.int().sum().item(),
           test_mask.int().sum().item()))

    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        labels = labels.cuda()
        train_mask = train_mask.cuda()
        val_mask = val_mask.cuda()
        test_mask = test_mask.cuda()

    # graph preprocess and calculate normalization factor
    g = DGLGraph(data.graph)
    n_edges = g.number_of_edges()
    # add self loop
    g.add_edges(g.nodes(), g.nodes())
    g.set_n_initializer(dgl.init.zero_initializer)
    g.set_e_initializer(dgl.init.zero_initializer)

    if args.gpu >= 0:
        g = g.to(args.gpu)

    # create APPNP model
    model = APPNP(g,
                  in_feats,
                  args.hidden_sizes,
                  n_classes,
                  F.relu,
                  args.in_drop,
                  args.edge_drop,
                  args.alpha,
                  args.k)

    if cuda:
        model.cuda()
    loss_fcn = torch.nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    # initialize graph
    dur = []
    for epoch in range(args.n_epochs):
        model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        logits = model(features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch >= 3:
            dur.append(time.time() - t0)

        acc = evaluate(model, features, labels, val_mask)
        print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
              "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(),
                                            acc, n_edges / np.mean(dur) / 1000))

    print()
    acc = evaluate(model, features, labels, test_mask)
    print("Test Accuracy {:.4f}".format(acc))
示例#4
0
def main(args):
    # load and preprocess dataset

    #FIRST, CHECK DATASET
    path = './dataset/' + str(args.dataset) + '/'
    '''
    edges = np.loadtxt(path + 'edges.txt')
    edges = edges.astype(int)

    features = np.loadtxt(path + 'features.txt')

    train_mask = np.loadtxt(path + 'train_mask.txt')
    train_mask = train_mask.astype(int)

    labels = np.loadtxt(path + 'labels.txt')
    labels = labels.astype(int)
    '''
    edges = np.load(path + 'edges.npy')
    features = np.load(path + 'features.npy')
    train_mask = np.load(path + 'train_mask.npy')
    labels = np.load(path + 'labels.npy')

    num_edges = edges.shape[0]
    num_nodes = features.shape[0]
    num_feats = features.shape[1]
    n_classes = max(labels) - min(labels) + 1

    assert train_mask.shape[0] == num_nodes

    print('dataset {}'.format(args.dataset))
    print('# of edges : {}'.format(num_edges))
    print('# of nodes : {}'.format(num_nodes))
    print('# of features : {}'.format(num_feats))

    features = torch.FloatTensor(features)
    labels = torch.LongTensor(labels)

    if hasattr(torch, 'BoolTensor'):
        train_mask = torch.BoolTensor(train_mask)

    else:
        train_mask = torch.ByteTensor(train_mask)

    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        labels = labels.cuda()
        train_mask = train_mask.cuda()

    u = edges[:, 0]
    v = edges[:, 1]

    #initialize a DGL graph
    g = DGLGraph()
    g.add_nodes(num_nodes)
    g.add_edges(u, v)

    # graph preprocess and calculate normalization factor

    n_edges = g.number_of_edges()
    # add self loop
    g.add_edges(g.nodes(), g.nodes())
    g.set_n_initializer(dgl.init.zero_initializer)
    g.set_e_initializer(dgl.init.zero_initializer)

    # create APPNP model
    model = APPNP(g, num_feats, args.hidden_sizes, n_classes, F.relu,
                  args.in_drop, args.edge_drop, args.alpha, args.k)

    if cuda:
        model.cuda()
    loss_fcn = torch.nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    # initialize graph
    dur = []
    Used_memory = 0

    for epoch in range(args.num_epochs):

        torch.cuda.synchronize()
        model.train()
        t0 = time.time()
        # forward
        logits = model(features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])
        now_mem = torch.cuda.max_memory_allocated(0)
        Used_memory = max(now_mem, Used_memory)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        torch.cuda.synchronize()
        t2 = time.time()
        run_time_this_epoch = t2 - t0

        if epoch >= 3:
            dur.append(run_time_this_epoch)

        train_acc = accuracy(logits[train_mask], labels[train_mask])

        print(
            'Epoch {:05d} | Time(s) {:.4f} | train_acc {:.6f} | Used_Memory {:.6f} mb'
            .format(epoch, run_time_this_epoch, train_acc,
                    (now_mem * 1.0 / (1024**2))))

    Used_memory /= (1024**3)
    print('^^^{:6f}^^^{:6f}'.format(Used_memory, np.mean(dur)))
示例#5
0
def main(args):
    # load and preprocess dataset
    data = load_data(args)

    ##
    structure_features = np.load('../../pretrained/' + args.dataset +
                                 '_structure_32d.npy')
    attr_features = np.load('../../pretrained/' + args.dataset +
                            '_attr_32d.npy')

    galpha = args.galpha
    gbeta = args.gbeta

    structure_features = preprocessing.scale(structure_features,
                                             axis=1,
                                             with_mean=True,
                                             with_std=True,
                                             copy=True)
    #structure_features = preprocessing.scale(structure_features, axis=0, with_mean=True,with_std=True,copy=True)
    structure_features = torch.FloatTensor(structure_features).cuda()

    attr_features = preprocessing.scale(attr_features,
                                        axis=1,
                                        with_mean=True,
                                        with_std=True,
                                        copy=True)
    #attr_features = preprocessing.scale(attr_features, axis=0, with_mean=True,with_std=True,copy=True)
    attr_features = torch.FloatTensor(attr_features).cuda()

    in_feats2 = structure_features.shape[1]
    in_feats3 = attr_features.shape[1]
    print(structure_features.shape, attr_features.shape)

    ##
    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    train_mask = torch.ByteTensor(data.train_mask)
    val_mask = torch.ByteTensor(data.val_mask)
    test_mask = torch.ByteTensor(data.test_mask)
    in_feats1 = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()
    print("""----Data statistics------'
      #Edges %d
      #Classes %d
      #Train samples %d
      #Val samples %d
      #Test samples %d""" % (n_edges, n_classes, train_mask.sum().item(),
                             val_mask.sum().item(), test_mask.sum().item()))

    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        labels = labels.cuda()
        train_mask = train_mask.cuda()
        val_mask = val_mask.cuda()
        test_mask = test_mask.cuda()

    # graph preprocess and calculate normalization factor
    g = DGLGraph(data.graph)
    n_edges = g.number_of_edges()
    # add self loop
    g.add_edges(g.nodes(), g.nodes())
    g.set_n_initializer(dgl.init.zero_initializer)
    g.set_e_initializer(dgl.init.zero_initializer)
    # normalization
    degs = g.in_degrees().float()
    norm = torch.pow(degs, -0.5)
    norm[torch.isinf(norm)] = 0
    if cuda:
        norm = norm.cuda()
    g.ndata['norm'] = norm.unsqueeze(1)

    # create APPNP model
    #alpha2_set = [0,0.001,0.002,0.004,0.006,0.008,0.01,0.02,0.03,0.04,0.05]
    #alpha3_set = [0,0.001,0.002,0.004,0.006,0.008,0.01,0.02,0.03,0.04,0.05]

    result = []
    for iter in range(10):
        model = APPNP(g, in_feats1, in_feats2, in_feats3, args.hidden_sizes,
                      n_classes, F.relu, args.in_drop, args.edge_drop,
                      args.alpha, args.k, 1, galpha, gbeta)

        if cuda:
            model.cuda()
        loss_fcn = torch.nn.CrossEntropyLoss()

        # use optimizer
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=args.lr,
                                     weight_decay=args.weight_decay)

        # initialize graph
        dur = []
        best_val_acc = 0
        best_test_acc = 0
        for epoch in range(args.n_epochs):
            model.train()
            if epoch >= 3:
                t0 = time.time()
            # forward
            logits = model(features, structure_features, attr_features)
            loss = loss_fcn(logits[train_mask], labels[train_mask])

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if epoch >= 3:
                dur.append(time.time() - t0)

            val_acc = evaluate(model, features, structure_features,
                               attr_features, labels, val_mask)
            if val_acc >= best_val_acc:
                best_val_acc = val_acc
                best_test_acc = evaluate(model, features, structure_features,
                                         attr_features, labels, test_mask)
        result.append(best_test_acc)
    print('average result of 10 experiments:', np.average(result))
示例#6
0
def main(args):
    # load and preprocess dataset
    data = load_data(args)
    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    if hasattr(torch, "BoolTensor"):
        train_mask = torch.BoolTensor(data.train_mask)
        val_mask = torch.BoolTensor(data.val_mask)
        test_mask = torch.BoolTensor(data.test_mask)
    else:
        train_mask = torch.ByteTensor(data.train_mask)
        val_mask = torch.ByteTensor(data.val_mask)
        test_mask = torch.ByteTensor(data.test_mask)
    in_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()
    print(
        """----Data statistics------'
      #Edges %d
      #Classes %d
      #Train samples %d
      #Val samples %d
      #Test samples %d"""
        % (
            n_edges,
            n_classes,
            train_mask.int().sum().item(),
            val_mask.int().sum().item(),
            test_mask.int().sum().item(),
        )
    )

    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        labels = labels.cuda()
        train_mask = train_mask.cuda()
        val_mask = val_mask.cuda()
        test_mask = test_mask.cuda()

    # graph preprocess and calculate normalization factor
    g = DGLGraph(data.graph)
    n_edges = g.number_of_edges()
    # add self loop
    g.add_edges(g.nodes(), g.nodes())
    g.set_n_initializer(dgl.init.zero_initializer)
    g.set_e_initializer(dgl.init.zero_initializer)

    # create APPNP model
    model = APPNP(
        g,
        in_feats,
        args.hidden_sizes,
        n_classes,
        F.relu,
        args.in_drop,
        args.edge_drop,
        args.alpha,
        args.k,
    )

    if cuda:
        model.cuda()
    loss_fcn = torch.nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(
        model.parameters(), lr=args.lr, weight_decay=args.weight_decay
    )

    # initialize graph
    dur = []
    for epoch in range(args.n_epochs):
        model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        logits = model(features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch >= 3:
            dur.append(time.time() - t0)

        accuracy, precision, recall, fscore, _ = evaluate(
            model, features, labels, val_mask
        )
        print("Epoch:", epoch)
        print("Loss:", loss.item())
        print("Accuracy:", accuracy)
        print("Precision:", precision)
        print("Recall:", recall)
        print("F-Score:", fscore)
        print()
        print("=" * 80)
        print()

    accuracy, precision, recall, fscore, class_based_report = evaluate(
        model, features, labels, test_mask
    )
    print("=" * 80)
    print(" " * 28 + "Final Statistics")
    print("=" * 80)
    print("Accuracy", accuracy)
    print("Precision", precision)
    print("Recall", recall)
    print("F-Score", fscore)
    print(class_based_report)