def main(args): # load and preprocess dataset if args.dataset == 'cora': data = CoraGraphDataset() elif args.dataset == 'citeseer': data = CiteseerGraphDataset() elif args.dataset == 'pubmed': data = PubmedGraphDataset() else: raise ValueError('Unknown dataset: {}'.format(args.dataset)) g = data[0] if args.gpu < 0: cuda = False else: cuda = True g = g.to(args.gpu) features = g.ndata['feat'] labels = g.ndata['label'] train_mask = g.ndata['train_mask'] val_mask = g.ndata['val_mask'] test_mask = g.ndata['test_mask'] in_feats = features.shape[1] n_classes = data.num_labels n_edges = g.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) n_edges = g.number_of_edges() # add self loop g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) # create APPNP model model = APPNP(g, in_feats, args.hidden_sizes, n_classes, F.relu, args.in_drop, args.edge_drop, args.alpha, args.k) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] for epoch in range(args.n_epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) acc = evaluate(model, features, labels, val_mask) print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(), acc, n_edges / np.mean(dur) / 1000)) print() acc = evaluate(model, features, labels, test_mask) print("Test Accuracy {:.4f}".format(acc))
def main(args): # load and preprocess dataset data = load_data(args) features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) if hasattr(torch, 'BoolTensor'): train_mask = torch.BoolTensor(data.train_mask) val_mask = torch.BoolTensor(data.val_mask) test_mask = torch.BoolTensor(data.test_mask) else: train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() # graph preprocess and calculate normalization factor g = DGLGraph(data.graph) n_edges = g.number_of_edges() # add self loop g.add_edges(g.nodes(), g.nodes()) g.set_n_initializer(dgl.init.zero_initializer) g.set_e_initializer(dgl.init.zero_initializer) if args.gpu >= 0: g = g.to(args.gpu) # create APPNP model model = APPNP(g, in_feats, args.hidden_sizes, n_classes, F.relu, args.in_drop, args.edge_drop, args.alpha, args.k) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] for epoch in range(args.n_epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) acc = evaluate(model, features, labels, val_mask) print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(), acc, n_edges / np.mean(dur) / 1000)) print() acc = evaluate(model, features, labels, test_mask) print("Test Accuracy {:.4f}".format(acc))
def main(args): # load and preprocess dataset data = load_data(args) ## structure_features = np.load('../../pretrained/' + args.dataset + '_structure_32d.npy') attr_features = np.load('../../pretrained/' + args.dataset + '_attr_32d.npy') galpha = args.galpha gbeta = args.gbeta structure_features = preprocessing.scale(structure_features, axis=1, with_mean=True, with_std=True, copy=True) #structure_features = preprocessing.scale(structure_features, axis=0, with_mean=True,with_std=True,copy=True) structure_features = torch.FloatTensor(structure_features).cuda() attr_features = preprocessing.scale(attr_features, axis=1, with_mean=True, with_std=True, copy=True) #attr_features = preprocessing.scale(attr_features, axis=0, with_mean=True,with_std=True,copy=True) attr_features = torch.FloatTensor(attr_features).cuda() in_feats2 = structure_features.shape[1] in_feats3 = attr_features.shape[1] print(structure_features.shape, attr_features.shape) ## features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) in_feats1 = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.sum().item(), val_mask.sum().item(), test_mask.sum().item())) if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() # graph preprocess and calculate normalization factor g = DGLGraph(data.graph) n_edges = g.number_of_edges() # add self loop g.add_edges(g.nodes(), g.nodes()) g.set_n_initializer(dgl.init.zero_initializer) g.set_e_initializer(dgl.init.zero_initializer) # normalization degs = g.in_degrees().float() norm = torch.pow(degs, -0.5) norm[torch.isinf(norm)] = 0 if cuda: norm = norm.cuda() g.ndata['norm'] = norm.unsqueeze(1) # create APPNP model #alpha2_set = [0,0.001,0.002,0.004,0.006,0.008,0.01,0.02,0.03,0.04,0.05] #alpha3_set = [0,0.001,0.002,0.004,0.006,0.008,0.01,0.02,0.03,0.04,0.05] result = [] for iter in range(10): model = APPNP(g, in_feats1, in_feats2, in_feats3, args.hidden_sizes, n_classes, F.relu, args.in_drop, args.edge_drop, args.alpha, args.k, 1, galpha, gbeta) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] best_val_acc = 0 best_test_acc = 0 for epoch in range(args.n_epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features, structure_features, attr_features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) val_acc = evaluate(model, features, structure_features, attr_features, labels, val_mask) if val_acc >= best_val_acc: best_val_acc = val_acc best_test_acc = evaluate(model, features, structure_features, attr_features, labels, test_mask) result.append(best_test_acc) print('average result of 10 experiments:', np.average(result))
def main(args): # load and preprocess dataset #FIRST, CHECK DATASET path = './dataset/' + str(args.dataset) + '/' ''' edges = np.loadtxt(path + 'edges.txt') edges = edges.astype(int) features = np.loadtxt(path + 'features.txt') train_mask = np.loadtxt(path + 'train_mask.txt') train_mask = train_mask.astype(int) labels = np.loadtxt(path + 'labels.txt') labels = labels.astype(int) ''' edges = np.load(path + 'edges.npy') features = np.load(path + 'features.npy') train_mask = np.load(path + 'train_mask.npy') labels = np.load(path + 'labels.npy') num_edges = edges.shape[0] num_nodes = features.shape[0] num_feats = features.shape[1] n_classes = max(labels) - min(labels) + 1 assert train_mask.shape[0] == num_nodes print('dataset {}'.format(args.dataset)) print('# of edges : {}'.format(num_edges)) print('# of nodes : {}'.format(num_nodes)) print('# of features : {}'.format(num_feats)) features = torch.FloatTensor(features) labels = torch.LongTensor(labels) if hasattr(torch, 'BoolTensor'): train_mask = torch.BoolTensor(train_mask) else: train_mask = torch.ByteTensor(train_mask) if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() u = edges[:, 0] v = edges[:, 1] #initialize a DGL graph g = DGLGraph() g.add_nodes(num_nodes) g.add_edges(u, v) # graph preprocess and calculate normalization factor n_edges = g.number_of_edges() # add self loop g.add_edges(g.nodes(), g.nodes()) g.set_n_initializer(dgl.init.zero_initializer) g.set_e_initializer(dgl.init.zero_initializer) # create APPNP model model = APPNP(g, num_feats, args.hidden_sizes, n_classes, F.relu, args.in_drop, args.edge_drop, args.alpha, args.k) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] Used_memory = 0 for epoch in range(args.num_epochs): torch.cuda.synchronize() model.train() t0 = time.time() # forward logits = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) now_mem = torch.cuda.max_memory_allocated(0) Used_memory = max(now_mem, Used_memory) optimizer.zero_grad() loss.backward() optimizer.step() torch.cuda.synchronize() t2 = time.time() run_time_this_epoch = t2 - t0 if epoch >= 3: dur.append(run_time_this_epoch) train_acc = accuracy(logits[train_mask], labels[train_mask]) print( 'Epoch {:05d} | Time(s) {:.4f} | train_acc {:.6f} | Used_Memory {:.6f} mb' .format(epoch, run_time_this_epoch, train_acc, (now_mem * 1.0 / (1024**2)))) Used_memory /= (1024**3) print('^^^{:6f}^^^{:6f}'.format(Used_memory, np.mean(dur)))
def main(args): # load and preprocess dataset data = load_data(args) features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) if hasattr(torch, "BoolTensor"): train_mask = torch.BoolTensor(data.train_mask) val_mask = torch.BoolTensor(data.val_mask) test_mask = torch.BoolTensor(data.test_mask) else: train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() print( """----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % ( n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item(), ) ) if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() # graph preprocess and calculate normalization factor g = DGLGraph(data.graph) n_edges = g.number_of_edges() # add self loop g.add_edges(g.nodes(), g.nodes()) g.set_n_initializer(dgl.init.zero_initializer) g.set_e_initializer(dgl.init.zero_initializer) # create APPNP model model = APPNP( g, in_feats, args.hidden_sizes, n_classes, F.relu, args.in_drop, args.edge_drop, args.alpha, args.k, ) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam( model.parameters(), lr=args.lr, weight_decay=args.weight_decay ) # initialize graph dur = [] for epoch in range(args.n_epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) accuracy, precision, recall, fscore, _ = evaluate( model, features, labels, val_mask ) print("Epoch:", epoch) print("Loss:", loss.item()) print("Accuracy:", accuracy) print("Precision:", precision) print("Recall:", recall) print("F-Score:", fscore) print() print("=" * 80) print() accuracy, precision, recall, fscore, class_based_report = evaluate( model, features, labels, test_mask ) print("=" * 80) print(" " * 28 + "Final Statistics") print("=" * 80) print("Accuracy", accuracy) print("Precision", precision) print("Recall", recall) print("F-Score", fscore) print(class_based_report)