def main(): # load data dataset = CiteseerGraphDataset() g = dataset[0] node_features = g.ndata['feat'] node_labels = g.ndata['label'] n_features = node_features.shape[1] n_labels = dataset.num_classes # get split masks train_mask = g.ndata['train_mask'] valid_mask = g.ndata['val_mask'] test_mask = g.ndata['test_mask'] model = SAGEFull(in_feats=n_features, hid_feats=100, out_feats=n_labels) opt = optim.Adam(model.parameters()) for epoch in range(30): model.train() # forward propagation by using all nodes logits = model(g, node_features) # compute loss loss = F.cross_entropy(logits[train_mask], node_labels[train_mask]) # compute validation accuracy acc = accuracy(logits[valid_mask], node_labels[valid_mask]) # backward propagation opt.zero_grad() loss.backward() opt.step() print('Epoch {:d} | Loss {:.4f} | Accuracy {:.2%}'.format( epoch + 1, loss.item(), acc)) acc = accuracy(model(g, node_features)[test_mask], node_labels[test_mask]) print('Test accuracy {:.4f}'.format(acc))
def train(args): data = load_citation_dataset(args.dataset) g = data[0] features = g.ndata['feat'] labels = g.ndata['label'] train_mask = g.ndata['train_mask'] val_mask = g.ndata['val_mask'] test_mask = g.ndata['test_mask'] # add self loop g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) model = GCN(features.shape[1], args.num_hidden, data.num_classes) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4) for epoch in range(args.epochs): model.train() logits = model(g, features) loss = criterion(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() acc = accuracy(logits[val_mask], labels[val_mask]) print('Epoch {:05d} | Loss {:.4f} | ValAcc {:.4f}'.format( epoch, loss.item(), acc)) acc = accuracy(model(g, features)[test_mask], labels[test_mask]) print('Test Accuracy {:.4f}'.format(acc))
def train(args): data = load_rdf_dataset(args.dataset) g = data[0] category = data.predict_category num_classes = data.num_classes labels = g.nodes[category].data['labels'] train_mask = g.nodes[category].data['train_mask'].bool() test_mask = g.nodes[category].data['test_mask'].bool() model = EntityClassification( {ntype: g.num_nodes(ntype) for ntype in g.ntypes}, args.num_hidden, num_classes, list(set(g.etypes)), args.num_hidden_layers, args.num_bases, args.self_loop, args.dropout) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) for epoch in range(args.epochs): model.train() logits = model(g)[category] loss = F.cross_entropy(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() train_acc = accuracy(logits[train_mask], labels[train_mask]) print('Epoch {:04d} | Loss {:.4f} | Train Acc {:.4f}'.format( epoch, loss.item(), train_acc)) test_acc = accuracy(model(g)[category][test_mask], labels[test_mask]) print('Test Accuracy {:.4f}'.format(test_acc))
def train(args): data = load_citation_dataset(args.dataset) g = data[0] features = g.ndata['feat'] labels = g.ndata['label'] train_mask = g.ndata['train_mask'] val_mask = g.ndata['val_mask'] test_mask = g.ndata['test_mask'] # add self loop g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) num_heads = [args.num_heads] * (args.num_layers - 1) + [args.num_out_heads] model = GAT(features.shape[1], args.num_hidden, data.num_classes, num_heads, args.dropout) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) for epoch in range(args.epochs): model.train() logits = model(g, features) loss = F.cross_entropy(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() train_acc = accuracy(logits[train_mask], labels[train_mask]) val_acc = evaluate(model, g, features, labels, val_mask) print('Epoch {:04d} | Loss {:.4f} | Train Acc {:.4f} | Val Acc {:.4f}'. format(epoch, loss.item(), train_acc, val_acc)) print() acc = evaluate(model, g, features, labels, test_mask) print('Test Accuracy {:.4f}'.format(acc))
def train(args): set_random_seed(args.seed) device = f'cuda:{args.device}' if torch.cuda.is_available( ) and args.device >= 0 else 'cpu' device = torch.device(device) g, labels, num_classes, train_idx, val_idx, test_idx = load_data( args.dataset, args.ogb_root, args.seed, device) features = g.ndata['feat'] model = SuperGAT(features.shape[1], args.num_hidden, num_classes, args.num_heads, args.attn_type, args.neg_sample_ratio, args.dropout, args.dropout).to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) for epoch in range(args.epochs): model.train() logits, attn_loss = model(g, features) loss = F.cross_entropy(logits[train_idx], labels[train_idx]) loss += args.attn_loss_weight * attn_loss optimizer.zero_grad() loss.backward() optimizer.step() train_acc = accuracy(logits[train_idx], labels[train_idx]) val_acc = evaluate(model, g, features, labels, val_idx) print('Epoch {:04d} | Loss {:.4f} | Train Acc {:.4f} | Val Acc {:.4f}'. format(epoch, loss.item(), train_acc, val_acc)) acc = evaluate(model, g, features, labels, test_idx) print('Test Accuracy {:.4f}'.format(acc))
def train(args): set_random_seed(args.seed) g, labels, num_classes, train_idx, val_idx, test_idx = load_data(args.dataset, args.ogb_root) print('正在预先计算邻居聚集特征...') features = preprocess(g, g.ndata['feat'], args.num_hops) # List[tensor(N, d_in)],长度为r+1 train_feats = [feat[train_idx] for feat in features] val_feats = [feat[val_idx] for feat in features] test_feats = [feat[test_idx] for feat in features] model = SIGN( g.ndata['feat'].shape[1], args.num_hidden, num_classes, args.num_hops, args.num_layers, args.dropout ) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) for epoch in range(args.epochs): model.train() logits = model(train_feats) loss = F.cross_entropy(logits, labels[train_idx]) optimizer.zero_grad() loss.backward() optimizer.step() train_acc = accuracy(logits, labels[train_idx]) val_acc = evaluate(model, val_feats, labels[val_idx]) print('Epoch {:d} | Train Loss {:.4f} | Train Acc {:.4f} | Val Acc {:.4f}'.format( epoch, loss, train_acc, val_acc )) test_acc = evaluate(model, test_feats, labels[test_idx]) print('Test Acc {:.4f}'.format(test_acc))
def train(args): data = load_citation_dataset(args.dataset) g = data[0] labels = g.ndata['label'] train_mask = g.ndata['train_mask'] test_mask = g.ndata['test_mask'] g = dgl.add_self_loop(dgl.remove_self_loop(g)) lp = LabelPropagation(args.num_layers, args.alpha) logits = lp(g, labels, train_mask) test_acc = accuracy(logits[test_mask], labels[test_mask]) print('Test Accuracy {:.4f}'.format(test_acc))
def correct_and_smooth(base_model, g, feats, labels, train_idx, val_idx, test_idx, args): print('C&S') base_model.eval() base_pred = base_model(feats).softmax(dim=1) # 注意要softmax cs = CorrectAndSmooth(args.num_correct_layers, args.correct_alpha, args.correct_norm, args.num_smooth_layers, args.smooth_alpha, args.smooth_norm, args.scale) mask = torch.cat([train_idx, val_idx]) logits = cs(g, F.one_hot(labels).float(), base_pred, mask) test_acc = accuracy(logits[test_idx], labels[test_idx]) print('Test Acc {:.4f}'.format(test_acc))
def train_base_model(base_model, feats, labels, train_idx, val_idx, test_idx, args): print(f'Base model {args.base_model}') optimizer = optim.Adam(base_model.parameters(), lr=args.lr) for epoch in range(args.epochs): base_model.train() logits = base_model(feats) loss = F.cross_entropy(logits[train_idx], labels[train_idx]) optimizer.zero_grad() loss.backward() optimizer.step() train_acc = accuracy(logits[train_idx], labels[train_idx]) val_acc = evaluate(base_model, feats, labels, val_idx) print( 'Epoch {:d} | Train Loss {:.4f} | Train Acc {:.4f} | Val Acc {:.4f}' .format(epoch, loss, train_acc, val_acc)) test_acc = evaluate(base_model, feats, labels, test_idx) print('Test Acc {:.4f}'.format(test_acc))
def node_clf(embeds, labels, num_classes, train_mask, test_mask): clf = nn.Linear(embeds.shape[1], num_classes) optimizer = optim.Adam(clf.parameters(), lr=0.05) best_acc, best_logits = 0, None for epoch in range(200): clf.train() logits = clf(embeds[train_mask]) loss = F.cross_entropy(logits, labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() with torch.no_grad(): clf.eval() test_logits = clf(embeds[test_mask]) if accuracy(test_logits, labels[test_mask]) > best_acc: best_logits = test_logits micro_f1, macro_f1 = micro_macro_f1_score(best_logits, labels[test_mask]) y_score = best_logits.softmax(dim=1).numpy() auc = roc_auc_score(labels[test_mask].numpy(), y_score, multi_class='ovr') return micro_f1, macro_f1, auc
def evaluate(model, g, features, labels, mask): model.eval() with torch.no_grad(): logits = model(g, features) return accuracy(logits[mask], labels[mask])
def evaluate(model, feats, labels, mask): model.eval() logits = model(feats) return accuracy(logits[mask], labels[mask])
def evaluate(model, feats, labels): model.eval() with torch.no_grad(): logits = model(feats) return accuracy(logits, labels)