def main(): parser = argparse.ArgumentParser(description='OGBN-MolHiv') parser.add_argument('--device', type=int, default=0) parser.add_argument('--num_workers', type=int, default=4) parser.add_argument('--log_steps', type=int, default=1) parser.add_argument('--batch_size', type=int, default=64) parser.add_argument('--num_layers', type=int, default=5) parser.add_argument('--emb_dim', type=int, default=256) parser.add_argument('--dropout', type=float, default=0.5) parser.add_argument('--lr', type=float, default=0.001) parser.add_argument('--epochs', type=int, default=50) parser.add_argument('--runs', type=int, default=10) parser.add_argument('--eval', action='store_true', help='If not set, we will only do the training part.') parser.add_argument('--eval_batch_size', type=int, default=2048) args = parser.parse_args() print(args) device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu' device = torch.device(device) dataset = DglGraphPropPredDataset(name='ogbg-molhiv') split_idx = dataset.get_idx_split() evaluator = Evaluator(name='ogbg-molhiv') train_loader = GraphDataLoader(dataset[split_idx["train"]], batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) val_loader = GraphDataLoader(dataset[split_idx["valid"]], batch_size=args.eval_batch_size, shuffle=True, num_workers=0) test_loader = GraphDataLoader(dataset[split_idx["test"]], batch_size=args.eval_batch_size, shuffle=True, num_workers=0) model = GCN(args.emb_dim, num_classes=dataset.num_tasks, num_layers=args.num_layers, dropout=args.dropout).to(device) logger = Logger(args.runs, args) dur = [] for run in range(args.runs): model.reset_parameters() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) for epoch in range(1, args.epochs + 1): t0 = time.time() loss = train(model, device, train_loader, optimizer) if epoch >= 3: dur.append(time.time() - t0) print('Training time/epoch {}'.format(np.mean(dur))) if not args.eval: continue val_rocauc = test(model, device, val_loader, evaluator)[dataset.eval_metric] test_rocauc = test(model, device, test_loader, evaluator)[dataset.eval_metric] logger.add_result(run, (0.0, val_rocauc, test_rocauc)) if epoch % args.log_steps == 0: print(f'Run: {run + 1:02d}, ' f'Epoch: {epoch:02d}, ' f'Loss: {loss:.4f}, ' f'Valid: {val_rocauc:.4f} ' f'Test: {test_rocauc:.4f}') if args.eval: logger.print_statistics(run) if args.eval: logger.print_statistics()
def main(): parser = argparse.ArgumentParser(description='GraphSAGE') parser.add_argument("--dataset", type=str, default='reddit') parser.add_argument("--device", type=int, default=0) parser.add_argument("--dropout", type=float, default=0.5, help="dropout probability") parser.add_argument("--lr", type=float, default=1e-2, help="learning rate") parser.add_argument("--epochs", type=int, default=200, help="number of training epochs") parser.add_argument("--n-hidden", type=int, default=16, help="number of hidden gcn units") parser.add_argument("--aggr", type=str, choices=['sum', 'mean'], default='mean', help='Aggregation for messages') parser.add_argument("--weight-decay", type=float, default=5e-4, help="Weight for L2 loss") parser.add_argument("--eval", action='store_true', help='If not set, we will only do the training part.') parser.add_argument("--runs", type=int, default=10) args = parser.parse_args() print(args) # load and preprocess dataset data = load_data(args) features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) if hasattr(torch, 'BoolTensor'): train_mask = torch.BoolTensor(data.train_mask) val_mask = torch.BoolTensor(data.val_mask) test_mask = torch.BoolTensor(data.test_mask) else: train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu' device = torch.device(device) features = features.to(device) labels = labels.to(device) train_mask = train_mask.to(device) val_mask = val_mask.to(device) test_mask = test_mask.to(device) # Remove duplicate edges # In PyG, this is a default pre-processing step for Reddit, see # https://github.com/rusty1s/pytorch_geometric/blob/master/torch_geometric/datasets/reddit.py#L58 g = data.graph g = g.int().to(device) # create GraphSAGE model model = GraphSAGE(g, in_feats, args.n_hidden, n_classes, args.aggr, F.relu, args.dropout).to(device) loss_fcn = nn.CrossEntropyLoss() logger = Logger(args.runs, args) dur = [] for run in range(args.runs): model.reset_parameters() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) for epoch in range(args.epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) print('Training time/epoch {}'.format(np.mean(dur))) if not args.eval: continue train_acc, val_acc, test_acc = evaluate(model, features, labels, train_mask, val_mask, test_mask) logger.add_result(run, (train_acc, val_acc, test_acc)) print( "Run {:02d} | Epoch {:05d} | Loss {:.4f} | Train {:.4f} | Val {:.4f} | Test {:.4f}" .format(run, epoch, loss.item(), train_acc, val_acc, test_acc)) if args.eval: logger.print_statistics(run) if args.eval: logger.print_statistics()
def main(): parser = argparse.ArgumentParser( description='OGBN-Arxiv (GraphSAGE Full-Batch)') parser.add_argument('--device', type=int, default=0) parser.add_argument('--log_steps', type=int, default=1) parser.add_argument('--num_layers', type=int, default=3) parser.add_argument('--hidden_channels', type=int, default=256) parser.add_argument('--dropout', type=float, default=0.5) parser.add_argument('--lr', type=float, default=0.01) parser.add_argument('--epochs', type=int, default=500) parser.add_argument('--runs', type=int, default=10) parser.add_argument("--eval", action='store_true', help='If not set, we will only do the training part.') args = parser.parse_args() print(args) device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu' device = torch.device(device) dataset = DglNodePropPredDataset(name='ogbn-arxiv') split_idx = dataset.get_idx_split() g, labels = dataset[0] feats = g.ndata['feat'] g = dgl.to_bidirected(g) g = g.int().to(device) feats, labels = feats.to(device), labels.to(device) train_idx = split_idx['train'].to(device) model = GraphSAGE(in_feats=feats.size(-1), hidden_feats=args.hidden_channels, out_feats=dataset.num_classes, num_layers=args.num_layers, dropout=args.dropout).to(device) evaluator = Evaluator(name='ogbn-arxiv') logger = Logger(args.runs, args) dur = [] for run in range(args.runs): model.reset_parameters() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) for epoch in range(1, 1 + args.epochs): t0 = time.time() loss = train(model, g, feats, labels, train_idx, optimizer) if epoch >= 3: dur.append(time.time() - t0) print('Training time/epoch {}'.format(np.mean(dur))) if not args.eval: continue result = test(model, g, feats, labels, split_idx, evaluator) logger.add_result(run, result) if epoch % args.log_steps == 0: train_acc, valid_acc, test_acc = result print(f'Run: {run + 1:02d}, ' f'Epoch: {epoch:02d}, ' f'Loss: {loss:.4f}, ' f'Train: {100 * train_acc:.2f}%, ' f'Valid: {100 * valid_acc:.2f}% ' f'Test: {100 * test_acc:.2f}%') if args.eval: logger.print_statistics(run) if args.eval: logger.print_statistics()
def main(): parser = argparse.ArgumentParser(description='GAT') parser.add_argument("--dataset", type=str) parser.add_argument("--device", type=int, default=0) parser.add_argument("--num-layers", type=int, default=3, help="number of hidden layers") parser.add_argument("--lr", type=float, default=0.005, help="learning rate") parser.add_argument('--weight-decay', type=float, default=5e-4, help="weight decay") parser.add_argument("--num-hidden", type=int, default=8, help="number of hidden units") parser.add_argument("--dropout", type=float, default=.6, help="Dropout to use") parser.add_argument('--epochs', type=int, default=200) parser.add_argument("--eval", action='store_true', help='If not set, we will only do the training part.') parser.add_argument("--runs", type=int, default=10) args = parser.parse_args() print(args) device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu' device = torch.device(device) path = osp.join('dataset', args.dataset) dataset = Planetoid(path, args.dataset, transform=T.NormalizeFeatures()) data = dataset[0] features = data.x.to(device) labels = data.y.to(device) edge_index = data.edge_index.to(device) adj = SparseTensor(row=edge_index[0], col=edge_index[1]) train_mask = torch.BoolTensor(data.train_mask).to(device) val_mask = torch.BoolTensor(data.val_mask).to(device) test_mask = torch.BoolTensor(data.test_mask).to(device) model = GAT(num_layers=args.num_layers, in_feats=features.size(-1), num_hidden=args.num_hidden, num_classes=dataset.num_classes, heads=[8, 8, 1], dropout=args.dropout).to(device) loss_fcn = nn.CrossEntropyLoss() logger = Logger(args.runs, args) dur = [] for run in range(args.runs): model.reset_parameters() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) for epoch in range(1, args.epochs + 1): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features, adj) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) print('Training time/epoch {}'.format(np.mean(dur))) if not args.eval: continue train_acc, val_acc, test_acc = evaluate(model, features, adj, labels, train_mask, val_mask, test_mask) logger.add_result(run, (train_acc, val_acc, test_acc)) print( "Run {:02d} | Epoch {:05d} | Loss {:.4f} | Train {:.4f} | Val {:.4f} | Test {:.4f}" .format(run, epoch, loss.item(), train_acc, val_acc, test_acc)) if args.eval: logger.print_statistics(run) if args.eval: logger.print_statistics()
def main(args): device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu' device = torch.device(device) path = osp.join('dataset', 'Reddit') dataset = Reddit(path) data = dataset[0] features = data.x.to(device) labels = data.y.to(device) edge_index = data.edge_index.to(device) adj = SparseTensor(row=edge_index[0], col=edge_index[1]) train_mask = torch.BoolTensor(data.train_mask).to(device) val_mask = torch.BoolTensor(data.val_mask).to(device) test_mask = torch.BoolTensor(data.test_mask).to(device) model = GraphSAGE(dataset.num_features, args.n_hidden, dataset.num_classes, args.aggr, F.relu, args.dropout).to(device) loss_fcn = nn.CrossEntropyLoss() logger = Logger(args.runs, args) dur = [] for run in range(args.runs): model.reset_parameters() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) for epoch in range(1, args.epochs + 1): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features, adj) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) print('Training time/epoch {}'.format(np.mean(dur))) if not args.eval: continue train_acc, val_acc, test_acc = evaluate(model, features, adj, labels, train_mask, val_mask, test_mask) logger.add_result(run, (train_acc, val_acc, test_acc)) print( "Run {:02d} | Epoch {:05d} | Loss {:.4f} | Train {:.4f} | Val {:.4f} | Test {:.4f}" .format(run, epoch, loss.item(), train_acc, val_acc, test_acc)) if args.eval: logger.print_statistics(run) if args.eval: logger.print_statistics()
def main(): parser = argparse.ArgumentParser(description='ENZYMES') parser.add_argument('--device', type=int, default=0) parser.add_argument('--num_workers', type=int, default=0) parser.add_argument('--log_steps', type=int, default=1) parser.add_argument('--batch_size', type=int, default=64) parser.add_argument('--num_layers', type=int, default=4) parser.add_argument('--hidden_size', type=int, default=128) parser.add_argument('--dropout', type=float, default=0.5) parser.add_argument('--lr', type=float, default=0.001) parser.add_argument('--epochs', type=int, default=10) parser.add_argument('--runs', type=int, default=10) parser.add_argument('--eval', action='store_true', help='If not set, we will only do the training part.') parser.add_argument('--eval_batch_size', type=int, default=2048) args = parser.parse_args() print(args) device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu' device = torch.device(device) dataset = LegacyTUDataset('ENZYMES') num_samples = len(dataset) indices = np.arange(num_samples) np.random.seed(42) np.random.shuffle(indices) train_set = dgl.data.utils.Subset(dataset, indices[:int(num_samples * 0.8)]) val_set = dgl.data.utils.Subset( dataset, indices[int(num_samples * 0.8):int(num_samples * 0.9)]) test_set = dgl.data.utils.Subset( dataset, indices[int(num_samples * 0.9):int(num_samples)]) train_loader = GraphDataLoader(train_set, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) val_loader = GraphDataLoader(val_set, batch_size=args.eval_batch_size, shuffle=True, num_workers=0) test_loader = GraphDataLoader(test_set, batch_size=args.eval_batch_size, shuffle=True, num_workers=0) model = GCN(18, args.hidden_size, num_classes=int(dataset.num_labels), num_layers=args.num_layers, dropout=args.dropout).to(device) logger = Logger(args.runs, args) dur = [] for run in range(args.runs): model.reset_parameters() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) for epoch in range(1, args.epochs + 1): t0 = time.time() loss = train(model, device, train_loader, optimizer) if epoch >= 3: dur.append(time.time() - t0) print('Training time/epoch {}'.format(np.mean(dur))) if not args.eval: continue val_acc = test(model, device, val_loader) test_acc = test(model, device, test_loader) logger.add_result(run, (0.0, val_acc, test_acc)) if epoch % args.log_steps == 0: print(f'Run: {run + 1:02d}, ' f'Epoch: {epoch:02d}, ' f'Loss: {loss:.4f}, ' f'Valid: {val_acc * 100:.4f}% ' f'Test: {test_acc * 100:.4f}%') if args.eval: logger.print_statistics(run) if args.eval: logger.print_statistics()
def main(): parser = argparse.ArgumentParser( description='OGBN-Proteins (RGCN Full-Batch)') parser.add_argument('--device', type=int, default=0) parser.add_argument('--log_steps', type=int, default=1) parser.add_argument('--lr', type=float, default=0.01) parser.add_argument('--num-layers', type=int, default=3) parser.add_argument('--hidden-feats', type=int, default=32) parser.add_argument('--dropout', type=float, default=0.) parser.add_argument('--epochs', type=int, default=1000) parser.add_argument('--eval_steps', type=int, default=5) parser.add_argument('--runs', type=int, default=10) parser.add_argument("--eval", action='store_true', help='If not set, we will only do the training part.') args = parser.parse_args() print(args) # OOM on a GPU of 16G device = 'cpu' device = torch.device(device) dataset = PygNodePropPredDataset(name='ogbn-proteins') data = dataset[0] y_true = data.y.to(device) node_feats = torch.ones((y_true.shape[0], 1)).to(device) edge_index = data.edge_index.to(device) edge_weights = data.edge_attr.to(device) split_idx = dataset.get_idx_split() train_idx = split_idx['train'].to(device) adjs = [] for t in range(edge_weights.shape[-1]): adjs.append( SparseTensor(row=edge_index[0], col=edge_index[1], value=edge_weights[:, t])) model = RGCN(num_layers=args.num_layers, in_feats=node_feats.shape[-1], hidden_feats=args.hidden_feats, out_feats=y_true.shape[-1], num_relations=edge_weights.shape[-1], dropout=args.dropout).to(device) evaluator = Evaluator(name='ogbn-proteins') logger = Logger(args.runs, args) dur = [] for run in range(args.runs): model.reset_parameters() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) for epoch in range(1, 1 + args.epochs): t0 = time.time() loss = train(model, node_feats, adjs, y_true, train_idx, optimizer) if epoch >= 3: dur.append(time.time() - t0) print('Training time/epoch {}'.format(np.mean(dur))) if not args.eval: continue if epoch % args.eval_steps == 0: result = test(model, node_feats, adjs, y_true, split_idx, evaluator) logger.add_result(run, result) if epoch % args.log_steps == 0: train_rocauc, valid_rocauc, test_rocauc = result print(f'Run: {run + 1:02d}, ' f'Epoch: {epoch:02d}, ' f'Loss: {loss:.4f}, ' f'Train: {100 * train_rocauc:.2f}%, ' f'Valid: {100 * valid_rocauc:.2f}% ' f'Test: {100 * test_rocauc:.2f}%') if args.eval: logger.print_statistics(run) if args.eval: logger.print_statistics()
def main(): parser = argparse.ArgumentParser(description='GAT') parser.add_argument("--dataset", type=str, default='reddit') parser.add_argument("--device", type=int, default=0) parser.add_argument("--num-layers", type=int, default=3, help="number of hidden layers") parser.add_argument("--lr", type=float, default=0.0029739421726400865, help="learning rate") parser.add_argument('--weight-decay', type=float, default=2.4222556964495987e-05, help="weight decay") parser.add_argument("--num-hidden", type=int, default=16, help="number of hidden units") parser.add_argument("--dropout", type=float, default=0.18074706609292976, help="Dropout to use") parser.add_argument('--epochs', type=int, default=500) parser.add_argument("--eval", action='store_true', help='If not set, we will only do the training part.') parser.add_argument("--runs", type=int, default=10) args = parser.parse_args() print(args) # load and preprocess dataset data = load_data(args) features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) if hasattr(torch, 'BoolTensor'): train_mask = torch.BoolTensor(data.train_mask) val_mask = torch.BoolTensor(data.val_mask) test_mask = torch.BoolTensor(data.test_mask) else: train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu' device = torch.device(device) # Remove duplicate edges # In PyG, this is a default pre-processing step for Reddit, see # https://github.com/rusty1s/pytorch_geometric/blob/master/torch_geometric/datasets/reddit.py#L58 g = data.graph g = dgl.add_self_loop(g) g = g.int().to(device) features, labels = features.to(device), labels.to(device) model = GAT(g=g, num_layers=args.num_layers, in_feats=in_feats, num_hidden=args.num_hidden, num_classes=n_classes, heads=[1, 1, 1], feat_drop=args.dropout, attn_drop=args.dropout) model = model.to(device) loss_fcn = nn.CrossEntropyLoss() logger = Logger(args.runs, args) dur = [] for run in range(args.runs): model.reset_parameters() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) for epoch in range(args.epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) print('Training time/epoch {}'.format(np.mean(dur))) if not args.eval: continue train_acc, val_acc, test_acc = evaluate(model, features, labels, train_mask, val_mask, test_mask) logger.add_result(run, (train_acc, val_acc, test_acc)) print( "Run {:02d} | Epoch {:05d} | Loss {:.4f} | Train {:.4f} | Val {:.4f} | Test {:.4f}" .format(run, epoch, loss.item(), train_acc, val_acc, test_acc)) if args.eval: logger.print_statistics(run) if args.eval: logger.print_statistics()
def main(): parser = argparse.ArgumentParser(description='OGBN-Arxiv (GAT Full-Batch)') parser.add_argument('--device', type=int, default=0) parser.add_argument('--log_steps', type=int, default=1) parser.add_argument("--num-layers", type=int, default=3, help="number of hidden layers") parser.add_argument("--lr", type=float, default=0.0029739421726400865, help="learning rate") parser.add_argument('--weight-decay', type=float, default=2.4222556964495987e-05, help="weight decay") parser.add_argument("--num-hidden", type=int, default=16, help="number of hidden units") parser.add_argument("--dropout", type=float, default=0.18074706609292976, help="Dropout to use") parser.add_argument('--epochs', type=int, default=500) parser.add_argument('--runs', type=int, default=10) parser.add_argument("--eval", action='store_true', help='If not set, we will only do the training part.') args = parser.parse_args() print(args) device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu' device = torch.device(device) dataset = DglNodePropPredDataset(name='ogbn-arxiv') split_idx = dataset.get_idx_split() g, labels = dataset[0] feats = g.ndata['feat'].to(device) labels = labels.to(device) train_idx = split_idx['train'].to(device) g = dgl.to_bidirected(g) g = dgl.add_self_loop(g) g = g.int().to(device) print(g) model = GAT(num_layers=args.num_layers, in_feats=feats.size(-1), num_hidden=args.num_hidden, num_classes=dataset.num_classes, heads=[4, 4, 4], feat_drop=args.dropout, attn_drop=args.dropout).to(device) evaluator = Evaluator(name='ogbn-arxiv') logger = Logger(args.runs, args) dur = [] for run in range(args.runs): model.reset_parameters() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) for epoch in range(1, 1 + args.epochs): t0 = time.time() loss = train(model, g, feats, labels, train_idx, optimizer) if epoch >= 3: dur.append(time.time() - t0) print('Training time/epoch {}'.format(np.mean(dur))) if not args.eval: continue result = test(model, g, feats, labels, split_idx, evaluator) logger.add_result(run, result) if epoch % args.log_steps == 0: train_acc, valid_acc, test_acc = result print(f'Run: {run + 1:02d}, ' f'Epoch: {epoch:02d}, ' f'Loss: {loss:.4f}, ' f'Train: {100 * train_acc:.2f}%, ' f'Valid: {100 * valid_acc:.2f}% ' f'Test: {100 * test_acc:.2f}%') if args.eval: logger.print_statistics(run) if args.eval: logger.print_statistics()
def main(): parser = argparse.ArgumentParser(description='OGBN-Arxiv (GraphSAGE Full-Batch)') parser.add_argument('--device', type=int, default=0) parser.add_argument('--log_steps', type=int, default=1) parser.add_argument('--num_layers', type=int, default=3) parser.add_argument('--hidden_channels', type=int, default=256) parser.add_argument('--dropout', type=float, default=0.5) parser.add_argument('--lr', type=float, default=0.01) parser.add_argument('--epochs', type=int, default=500) parser.add_argument('--runs', type=int, default=10) parser.add_argument("--eval", action='store_true', help='If not set, we will only do the training part.') args = parser.parse_args() print(args) dataset = DglNodePropPredDataset(name='ogbn-arxiv') split_idx = dataset.get_idx_split() g, labels = dataset[0] feats = jax.device_put( g.ndata['feat'], jax.devices()[0] ) g = g.to(jax.devices("cpu")[0]) g = dgl.to_bidirected(g) g = g.int() g = g.to(jax.devices()[0]) train_idx = split_idx['train'].numpy() _model = GraphSAGE.partial(in_feats=feats.shape[-1], hidden_feats=args.hidden_channels, out_feats=dataset.num_classes, num_layers=args.num_layers, dropout=args.dropout) _, initial_params = _model.init(jax.random.PRNGKey(0), g, feats) model = nn.Model(_model, initial_params) evaluator = Evaluator(name='ogbn-arxiv') logger = Logger(args.runs, args) dur = [] for run in range(args.runs): _, initial_params = _model.init(jax.random.PRNGKey(0), g, feats) model = nn.Model(_model, initial_params) optimizer = flax.optim.Adam(args.lr).create(model) for epoch in range(1, 1 + args.epochs): t0 = time.time() optimizer, loss = train(model, g, feats, labels, train_idx, optimizer) if epoch >= 3: dur.append(time.time() - t0) print('Training time/epoch {}'.format(np.mean(dur))) if not args.eval: continue result = test(model, g, feats, labels, split_idx, evaluator) logger.add_result(run, result) if epoch % args.log_steps == 0: train_acc, valid_acc, test_acc = result print(f'Run: {run + 1:02d}, ' f'Epoch: {epoch:02d}, ' f'Loss: {loss:.4f}, ' f'Train: {100 * train_acc:.2f}%, ' f'Valid: {100 * valid_acc:.2f}% ' f'Test: {100 * test_acc:.2f}%') if args.eval: logger.print_statistics(run) if args.eval: logger.print_statistics()
def main(): parser = argparse.ArgumentParser(description='OGBN-MolHiv') parser.add_argument('--device', type=int, default=0) parser.add_argument('--num_workers', type=int, default=0) parser.add_argument('--log_steps', type=int, default=1) parser.add_argument('--batch_size', type=int, default=64) parser.add_argument('--num_layers', type=int, default=4) parser.add_argument('--hidden_size', type=int, default=128) parser.add_argument('--dropout', type=float, default=0.5) parser.add_argument('--lr', type=float, default=0.001) parser.add_argument('--epochs', type=int, default=200) parser.add_argument('--runs', type=int, default=10) parser.add_argument('--eval', action='store_true', help='If not set, we will only do the training part.') parser.add_argument('--eval_batch_size', type=int, default=2048) args = parser.parse_args() print(args) device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu' device = torch.device(device) dataset = TUDataset('dataset', name='ENZYMES', use_node_attr=True) dataset = dataset.shuffle() train_loader = DataLoader(dataset[:len(dataset) // 10 * 8], batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) val_loader = DataLoader(dataset[len(dataset) // 10 * 8 : len(dataset) // 10 * 9], batch_size=args.eval_batch_size, shuffle=False, num_workers=0) test_loader = DataLoader(dataset[len(dataset) // 10 * 9:], batch_size=args.eval_batch_size, shuffle=False, num_workers=0) model = GCN(dataset.num_features, args.hidden_size, num_classes=dataset.num_classes, num_layers=args.num_layers, dropout=args.dropout).to(device) logger = Logger(args.runs, args) dur = [] for run in range(args.runs): model.reset_parameters() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) for epoch in range(1, args.epochs + 1): t0 = time.time() loss = train(model, device, train_loader, optimizer) if epoch >= 3: dur.append(time.time() - t0) print('Training time/epoch {}'.format(np.mean(dur))) if not args.eval: continue val_acc = test(model, device, val_loader) test_acc = test(model, device, test_loader) logger.add_result(run, (0.0, val_acc, test_acc)) if epoch % args.log_steps == 0: print(f'Run: {run + 1:02d}, ' f'Epoch: {epoch:02d}, ' f'Loss: {loss:.4f}, ' f'Valid: {val_acc * 100:.2f}% ' f'Test: {test_acc * 100:.2f}%') if args.eval: logger.print_statistics(run) if args.eval: logger.print_statistics()
def main(): parser = argparse.ArgumentParser('OGBN-Proteins (RGCN Full-Batch)') parser.add_argument('--device', type=int, default=0) parser.add_argument('--log_steps', type=int, default=1) parser.add_argument('--lr', type=float, default=0.01) parser.add_argument('--num-layers', type=int, default=3) parser.add_argument('--hidden-feats', type=int, default=32) parser.add_argument('--dropout', type=float, default=0.) parser.add_argument('--epochs', type=int, default=1000) parser.add_argument('--eval_steps', type=int, default=5) parser.add_argument('--runs', type=int, default=10) parser.add_argument("--eval", action='store_true', help='If not set, we will only do the training part.') args = parser.parse_args() print(args) device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu' device = torch.device(device) dataset = DglNodePropPredDataset(name='ogbn-proteins') graph, y_true = dataset[0] graph = graph.int().to(device) y_true = y_true.to(device) node_feats = torch.ones((graph.number_of_nodes(), 1)).to(device) split_idx = dataset.get_idx_split() train_idx = split_idx['train'].to(device) edge_weights = [] for t in range(graph.edata['feat'].shape[-1]): edge_weights.append(graph.edata['feat'][:, t:t+1].to(device)) model = RGCN(num_layers=args.num_layers, in_feats=node_feats.shape[-1], hidden_feats=args.hidden_feats, out_feats=y_true.shape[-1], num_relations=len(edge_weights), dropout=args.dropout).to(device) evaluator = Evaluator(name='ogbn-proteins') logger = Logger(args.runs, args) dur = [] for run in range(args.runs): model.reset_parameters() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) for epoch in range(1, 1 + args.epochs): t0 = time.time() loss = train(model, graph, node_feats, edge_weights, y_true, train_idx, optimizer) if epoch >= 3: dur.append(time.time() - t0) print('Training time/epoch {}'.format(np.mean(dur))) if not args.eval: continue if epoch % args.eval_steps == 0: result = test(model, graph, node_feats, edge_weights, y_true, split_idx, evaluator) logger.add_result(run, result) if epoch % args.log_steps == 0: train_rocauc, valid_rocauc, test_rocauc = result print(f'Run: {run + 1:02d}, ' f'Epoch: {epoch:02d}, ' f'Loss: {loss:.4f}, ' f'Train: {100 * train_rocauc:.2f}%, ' f'Valid: {100 * valid_rocauc:.2f}% ' f'Test: {100 * test_rocauc:.2f}%') if args.eval: logger.print_statistics(run) if args.eval: logger.print_statistics()