def __init__(self, config, device=None): if device is None: self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") else: self.device = config['device'] self.model = MyModel(num_feats=config['num_feats'], output_dim=config['num_feats'], hidden_size=config['hidden_size'], num_layers=config['num_layers'], seq_len=config['X_len'], horizon=config['Y_len'], device=self.device, bidirectional=bool(config['bidirectional'])).to( self.device) self.optimizer = torch.optim.AdamW(self.model.parameters(), lr=config["lr"]) self.criterion = MyLoss(num_feats=config['num_feats'], loss_type=config['loss_type']).to(self.device) #学习率计划√ # Scheduler https://arxiv.org/pdf/1812.01187.pdf epochs = config['epochs'] # lf = lambda x: (((1 + math.cos(x * math.pi / epochs)) / 2) ** 1.0) * 0.95 + 0.05 # 先用了个适用于image classification的lr函数 # self.scheduler = lr_scheduler.LambdaLR(self.optimizer, lr_lambda=lf) # self.scheduler.last_epoch = 0 self.scheduler = lr_scheduler.CosineAnnealingLR(self.optimizer, epochs, eta_min=0, last_epoch=-1) self.epoch = 0 self.best_loss = 99999 HighD_dataset = HighD_Dataset(X_len=config['X_len'], X_step=config['X_step'], Y_len=config['Y_len'], Y_step=config['Y_step'], diff=config['diff'], name='data_01', raw_dir='./dataset/', preprocess_all=True, device=self.device) n_val = int(len(HighD_dataset) * config['val_percent']) n_train = len(HighD_dataset) - n_val train_dataset, val_dataset = random_split( HighD_dataset, [n_train, n_val], generator=torch.Generator().manual_seed(2021)) self.train_dataloader = GraphDataLoader( train_dataset, batch_size=32, shuffle=True, pin_memory=(self.device == "cuda")) self.val_dataloader = GraphDataLoader( val_dataset, batch_size=32, shuffle=False, pin_memory=(self.device == "cuda")) print("Dataset Ready!")
def __init__(self, dataset, batch_size, device, collate_fn=None, seed=0, shuffle=True, split_name='fold10', fold_idx=0, split_ratio=0.7): self.shuffle = shuffle self.seed = seed self.kwargs = {'pin_memory': True} if 'cuda' in device.type else {} labels = [l for _, l in dataset] if split_name == 'fold10': train_idx, valid_idx = self._split_fold10( labels, fold_idx, seed, shuffle) elif split_name == 'rand': train_idx, valid_idx = self._split_rand( labels, split_ratio, seed, shuffle) else: raise NotImplementedError() train_sampler = SubsetRandomSampler(train_idx) valid_sampler = SubsetRandomSampler(valid_idx) self.train_loader = GraphDataLoader( dataset, sampler=train_sampler, batch_size=batch_size, collate_fn=collate_fn, **self.kwargs) self.valid_loader = GraphDataLoader( dataset, sampler=valid_sampler, batch_size=batch_size, collate_fn=collate_fn, **self.kwargs)
def main(args): # Step 1: Prepare graph data and retrieve train/validation/test index ============================= # dataset = LegacyTUDataset(args.dataset, raw_dir=args.dataset_path) # add self loop. We add self loop for each graph here since the function "add_self_loop" does not # support batch graph. for i in range(len(dataset)): dataset.graph_lists[i] = dgl.add_self_loop(dataset.graph_lists[i]) num_training = int(len(dataset) * 0.8) num_val = int(len(dataset) * 0.1) num_test = len(dataset) - num_val - num_training train_set, val_set, test_set = random_split(dataset, [num_training, num_val, num_test]) train_loader = GraphDataLoader(train_set, batch_size=args.batch_size, shuffle=True, num_workers=6) val_loader = GraphDataLoader(val_set, batch_size=args.batch_size, num_workers=2) test_loader = GraphDataLoader(test_set, batch_size=args.batch_size, num_workers=2) device = torch.device(args.device) # Step 2: Create model =================================================================== # num_feature, num_classes, _ = dataset.statistics() model_op = get_sag_network(args.architecture) model = model_op(in_dim=num_feature, hid_dim=args.hid_dim, out_dim=num_classes, num_convs=args.conv_layers, pool_ratio=args.pool_ratio, dropout=args.dropout).to(device) args.num_feature = int(num_feature) args.num_classes = int(num_classes) # Step 3: Create training components ===================================================== # optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # Step 4: training epoches =============================================================== # bad_cound = 0 best_val_loss = float("inf") final_test_acc = 0. best_epoch = 0 train_times = [] for e in range(args.epochs): s_time = time() train_loss = train(model, optimizer, train_loader, device) train_times.append(time() - s_time) val_acc, val_loss = test(model, val_loader, device) test_acc, _ = test(model, test_loader, device) if best_val_loss > val_loss: best_val_loss = val_loss final_test_acc = test_acc bad_cound = 0 best_epoch = e + 1 else: bad_cound += 1 if bad_cound >= args.patience: break if (e + 1) % args.print_every == 0: log_format = "Epoch {}: loss={:.4f}, val_acc={:.4f}, final_test_acc={:.4f}" print(log_format.format(e + 1, train_loss, val_acc, final_test_acc)) print("Best Epoch {}, final test acc {:.4f}".format(best_epoch, final_test_acc)) return final_test_acc, sum(train_times) / len(train_times)
def main(args): # Step 1: Prepare graph data and retrieve train/validation/test index ============================= # dataset = LegacyTUDataset(args.dataset, raw_dir=args.dataset_path) # add self loop. We add self loop for each graph here since the function "add_self_loop" does not # support batch graph. for i in range(len(dataset)): dataset.graph_lists[i] = dgl.remove_self_loop(dataset.graph_lists[i]) dataset.graph_lists[i] = dgl.add_self_loop(dataset.graph_lists[i]) # preprocess: use node degree/label as node feature if args.degree_as_feature: dataset = degree_as_feature(dataset) mode = "concat" else: mode = "replace" dataset = node_label_as_feature(dataset, mode=mode) num_training = int(len(dataset) * 0.9) num_test = len(dataset) - num_training train_set, test_set = random_split(dataset, [num_training, num_test]) train_loader = GraphDataLoader(train_set, batch_size=args.batch_size, shuffle=True, num_workers=1) test_loader = GraphDataLoader(test_set, batch_size=args.batch_size, num_workers=1) device = torch.device(args.device) # Step 2: Create model =================================================================== # num_feature, num_classes, _ = dataset.statistics() args.in_dim = int(num_feature) args.out_dim = int(num_classes) args.edge_feat_dim = 0 # No edge feature in datasets that we use. model = GraphClassifier(args).to(device) # Step 3: Create training components ===================================================== # optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, amsgrad=True, weight_decay=args.weight_decay) # Step 4: training epoches =============================================================== # best_test_acc = 0.0 best_epoch = -1 train_times = [] for e in range(args.epochs): s_time = time() train_loss = train(model, optimizer, train_loader, device, e, args.epochs) train_times.append(time() - s_time) test_acc = test(model, test_loader, device) if test_acc > best_test_acc: best_test_acc = test_acc best_epoch = e + 1 if (e + 1) % args.print_every == 0: log_format = "Epoch {}: loss={:.4f}, test_acc={:.4f}, best_test_acc={:.4f}" print(log_format.format(e + 1, train_loss, test_acc, best_test_acc)) print("Best Epoch {}, final test acc {:.4f}".format(best_epoch, best_test_acc)) return best_test_acc, sum(train_times) / len(train_times)
def load_ogbg(name, device=th.device('cpu'), root='/home/eva_share_users/zhuyu'): from ogb.graphproppred import DglGraphPropPredDataset print('load', name) data = DglGraphPropPredDataset(name=name, root=root) #from IPython import embed; embed() from tqdm import tqdm out_channels = 0 for graph in tqdm(data): if name == 'ogbg-ppa': graph[0].ndata['feat'] = dgl.ops.copy_e_mean( graph[0], graph[0].edata['feat']) else: ef = graph[0].edata['feat'] edge = graph[0].edges()[1] H = th.zeros(graph[0].num_nodes(), 3) for i in range(graph[0].num_nodes()): mask = th.eq(edge, i) H[i, :] += th.matmul(mask.float(), ef.float()) H[i, :] /= graph[0].in_degrees(i) graph[0].ndata['feat'] = th.cat((graph[0].ndata['feat'], H), dim=1) #from IPython import embed; embed() in_channels = graph[0].ndata['feat'].shape[1] try: out_channels = max(out_channels, int(graph[1])) except: from IPython import embed embed() split_idx = data.get_idx_split() print('finish loading', name) from dgl.dataloading import GraphDataLoader train_loader = GraphDataLoader( data[split_idx['train']], batch_size=256, shuffle=True, ) valid_loader = GraphDataLoader( data[split_idx['valid']], batch_size=256, shuffle=True, ) test_loader = GraphDataLoader( data[split_idx['test']], batch_size=256, shuffle=True, ) #from IPython import embed; embed() return train_loader, valid_loader, test_loader, in_channels, out_channels + 1
def get_dataloaders(dataset, seed, batch_size=32): # Use a 80:10:10 train-val-test split train_set, val_set, test_set = split_dataset(dataset, frac_list=[0.8, 0.1, 0.1], shuffle=True, random_state=seed) train_loader = GraphDataLoader(train_set, use_ddp=True, batch_size=batch_size, shuffle=True) val_loader = GraphDataLoader(val_set, batch_size=batch_size) test_loader = GraphDataLoader(test_set, batch_size=batch_size) return train_loader, val_loader, test_loader
def test_all(self, dataset: AllDataset, output_dir: str = "test_result"): if not os.path.exists(output_dir): os.makedirs(output_dir) print( f"make new dir {os.path.abspath(output_dir)}, and write files into it." ) else: print(f'output dir {os.path.abspath(output_dir)} exists !') self.load() self.eval() data_loader = GraphDataLoader(dataset.test, collate_fn=collate, batch_size=10, shuffle=False, drop_last=False) start_time = time.time() file_name_index = 1 for i, (bhg, info) in enumerate(data_loader): batch_size = len(info) self.forward(bhg) for idi, (cg, cd) in enumerate(zip(dgl.unbatch(bhg), info)): track_pd_list = graph_and_info_to_df_list(cg, cd) # todo # pd.set_option('display.max_columns', 10000) # print(track_pd_list[0]) for i_df, df in enumerate(track_pd_list): df.to_csv(os.path.join(output_dir, str(file_name_index) + ".csv"), index=False) file_name_index += 1 self.train() print( f"test time is :{time.time() - start_time:6.2f} s | num_samples : {len(dataset.test)}" )
def test_model(self, dataset: AllDataset, output_dir: str = "test_result"): if not os.path.exists(output_dir): os.makedirs(output_dir) print( f"make new dir {os.path.abspath(output_dir)}, and write files into it." ) else: print(f'output dir {os.path.abspath(output_dir)} exists !') self.load() self.eval() data_loader = GraphDataLoader(dataset.test, collate_fn=collate, batch_size=10, shuffle=False, drop_last=False) start_time = time.time() for i, (bhg, info) in enumerate(data_loader): batch_size = len(info) self.forward(bhg) y_pred: torch.FloatTensor = bhg.nodes['agent'].data['predict'] assert batch_size == y_pred.shape[0] print(f"\rprocessed {i+1}/{len(data_loader)} ", end="") for n, d in enumerate(info): st = float(d['split_time']) x, y = d['radix']['x'], d['radix']['y'] timestamp = pd.Series(np.linspace(st + 0.1, st + 3.0, 30, dtype=np.float), name="TIMESTAMP") track_id = pd.Series([d['agent_track_id'] for _ in range(30)], name="TRACK_ID") object_type = pd.Series(["AGENT" for _ in range(30)], name="OBJECT_TYPE") x = pd.Series(y_pred[n, :, 0] + x, name="X") y = pd.Series(y_pred[n, :, 1] + y, name="Y") city_name = pd.Series([d['city'] for _ in range(30)], name="CITY_NAME") this_df = pd.DataFrame( list(zip(timestamp, track_id, object_type, x, y, city_name)), columns=("TIMESTAMP", "TRACK_ID", "OBJECT_TYPE", "X", "Y", "CITY_NAME")) stack_df = pd.concat(objs=[d['df'], this_df]) # select the 京东 agent object stack_df = stack_df[stack_df["OBJECT_TYPE"] == "AGENT"] stack_df.to_csv(os.path.join(output_dir, d['filename'] + ".csv"), index=False) # pd.set_option('display.max_columns', 1000) # print(this_df) self.train() print( f"test time is :{time.time() - start_time:6.2f} s | num_samples : {len(dataset.test)}" )
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument("--repeat", type=int, default=10) parser.add_argument('--dataset', type=str, choices=['MUTAG', 'COLLAB', 'IMDBBINARY', 'IMDBMULTI', 'NCI1', 'PROTEINS', 'PTC', 'REDDITBINARY', 'REDDITMULTI5K'], default='MUTAG') args = parser.parse_args() device = torch.device('cuda') dataset_ = GINDataset(args.dataset, False) dataset = DatasetAbstraction([g[0] for g in dataset_], [g[1] for g in dataset_]) # 1. split dataset [fix split] dataids = list(range(len(dataset))) random.seed(2021) random.shuffle(dataids) fold = int(len(dataset) * 0.1) train_dataset = dataset[dataids[:fold * 8]] val_dataset = dataset[dataids[fold * 8: fold * 9]] test_dataset = dataset[dataids[fold * 9: ]] trainloader = GraphDataLoader(train_dataset, batch_size=32, shuffle=True) valloader = GraphDataLoader(val_dataset, batch_size=32, shuffle=False) testloader = GraphDataLoader(test_dataset, batch_size=32, shuffle=False) accs = [] for seed in tqdm(range(args.repeat)): # set up seeds, args.seed supported set_seed(seed) model = GIN( 5, 2, dataset_.dim_nfeats, 64, dataset_.gclasses, 0.5, False, "sum", "sum").to(device) criterion = nn.CrossEntropyLoss() # defaul reduce is true optimizer = optim.Adam(model.parameters(), lr=0.0001) model = train(model, trainloader, valloader, optimizer, criterion, 100, device) acc = eval_net(model, testloader, device) accs.append(acc) print('{:.2f} ~ {:.2f}'.format(np.mean(accs) * 100, np.std(accs) * 100))
def test_gcnnet_batched_graph(small_dataset): net_params = NetParams.from_file("../graph_conn/configs/test_gcn.json") net_params.readout = 'flatten' model = GCNNet(net_params=net_params) dataloader = GraphDataLoader(small_dataset, batch_size=3) batched_graph, labels = next(iter(dataloader)) h = batched_graph.ndata['feat'] e = batched_graph.edata['weight'] out = model(batched_graph, h, e) assert True
def make_loader(self, dataset): """ Args: dataset: dataset instance from conn_dataset Returns: """ split = dataset.get_split_idx(test_size=self.net_params.test_size, val_size=self.net_params.val_size) test_dataset = dataset[split['test']] train_dataset = dataset[split['train']] val_dataset = dataset[split['train']] train_loader = GraphDataLoader(train_dataset, batch_size=self.net_params.batch_size) test_loader = GraphDataLoader(test_dataset, batch_size=self.net_params.batch_size) val_loader = GraphDataLoader(val_dataset, batch_size=self.net_params.batch_size) return train_loader, test_loader, val_loader
def get_ppi(): train_dataset = PPIDataset(mode='train') val_dataset = PPIDataset(mode='valid') test_dataset = PPIDataset(mode='test') train_val_dataset = [i for i in train_dataset] + [i for i in val_dataset] for idx, data in enumerate(train_val_dataset): data.ndata['batch'] = torch.zeros(data.number_of_nodes()) + idx data.ndata['batch'] = data.ndata['batch'].long() g = list(GraphDataLoader(train_val_dataset, batch_size=22, shuffle=True)) return g, PPIDataset(mode='train'), PPIDataset(mode='valid'), test_dataset
def main(): dataset = UserItemDataset() dataloader = GraphDataLoader(dataset, batch_size=32, shuffle=True) model = HeteroClassifier(dataset.n_features, 20, dataset.num_classes, dataset[0][0].etypes) opt = optim.Adam(model.parameters()) for epoch in range(5): for batched_graph, labels in dataloader: logits = model(batched_graph) loss = F.cross_entropy(logits, labels) opt.zero_grad() loss.backward() opt.step() print(loss.item())
def main(): dataset = dgl.data.GINDataset('MUTAG', False) dataloader = GraphDataLoader(dataset, batch_size=32, shuffle=True) model = Model(dataset.dim_nfeats, 20, dataset.gclasses) opt = optim.Adam(model.parameters()) for epoch in range(5): for batched_graph, labels in dataloader: feats = batched_graph.ndata['attr'].float() logits = model(batched_graph, feats) loss = F.cross_entropy(logits, labels) opt.zero_grad() loss.backward() opt.step() print(loss.item())
def predict(self): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") predicted_labels = [] idx = torch.randperm(len(self.validationDataset)) num_train = int(len(self.validationDataset)) sampler = SubsetRandomSampler(idx[:num_train]) dataloader = GraphDataLoader(self.validationDataset, sampler=sampler, batch_size=1, drop_last=False) num_correct = 0 num_tests = 0 for batched_graph, labels in dataloader: pred = self.model( batched_graph, batched_graph.ndata[self.node_attr_key].float()).to(device) num_correct += (pred.argmax(1) == labels).sum().item() num_tests += len(labels) accuracy = num_correct / num_tests return accuracy
def val_model(self, dataset: AllDataset, return_to_plot=False): if not self.training: self.load() self.eval() data_loader = GraphDataLoader( dataset.val, collate_fn=collate, batch_size=int(10 if not return_to_plot else 1), shuffle=False, drop_last=False) start_time = time.time() real_queue = deque() for i, (bhg, info) in enumerate(data_loader): print( f"\r {i+1}/{len(data_loader)} | elapse time: {time.time() - start_time}", end="") self.forward(bhg) agent_pred = bhg.nodes['agent'].data['predict'] agent_true = bhg.nodes['agent'].data['state'][:, 20:, :] real_lose = torch.square(agent_pred - agent_true).flatten().view( -1, 2) real_lose = torch.sum(real_lose, dim=1) real_lose = torch.sqrt(real_lose) real_lose = torch.mean(real_lose) real_queue.append(real_lose) if return_to_plot: val_plot(bhg) print( "-------------------------------------evaluation---------------------------------------------" ) print( f"val total time elapse: {time.time() - start_time:6.2f} s| #samples : {len(dataset.val)}" f" loss : {sum(real_queue) / len(real_queue):6.4f} m") print( "--------------------------------------------------------------------------------------------" ) self.train()
def validate(self): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Set training to 100% of the data, validate, and save a final model idx = torch.randperm(len(self.trainingDataset)) num_train = int(len(self.trainingDataset)) sampler = SubsetRandomSampler(idx[:num_train]) dataloader = GraphDataLoader(self.trainingDataset, sampler=sampler, batch_size=self.hparams.batch_size, drop_last=False) # Once a model is chosen, train on all the data and save for e in range(self.hparams.epochs): num_correct = 0 num_tests = 0 for batched_graph, labels in dataloader: #pred = self.model(batched_graph, batched_graph.ndata['attr'].float()).to(device) pred = self.model( batched_graph, batched_graph.ndata[self.node_attr_key].float()).to(device) if self.hparams.loss_function == "Negative Log Likelihood": logp = F.log_softmax(pred, 1) loss = F.nll_loss(logp, labels) elif self.hparams.loss_function == "Cross Entropy": loss = F.cross_entropy(pred, labels) num_correct += (pred.argmax(1) == labels).sum().item() num_tests += len(labels) self.optimizer.zero_grad() loss.backward() self.optimizer.step() training_accuracy = num_correct / num_tests validation_accuracy = self.predict() if validation_accuracy >= training_accuracy and validation_accuracy > 0.6: break print("Validation - Stopped at Epoch:", e + 1) if self.hparams.checkpoint_path is not None: # Save the entire model torch.save(self.model, self.hparams.checkpoint_path)
def main(args): # Step 1: Prepare graph data and retrieve train/validation/test index ============================= # # Load from DGL dataset train_dataset = PPIDataset(mode='train') valid_dataset = PPIDataset(mode='valid') test_dataset = PPIDataset(mode='test') # data loader train_loader = GraphDataLoader(train_dataset, batch_size=args.batch_size, shuffle=True) valid_loader = GraphDataLoader(valid_dataset, batch_size=args.batch_size, shuffle=False) test_loader = GraphDataLoader(test_dataset, batch_size=args.batch_size, shuffle=False) graph = train_dataset[0] # check cuda device = f'cuda:{args.gpu}' if args.gpu >= 0 and torch.cuda.is_available() else 'cpu' # retrieve the number of classes n_classes = train_dataset.num_labels # Extract node features n_features = graph.ndata['feat'].shape[1] # Step 2: Create model =================================================================== # model = ARMA4NC(in_dim=n_features, hid_dim=args.hid_dim, out_dim=n_classes, num_stacks=args.num_stacks, num_layers=args.num_layers, activation=nn.ReLU(), dropout=args.dropout).to(device) best_model = copy.deepcopy(model) # Step 3: Create training components ===================================================== # loss_fn = nn.BCEWithLogitsLoss() opt = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.lamb) # Step 4: training epoches =============================================================== # f1 = 0 no_improvement = 0 epochs = trange(args.epochs, desc='F1 & Loss') for _ in epochs: # Training train_loss, train_f1 = train(device, model, opt, loss_fn, train_loader) # Validation valid_loss, valid_f1 = evaluate(device, model, loss_fn, valid_loader) # Print out performance epochs.set_description(f'Train Loss {train_loss:.4f} | Train F1 {train_f1:.4f} | Valid Loss {valid_loss:.4f} | Valid F1 {valid_f1:.4f}') if valid_f1 < f1: no_improvement += 1 if no_improvement == args.early_stopping: print('Early stop.') break else: no_improvement = 0 f1 = valid_f1 best_model = copy.deepcopy(model) _, test_f1 = evaluate(device, best_model, loss_fn, test_loader) print(f'Test F1 {test_f1:.4f}') return test_f1
def learn(model_params, experiment_number, dataset): split_rate = model_params['split_rate'] epochs = model_params['epochs'] lr = model_params['lr'] batch_size = model_params['batch_size'] print('-' * 50) print(f'Model Hyper-parameters') print('-' * 50) print(f'Epochs: {epochs}') print(f'Split Rate: {split_rate}') print(f'Learning Rate: {lr}') print(f'Batch Size: {batch_size}') print('-' * 50) log.write('-' * 100 + '\n') log.write(f'Experiment #{experiment_number}\n') log.write(f'Model Hyper-parameters\n') log.write('-' * 100 + '\n') log.write(f'Epochs: {epochs}\n') log.write(f'Split Rate: {split_rate}\n') log.write(f'Learning Rate: {lr}\n') log.write(f'Batch Size: {batch_size}\n') log.write('-' * 100 + '\n') log.flush() workers_count = min(int(multiprocessing.cpu_count() * 0.8), batch_size) num_train = int(num_examples * split_rate) train_sampler = SubsetRandomSampler(torch.arange(num_train)) test_sampler = SubsetRandomSampler(torch.arange(num_train, num_examples)) train_dataloader = GraphDataLoader(dataset, sampler=train_sampler, batch_size=batch_size, drop_last=False, num_workers=workers_count) test_dataloader = GraphDataLoader(dataset, sampler=test_sampler, batch_size=batch_size, drop_last=False, num_workers=workers_count) # 모델 설정 model = Classifier(1, 256, 5) loss_func = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=lr) model.train() # 학습 시각적 효과 tqdm_train_descr_format = "Training GNN Feed-Forward model: Epoch Accuracy = {:02.4f}%, Loss = {:.8f}" tqdm_train_descr = tqdm_train_descr_format.format(0, float('inf')) tqdm_train_obj = tqdm(range(epochs), desc=tqdm_train_descr) # 학습 train_losses = [] train_accuracy = [] print(f'Training Starting...') log.write(f'Training Starting...' + '\n') log.flush() for i in tqdm_train_obj: epoch_corr = 0 epoch_loss = 0 total_samples = 0 for b, (X_train, y_train) in enumerate(train_dataloader): y_prediction = model(X_train) loss = loss_func(y_prediction, y_train) predicted = torch.max(y_prediction.data, 1)[1] batch_corr = (predicted == y_train).sum() epoch_corr += batch_corr.detach().item() epoch_loss += loss.detach().item() total_samples += y_prediction.shape[0] # Update parameters optimizer.zero_grad() loss.backward() optimizer.step() epoch_accuracy = epoch_corr * 100 / total_samples epoch_loss = epoch_loss / total_samples print(f'Epoch {i}, accuracy: {epoch_accuracy}, loss: {epoch_loss}') log.write( f'Epoch {i}, accuracy: {epoch_accuracy}, loss: {epoch_loss}\n') log.flush() train_losses.append(epoch_loss) train_accuracy.append(epoch_accuracy) tqdm_descr = tqdm_train_descr_format.format(epoch_accuracy, epoch_loss) tqdm_train_obj.set_description(tqdm_descr) # 테스트 시각적 효과 print(f'Testing Starting...') log.write(f'Testing Starting...' + '\n') log.flush() tqdm_test_descr_format = "Testing GNN Feed-Forward model: Batch Accuracy = {:02.4f}%" tqdm_test_descr = tqdm_test_descr_format.format(0) tqdm_test_obj = tqdm(test_dataloader, desc=tqdm_test_descr) num_of_batches = len(test_dataloader) model.eval() # 테스트 total_test_sample = 0 total_sampled_test_acc = 0 total_argmax_test_acc = 0 with torch.no_grad(): for b, (X_test, y_test) in enumerate(tqdm_test_obj): predictions = model(X_test) y_test = torch.tensor(y_test).float().view(-1, 1) y_predicted = torch.softmax(predictions, 1) y_sampled = torch.multinomial(y_predicted, 1) y_argmax = torch.max(y_predicted, 1)[1].view(-1, 1) total_sampled_test_acc += ( y_test == y_sampled.float()).sum().item() total_argmax_test_acc += (y_test == y_argmax.float()).sum().item() total_test_sample += predictions.shape[0] # tqdm_descr = tqdm_train_descr_format.format(total_sampled_test_acc) # tqdm_train_obj.set_description(tqdm_descr) print(f'The total number of test dataset: {total_test_sample}') print('Accuracy of sampled predictions on the test set: {:.4f}%'.format( total_sampled_test_acc * 100 / total_test_sample)) print('Accuracy of argmax predictions on the test set: {:4f}%'.format( total_argmax_test_acc * 100 / total_test_sample)) log.write('-' * 100 + '\n') log.write(f'The total number of test dataset: {total_test_sample}\n') log.write( 'Accuracy of sampled predictions on the test set: {:.4f}%\n'.format( total_sampled_test_acc * 100 / total_test_sample)) log.write( 'Accuracy of argmax predictions on the test set: {:4f}%\n'.format( total_argmax_test_acc * 100 / total_test_sample)) log.write('-' * 100 + '\n') log.flush()
def main(args, print_fn=print): print_fn("Experiment arguments: {}".format(args)) if args.random_seed: torch.manual_seed(args.random_seed) else: torch.manual_seed(123) # Load dataset if args.dataset.startswith('ogbl'): graph, split_edge = load_ogb_dataset(args.dataset) else: raise NotImplementedError num_nodes = graph.num_nodes() # set gpu if args.gpu_id >= 0 and torch.cuda.is_available(): device = 'cuda:{}'.format(args.gpu_id) else: device = 'cpu' if args.dataset == 'ogbl-collab': # ogbl-collab dataset is multi-edge graph use_coalesce = True else: use_coalesce = False # Generate positive and negative edges and corresponding labels # Sampling subgraphs and generate node labeling features seal_data = SEALData(g=graph, split_edge=split_edge, hop=args.hop, neg_samples=args.neg_samples, subsample_ratio=args.subsample_ratio, use_coalesce=use_coalesce, prefix=args.dataset, save_dir=args.save_dir, num_workers=args.num_workers, print_fn=print_fn) node_attribute = seal_data.ndata['feat'] edge_weight = seal_data.edata['weight'].float() train_data = seal_data('train') val_data = seal_data('valid') test_data = seal_data('test') train_graphs = len(train_data.graph_list) # Set data loader train_loader = GraphDataLoader(train_data, batch_size=args.batch_size, num_workers=args.num_workers) val_loader = GraphDataLoader(val_data, batch_size=args.batch_size, num_workers=args.num_workers) test_loader = GraphDataLoader(test_data, batch_size=args.batch_size, num_workers=args.num_workers) # set model if args.model == 'gcn': model = GCN(num_layers=args.num_layers, hidden_units=args.hidden_units, gcn_type=args.gcn_type, pooling_type=args.pooling, node_attributes=node_attribute, edge_weights=edge_weight, node_embedding=None, use_embedding=True, num_nodes=num_nodes, dropout=args.dropout) elif args.model == 'dgcnn': model = DGCNN(num_layers=args.num_layers, hidden_units=args.hidden_units, k=args.sort_k, gcn_type=args.gcn_type, node_attributes=node_attribute, edge_weights=edge_weight, node_embedding=None, use_embedding=True, num_nodes=num_nodes, dropout=args.dropout) else: raise ValueError('Model error') model = model.to(device) parameters = model.parameters() optimizer = torch.optim.Adam(parameters, lr=args.lr) loss_fn = BCEWithLogitsLoss() print_fn("Total parameters: {}".format(sum([p.numel() for p in model.parameters()]))) # train and evaluate loop summary_val = [] summary_test = [] for epoch in range(args.epochs): start_time = time.time() loss = train(model=model, dataloader=train_loader, loss_fn=loss_fn, optimizer=optimizer, device=device, num_graphs=args.batch_size, total_graphs=train_graphs) train_time = time.time() if epoch % args.eval_steps == 0: val_pos_pred, val_neg_pred = evaluate(model=model, dataloader=val_loader, device=device) test_pos_pred, test_neg_pred = evaluate(model=model, dataloader=test_loader, device=device) val_metric = evaluate_hits(args.dataset, val_pos_pred, val_neg_pred, args.hits_k) test_metric = evaluate_hits(args.dataset, test_pos_pred, test_neg_pred, args.hits_k) evaluate_time = time.time() print_fn("Epoch-{}, train loss: {:.4f}, hits@{}: val-{:.4f}, test-{:.4f}, " "cost time: train-{:.1f}s, total-{:.1f}s".format(epoch, loss, args.hits_k, val_metric, test_metric, train_time - start_time, evaluate_time - start_time)) summary_val.append(val_metric) summary_test.append(test_metric) summary_test = np.array(summary_test) print_fn("Experiment Results:") print_fn("Best hits@{}: {:.4f}, epoch: {}".format(args.hits_k, np.max(summary_test), np.argmax(summary_test)))
def train(self): # The number of folds (This should come from the hparams) k_folds = self.hparams.k_folds # Init the loss and accuracy reporting lists self.training_accuracy_list = [] self.training_loss_list = [] self.testing_accuracy_list = [] self.testing_loss_list = [] # Set fixed random number seed torch.manual_seed(42) # Define the K-fold Cross Validator kfold = KFold(n_splits=k_folds, shuffle=True) # K-fold Cross-validation model evaluation for fold, (train_ids, test_ids) in enumerate(kfold.split(self.trainingDataset)): epoch_training_loss_list = [] epoch_training_accuracy_list = [] epoch_testing_loss_list = [] epoch_testing_accuracy_list = [] # Sample elements randomly from a given list of ids, no replacement. train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids) test_subsampler = torch.utils.data.SubsetRandomSampler(test_ids) # Define data loaders for training and testing data in this fold self.train_dataloader = GraphDataLoader( self.trainingDataset, sampler=train_subsampler, batch_size=self.hparams.batch_size, drop_last=False) self.test_dataloader = GraphDataLoader( self.trainingDataset, sampler=test_subsampler, batch_size=self.hparams.batch_size, drop_last=False) # Init the neural network self.model.apply(reset_weights) # Run the training loop for defined number of epochs for _ in range(self.hparams.epochs): num_correct = 0 num_tests = 0 training_temp_loss_list = [] # Iterate over the DataLoader for training data for batched_graph, labels in self.train_dataloader: # Zero the gradients self.optimizer.zero_grad() # Perform forward pass pred = self.model( batched_graph, batched_graph.ndata[self.node_attr_key].float()) # Compute loss if self.hparams.loss_function == "Negative Log Likelihood": logp = F.log_softmax(pred, 1) loss = F.nll_loss(logp, labels) elif self.hparams.loss_function == "Cross Entropy": loss = F.cross_entropy(pred, labels) # Save loss information for reporting training_temp_loss_list.append(loss.item()) num_correct += (pred.argmax(1) == labels).sum().item() num_tests += len(labels) # Perform backward pass loss.backward() # Perform optimization self.optimizer.step() self.training_accuracy = num_correct / num_tests epoch_training_accuracy_list.append(self.training_accuracy) epoch_training_loss_list.append( sum(training_temp_loss_list) / len(training_temp_loss_list)) self.test() epoch_testing_accuracy_list.append(self.testing_accuracy) epoch_testing_loss_list.append(self.testing_loss) if self.hparams.checkpoint_path is not None: # Save the entire model torch.save(self.model, self.hparams.checkpoint_path + "-fold_" + str(fold)) self.training_accuracy_list.append(epoch_training_accuracy_list) self.training_loss_list.append(epoch_training_loss_list) self.testing_accuracy_list.append(epoch_testing_accuracy_list) self.testing_loss_list.append(epoch_testing_loss_list)
# `torch.utils.data.sampler <https://pytorch.org/docs/stable/data.html#data-loading-order-and-sampler>`__. # For example, this tutorial creates a training ``GraphDataLoader`` and # test ``GraphDataLoader``, using ``SubsetRandomSampler`` to tell PyTorch # to sample from only a subset of the dataset. # from dgl.dataloading import GraphDataLoader from torch.utils.data.sampler import SubsetRandomSampler num_examples = len(dataset) num_train = int(num_examples * 0.8) train_sampler = SubsetRandomSampler(torch.arange(num_train)) test_sampler = SubsetRandomSampler(torch.arange(num_train, num_examples)) train_dataloader = GraphDataLoader( dataset, sampler=train_sampler, batch_size=5, drop_last=False) test_dataloader = GraphDataLoader( dataset, sampler=test_sampler, batch_size=5, drop_last=False) ###################################################################### # You can try to iterate over the created ``GraphDataLoader`` and see what it # gives: # it = iter(train_dataloader) batch = next(it) print(batch) ######################################################################
def main(args): if args.gpu<0: device = torch.device("cpu") else: device = torch.device("cuda:" + str(args.gpu)) batch_size = args.batch_size cur_step = 0 patience = args.patience best_score = -1 best_loss = 10000 # define loss function loss_fcn = torch.nn.BCEWithLogitsLoss() # create the dataset train_dataset = PPIDataset(mode='train') valid_dataset = PPIDataset(mode='valid') test_dataset = PPIDataset(mode='test') train_dataloader = GraphDataLoader(train_dataset, batch_size=batch_size) valid_dataloader = GraphDataLoader(valid_dataset, batch_size=batch_size) test_dataloader = GraphDataLoader(test_dataset, batch_size=batch_size) g = train_dataset[0] n_classes = train_dataset.num_labels num_feats = g.ndata['feat'].shape[1] g = g.int().to(device) heads = ([args.num_heads] * (args.num_layers-1)) + [args.num_out_heads] # define the model model = GAT(g, args.num_layers, num_feats, args.num_hidden, n_classes, heads, F.elu, args.in_drop, args.attn_drop, args.alpha, args.residual) # define the optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) model = model.to(device) for epoch in range(args.epochs): model.train() loss_list = [] for batch, subgraph in enumerate(train_dataloader): subgraph = subgraph.to(device) model.g = subgraph for layer in model.gat_layers: layer.g = subgraph logits = model(subgraph.ndata['feat'].float()) loss = loss_fcn(logits, subgraph.ndata['label']) optimizer.zero_grad() loss.backward() optimizer.step() loss_list.append(loss.item()) loss_data = np.array(loss_list).mean() print("Epoch {:05d} | Loss: {:.4f}".format(epoch + 1, loss_data)) if epoch % 5 == 0: score_list = [] val_loss_list = [] for batch, subgraph in enumerate(valid_dataloader): subgraph = subgraph.to(device) score, val_loss = evaluate(subgraph.ndata['feat'], model, subgraph, subgraph.ndata['label'], loss_fcn) score_list.append(score) val_loss_list.append(val_loss) mean_score = np.array(score_list).mean() mean_val_loss = np.array(val_loss_list).mean() print("Val F1-Score: {:.4f} ".format(mean_score)) # early stop if mean_score > best_score or best_loss > mean_val_loss: if mean_score > best_score and best_loss > mean_val_loss: val_early_loss = mean_val_loss val_early_score = mean_score best_score = np.max((mean_score, best_score)) best_loss = np.min((best_loss, mean_val_loss)) cur_step = 0 else: cur_step += 1 if cur_step == patience: break test_score_list = [] for batch, subgraph in enumerate(test_dataloader): subgraph = subgraph.to(device) score, test_loss = evaluate(subgraph.ndata['feat'], model, subgraph, subgraph.ndata['label'], loss_fcn) test_score_list.append(score) print("Test F1-Score: {:.4f}".format(np.array(test_score_list).mean()))
if __name__ == "__main__": from dgl.dataloading import GraphDataLoader import networkx as nx import matplotlib.pyplot as plt from tqdm import tqdm HighD_dataset = HighD_Dataset(X_len=20, X_step=1, Y_len=20, Y_step=2, diff=5, name='data_22', raw_dir='./') HighD_dataloader = GraphDataLoader(HighD_dataset, batch_size=1, shuffle=True) print("Dataset Ready!") with tqdm(total=len(HighD_dataloader)) as pbar: for i, (graph, X, Y, mask) in enumerate(HighD_dataloader): # if i==1: nx.draw(graph.to_networkx(), with_labels=True) pbar.set_postfix({"mask_shape": mask.shape}) pbar.update(1) # if (i==466): # print(X["feature"][0,10]) # print(mask[0,10]) # nx.draw(X["graph"][10].to_networkx(), with_labels=True) # plt.show()
def main(args): # Step 1: Prepare graph data and retrieve train/validation/test index ============================= # # Load dataset train_dataset = PPIDataset(mode='train') valid_dataset = PPIDataset(mode='valid') test_dataset = PPIDataset(mode='test') train_dataloader = GraphDataLoader(train_dataset, batch_size=args.batch_size) valid_dataloader = GraphDataLoader(valid_dataset, batch_size=args.batch_size) test_dataloader = GraphDataLoader(test_dataset, batch_size=args.batch_size) # check cuda if args.gpu >= 0 and th.cuda.is_available(): device = 'cuda:{}'.format(args.gpu) else: device = 'cpu' num_classes = train_dataset.num_labels # Extract node features graph = train_dataset[0] feat = graph.ndata['feat'] # Step 2: Create model =================================================================== # if args.lazy: model = GeniePathLazy(in_dim=feat.shape[-1], out_dim=num_classes, hid_dim=args.hid_dim, num_layers=args.num_layers, num_heads=args.num_heads, residual=args.residual) else: model = GeniePath(in_dim=feat.shape[-1], out_dim=num_classes, hid_dim=args.hid_dim, num_layers=args.num_layers, num_heads=args.num_heads, residual=args.residual) model = model.to(device) # Step 3: Create training components ===================================================== # loss_fn = th.nn.BCEWithLogitsLoss() optimizer = optim.Adam(model.parameters(), lr=args.lr) # Step 4: training epochs =============================================================== # for epoch in range(args.max_epoch): model.train() tr_loss = 0 tr_f1 = 0 num_blocks = 0 for subgraph in train_dataloader: subgraph = subgraph.to(device) label = subgraph.ndata['label'] feat = subgraph.ndata['feat'] logits = model(subgraph, feat) # compute loss batch_loss = loss_fn(logits, label) tr_loss += batch_loss.item() tr_predict = np.where(logits.data.cpu().numpy() >= 0., 1, 0) tr_f1 += f1_score(label.cpu(), tr_predict, average='micro') num_blocks += 1 # backward optimizer.zero_grad() batch_loss.backward() optimizer.step() # validation model.eval() val_f1, val_loss = evaluate(model, loss_fn, valid_dataloader, device) print( "In epoch {}, Train F1: {:.4f} | Train Loss: {:.4f}; Valid F1: {:.4f} | Valid loss: {:.4f}" .format(epoch, tr_f1 / num_blocks, tr_loss / num_blocks, val_f1, val_loss)) # Test after all epoch model.eval() test_f1, test_loss = evaluate(model, loss_fn, test_dataloader, device) print("Test F1: {:.4f} | Test loss: {:.4f}".format(test_f1, test_loss))
############################################################################### # Setup and training # ------------------ # Create a synthetic dataset of :math:`400` graphs with :math:`10` ~ # :math:`20` nodes. :math:`320` graphs constitute a training set and # :math:`80` graphs constitute a test set. import torch.optim as optim from dgl.dataloading import GraphDataLoader # Create training and test sets. trainset = MiniGCDataset(320, 10, 20) testset = MiniGCDataset(80, 10, 20) # Use DGL's GraphDataLoader. It by default handles the # graph batching operation for every mini-batch. data_loader = GraphDataLoader(trainset, batch_size=32, shuffle=True) # Create model model = Classifier(1, 256, trainset.num_classes) loss_func = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) model.train() epoch_losses = [] for epoch in range(80): epoch_loss = 0 for iter, (bg, label) in enumerate(data_loader): prediction = model(bg) loss = loss_func(prediction, label) optimizer.zero_grad() loss.backward()
val_idx = all_idx[:val_num] test_idx = all_idx[val_num : val_num + test_num] train_idx = all_idx[val_num + test_num : val_num + test_num + args.train_num] train_data = Subset(dataset, train_idx) val_data = Subset(dataset, val_idx) test_data = Subset(dataset, test_idx) unsup_idx = all_idx[val_num + test_num:] unsup_data = Subset(dataset, unsup_idx) # generate supervised training dataloader and unsupervised training dataloader train_loader = GraphDataLoader(train_data, batch_size=args.batch_size, collate_fn=collate, drop_last=False, shuffle=True) unsup_loader = GraphDataLoader(unsup_data, batch_size=args.batch_size, collate_fn=collate, drop_last=False, shuffle=True) # generate validation & testing dataloader val_loader = GraphDataLoader(val_data, batch_size=args.val_batch_size, collate_fn=collate, drop_last=False,
def main(): parser = argparse.ArgumentParser(description='ENZYMES') parser.add_argument('--device', type=int, default=0) parser.add_argument('--num_workers', type=int, default=0) parser.add_argument('--log_steps', type=int, default=1) parser.add_argument('--batch_size', type=int, default=64) parser.add_argument('--num_layers', type=int, default=4) parser.add_argument('--hidden_size', type=int, default=128) parser.add_argument('--dropout', type=float, default=0.5) parser.add_argument('--lr', type=float, default=0.001) parser.add_argument('--epochs', type=int, default=10) parser.add_argument('--runs', type=int, default=10) parser.add_argument('--eval', action='store_true', help='If not set, we will only do the training part.') parser.add_argument('--eval_batch_size', type=int, default=2048) args = parser.parse_args() print(args) device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu' device = torch.device(device) dataset = LegacyTUDataset('ENZYMES') num_samples = len(dataset) indices = np.arange(num_samples) np.random.seed(42) np.random.shuffle(indices) train_set = dgl.data.utils.Subset(dataset, indices[:int(num_samples * 0.8)]) val_set = dgl.data.utils.Subset( dataset, indices[int(num_samples * 0.8):int(num_samples * 0.9)]) test_set = dgl.data.utils.Subset( dataset, indices[int(num_samples * 0.9):int(num_samples)]) train_loader = GraphDataLoader(train_set, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) val_loader = GraphDataLoader(val_set, batch_size=args.eval_batch_size, shuffle=True, num_workers=0) test_loader = GraphDataLoader(test_set, batch_size=args.eval_batch_size, shuffle=True, num_workers=0) model = GCN(18, args.hidden_size, num_classes=int(dataset.num_labels), num_layers=args.num_layers, dropout=args.dropout).to(device) logger = Logger(args.runs, args) dur = [] for run in range(args.runs): model.reset_parameters() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) for epoch in range(1, args.epochs + 1): t0 = time.time() loss = train(model, device, train_loader, optimizer) if epoch >= 3: dur.append(time.time() - t0) print('Training time/epoch {}'.format(np.mean(dur))) if not args.eval: continue val_acc = test(model, device, val_loader) test_acc = test(model, device, test_loader) logger.add_result(run, (0.0, val_acc, test_acc)) if epoch % args.log_steps == 0: print(f'Run: {run + 1:02d}, ' f'Epoch: {epoch:02d}, ' f'Loss: {loss:.4f}, ' f'Valid: {val_acc * 100:.4f}% ' f'Test: {test_acc * 100:.4f}%') if args.eval: logger.print_statistics(run) if args.eval: logger.print_statistics()
def main(): parser = argparse.ArgumentParser(description='OGBN-MolHiv') parser.add_argument('--device', type=int, default=0) parser.add_argument('--num_workers', type=int, default=4) parser.add_argument('--log_steps', type=int, default=1) parser.add_argument('--batch_size', type=int, default=64) parser.add_argument('--num_layers', type=int, default=5) parser.add_argument('--emb_dim', type=int, default=256) parser.add_argument('--dropout', type=float, default=0.5) parser.add_argument('--lr', type=float, default=0.001) parser.add_argument('--epochs', type=int, default=50) parser.add_argument('--runs', type=int, default=10) parser.add_argument('--eval', action='store_true', help='If not set, we will only do the training part.') parser.add_argument('--eval_batch_size', type=int, default=2048) args = parser.parse_args() print(args) device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu' device = torch.device(device) dataset = DglGraphPropPredDataset(name='ogbg-molhiv') split_idx = dataset.get_idx_split() evaluator = Evaluator(name='ogbg-molhiv') train_loader = GraphDataLoader(dataset[split_idx["train"]], batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) val_loader = GraphDataLoader(dataset[split_idx["valid"]], batch_size=args.eval_batch_size, shuffle=True, num_workers=0) test_loader = GraphDataLoader(dataset[split_idx["test"]], batch_size=args.eval_batch_size, shuffle=True, num_workers=0) model = GCN(args.emb_dim, num_classes=dataset.num_tasks, num_layers=args.num_layers, dropout=args.dropout).to(device) logger = Logger(args.runs, args) dur = [] for run in range(args.runs): model.reset_parameters() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) for epoch in range(1, args.epochs + 1): t0 = time.time() loss = train(model, device, train_loader, optimizer) if epoch >= 3: dur.append(time.time() - t0) print('Training time/epoch {}'.format(np.mean(dur))) if not args.eval: continue val_rocauc = test(model, device, val_loader, evaluator)[dataset.eval_metric] test_rocauc = test(model, device, test_loader, evaluator)[dataset.eval_metric] logger.add_result(run, (0.0, val_rocauc, test_rocauc)) if epoch % args.log_steps == 0: print(f'Run: {run + 1:02d}, ' f'Epoch: {epoch:02d}, ' f'Loss: {loss:.4f}, ' f'Valid: {val_rocauc:.4f} ' f'Test: {test_rocauc:.4f}') if args.eval: logger.print_statistics(run) if args.eval: logger.print_statistics()
def main(args): data = FB15k237Dataset(reverse=False) graph = data[0] num_nodes = graph.num_nodes() num_rels = data.num_rels train_g, test_g = preprocess(graph, num_rels) test_nids = th.arange(0, num_nodes) test_mask = graph.edata['test_mask'] subg_iter = SubgraphIterator(train_g, num_rels, args.edge_sampler) dataloader = GraphDataLoader(subg_iter, batch_size=1, collate_fn=lambda x: x[0]) # Prepare data for metric computation src, dst = graph.edges() triplets = th.stack([src, graph.edata['etype'], dst], dim=1) model = LinkPredict(num_nodes, num_rels) optimizer = th.optim.Adam(model.parameters(), lr=1e-2) if args.gpu >= 0 and th.cuda.is_available(): device = th.device(args.gpu) else: device = th.device('cpu') model = model.to(device) best_mrr = 0 model_state_file = 'model_state.pth' for epoch, batch_data in enumerate(dataloader): model.train() g, train_nids, edges, labels = batch_data g = g.to(device) train_nids = train_nids.to(device) edges = edges.to(device) labels = labels.to(device) embed = model(g, train_nids) loss = model.get_loss(embed, edges, labels) optimizer.zero_grad() loss.backward() nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0) # clip gradients optimizer.step() print("Epoch {:04d} | Loss {:.4f} | Best MRR {:.4f}".format( epoch, loss.item(), best_mrr)) if (epoch + 1) % 500 == 0: # perform validation on CPU because full graph is too large model = model.cpu() model.eval() print("start eval") embed = model(test_g, test_nids) mrr = calc_mrr(embed, model.w_relation, test_mask, triplets, batch_size=500, eval_p=args.eval_protocol) # save best model if best_mrr < mrr: best_mrr = mrr th.save({ 'state_dict': model.state_dict(), 'epoch': epoch }, model_state_file) model = model.to(device) print("Start testing:") # use best model checkpoint checkpoint = th.load(model_state_file) model = model.cpu() # test on CPU model.eval() model.load_state_dict(checkpoint['state_dict']) print("Using best epoch: {}".format(checkpoint['epoch'])) embed = model(test_g, test_nids) calc_mrr(embed, model.w_relation, test_mask, triplets, batch_size=500, eval_p=args.eval_protocol)