def main(): args = config.config() logger.info(args.embed_dim) os.environ['CUDA_VISIBLE_DEVICES'] = '0' use_cuda = False if torch.cuda.is_available(): use_cuda = True device = torch.device("cuda" if use_cuda else "cpu") embed_dim = args.embed_dim dir_data = 'data/' + args.dataset + '_dataset' path_data = dir_data + ".pkl" data_file = open(path_data, 'rb') history_u, history_i, history_ur, history_ir, train_u, train_i, train_r, valid_u, valid_i, valid_r,\ test_u, test_i, test_r, social_neighbor, ratings = pickle.load(data_file) path_data_rank = dir_data + "_rank.pkl" rank_data = open(path_data_rank, 'rb') valid_rank_data, test_rank_data = pickle.load(rank_data) trainset = torch.utils.data.TensorDataset(torch.LongTensor(train_u), torch.LongTensor(train_i), torch.FloatTensor(train_r)) validset = torch.utils.data.TensorDataset(torch.LongTensor(valid_u), torch.LongTensor(valid_i), torch.FloatTensor(valid_r)) testset = torch.utils.data.TensorDataset(torch.LongTensor(test_u), torch.LongTensor(test_i), torch.FloatTensor(test_r)) # train_size = int(0.8 * len(trainset)) # val_size = len(trainset) - train_size # trainset, valset = torch.utils.data.random_split(trainset, [train_size, val_size]) train_loader = torch.utils.data.DataLoader(trainset, batch_size=args.batch_size, shuffle=True) val_loader = torch.utils.data.DataLoader(validset, batch_size=args.batch_size, shuffle=True) test_loader = torch.utils.data.DataLoader(testset, batch_size=args.test_batch_size, shuffle=True) num_users = history_u.__len__() num_items = history_i.__len__() num_ratings = ratings.__len__() # model graphrec = GraphRec(num_users, num_items, num_ratings, history_u, history_i, history_ur,\ history_ir, embed_dim, social_neighbor, cuda=device).to(device) optimizer = torch.optim.RMSprop(graphrec.parameters(), lr=args.lr, alpha=0.9) # best_rmse = 9999.0 # best_mae = 9999.0 best_hits = 0.0 endure_count = 0 best_test_hits = 0 for epoch in range(1, args.epochs + 1): val_hits = train(graphrec, device, train_loader, optimizer, epoch, valid_rank_data) # please add the validation set to tune the hyper-parameters based on your datasets. # early stopping if best_hits < val_hits: best_hits = val_hits endure_count = 0 else: endure_count += 1 logger.info("val HITS@10:%.4f " % (val_hits)) test_hits = rank_test(graphrec, device, test_rank_data) if test_hits > best_test_hits: best_test_hits = test_hits logger.info("best test HITS@10:%.4f " % (best_test_hits)) if endure_count > 5: logger.info("early stopping...") break test_hits = rank_test(graphrec, device, test_rank_data) logger.info("test HITS@10:%.4f " % (test_hits)) logger.info("best test HITS@10:%.4f " % (best_test_hits))
def main(): print('Loading data...') with open(args.dataset_path + 'dataset.pkl', 'rb') as f: train_set = pickle.load(f) valid_set = pickle.load(f) test_set = pickle.load(f) with open(args.dataset_path + 'list.pkl', 'rb') as f: u_items_list = pickle.load(f) u_users_list = pickle.load(f) u_users_items_list = pickle.load(f) i_users_list = pickle.load(f) (user_count, item_count, rate_count) = pickle.load(f) train_data = GRDataset(train_set, u_items_list, u_users_list, u_users_items_list, i_users_list) valid_data = GRDataset(valid_set, u_items_list, u_users_list, u_users_items_list, i_users_list) test_data = GRDataset(test_set, u_items_list, u_users_list, u_users_items_list, i_users_list) train_loader = DataLoader(train_data, batch_size=args.batch_size, shuffle=True, collate_fn=collate_fn) valid_loader = DataLoader(valid_data, batch_size=args.batch_size, shuffle=False, collate_fn=collate_fn) test_loader = DataLoader(test_data, batch_size=args.batch_size, shuffle=False, collate_fn=collate_fn) model = GraphRec(user_count + 1, item_count + 1, rate_count + 1, args.embed_dim).to(device) if args.test: print('Load checkpoint and testing...') ckpt = torch.load('best_checkpoint.pth.tar') model.load_state_dict(ckpt['state_dict']) mae, rmse = validate(test_loader, model) print("Test: MAE: {:.4f}, RMSE: {:.4f}".format(mae, rmse)) return optimizer = optim.RMSprop(model.parameters(), args.lr) criterion = nn.MSELoss() scheduler = StepLR(optimizer, step_size=args.lr_dc_step, gamma=args.lr_dc) for epoch in tqdm(range(args.epoch)): # train for one epoch scheduler.step(epoch=epoch) trainForEpoch(train_loader, model, optimizer, epoch, args.epoch, criterion, log_aggr=100) mae, rmse = validate(valid_loader, model) # store best loss and save a model checkpoint ckpt_dict = { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() } torch.save(ckpt_dict, 'latest_checkpoint.pth.tar') if epoch == 0: best_mae = mae elif mae < best_mae: best_mae = mae torch.save(ckpt_dict, 'best_checkpoint.pth.tar') print( 'Epoch {} validation: MAE: {:.4f}, RMSE: {:.4f}, Best MAE: {:.4f}'. format(epoch, mae, rmse, best_mae))
def main(): print('Loading data...') with open(args.dataset_path + 'dataset_filter5.pkl', 'rb') as f: train_set = pickle.load(f) valid_set = pickle.load(f) test_set = pickle.load(f) with open(args.dataset_path + 'list_filter5.pkl', 'rb') as f: u_items_list = pickle.load(f) u_users_list = pickle.load(f) u_users_items_list = pickle.load(f) i_users_list = pickle.load(f) # print(u_users_list) (user_count, item_count, rate_count) = pickle.load(f) train_data = GRDataset(train_set, u_items_list, u_users_list, u_users_items_list, i_users_list) valid_data = GRDataset(valid_set, u_items_list, u_users_list, u_users_items_list, i_users_list) test_data = GRDataset(test_set, u_items_list, u_users_list, u_users_items_list, i_users_list) train_loader = DataLoader(train_data, batch_size=args.batch_size, shuffle=True, collate_fn=collate_fn) valid_loader = DataLoader(valid_data, batch_size=args.batch_size, shuffle=False, collate_fn=collate_fn) test_loader = DataLoader(test_data, batch_size=args.batch_size, shuffle=False, collate_fn=collate_fn) model = GraphRec(user_count + 1, item_count + 1, rate_count + 1, args.embed_dim).to(device) if args.test: print('Load checkpoint and testing...') ckpt = torch.load('%s/random_best_checkpoint.pth.tar' % fn) model.load_state_dict(ckpt['state_dict']) mae, rmse = validate(test_loader, model) print("Test: MAE: {:.4f}, RMSE: {:.4f}".format(mae, rmse)) return optimizer = optim.RMSprop(model.parameters(), lr=args.lr, weight_decay=1e-4) criterion = nn.MSELoss() scheduler = StepLR(optimizer, step_size=args.lr_dc_step, gamma=args.lr_dc) valid_loss_list, test_loss_list = [], [] ave_mae = [] ave_rmse = [] for epoch in tqdm(range(args.epoch)): scheduler.step(epoch=epoch) trainForEpoch(train_loader, model, optimizer, epoch, args.epoch, criterion, log_aggr=100) mae, rmse = validate(valid_loader, model) valid_loss_list.append([mae, rmse]) test_mae, test_rmse = validate(test_loader, model) test_loss_list.append([test_mae, test_rmse]) ckpt_dict = { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() } if epoch == 0: best_mae = mae elif mae < best_mae: best_mae = mae torch.save(ckpt_dict, '%s/random_best_checkpoint.pth.tar' % fn) print( 'Epoch {} validation: MAE: {:.4f}, RMSE: {:.4f}, Best MAE: {:.4f}, test_MAE: {:.4f}, test_RMSE: {:.4f}' .format(epoch, mae, rmse, best_mae, test_mae, test_rmse)) ave_mae.append(test_mae) ave_rmse.append(test_rmse) with open('%s/random_valid_loss_list.txt' % fn, 'w') as f: f.write(json.dumps(valid_loss_list)) with open('%s/random_test_loss_list.txt' % fn, 'w') as f: f.write(json.dumps(test_loss_list)) print('ave_mse:{},ave_rmse:{}'.format( sum(ave_mae[6:]) / len(ave_mae[6:]), sum(ave_rmse[6:]) / len(ave_rmse[6:])))
def main(): writer = SummaryWriter(args.model) print('Loading data...') with open(args.dataset_path + 'dataset.pkl', 'rb') as f: train_set = pickle.load(f) valid_set = pickle.load(f) test_set = pickle.load(f) with open(args.dataset_path + 'list.pkl', 'rb') as f: u_items_list = pickle.load(f) u_users_list = pickle.load(f) u_users_items_list = pickle.load(f) i_users_list = pickle.load(f) (user_count, item_count, rate_count) = pickle.load(f) train_data = GRDataset(train_set, u_items_list, u_users_list, u_users_items_list, i_users_list) valid_data = GRDataset(valid_set, u_items_list, u_users_list, u_users_items_list, i_users_list) test_data = GRDataset(test_set, u_items_list, u_users_list, u_users_items_list, i_users_list) train_loader = DataLoader(train_data, batch_size=args.batch_size, shuffle=True, collate_fn=collate_fn) valid_loader = DataLoader(valid_data, batch_size=args.batch_size, shuffle=False, collate_fn=collate_fn) test_loader = DataLoader(test_data, batch_size=args.batch_size, shuffle=False, collate_fn=collate_fn) if args.model == "GraphRecSN": print("loaded GraphRecSN") model = GraphRecSN(user_count + 1, item_count + 1, rate_count + 1, args.embed_dim).to(device) elif args.model == "GraphRecNoBatchNorm": print("loaded GraphRecNoBatchNorm") model = GraphRecNoBatchNorm(user_count + 1, item_count + 1, rate_count + 1, args.embed_dim).to(device) elif args.model == "GraphRecAlpha": print("loaded GraphRecAlpha") model = GraphRecAlpha(user_count + 1, item_count + 1, rate_count + 1, args.embed_dim).to(device) elif args.model == "GraphRecGamma": print("loaded GraphRecGamma") model = GraphRecGamma(user_count + 1, item_count + 1, rate_count + 1, args.embed_dim).to(device) elif args.model == "GraphRecBeta": print("loaded GraphRecBeta") model = GraphRecBeta(user_count + 1, item_count + 1, rate_count + 1, args.embed_dim).to(device) elif args.model == "GraphRecAlphaBeta": print("loaded GraphRecAlphaBeta") model = GraphRecAlphaBeta(user_count + 1, item_count + 1, rate_count + 1, args.embed_dim).to(device) elif args.model == "GraphRecOpinion": print("loaded GraphRecOpinion") model = GraphRecOpinion(user_count + 1, item_count + 1, rate_count + 1, args.embed_dim).to(device) else: # args.model=="GraphRec" print("loaded GraphRec") model = GraphRec(user_count + 1, item_count + 1, rate_count + 1, args.embed_dim).to(device) if args.test: print('Load checkpoint and testing...') ckpt = torch.load('best_checkpoint.pth.tar') model.load_state_dict(ckpt['state_dict']) mae, rmse = validate(test_loader, model) print("Test: MAE: {:.4f}, RMSE: {:.4f}".format(mae, rmse)) return optimizer = optim.RMSprop(model.parameters(), args.lr) #model, optimizer = amp.initialize(model, optimizer, opt_level='O2') criterion = nn.MSELoss() scheduler = StepLR(optimizer, step_size=args.lr_dc_step, gamma=args.lr_dc) best_rmse = 9999.0 best_mae = 9999.0 endure_count = 0 for epoch in tqdm(range(args.epoch)): # train for one epoch scheduler.step(epoch=epoch) trainForEpoch(train_loader, model, optimizer, epoch, args.epoch, criterion, writer, log_aggr=100) mae, rmse = validate(valid_loader, model) # store best loss and save a model checkpoint ckpt_dict = { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() } torch.save(ckpt_dict, 'latest_checkpoint.pth.tar') if best_rmse > rmse: best_rmse = rmse best_mae = mae endure_count = 0 torch.save(ckpt_dict, 'best_checkpoint.pth.tar') else: endure_count += 1 print( 'Epoch {} validation: MAE: {:.4f}, RMSE: {:.4f}, Best MAE: {:.4f}'. format(epoch, mae, rmse, best_rmse)) if endure_count > 5: break '''if epoch == 0: best_mae = mae elif mae < best_mae: best_mae = mae torch.save(ckpt_dict, 'best_checkpoint.pth.tar')''' writer.close()
def main(): writer = SummaryWriter(args.model) print('Loading data...') with open(args.dataset_path + 'dataset.pkl', 'rb') as f: train_set = pickle.load(f) valid_set = pickle.load(f) test_set = pickle.load(f) with open(args.dataset_path + 'list.pkl', 'rb') as f: u_items_list = pickle.load(f) u_users_list = pickle.load(f) u_users_items_list = pickle.load(f) i_users_list = pickle.load(f) (user_count, item_count, rate_count) = pickle.load(f) train_data = GRDataset(train_set, u_items_list, u_users_list, u_users_items_list, i_users_list) valid_data = GRDataset(valid_set, u_items_list, u_users_list, u_users_items_list, i_users_list) test_data = GRDataset(test_set, u_items_list, u_users_list, u_users_items_list, i_users_list) train_loader = DataLoader(train_data, batch_size=args.batch_size, shuffle=True, collate_fn=collate_fn) valid_loader = DataLoader(valid_data, batch_size=args.batch_size, shuffle=False, collate_fn=collate_fn) test_loader = DataLoader(test_data, batch_size=args.batch_size, shuffle=False, collate_fn=collate_fn) if args.model == "GraphRecSN": print("loaded GraphRecSN") model = GraphRecSN(user_count + 1, item_count + 1, rate_count + 1, args.embed_dim).to(device) elif args.model == "GraphRecAlpha": print("loaded GraphRecAlpha") model = GraphRecAlpha(user_count + 1, item_count + 1, rate_count + 1, args.embed_dim).to(device) elif args.model == "GraphRecGamma": print("loaded GraphRecGamma") model = GraphRecGamma(user_count + 1, item_count + 1, rate_count + 1, args.embed_dim).to(device) elif args.model == "GraphRecBeta": print("loaded GraphRecBeta") model = GraphRecBeta(user_count + 1, item_count + 1, rate_count + 1, args.embed_dim).to(device) elif args.model == "GraphRecAlphaBeta": print("loaded GraphRecAlphaBeta") model = GraphRecAlphaBeta(user_count + 1, item_count + 1, rate_count + 1, args.embed_dim).to(device) elif args.model == "GraphRecOpinion": print("loaded GraphRecOpinion") model = GraphRecOpinion(user_count + 1, item_count + 1, rate_count + 1, args.embed_dim).to(device) else: # args.model=="GraphRec" print("loaded GraphRec") model = GraphRec(user_count + 1, item_count + 1, rate_count + 1, args.embed_dim).to(device) if args.test: print('Load checkpoint and testing...') ckpt = torch.load('best_checkpoint.pth.tar') model.load_state_dict(ckpt['state_dict']) mae, rmse = validate(test_loader, model) print("Test: MAE: {:.4f}, RMSE: {:.4f}".format(mae, rmse)) return optimizer = optim.RMSprop(model.parameters(), args.lr) #model, optimizer = amp.initialize(model, optimizer, opt_level='O2') criterion = nn.MSELoss() scheduler = StepLR(optimizer, step_size=args.lr_dc_step, gamma=args.lr_dc) best_rmse = 9999.0 best_mae = 9999.0 endure_count = 0 relu_layers = { "gv_relu": model.user_model.g_v.mlp[1], # gv_relu "user_items_att_relu": model.user_model.user_items_att.mlp[1], # user item attention relu "aggre_items_relu": model.user_model.aggre_items.aggre[1], # aggre_items relu "user_users_att_relu": model.user_model.user_users_att.mlp[1], "aggre_neigbors_relu": model.user_model.aggre_neigbors.aggre[1], # aggre_items relu "combine_mlp_relu_1_relu": model.user_model.combine_mlp[1], # combinre relu 1 "combine_mlp_relu_3_relu": model.user_model.combine_mlp[3], # combinre relu 2 "combine_mlp_relu_5_relu": model.user_model.combine_mlp[5], # combinre relu 3 "gu_relu": model.item_model.g_u.mlp[1], "item_users_att_relu": model.item_model.item_users_att.mlp[1], "aggre_users_relu ": model.item_model.aggre_users.aggre[1], "rate_pred_1": model.rate_pred[1], "rate_pred_3": model.rate_pred[3], } activation = {} def get_activation(name): def hook(model, input, output): activation[name] = output.detach() return hook for k, v in relu_layers.items(): v.register_forward_hook(get_activation(k)) for epoch in tqdm(range(args.epoch)): # train for one epoch scheduler.step(epoch=epoch) trainForEpoch(train_loader, model, optimizer, epoch, args.epoch, criterion, writer, relu_layers, activation, log_aggr=100) mae, rmse = validate(valid_loader, model) # store best loss and save a model checkpoint ckpt_dict = { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() } torch.save(ckpt_dict, 'latest_checkpoint.pth.tar') if best_rmse > rmse: best_rmse = rmse best_mae = mae endure_count = 0 torch.save(ckpt_dict, 'best_checkpoint.pth.tar') else: endure_count += 1 print( 'Epoch {} validation: MAE: {:.4f}, RMSE: {:.4f}, Best MAE: {:.4f}'. format(epoch, mae, rmse, best_rmse)) if endure_count > 5: break '''if epoch == 0: best_mae = mae elif mae < best_mae: best_mae = mae torch.save(ckpt_dict, 'best_checkpoint.pth.tar')''' writer.close()