Пример #1
0
def main():
    args = config.config()
    logger.info(args.embed_dim)
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    use_cuda = False
    if torch.cuda.is_available():
        use_cuda = True
    device = torch.device("cuda" if use_cuda else "cpu")
    embed_dim = args.embed_dim

    dir_data = 'data/' + args.dataset + '_dataset'
    path_data = dir_data + ".pkl"
    data_file = open(path_data, 'rb')
    history_u, history_i, history_ur, history_ir, train_u, train_i, train_r, valid_u, valid_i, valid_r,\
                 test_u, test_i, test_r, social_neighbor, ratings = pickle.load(data_file)

    path_data_rank = dir_data + "_rank.pkl"
    rank_data = open(path_data_rank, 'rb')
    valid_rank_data, test_rank_data = pickle.load(rank_data)

    trainset = torch.utils.data.TensorDataset(torch.LongTensor(train_u),
                                              torch.LongTensor(train_i),
                                              torch.FloatTensor(train_r))
    validset = torch.utils.data.TensorDataset(torch.LongTensor(valid_u),
                                              torch.LongTensor(valid_i),
                                              torch.FloatTensor(valid_r))
    testset = torch.utils.data.TensorDataset(torch.LongTensor(test_u),
                                             torch.LongTensor(test_i),
                                             torch.FloatTensor(test_r))
    # train_size = int(0.8 * len(trainset))
    # val_size = len(trainset) - train_size
    # trainset, valset = torch.utils.data.random_split(trainset, [train_size, val_size])
    train_loader = torch.utils.data.DataLoader(trainset,
                                               batch_size=args.batch_size,
                                               shuffle=True)
    val_loader = torch.utils.data.DataLoader(validset,
                                             batch_size=args.batch_size,
                                             shuffle=True)
    test_loader = torch.utils.data.DataLoader(testset,
                                              batch_size=args.test_batch_size,
                                              shuffle=True)
    num_users = history_u.__len__()
    num_items = history_i.__len__()
    num_ratings = ratings.__len__()

    # model
    graphrec = GraphRec(num_users, num_items, num_ratings, history_u, history_i, history_ur,\
                                     history_ir, embed_dim, social_neighbor, cuda=device).to(device)
    optimizer = torch.optim.RMSprop(graphrec.parameters(),
                                    lr=args.lr,
                                    alpha=0.9)

    # best_rmse = 9999.0
    # best_mae = 9999.0
    best_hits = 0.0
    endure_count = 0
    best_test_hits = 0

    for epoch in range(1, args.epochs + 1):
        val_hits = train(graphrec, device, train_loader, optimizer, epoch,
                         valid_rank_data)

        # please add the validation set to tune the hyper-parameters based on your datasets.

        # early stopping
        if best_hits < val_hits:
            best_hits = val_hits
            endure_count = 0
        else:
            endure_count += 1
        logger.info("val HITS@10:%.4f " % (val_hits))

        test_hits = rank_test(graphrec, device, test_rank_data)
        if test_hits > best_test_hits:
            best_test_hits = test_hits
        logger.info("best test HITS@10:%.4f " % (best_test_hits))

        if endure_count > 5:
            logger.info("early stopping...")
            break

    test_hits = rank_test(graphrec, device, test_rank_data)
    logger.info("test HITS@10:%.4f " % (test_hits))
    logger.info("best test HITS@10:%.4f " % (best_test_hits))
Пример #2
0
def main():
    print('Loading data...')
    with open(args.dataset_path + 'dataset.pkl', 'rb') as f:
        train_set = pickle.load(f)
        valid_set = pickle.load(f)
        test_set = pickle.load(f)

    with open(args.dataset_path + 'list.pkl', 'rb') as f:
        u_items_list = pickle.load(f)
        u_users_list = pickle.load(f)
        u_users_items_list = pickle.load(f)
        i_users_list = pickle.load(f)
        (user_count, item_count, rate_count) = pickle.load(f)

    train_data = GRDataset(train_set, u_items_list, u_users_list,
                           u_users_items_list, i_users_list)
    valid_data = GRDataset(valid_set, u_items_list, u_users_list,
                           u_users_items_list, i_users_list)
    test_data = GRDataset(test_set, u_items_list, u_users_list,
                          u_users_items_list, i_users_list)
    train_loader = DataLoader(train_data,
                              batch_size=args.batch_size,
                              shuffle=True,
                              collate_fn=collate_fn)
    valid_loader = DataLoader(valid_data,
                              batch_size=args.batch_size,
                              shuffle=False,
                              collate_fn=collate_fn)
    test_loader = DataLoader(test_data,
                             batch_size=args.batch_size,
                             shuffle=False,
                             collate_fn=collate_fn)

    model = GraphRec(user_count + 1, item_count + 1, rate_count + 1,
                     args.embed_dim).to(device)

    if args.test:
        print('Load checkpoint and testing...')
        ckpt = torch.load('best_checkpoint.pth.tar')
        model.load_state_dict(ckpt['state_dict'])
        mae, rmse = validate(test_loader, model)
        print("Test: MAE: {:.4f}, RMSE: {:.4f}".format(mae, rmse))
        return

    optimizer = optim.RMSprop(model.parameters(), args.lr)
    criterion = nn.MSELoss()
    scheduler = StepLR(optimizer, step_size=args.lr_dc_step, gamma=args.lr_dc)

    for epoch in tqdm(range(args.epoch)):
        # train for one epoch
        scheduler.step(epoch=epoch)
        trainForEpoch(train_loader,
                      model,
                      optimizer,
                      epoch,
                      args.epoch,
                      criterion,
                      log_aggr=100)

        mae, rmse = validate(valid_loader, model)

        # store best loss and save a model checkpoint
        ckpt_dict = {
            'epoch': epoch + 1,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict()
        }

        torch.save(ckpt_dict, 'latest_checkpoint.pth.tar')

        if epoch == 0:
            best_mae = mae
        elif mae < best_mae:
            best_mae = mae
            torch.save(ckpt_dict, 'best_checkpoint.pth.tar')

        print(
            'Epoch {} validation: MAE: {:.4f}, RMSE: {:.4f}, Best MAE: {:.4f}'.
            format(epoch, mae, rmse, best_mae))
Пример #3
0
def main():
    print('Loading data...')
    with open(args.dataset_path + 'dataset_filter5.pkl', 'rb') as f:
        train_set = pickle.load(f)
        valid_set = pickle.load(f)
        test_set = pickle.load(f)

    with open(args.dataset_path + 'list_filter5.pkl', 'rb') as f:
        u_items_list = pickle.load(f)
        u_users_list = pickle.load(f)
        u_users_items_list = pickle.load(f)
        i_users_list = pickle.load(f)
        # print(u_users_list)
        (user_count, item_count, rate_count) = pickle.load(f)

    train_data = GRDataset(train_set, u_items_list, u_users_list,
                           u_users_items_list, i_users_list)
    valid_data = GRDataset(valid_set, u_items_list, u_users_list,
                           u_users_items_list, i_users_list)
    test_data = GRDataset(test_set, u_items_list, u_users_list,
                          u_users_items_list, i_users_list)

    train_loader = DataLoader(train_data,
                              batch_size=args.batch_size,
                              shuffle=True,
                              collate_fn=collate_fn)
    valid_loader = DataLoader(valid_data,
                              batch_size=args.batch_size,
                              shuffle=False,
                              collate_fn=collate_fn)
    test_loader = DataLoader(test_data,
                             batch_size=args.batch_size,
                             shuffle=False,
                             collate_fn=collate_fn)
    model = GraphRec(user_count + 1, item_count + 1, rate_count + 1,
                     args.embed_dim).to(device)

    if args.test:
        print('Load checkpoint and testing...')
        ckpt = torch.load('%s/random_best_checkpoint.pth.tar' % fn)
        model.load_state_dict(ckpt['state_dict'])
        mae, rmse = validate(test_loader, model)
        print("Test: MAE: {:.4f}, RMSE: {:.4f}".format(mae, rmse))
        return

    optimizer = optim.RMSprop(model.parameters(),
                              lr=args.lr,
                              weight_decay=1e-4)
    criterion = nn.MSELoss()
    scheduler = StepLR(optimizer, step_size=args.lr_dc_step, gamma=args.lr_dc)

    valid_loss_list, test_loss_list = [], []
    ave_mae = []
    ave_rmse = []

    for epoch in tqdm(range(args.epoch)):

        scheduler.step(epoch=epoch)
        trainForEpoch(train_loader,
                      model,
                      optimizer,
                      epoch,
                      args.epoch,
                      criterion,
                      log_aggr=100)

        mae, rmse = validate(valid_loader, model)
        valid_loss_list.append([mae, rmse])

        test_mae, test_rmse = validate(test_loader, model)
        test_loss_list.append([test_mae, test_rmse])

        ckpt_dict = {
            'epoch': epoch + 1,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict()
        }

        if epoch == 0:
            best_mae = mae
        elif mae < best_mae:
            best_mae = mae
            torch.save(ckpt_dict, '%s/random_best_checkpoint.pth.tar' % fn)

        print(
            'Epoch {} validation: MAE: {:.4f}, RMSE: {:.4f}, Best MAE: {:.4f}, test_MAE: {:.4f}, test_RMSE: {:.4f}'
            .format(epoch, mae, rmse, best_mae, test_mae, test_rmse))

        ave_mae.append(test_mae)
        ave_rmse.append(test_rmse)

        with open('%s/random_valid_loss_list.txt' % fn, 'w') as f:
            f.write(json.dumps(valid_loss_list))

        with open('%s/random_test_loss_list.txt' % fn, 'w') as f:
            f.write(json.dumps(test_loss_list))

    print('ave_mse:{},ave_rmse:{}'.format(
        sum(ave_mae[6:]) / len(ave_mae[6:]),
        sum(ave_rmse[6:]) / len(ave_rmse[6:])))
Пример #4
0
def main():
    writer = SummaryWriter(args.model)
    print('Loading data...')
    with open(args.dataset_path + 'dataset.pkl', 'rb') as f:
        train_set = pickle.load(f)
        valid_set = pickle.load(f)
        test_set = pickle.load(f)

    with open(args.dataset_path + 'list.pkl', 'rb') as f:
        u_items_list = pickle.load(f)
        u_users_list = pickle.load(f)
        u_users_items_list = pickle.load(f)
        i_users_list = pickle.load(f)
        (user_count, item_count, rate_count) = pickle.load(f)

    train_data = GRDataset(train_set, u_items_list, u_users_list,
                           u_users_items_list, i_users_list)
    valid_data = GRDataset(valid_set, u_items_list, u_users_list,
                           u_users_items_list, i_users_list)
    test_data = GRDataset(test_set, u_items_list, u_users_list,
                          u_users_items_list, i_users_list)
    train_loader = DataLoader(train_data,
                              batch_size=args.batch_size,
                              shuffle=True,
                              collate_fn=collate_fn)
    valid_loader = DataLoader(valid_data,
                              batch_size=args.batch_size,
                              shuffle=False,
                              collate_fn=collate_fn)
    test_loader = DataLoader(test_data,
                             batch_size=args.batch_size,
                             shuffle=False,
                             collate_fn=collate_fn)

    if args.model == "GraphRecSN":
        print("loaded GraphRecSN")
        model = GraphRecSN(user_count + 1, item_count + 1, rate_count + 1,
                           args.embed_dim).to(device)
    elif args.model == "GraphRecNoBatchNorm":
        print("loaded GraphRecNoBatchNorm")
        model = GraphRecNoBatchNorm(user_count + 1, item_count + 1,
                                    rate_count + 1, args.embed_dim).to(device)
    elif args.model == "GraphRecAlpha":
        print("loaded GraphRecAlpha")
        model = GraphRecAlpha(user_count + 1, item_count + 1, rate_count + 1,
                              args.embed_dim).to(device)
    elif args.model == "GraphRecGamma":
        print("loaded GraphRecGamma")
        model = GraphRecGamma(user_count + 1, item_count + 1, rate_count + 1,
                              args.embed_dim).to(device)
    elif args.model == "GraphRecBeta":
        print("loaded GraphRecBeta")
        model = GraphRecBeta(user_count + 1, item_count + 1, rate_count + 1,
                             args.embed_dim).to(device)
    elif args.model == "GraphRecAlphaBeta":
        print("loaded GraphRecAlphaBeta")
        model = GraphRecAlphaBeta(user_count + 1, item_count + 1,
                                  rate_count + 1, args.embed_dim).to(device)
    elif args.model == "GraphRecOpinion":
        print("loaded GraphRecOpinion")
        model = GraphRecOpinion(user_count + 1, item_count + 1, rate_count + 1,
                                args.embed_dim).to(device)
    else:  #  args.model=="GraphRec"
        print("loaded GraphRec")
        model = GraphRec(user_count + 1, item_count + 1, rate_count + 1,
                         args.embed_dim).to(device)

    if args.test:
        print('Load checkpoint and testing...')
        ckpt = torch.load('best_checkpoint.pth.tar')
        model.load_state_dict(ckpt['state_dict'])
        mae, rmse = validate(test_loader, model)
        print("Test: MAE: {:.4f}, RMSE: {:.4f}".format(mae, rmse))
        return

    optimizer = optim.RMSprop(model.parameters(), args.lr)
    #model, optimizer = amp.initialize(model, optimizer, opt_level='O2')

    criterion = nn.MSELoss()
    scheduler = StepLR(optimizer, step_size=args.lr_dc_step, gamma=args.lr_dc)

    best_rmse = 9999.0
    best_mae = 9999.0
    endure_count = 0

    for epoch in tqdm(range(args.epoch)):
        # train for one epoch
        scheduler.step(epoch=epoch)
        trainForEpoch(train_loader,
                      model,
                      optimizer,
                      epoch,
                      args.epoch,
                      criterion,
                      writer,
                      log_aggr=100)

        mae, rmse = validate(valid_loader, model)

        # store best loss and save a model checkpoint
        ckpt_dict = {
            'epoch': epoch + 1,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict()
        }

        torch.save(ckpt_dict, 'latest_checkpoint.pth.tar')

        if best_rmse > rmse:
            best_rmse = rmse
            best_mae = mae
            endure_count = 0
            torch.save(ckpt_dict, 'best_checkpoint.pth.tar')
        else:
            endure_count += 1

        print(
            'Epoch {} validation: MAE: {:.4f}, RMSE: {:.4f}, Best MAE: {:.4f}'.
            format(epoch, mae, rmse, best_rmse))

        if endure_count > 5:
            break
        '''if epoch == 0:
            best_mae = mae
        elif mae < best_mae:
            best_mae = mae
            torch.save(ckpt_dict, 'best_checkpoint.pth.tar')'''
    writer.close()
Пример #5
0
def main():
    writer = SummaryWriter(args.model)
    print('Loading data...')
    with open(args.dataset_path + 'dataset.pkl', 'rb') as f:
        train_set = pickle.load(f)
        valid_set = pickle.load(f)
        test_set = pickle.load(f)

    with open(args.dataset_path + 'list.pkl', 'rb') as f:
        u_items_list = pickle.load(f)
        u_users_list = pickle.load(f)
        u_users_items_list = pickle.load(f)
        i_users_list = pickle.load(f)
        (user_count, item_count, rate_count) = pickle.load(f)

    train_data = GRDataset(train_set, u_items_list, u_users_list,
                           u_users_items_list, i_users_list)
    valid_data = GRDataset(valid_set, u_items_list, u_users_list,
                           u_users_items_list, i_users_list)
    test_data = GRDataset(test_set, u_items_list, u_users_list,
                          u_users_items_list, i_users_list)
    train_loader = DataLoader(train_data,
                              batch_size=args.batch_size,
                              shuffle=True,
                              collate_fn=collate_fn)
    valid_loader = DataLoader(valid_data,
                              batch_size=args.batch_size,
                              shuffle=False,
                              collate_fn=collate_fn)
    test_loader = DataLoader(test_data,
                             batch_size=args.batch_size,
                             shuffle=False,
                             collate_fn=collate_fn)

    if args.model == "GraphRecSN":
        print("loaded GraphRecSN")
        model = GraphRecSN(user_count + 1, item_count + 1, rate_count + 1,
                           args.embed_dim).to(device)
    elif args.model == "GraphRecAlpha":
        print("loaded GraphRecAlpha")
        model = GraphRecAlpha(user_count + 1, item_count + 1, rate_count + 1,
                              args.embed_dim).to(device)
    elif args.model == "GraphRecGamma":
        print("loaded GraphRecGamma")
        model = GraphRecGamma(user_count + 1, item_count + 1, rate_count + 1,
                              args.embed_dim).to(device)
    elif args.model == "GraphRecBeta":
        print("loaded GraphRecBeta")
        model = GraphRecBeta(user_count + 1, item_count + 1, rate_count + 1,
                             args.embed_dim).to(device)
    elif args.model == "GraphRecAlphaBeta":
        print("loaded GraphRecAlphaBeta")
        model = GraphRecAlphaBeta(user_count + 1, item_count + 1,
                                  rate_count + 1, args.embed_dim).to(device)
    elif args.model == "GraphRecOpinion":
        print("loaded GraphRecOpinion")
        model = GraphRecOpinion(user_count + 1, item_count + 1, rate_count + 1,
                                args.embed_dim).to(device)
    else:  #  args.model=="GraphRec"
        print("loaded GraphRec")
        model = GraphRec(user_count + 1, item_count + 1, rate_count + 1,
                         args.embed_dim).to(device)

    if args.test:
        print('Load checkpoint and testing...')
        ckpt = torch.load('best_checkpoint.pth.tar')
        model.load_state_dict(ckpt['state_dict'])
        mae, rmse = validate(test_loader, model)
        print("Test: MAE: {:.4f}, RMSE: {:.4f}".format(mae, rmse))
        return

    optimizer = optim.RMSprop(model.parameters(), args.lr)
    #model, optimizer = amp.initialize(model, optimizer, opt_level='O2')

    criterion = nn.MSELoss()
    scheduler = StepLR(optimizer, step_size=args.lr_dc_step, gamma=args.lr_dc)

    best_rmse = 9999.0
    best_mae = 9999.0
    endure_count = 0

    relu_layers = {
        "gv_relu": model.user_model.g_v.mlp[1],  # gv_relu
        "user_items_att_relu":
        model.user_model.user_items_att.mlp[1],  # user item attention relu
        "aggre_items_relu":
        model.user_model.aggre_items.aggre[1],  # aggre_items relu
        "user_users_att_relu": model.user_model.user_users_att.mlp[1],
        "aggre_neigbors_relu":
        model.user_model.aggre_neigbors.aggre[1],  # aggre_items relu
        "combine_mlp_relu_1_relu":
        model.user_model.combine_mlp[1],  # combinre relu 1
        "combine_mlp_relu_3_relu":
        model.user_model.combine_mlp[3],  # combinre relu 2
        "combine_mlp_relu_5_relu":
        model.user_model.combine_mlp[5],  # combinre relu 3
        "gu_relu": model.item_model.g_u.mlp[1],
        "item_users_att_relu": model.item_model.item_users_att.mlp[1],
        "aggre_users_relu  ": model.item_model.aggre_users.aggre[1],
        "rate_pred_1": model.rate_pred[1],
        "rate_pred_3": model.rate_pred[3],
    }

    activation = {}

    def get_activation(name):
        def hook(model, input, output):
            activation[name] = output.detach()

        return hook

    for k, v in relu_layers.items():
        v.register_forward_hook(get_activation(k))

    for epoch in tqdm(range(args.epoch)):
        # train for one epoch
        scheduler.step(epoch=epoch)
        trainForEpoch(train_loader,
                      model,
                      optimizer,
                      epoch,
                      args.epoch,
                      criterion,
                      writer,
                      relu_layers,
                      activation,
                      log_aggr=100)

        mae, rmse = validate(valid_loader, model)

        # store best loss and save a model checkpoint
        ckpt_dict = {
            'epoch': epoch + 1,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict()
        }

        torch.save(ckpt_dict, 'latest_checkpoint.pth.tar')

        if best_rmse > rmse:
            best_rmse = rmse
            best_mae = mae
            endure_count = 0
            torch.save(ckpt_dict, 'best_checkpoint.pth.tar')
        else:
            endure_count += 1

        print(
            'Epoch {} validation: MAE: {:.4f}, RMSE: {:.4f}, Best MAE: {:.4f}'.
            format(epoch, mae, rmse, best_rmse))

        if endure_count > 5:
            break
        '''if epoch == 0:
            best_mae = mae
        elif mae < best_mae:
            best_mae = mae
            torch.save(ckpt_dict, 'best_checkpoint.pth.tar')'''
    writer.close()