Exemplo n.º 1
0
def train_gowalla(args):
    p_str = "running " + args.dataset
    print(p_str)

    dataset_base_path = '../../data/Gowalla/gowalla_x0'

    ##gowalla
    user_num = 29858
    item_num = 40981
    factor_num = args.embed_size
    batch_size = 2048 * 512
    top_k = 20
    num_negative_test_val = -1  ##all

    run_id = args.run_id
    print(run_id)
    dataset = 'gowalla'
    path_save_base = './log/' + dataset + '/newloss' + run_id
    if (os.path.exists(path_save_base)):
        print('has results save path')
    else:
        os.makedirs(path_save_base)
    result_file = open(path_save_base + '/results.txt',
                       'w+')  #('./log/results_gcmc.txt','w+')
    copyfile('../../external/LR_gccf/train_gowalla.py',
             path_save_base + '/train_gowalla' + run_id + '.py')

    path_save_model_base = './newlossModel/' + dataset + '/s' + run_id
    if (os.path.exists(path_save_model_base)):
        print('has model save path')
    else:
        os.makedirs(path_save_model_base)

    training_user_set, training_item_set, training_set_count = np.load(
        dataset_base_path + '/datanpy/training_set.npy', allow_pickle=True)
    testing_user_set, testing_item_set, testing_set_count = np.load(
        dataset_base_path + '/datanpy/testing_set.npy', allow_pickle=True)
    val_user_set, val_item_set, val_set_count = np.load(dataset_base_path +
                                                        '/datanpy/val_set.npy',
                                                        allow_pickle=True)
    user_rating_set_all = np.load(dataset_base_path +
                                  '/datanpy/user_rating_set_all.npy',
                                  allow_pickle=True).item()

    u_d = readD(training_user_set, user_num)
    i_d = readD(training_item_set, item_num)
    d_i_train = u_d
    d_j_train = i_d

    sparse_u_i = readTrainSparseMatrix(training_user_set, True, u_d, i_d)
    sparse_i_u = readTrainSparseMatrix(training_item_set, False, u_d, i_d)

    train_dataset = data_utils.BPRData(
            train_dict=training_user_set, num_item=item_num, num_ng=5, is_training=True,\
            data_set_count=training_set_count,all_rating=user_rating_set_all)
    train_loader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              shuffle=True,
                              num_workers=2)

    testing_dataset_loss = data_utils.BPRData(
            train_dict=testing_user_set, num_item=item_num, num_ng=5, is_training=True,\
            data_set_count=testing_set_count,all_rating=user_rating_set_all)
    testing_loader_loss = DataLoader(testing_dataset_loss,
                                     batch_size=batch_size,
                                     shuffle=False,
                                     num_workers=0)

    val_dataset_loss = data_utils.BPRData(
            train_dict=val_user_set, num_item=item_num, num_ng=5, is_training=True,\
            data_set_count=val_set_count,all_rating=user_rating_set_all)
    val_loader_loss = DataLoader(val_dataset_loss,
                                 batch_size=batch_size,
                                 shuffle=False,
                                 num_workers=0)

    model = BPR(user_num, item_num, factor_num, sparse_u_i, sparse_i_u,
                d_i_train, d_j_train)
    model = model.to('cuda')

    # lr=0.005
    optimizer_bpr = torch.optim.Adam(model.parameters(),
                                     lr=args.lr)  #, betas=(0.5, 0.99))

    ########################### TRAINING #####################################

    # testing_loader_loss.dataset.ng_sample()

    print('--------training processing-------')
    count, best_hr = 0, 0
    # epoch=350
    for epoch in range(args.epoch):
        model.train()
        start_time = time.time()
        train_loader.dataset.ng_sample()
        # pdb.set_trace()
        print('train data of ng_sample is  end')
        # elapsed_time = time.time() - start_time
        # print(' time:'+str(round(elapsed_time,1)))
        # start_time = time.time()

        train_loss_sum = []
        train_loss_sum2 = []
        for user, item_i, item_j in train_loader:
            user = user.cuda()
            item_i = item_i.cuda()
            item_j = item_j.cuda()

            model.zero_grad()
            prediction_i, prediction_j, loss, loss2 = model(
                user, item_i, item_j)
            loss.backward()
            optimizer_bpr.step()
            count += 1
            train_loss_sum.append(loss.item())
            train_loss_sum2.append(loss2.item())
            # print(count)

        elapsed_time = time.time() - start_time
        train_loss = round(np.mean(train_loss_sum[:-1]),
                           4)  #最后一个可能不满足一个batch,所以去掉这样loss就是一致的可以求mean了
        train_loss2 = round(np.mean(train_loss_sum2[:-1]),
                            4)  #最后一个可能不满足一个batch,所以去掉这样loss就是一致的可以求mean了
        str_print_train = "epoch:" + str(epoch) + ' time:' + str(
            round(elapsed_time, 1)) + '\t train loss:' + str(
                train_loss) + "=" + str(train_loss2) + "+"
        print('--train--', elapsed_time)

        PATH_model = path_save_model_base + '/epoch' + str(epoch) + '.pt'
        torch.save(model.state_dict(), PATH_model)

        model.eval()
        # ######test and val###########
        val_loader_loss.dataset.ng_sample()
        val_loss = evaluate.metrics_loss(model, val_loader_loss, batch_size)
        # str_print_train+=' val loss:'+str(val_loss)

        testing_loader_loss.dataset.ng_sample()
        test_loss = evaluate.metrics_loss(model, testing_loader_loss,
                                          batch_size)
        print(str_print_train + ' val loss:' + str(val_loss) + ' test loss:' +
              str(test_loss))
        result_file.write(str_print_train + ' val loss:' + str(val_loss) +
                          ' test loss:' + str(test_loss))
        result_file.write('\n')
        result_file.flush()
Exemplo n.º 2
0
        prediction_i = (user * item_i).sum(dim=-1)
        prediction_j = (user * item_j).sum(dim=-1)
        # loss=-((rediction_i-prediction_j).sigmoid())**2#self.loss(prediction_i,prediction_j)#.sum()
        l2_regulization = 0.01 * (user**2 + item_i**2 + item_j**2).sum(dim=-1)
        # l2_regulization = 0.01*((gcn1_users_embedding**2).sum(dim=-1).mean()+(gcn1_items_embedding**2).sum(dim=-1).mean())

        loss2 = -((prediction_i - prediction_j).sigmoid().log().mean())
        # loss= loss2 + l2_regulization
        loss = -(
            (prediction_i -
             prediction_j)).sigmoid().log().mean() + l2_regulization.mean()
        # pdb.set_trace()
        return prediction_i, prediction_j, loss, loss2

train_dataset = data_utils.BPRData(
        train_dict=training_user_set, num_item=item_num, num_ng=5, is_training=True,\
        data_set_count=training_set_count,all_rating=user_rating_set_all)
train_loader = DataLoader(train_dataset,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=2)

testing_dataset_loss = data_utils.BPRData(
        train_dict=testing_user_set, num_item=item_num, num_ng=5, is_training=True,\
        data_set_count=testing_set_count,all_rating=user_rating_set_all)
testing_loader_loss = DataLoader(testing_dataset_loss,
                                 batch_size=batch_size,
                                 shuffle=False,
                                 num_workers=0)

val_dataset_loss = data_utils.BPRData(
Exemplo n.º 3
0
parser.add_argument("--test_num_ng",
                    type=int,
                    default=99,
                    help="sample part of negative items for testing")
parser.add_argument("--out", default=True, help="save model or not")
parser.add_argument("--gpu", type=str, default="0", help="gpu card ID")
args = parser.parse_args()

os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
cudnn.benchmark = True

############################## PREPARE DATASET ##########################
train_data, test_data, user_num, item_num, train_mat = data_utils.load_all()

# construct the train and test datasets
train_dataset = data_utils.BPRData(train_data, item_num, train_mat,
                                   args.num_ng, True)
test_dataset = data_utils.BPRData(test_data, item_num, train_mat, 0, False)
train_loader = data.DataLoader(train_dataset,
                               batch_size=args.batch_size,
                               shuffle=True,
                               num_workers=16)
test_loader = data.DataLoader(test_dataset,
                              batch_size=args.test_num_ng + 1,
                              shuffle=False,
                              num_workers=0)

########################### CREATE MODEL #################################
model = model.BPR(user_num, item_num, args.factor_num)
model.cuda()

optimizer = optim.SGD(model.parameters(), lr=args.lr, weight_decay=args.lamda)
Exemplo n.º 4
0
    default=50,  # the new dataset
    help="sample part of negative items for testing")
parser.add_argument("--out", default=True, help="save model or not")
parser.add_argument("--gpu", type=str, default="0", help="gpu card ID")
args = parser.parse_args()

os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
cudnn.benchmark = True

############################## PREPARE DATASET ##########################
train_data, test_data, train_mat, user_num, item_num = data_utils.load_all()

# construct the train and test datasets
train_dataset = data_utils.BPRData(train_data,
                                   user_num,
                                   item_num,
                                   train_mat,
                                   num_ng=args.num_ng,
                                   is_training=True)
test_dataset = data_utils.BPRData(test_data,
                                  user_num,
                                  item_num,
                                  num_ng=0,
                                  is_training=False)
train_loader = data.DataLoader(train_dataset,
                               batch_size=args.batch_size,
                               shuffle=True,
                               num_workers=20)
# test_loader = data.DataLoader(test_dataset,
# 		batch_size=args.test_num_ng+1, shuffle=False, num_workers=0)
test_loader = data.DataLoader(test_dataset, batch_size=1, shuffle=False)