def main(_):
    assert sum([FLAGS.train, FLAGS.predict, FLAGS.eval]) == 1

    if not os.path.exists(FLAGS.checkpoint_dir):
        os.makedirs(FLAGS.checkpoint_dir)
    if not os.path.exists(FLAGS.log_dir):
        os.makedirs(FLAGS.log_dir)

    # config = tf.ConfigProto(gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.5))
    # config.gpu_options.allow_growth = True
    os.environ["CUDA_VISIBLE_DEVICES"] = '1'
    # with tf.Session(config=config) as sess:
    with tf.Session() as sess:
        dnn = DNN(sess, FLAGS)
        if FLAGS.train:
            # os.environ["CUDA_VISIBLE_DEVICES"] = '1'
            dnn.fit()
        elif FLAGS.predict:
            dnn.load_network()
            samples = np.array(
                pd.read_csv('dataset/gen_samples.csv', header=None))
            gen_y = samples[:, -1]
            predict = dnn.predict(np.delete(samples, -1, 1))
            # assert gen_y.shape[0] == predict.shape[0]
            # print 'Accuracy: {}%'.format((predict == gen_y).sum() / float(predict.shape[0]) * 100)
            samples = samples[gen_y != predict]
            pd.DataFrame(samples).to_csv('dataset/gen_samples.csv',
                                         index=False,
                                         header=None)
        elif FLAGS.eval:
            dnn.load_network()
            dnn.eval()
Exemplo n.º 2
0
    def parse_individual(self, indi):
        torch_device = torch.device('cuda')

        train_data, test_data, user_num, item_num, train_mat = data_loader.load_dataset(
        )
        train_dataset = Data(train_data,
                             item_num,
                             train_mat,
                             num_ng=4,
                             is_training=True)  # neg_items=4,default
        test_dataset = Data(test_data,
                            item_num,
                            train_mat,
                            num_ng=0,
                            is_training=False)  # 100

        train_loader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=256,
                                                   shuffle=True,
                                                   num_workers=4)

        dnn = DNN(int(user_num), int(item_num), factor_num=8,
                  indi=indi)  ##################
        dnn.cuda()
        print(dnn)
        #complexity = get_total_params(dnn.cuda(), (220, 30, 30))  ########todo: change the inpupt size

        # Loss and optimizer 3.定义损失函数, 使用的是BCEWithLogitsLoss
        criterion = nn.BCEWithLogitsLoss()
        criterion = criterion.to(torch_device)

        # 4.定义迭代优化算法, 使用的是Adam
        learning_rate = 0.002  #########################
        optimizer = torch.optim.Adam(dnn.parameters(),
                                     lr=learning_rate)  ##########
        loss_dict = []
        num_epochs = train_loader.__len__()
        # Train the model 5. 迭代训练
        dnn.train()
        train_loader.dataset.ng_sample()
        for i, data in enumerate(train_loader, 0):
            # Convert numpy arrays to torch tensors  5.1 准备tensor的训练数据和标签
            user = data[0].cuda()
            item = data[1].cuda()
            label = data[2].float().cuda()

            # Forward pass  5.2 前向传播计算网络结构的输出结果
            optimizer.zero_grad()
            dnn.zero_grad()  ##########
            output = dnn(user, item)
            # 5.3 计算损失函数
            loss = criterion(output, label)
            loss.cuda()

            # Backward and optimize 5.4 反向传播更新参数
            loss.backward()
            optimizer.step()

            # 可选 5.5 打印训练信息和保存loss
            loss_dict.append(loss.item())
            if (i + 1) % 5000 == 0:
                print('Epoch [{}/{}], Loss: {:.4f}'.format(
                    i + 1, num_epochs, loss.item()))

        # evaluate
        dnn.eval()
        #test_loss_dict = []
        # every user have 99 negative items and one positive items,so batch_size=100
        test_loader = torch.utils.data.DataLoader(test_dataset,
                                                  batch_size=99 + 1,
                                                  shuffle=False,
                                                  num_workers=2)
        #test_loader.dataset.ng_sample()
        #for i, data in enumerate(test_loader, 0):
        #user = data[0].cuda()
        #item = data[1].cuda()
        #label = data[2].float().cuda()
        #output = dnn(user, item)
        #loss = criterion(output, label)
        #loss = loss.cuda()
        #test_loss_dict.append(loss.item())

        HR = utils.metricsHR(dnn, test_loader, top_k=10)
        NDCG = utils.metricsNDCG(dnn, test_loader, top_k=10)

        #mean_test_loss = np.mean(test_loss_dict)
        #std_test_loss = np.std(test_loss_dict)
        print("HR:{},NDCG:{}".format(HR, NDCG))
        return HR, NDCG