Пример #1
0
def train(args, data_info, show_loss):
    train_data = data_info[0]
    eval_data = data_info[1]
    test_data = data_info[2]
    n_entity = data_info[3]
    n_relation = data_info[4]
    ripple_set = data_info[5]

    model = RippleNet(args, n_entity, n_relation)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for step in range(args.n_epoch):
            # training
            np.random.shuffle(train_data)
            start = 0
            while start < train_data.shape[0]:
                _, loss = model.train(
                    sess, get_feed_dict(args, model, train_data, ripple_set, start, start + args.batch_size))
                start += args.batch_size
                if show_loss:
                    print('%.1f%% %.4f' % (start / train_data.shape[0] * 100, loss))

            # evaluation
            train_auc, train_acc = evaluation(sess, args, model, train_data, ripple_set, args.batch_size)
            eval_auc, eval_acc = evaluation(sess, args, model, eval_data, ripple_set, args.batch_size)
            test_auc, test_acc = evaluation(sess, args, model, test_data, ripple_set, args.batch_size)

            print('epoch %d    train auc: %.4f  acc: %.4f    eval auc: %.4f  acc: %.4f    test auc: %.4f  acc: %.4f'
                  % (step, train_auc, train_acc, eval_auc, eval_acc, test_auc, test_acc))

### +Added Instructions
        case_rec_evaluation(sess, args, model, test_data, ripple_set, args.batch_size)
Пример #2
0
def train(args, data_info, show_loss, config):

    train_data = data_info[0]
    eval_data = data_info[1]
    test_data = data_info[2]
    n_entity = data_info[3]
    n_relation = data_info[4]
    ripple_set = data_info[5]

    logger = Logger()
    logger.create_session_folder(SESSION_LOG_PATH)
    logger.set_default_filename(SESSION_LOG_PATH + "log.txt")
    logger.log(str(args))  # Log training and model hyper parameters

    model = RippleNet(args, n_entity, n_relation)

    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver(max_to_keep=None)

        for step in range(args.n_epoch):

            np.random.shuffle(train_data)

            # training
            for i in tqdm(range(0, train_data.shape[0], args.batch_size)):

                _, loss = model.train(
                    sess,
                    _get_feed_dict(args, model, train_data, ripple_set, i,
                                   i + args.batch_size))

                if show_loss:
                    print('%.1f%% %.4f' %
                          (i / train_data.shape[0] * 100, loss))
                    logger.log('%.1f%% %.4f' %
                               (i / train_data.shape[0] * 100, loss))

            # evaluation
            train_auc, train_acc = _evaluation(sess, args, model, train_data,
                                               ripple_set)
            eval_auc, eval_acc = _evaluation(sess, args, model, eval_data,
                                             ripple_set)
            test_auc, test_acc = _evaluation(sess, args, model, test_data,
                                             ripple_set)

            # Save the variables to disk.
            saver.save(sess, SESSION_LOG_PATH + "models/epoch_{}".format(step))

            print(
                'epoch %d    train auc: %.4f  acc: %.4f    eval auc: %.4f  acc: %.4f    test auc: %.4f  acc: %.4f'
                % (step, train_auc, train_acc, eval_auc, eval_acc, test_auc,
                   test_acc))

            logger.log(
                'epoch %d    train auc: %.4f  acc: %.4f    eval auc: %.4f  acc: %.4f    test auc: %.4f  acc: %.4f'
                % (step, train_auc, train_acc, eval_auc, eval_acc, test_auc,
                   test_acc))
Пример #3
0
def train(args, data_info, show_loss, config):
    train_data = data_info[0]
    eval_data = data_info[1]
    test_data = data_info[2]
    n_entity = data_info[3]
    n_relation = data_info[4]
    ripple_set = data_info[5]

    logger.log(str(args))

    model = RippleNet(args, n_entity, n_relation)

    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver(max_to_keep=None)

        for step in range(args.n_epoch):

            # training
            np.random.shuffle(train_data)
            start = 0
            for start in tqdm(range(0, train_data.shape[0], args.batch_size)):

                _, loss = model.train(
                    sess,
                    get_feed_dict(args, model, train_data, ripple_set, start,
                                  start + args.batch_size))

                if show_loss:
                    print('%.1f%% %.4f' %
                          (start / train_data.shape[0] * 100, loss))
                    logger.log('%.1f%% %.4f' %
                               (start / train_data.shape[0] * 100, loss))

            # evaluation
            train_auc, train_acc = evaluation(sess, args, model, train_data,
                                              ripple_set, args.batch_size)
            eval_auc, eval_acc = evaluation(sess, args, model, eval_data,
                                            ripple_set, args.batch_size)
            test_auc, test_acc = evaluation(sess, args, model, test_data,
                                            ripple_set, args.batch_size)

            # Save the variables to disk.
            saver.save(sess, session_log_path + "models/epoch_{}".format(step))

            print(
                'epoch %d    train auc: %.4f  acc: %.4f    eval auc: %.4f  acc: %.4f    test auc: %.4f  acc: %.4f'
                % (step, train_auc, train_acc, eval_auc, eval_acc, test_auc,
                   test_acc))
            logger.log(
                'epoch %d    train auc: %.4f  acc: %.4f    eval auc: %.4f  acc: %.4f    test auc: %.4f  acc: %.4f'
                % (step, train_auc, train_acc, eval_auc, eval_acc, test_auc,
                   test_acc))
Пример #4
0
def train(args, data_info,
          show_loss):  # train 方法需要用到data info  是从 main 的 load data 方法里来的
    train_data = data_info[0]
    eval_data = data_info[1]
    test_data = data_info[2]
    n_entity = data_info[3]
    n_relation = data_info[4]
    ripple_set = data_info[5]
    adj_entity = data_info[6]
    adj_relation = data_info[7]

    model = RippleNet(args, n_entity, n_relation, adj_entity,
                      adj_relation)  # 在 train 里 用到model

    if args.use_cuda:
        model.cuda()
    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                        model.parameters()),
                                 args.lr,
                                 weight_decay=args.l2_weight)

    for step in range(args.n_epoch):
        # training
        np.random.shuffle(train_data)
        start = 0
        while start < train_data.shape[0]:
            return_dict = model(*get_feed_dict(args, model, train_data,
                                               ripple_set, start, start +
                                               args.batch_size))
            loss = return_dict["loss"]

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            start += args.batch_size
            if show_loss:
                print('%.1f%% %.4f' %
                      (start / train_data.shape[0] * 100, loss.item()))

        # evaluation
        train_auc, train_acc = evaluation(args, model, train_data, ripple_set,
                                          args.batch_size)
        eval_auc, eval_acc = evaluation(args, model, eval_data, ripple_set,
                                        args.batch_size)
        test_auc, test_acc = evaluation(args, model, test_data, ripple_set,
                                        args.batch_size)

        print(
            'epoch %d    train auc: %.4f  acc: %.4f    eval auc: %.4f  acc: %.4f    test auc: %.4f  acc: %.4f'
            % (step, train_auc, train_acc, eval_auc, eval_acc, test_auc,
               test_acc))
Пример #5
0
def train(args, data, show_loss):
    sample_data = data[5]
    train_set = data[0]
    test_set = data[1]
    num_entity = data[2]
    ripple_set = data[4]
    num_rel = data[3]

    model = RippleNet(args, num_entity, num_rel)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        train_accs = []
        test_accs = []
        for s in range(args.n_epoch):
            np.random.shuffle(train_set)
            st = 0
            while st < train_set.shape[0]:
                _, loss = model.train(
                    sess,
                    get_feed_dict(args, model, train_set, ripple_set, st,
                                  st + args.batch_size))
                st += args.batch_size
                if show_loss:
                    print('%.1f%% %.4f' %
                          (st / train_set.shape[0] * 100, loss))

            # evaluation
            sample = True
            train_auc, train_acc = evaluation(sess, args, model, train_set,
                                              ripple_set, args.batch_size,
                                              not sample)
            test_auc, test_acc = evaluation(sess, args, model, test_set,
                                            ripple_set, args.batch_size,
                                            not sample)
            sample_auc, sample_acc = evaluation(sess, args, model, sample_data,
                                                ripple_set,
                                                sample_data.shape[0], sample)
            train_accs.append(train_acc)
            test_accs.append(test_acc)
            print(
                'epoch %d    train auc: %.4f  acc: %.4f  test auc: %.4f  acc: %.4f'
                % (s, train_auc, train_acc, test_auc, test_acc))
        x_axis = []
        for i in range(len(train_accs)):
            x_axis.append(i)
        axes = plt1.gca()
        axes.set_ylim([0, 1])
        plt1.plot(x_axis, train_accs, 'r')
        plt1.plot(x_axis, test_accs, 'g')
        plt1.savefig('test2png.png', dpi=100)
Пример #6
0
def train(args, data_info, show_loss):
    train_data_just_ui = data_info[0]
    eval_data = data_info[1]
    test_data = data_info[2]
    n_entity = data_info[3]
    n_relation = data_info[4]
    ripple_set = data_info[5]
    uidx_negs = data_info[6]

    # -------------- record u_is and neg_sampling for training ------------- #
    uidxs, iidxs, u_is = get_u_is(train_data_just_ui)
    # generate_neg_sampling(u_is, iidxs, train_data)

    model = RippleNet(args, n_entity, n_relation)
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        #eval_acc = evaluation(sess, args, model, eval_data, ripple_set, args.batch_size)
        #test(sess, args, model, test_data, uidx_negs, ripple_set, args.batch_size)

        for step in range(args.n_epoch):
            start_time = time.time()
            train_data = generate_neg_sampling(u_is, iidxs, train_data_just_ui)
            print("negative sampling done. %f s" % (time.time() - start_time))
            np.random.shuffle(train_data)
            eval_data = train_data[-4096:, :]
            start = 0
            while start < train_data.shape[0]:
                feed_dict = get_feed_dict(args, model, train_data, ripple_set,
                                          start, start + args.batch_size)
                feed_dict[model.global_step] = step
                _, loss = model.train(sess, feed_dict)
                start += args.batch_size
                #if start % 102400 == 0:
                #if show_loss:
                #print(start)
                #print('%.1f%% %.4f' % (start / train_data.shape[0] * 100, loss))

            # evaluation
            #train_acc = evaluation(sess, args, model, train_data, ripple_set, args.batch_size)
            eval_acc = evaluation(sess, args, model, eval_data, ripple_set,
                                  args.batch_size)
            test(sess, args, model, test_data, uidx_negs, ripple_set,
                 args.batch_size)
            print('epoch %d    train acc: %.4f    eval acc: %.4f ' %
                  (step, eval_acc, eval_acc))
Пример #7
0
def train(args, data_info, show_loss):
    train_data = data_info[0]
    eval_data = data_info[1]
    test_data = data_info[2]
    n_entity = data_info[3]
    n_relation = data_info[4]
    ripple_set = data_info[5]

    model = RippleNet(args, n_entity, n_relation)
    if args.use_cuda:
        model.cuda()
    optimizer = torch.optim.Adam(
        filter(lambda p: p.requires_grad, model.parameters()),
        args.lr,
    )

    for step in range(args.n_epoch):
        # training
        np.random.shuffle(train_data)
        start = 0
        while start < train_data.shape[0]:
            return_dict = model(*get_feed_dict(args, model, train_data,
                                               ripple_set, start, start +
                                               args.batch_size))
            loss = return_dict["loss"]

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            start += args.batch_size
            if show_loss:
                print('%.1f%% %.4f' %
                      (start / train_data.shape[0] * 100, loss.item()))

        # evaluation


#         train_auc, train_acc, train_precision, train_recall, train_ndcg = evaluation(args, model, train_data, ripple_set, args.batch_size)
#         eval_auc, eval_acc, eval_precision, eval_recall, eval_ndcg = evaluation(args, model, eval_data, ripple_set, args.batch_size)
        test_precision, test_recall, test_ndcg = evaluation(
            args, model, test_data, ripple_set, args.batch_size)

        print('epoch %d    test precision: %.4f recall: %.4f ndcg: %.4f' %
              (step, test_precision, test_recall, test_ndcg))
Пример #8
0
def train(args, data_info, show_loss):
    train_data = data_info[0]
    eval_data = data_info[1]
    test_data = data_info[2]
    n_entity = data_info[3]
    n_relation = data_info[4]
    ripple_set = data_info[5]

    model = RippleNet(args, n_entity, n_relation)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for step in range(args.n_epoch):
            # training
            np.random.shuffle(train_data)
            start = 0
            while start < train_data.shape[0]:
                _, loss = model.train(
                    sess,
                    get_feed_dict(args, model, train_data, ripple_set, start,
                                  start + args.batch_size))
                start += args.batch_size
                if show_loss:
                    print('%.1f%% %.4f' %
                          (start / train_data.shape[0] * 100, loss),
                          end='\r')

            # evaluation
            train_auc, train_acc = evaluation(sess, args, model, train_data,
                                              ripple_set, args.batch_size)
            # eval_auc, eval_acc = evaluation(sess, args, model, eval_data, ripple_set, args.batch_size)
            # test_auc, test_acc = evaluation(sess, args, model, test_data, ripple_set, args.batch_size)

            # print('epoch %d    train auc: %.4f  acc: %.4f    eval auc: %.4f  acc: %.4f    test auc: %.4f  acc: %.4f'
            #       % (step, train_auc, train_acc, eval_auc, eval_acc, test_auc, test_acc))
            print('epoch %d    train auc: %.4f  acc: %.4f' %
                  (step, train_auc, train_acc))

        # test
        user_list, item_list, score_list = test(sess, args, model, test_data,
                                                ripple_set, args.batch_size)
        persistence(user_list, item_list, score_list, args.dataset)

    return model
Пример #9
0
def train(args, data_info, logger):
    train_data, eval_data, test_data = data_info[0], data_info[1], data_info[2]
    n_item, n_user = data_info[3], data_info[4]
    n_entity, n_relation = data_info[5], data_info[6]
    ripple_set = data_info[7]
    if args.show_save_dataset_info:
        print(
            f'train({len(train_data)}), eval({len(eval_data)}), test({len(test_data)})'
        )

    if args.topk_eval:
        _, eval_record, test_record, topk_data = topk_settings(
            args, train_data, eval_data, test_data, n_item)

    # create dataset
    train_dataset = get_dataset(train_data,
                                ripple_set,
                                n_hop=args.n_hop,
                                batch_size=args.batch_size)
    eval_dataset = get_dataset(eval_data,
                               ripple_set,
                               n_hop=args.n_hop,
                               batch_size=args.batch_size)
    test_dataset = get_dataset(test_data,
                               ripple_set,
                               n_hop=args.n_hop,
                               batch_size=args.batch_size)
    if args.topk_eval:
        topk_dataset = get_dataset(topk_data,
                                   ripple_set,
                                   n_hop=args.n_hop,
                                   batch_size=args.batch_size)

    # init early stop controller
    early_stop = Early_stop_info(args)

    config = tf.ConfigProto()
    # config.gpu_options.allow_growth=True
    config.gpu_options.per_process_gpu_memory_fraction = args.gpu_fract

    with tf.Session(config=config) as sess:
        model = RippleNet(args, n_entity, n_relation, train_dataset)

        init = tf.global_variables_initializer()
        sess.run(init)

        # load emb from previous stage
        if args.load_emb == True:
            print('load pretrained emb ...')
            model.initialize_pretrained_embeddings(sess)

        for epoch in range(args.n_epoch):
            scores = {t: {} for t in ['train', 'eval', 'test']}

            train_dataset.shuffle(buffer_size=1024)
            model.iter_init(sess, train_dataset)

            # start to train
            t_start = time()
            try:
                while True:
                    model.train(sess)
            except tf.errors.OutOfRangeError:
                pass
            t_flag = time()

            # evaluation
            scores['train'] = evaluation(sess, model, train_dataset)
            scores['eval'] = evaluation(sess, model, eval_dataset)
            scores['test'] = evaluation(sess, model, test_dataset)

            early_stop_score = 0.
            if args.topk_eval:
                # topk evaluation
                topk_scores = topk_evaluation(sess, model, topk_dataset,
                                              eval_record, test_record,
                                              args.k_list)
                for t in ['eval', 'test']:
                    for m in ['p', 'r', 'ndcg']:
                        scores[t][m] = topk_scores[t][m]
                early_stop_score = scores['eval']['r'][-1]
            # else:
            early_stop_score = scores['eval']['auc']

            logger.update_score(epoch, scores)

            print('training time: %.1fs' % (t_flag - t_start), end='')
            print(', total: %.1fs.' % (time() - t_start))

            if early_stop_score >= early_stop.best_score:
                print('save embs ...', end='\r')
                model.save_pretrained_emb(sess)

            if early_stop.update_score(epoch, early_stop_score) == True: break

    tf.reset_default_graph()
Пример #10
0
def train(args, data_info, logger):
    train_data, eval_data, test_data = data_info[0], data_info[1], data_info[2]
    n_item, n_user = data_info[3], data_info[4]
    n_entity, n_relation = data_info[5], data_info[6]
    ripple_set, item_set_most_pop = data_info[7], data_info[-1]
    if args.show_save_dataset_info:
        print(f'train({len(train_data)}), eval({len(eval_data)}), test({len(test_data)})')

    # train_dataset = get_dataset(train_data, ripple_set, n_hop=args.n_hop, batch_size=args.batch_size)
    # eval_dataset = get_dataset(eval_data, ripple_set, n_hop=args.n_hop, batch_size=args.batch_size)
    # test_dataset = get_dataset(test_data, ripple_set, n_hop=args.n_hop, batch_size=args.batch_size)
    # if args.topk_eval:
    #     topk_dataset = get_dataset(topk_data, ripple_set, n_hop=args.n_hop, batch_size=args.batch_size)

    if args.topk_eval:
        user_list, train_record, eval_record, test_record, item_set, k_list = topk_settings(args, train_data, eval_data, test_data, n_item)

    # init early stop controller
    early_stop = Early_stop_info(args)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth=True
    config.gpu_options.per_process_gpu_memory_fraction = args.gpu_fract

    with tf.Session(config=config) as sess:
        model = RippleNet(args, n_entity, n_relation)

        init = tf.global_variables_initializer()
        sess.run(init)

        # if args.load_emb == True:
        #     print('load pretrained emb ...')
        #     model.initialize_pretrained_embeddings(sess)

        for epoch in range(80):
            scores = {t: {} for t in ['train', 'eval', 'test']}
            
            np.random.shuffle(train_data)
            start = 0

            t_start = time()
            while start < train_data.shape[0]:
                _, loss = model.train(
                    sess, get_feed_dict(args, model, train_data, ripple_set, start, start + args.batch_size))
                start += args.batch_size
            t_flag = time()

            # evaluation
            scores['train'] = evaluation(sess, args, model, train_data, ripple_set, args.batch_size)
            scores['eval'] = evaluation(sess, args, model, eval_data, ripple_set, args.batch_size)
            scores['test'] = evaluation(sess, args, model, test_data, ripple_set, args.batch_size)
            
            early_stop_score = 0.
            if args.topk_eval:
                # topk evaluation
                # topk_scores = topk_evaluation(sess, model, topk_data, ripple_set, args.k_list)
                precision, recall, ndcg, MAP, hit_ratio = topk_eval(
                    sess, args, ripple_set, model, user_list, train_record, eval_record, test_record, item_set_most_pop, k_list, args.batch_size, mode = 'eval')
                n_precision_eval = [round(i, 6) for i in precision]
                n_recall_eval = [round(i, 6) for i in recall]
                n_ndcg_eval = [round(i, 6) for i in ndcg]

                for t in ['eval']:
                    scores[t]['p'] = n_precision_eval
                    scores[t]['r'] = n_recall_eval
                    scores[t]['ndcg'] = n_ndcg_eval

                precision, recall, ndcg, MAP, hit_ratio = topk_eval(
                    sess, args, ripple_set, model, user_list, train_record, eval_record, test_record, item_set_most_pop, k_list, args.batch_size, mode = 'test')
                n_precision_test = [round(i, 6) for i in precision]
                n_recall_test = [round(i, 6) for i in recall]
                n_ndcg_test = [round(i, 6) for i in ndcg]

                for t in ['test']:
                    scores[t]['p'] = n_precision_test
                    scores[t]['r'] = n_recall_test
                    scores[t]['ndcg'] = n_ndcg_test
                    # for m in ['p', 'r', 'ndcg']:
                    #     scores[t][m] = topk_scores[t][m]
                # print('scores = ', scores)
                early_stop_score = scores['eval']['r'][2]
            # else:
            early_stop_score = scores['eval']['auc']

            logger.update_score(epoch, scores)
            
            print('training time: %.1fs' % (t_flag - t_start), end='') 
            print(', total: %.1fs.' % (time() - t_start))

            # if early_stop_score >= early_stop.best_score:
            #     print('save embs ...', end='\r')
            #     model.save_pretrained_emb(sess)

            if early_stop.update_score(epoch, early_stop_score) == True: break
        
    tf.reset_default_graph()