Пример #1
0
def job(model_type, f, train_set, test_set, output_fold, batch_size, pc, ps):
    cm = SDBN(pc, ps)

    for r in range(1, 2):
        # np.random.seed(r)
        ranker = PDGDLinearRanker(FEATURE_SIZE, Learning_rate)
        print("PDGD fold{} {} run{} start!".format(f, model_type, r))
        ndcg_scores, cndcg_scores, cmrr_scores, final_weights = run(
            train_set, test_set, ranker, NUM_INTERACTION, cm, batch_size)
        os.makedirs(os.path.dirname("{}/fold{}/".format(output_fold, f)),
                    exist_ok=True)  # create directory if not exist
        with open(
                "{}/fold{}/{}_run{}_ndcg.txt".format(output_fold, f,
                                                     model_type, r),
                "wb") as fp:
            pickle.dump(ndcg_scores, fp)
        with open(
                "{}/fold{}/{}_run{}_cndcg.txt".format(output_fold, f,
                                                      model_type, r),
                "wb") as fp:
            pickle.dump(cndcg_scores, fp)
        with open(
                "{}/fold{}/{}_run{}_cmrr.txt".format(output_fold, f,
                                                     model_type, r),
                "wb") as fp:
            pickle.dump(cmrr_scores, fp)
        with open(
                "{}/fold{}/{}_run{}_weights.txt".format(
                    output_fold, f, model_type, r), "wb") as fp:
            pickle.dump(final_weights, fp)
        print("PDGD fold{} {} run{} finished!".format(f, model_type, r))
Пример #2
0
def job(model_type, f, train_set, test_set, tau, sigma, gamma, num_rankers, learning_rate_decay, output_fold):
    if model_type == "perfect":
        pc = [0.0, 0.2, 0.4, 0.8, 1.0]
        ps = [0.0, 0.0, 0.0, 0.0, 0.0]
    elif model_type == "navigational":
        pc = [0.05, 0.3, 0.5, 0.7, 0.95]
        ps = [0.2, 0.3, 0.5, 0.7, 0.9]
    elif model_type == "informational":
        pc = [0.4, 0.6, 0.7, 0.8, 0.9]
        ps = [0.1, 0.2, 0.3, 0.4, 0.5]

    cm = SDBN(pc, ps)

    for r in range(1, 26):
        # np.random.seed(r)
        ranker = ESLinearRanker(FEATURE_SIZE, Learning_rate, sigma, tau, gamma, learning_rate_decay=learning_rate_decay)
        print("ES fold{} {} run{} start!".format(f, model_type, r))
        ndcg_scores, cndcg_scores, final_weight = run(train_set, test_set, ranker, NUM_INTERACTION, cm, num_rankers)
        with open(
                "{}/fold{}/{}_sigma{}_run{}_ndcg.txt".format(output_fold, f, model_type, sigma, r),
                "wb") as fp:
            pickle.dump(ndcg_scores, fp)
        with open(
                "{}/fold{}/{}_sigma{}_run{}_cndcg.txt".format(output_fold, f, model_type, sigma, r),
                "wb") as fp:
            pickle.dump(cndcg_scores, fp)
        with open(
                "{}/fold{}/{}_sigma{}_run{}_final_weight.txt".format(output_fold, f, model_type, sigma, r),
                "wb") as fp:
            pickle.dump(final_weight, fp)
        print("ES sigma{} fold{} {} run{} finished!".format(output_fold, sigma, f, model_type, r))
Пример #3
0
def job(model_type, f, train_set, test_set, tau, r):
    if model_type == "perfect":
        pc = [0.0, 0.5, 1.0]
        ps = [0.0, 0.0, 0.0]
    elif model_type == "navigational":
        pc = [0.05, 0.5, 0.95]
        ps = [0.2, 0.5, 0.9]
    elif model_type == "informational":
        pc = [0.4, 0.7, 0.9]
        ps = [0.1, 0.3, 0.5]

    cm = SDBN(train_set, pc, ps)
    # np.random.seed(r)
    ranker1 = PDGDLinearRanker(FEATURE_SIZE, Learning_rate, tau)
    ranker2 = PDGDLinearRanker(FEATURE_SIZE, Learning_rate, tau)
    print("PDGD tau{} fold{} {} run{} start!".format(tau, f, model_type, r))
    final_weight1, final_weight2 = run(train_set, test_set, ranker1, ranker2,
                                       NUM_INTERACTION, cm)
    with open(
            "./results/multiple_ranker/mq2007/PDGD/fold{}/{}_tau{}_run{}_ranker1_weights.txt"
            .format(f, model_type, tau, r), "wb") as fp:
        pickle.dump(final_weight1, fp)
    with open(
            "./results/multiple_ranker/mq2007/PDGD/fold{}/{}_tau{}_run{}_ranker2_weights.txt"
            .format(f, model_type, tau, r), "wb") as fp:
        pickle.dump(final_weight2, fp)
    print("PDGD tau{} fold{} {} run{} finished!".format(tau, f, model_type, r))
Пример #4
0
def job(model_type, f, train_set, test_set, tau):
    if model_type == "perfect":
        pc = [0.0, 0.5, 1.0]
        ps = [0.0, 0.0, 0.0]
    elif model_type == "navigational":
        pc = [0.05, 0.5, 0.95]
        ps = [0.2, 0.5, 0.9]
    elif model_type == "informational":
        pc = [0.4, 0.7, 0.9]
        ps = [0.1, 0.3, 0.5]

    cm = SDBN(pc, ps)

    for r in range(1, 26):
        # np.random.seed(r)
        ranker = PDGDLinearRanker(FEATURE_SIZE, Learning_rate, tau)
        print("PDGD tau{} fold{} {} run{} start!".format(
            tau, f, model_type, r))
        ndcg_scores, cndcg_scores, final_weight = run(train_set, test_set,
                                                      ranker, NUM_INTERACTION,
                                                      cm)
        with open(
                "../results/exploration/mq2007/PDGD/fold{}/{}_tau{}_run{}_ndcg.txt"
                .format(f, model_type, tau, r), "wb") as fp:
            pickle.dump(ndcg_scores, fp)
        with open(
                "../results/exploration/mq2007/PDGD/fold{}/{}_tau{}_run{}_cndcg.txt"
                .format(f, model_type, tau, r), "wb") as fp:
            pickle.dump(cndcg_scores, fp)
        with open(
                "../results/exploration/mq2007/PDGD/fold{}/{}_tau{}_run{}_final_weight.txt"
                .format(f, model_type, tau, r), "wb") as fp:
            pickle.dump(final_weight, fp)
        print("PDGD tau{} fold{} {} run{} finished!".format(
            tau, f, model_type, r))
Пример #5
0
def job(model_type, Learning_rate, NUM_INTERACTION, f, train_set, intent_paths,
        output_fold, num_groups, group_sequence):
    if model_type == "perfect":
        pc = [0.0, 1.0]
        ps = [0.0, 0.0]

    elif model_type == "navigational":
        pc = [0.05, 0.95]
        ps = [0.2, 0.9]

    elif model_type == "informational":
        pc = [0.3, 0.7]
        ps = [0.1, 0.5]

    elif model_type == "noisy":
        pc = [0.4, 0.6]
        ps = [0.0, 0.0]
    # cm = PBM(pc, 1)
    cm = SDBN(pc, ps)

    for r in range(1, 26):
        random.seed(r)
        np.random.seed(r)
        datasets = get_groups_dataset(train_set,
                                      intent_paths,
                                      num_groups=num_groups)
        ranker = PDGDLinearRanker(FEATURE_SIZE, Learning_rate)

        print("PDGD intent change {} fold{} run{} start!".format(
            model_type, f, r))
        ndcg_scores, cndcg_scores = run(datasets, ranker, NUM_INTERACTION, cm,
                                        group_sequence)

        # create directory if not exist
        os.makedirs(os.path.dirname("{}/current_intent/fold{}/".format(
            output_fold, f)),
                    exist_ok=True)
        with open(
                "{}/current_intent/fold{}/{}_run{}_cndcg.txt".format(
                    output_fold, f, model_type, r), "wb") as fp:
            pickle.dump(cndcg_scores, fp)

        with open(
                "{}/current_intent/fold{}/{}_run{}_ndcg.txt".format(
                    output_fold, f, model_type, r), "wb") as fp:
            pickle.dump(ndcg_scores[0], fp)

        for i in range(len(ndcg_scores) - 1):  # the intent ndcg start from 1.
            os.makedirs(os.path.dirname("{}/intent{}/fold{}/".format(
                output_fold, i + 1, f)),
                        exist_ok=True)  # create directory if not exist\

            with open(
                    "{}/intent{}/fold{}/{}_run{}_ndcg.txt".format(
                        output_fold, i + 1, f, model_type, r), "wb") as fp:
                pickle.dump(ndcg_scores[i + 1], fp)

        print("PDGD intent change {} run{} finish!".format(model_type, r))
        print()
Пример #6
0
def job(model_type, f, train_set, test_set, tau, step_size, gamma, num_rankers,
        learning_rate_decay, output_fold):
    if model_type == "perfect":
        pc = [0.0, 0.5, 1.0]
        ps = [0.0, 0.0, 0.0]
    elif model_type == "navigational":
        pc = [0.05, 0.5, 0.95]
        ps = [0.2, 0.5, 0.9]
    elif model_type == "informational":
        pc = [0.4, 0.7, 0.9]
        ps = [0.1, 0.3, 0.5]

    # if model_type == "perfect":
    #     pc = [0.0, 0.2, 0.4, 0.8, 1.0]
    #     ps = [0.0, 0.0, 0.0, 0.0, 0.0]
    # elif model_type == "navigational":
    #     pc = [0.05, 0.3, 0.5, 0.7, 0.95]
    #     ps = [0.2, 0.3, 0.5, 0.7, 0.9]
    # elif model_type == "informational":
    #     pc = [0.4, 0.6, 0.7, 0.8, 0.9]
    #     ps = [0.1, 0.2, 0.3, 0.4, 0.5]

    cm = SDBN(pc, ps)

    for r in range(1, 26):
        # np.random.seed(r)
        ranker = COLTRLinearRanker(FEATURE_SIZE,
                                   Learning_rate,
                                   step_size,
                                   tau,
                                   gamma,
                                   learning_rate_decay=learning_rate_decay)

        print("COTLR {} tau{} fold{} {} run{} start!".format(
            output_fold, tau, f, model_type, r))
        ndcg_scores, cndcg_scores, final_weight = run(train_set, test_set,
                                                      ranker, NUM_INTERACTION,
                                                      cm, num_rankers)
        with open(
                "{}/fold{}/{}_tau{}_run{}_ndcg.txt".format(
                    output_fold, f, model_type, tau, r), "wb") as fp:
            pickle.dump(ndcg_scores, fp)
        with open(
                "{}/fold{}/{}_tau{}_run{}_cndcg.txt".format(
                    output_fold, f, model_type, tau, r), "wb") as fp:
            pickle.dump(cndcg_scores, fp)
        with open(
                "{}/fold{}/{}_tau{}_run{}_final_weight.txt".format(
                    output_fold, f, model_type, tau, r), "wb") as fp:
            pickle.dump(final_weight, fp)
        print("COTLR {} tau{} fold{} {} run{} finished!".format(
            output_fold, tau, f, model_type, r))

        utility.send_progress("@arvin {}".format(model_type), r, 25,
                              "final ndcg {}".format(ndcg_scores[-1]))
Пример #7
0
def job(model_type, Learning_rate, NUM_INTERACTION, f, train_set, intent_paths,
        output_fold, num_groups):
    if model_type == "perfect":
        pc = [0.0, 1.0]
        ps = [0.0, 0.0]

    elif model_type == "navigational":
        pc = [0.05, 0.95]
        ps = [0.2, 0.9]

    elif model_type == "informational":
        pc = [0.3, 0.7]
        ps = [0.1, 0.5]

    elif model_type == "noisy":
        pc = [0.4, 0.6]
        ps = [0.0, 0.0]
    # cm = PBM(pc, 1)
    cm = SDBN(pc, ps)

    for r in range(1, 26):
        random.seed(r)
        np.random.seed(r)
        datasets = get_groups_dataset(train_set,
                                      intent_paths,
                                      num_groups=num_groups)

        # for i in range(len(datasets)):
        for i in [1, 0]:
            ranker = PDGDLinearRanker(FEATURE_SIZE, Learning_rate)
            print("PDGD intent fixed {} intent {} run{} start!".format(
                model_type, i, r))
            ndcg_scores, cndcg_scores = run(datasets[i], ranker,
                                            NUM_INTERACTION, cm)

            os.makedirs(os.path.dirname("{}/group{}/fold{}/".format(
                output_fold, i + 1, f)),
                        exist_ok=True)
            with open(
                    "{}/group{}/fold{}/{}_run{}_ndcg.txt".format(
                        output_fold, i + 1, f, model_type, r), "wb") as fp:
                pickle.dump(ndcg_scores, fp)
            with open(
                    "{}/group{}/fold{}/{}_run{}_cndcg.txt".format(
                        output_fold, i + 1, f, model_type, r), "wb") as fp:
                pickle.dump(cndcg_scores, fp)

            print("PDGD intent fixed {} intent {} run{} finished!".format(
                model_type, i, r))
            print()
Пример #8
0
def job(model_type, f, train_set, intent_paths, tau, step_size, gamma,
        num_rankers, learning_rate_decay, output_fold):
    if model_type == "perfect":
        pc = [0.0, 1.0]
        ps = [0.0, 0.0]

    elif model_type == "navigational":
        pc = [0.05, 0.95]
        ps = [0.2, 0.9]

    elif model_type == "informational":
        pc = [0.3, 0.7]
        ps = [0.1, 0.5]
    # cm = PBM(pc, 1)
    cm = SDBN(pc, ps)

    for r in range(1, 26):
        random.seed(r)
        np.random.seed(r)
        datasets = get_groups_dataset(train_set, intent_paths)

        for i in range(len(datasets)):
            ranker = COLTRLinearRanker(FEATURE_SIZE,
                                       Learning_rate,
                                       step_size,
                                       tau,
                                       gamma,
                                       learning_rate_decay=learning_rate_decay)

            print("COLTR fixed intent {} fold{} run{} start!".format(
                model_type, f, r))
            ndcg_scores, cndcg_scores = run(datasets[i], ranker,
                                            NUM_INTERACTION, cm, num_rankers)

            # create directory if not exist
            os.makedirs(os.path.dirname("{}/group{}/fold{}/".format(
                output_fold, i + 1, f)),
                        exist_ok=True)
            with open(
                    "{}/group{}/fold{}/{}_run{}_ndcg.txt".format(
                        output_fold, i + 1, f, model_type, r), "wb") as fp:
                pickle.dump(ndcg_scores, fp)
            with open(
                    "{}/group{}/fold{}/{}_run{}_cndcg.txt".format(
                        output_fold, i + 1, f, model_type, r), "wb") as fp:
                pickle.dump(cndcg_scores, fp)

            print("COLTR fixed intent {} run{} finish!".format(model_type, r))
            print()
Пример #9
0
def job(model_type, f, train_set, intent_paths, delta, alpha, FEATURE_SIZE,
        num_rankers, output_fold):
    if model_type == "perfect":
        pc = [0.0, 1.0]
        ps = [0.0, 0.0]

    elif model_type == "navigational":
        pc = [0.05, 0.95]
        ps = [0.2, 0.9]

    elif model_type == "informational":
        pc = [0.3, 0.7]
        ps = [0.1, 0.5]
    # cm = PBM(pc, 1)
    cm = SDBN(pc, ps)

    for r in range(1, 26):
        random.seed(r)
        np.random.seed(r)
        datasets = get_groups_dataset(train_set, intent_paths)
        # create directory if not exist

        for i in range(len(datasets)):
            ranker = ProbabilisticRanker(delta, alpha, FEATURE_SIZE)

            print("PDGD intent fixed {} intent {} run{} start!".format(
                model_type, i, r))
            ndcg_scores, cndcg_scores = run(datasets[i], ranker,
                                            NUM_INTERACTION, cm, num_rankers)

            os.makedirs(os.path.dirname("{}/group{}/fold{}/".format(
                output_fold, i + 1, f)),
                        exist_ok=True)
            with open(
                    "{}/group{}/fold{}/{}_run{}_ndcg.txt".format(
                        output_fold, i + 1, f, model_type, r), "wb") as fp:
                pickle.dump(ndcg_scores, fp)
            with open(
                    "{}/group{}/fold{}/{}_run{}_cndcg.txt".format(
                        output_fold, i + 1, f, model_type, r), "wb") as fp:
                pickle.dump(cndcg_scores, fp)

            print("PDGD intent fixed {} intent {} run{} finished!".format(
                model_type, i, r))
            print()
Пример #10
0
def job(model_type, f, train_set, test_set, output_fold):
    if model_type == "perfect":
        # pc = [0.0, 0.2, 0.4, 0.8, 1.0]
        # pc = [0.0, 0.5, 1.0]
        pc = [0.0, 1.0]
        # ps = [0.0, 0.0, 0.0, 0.0, 0.0]
        # ps = [0.0, 0.0, 0.0]
        ps = [0.0, 0.0]
    elif model_type == "navigational":
        pc = [0.05, 0.3, 0.5, 0.7, 0.95]
        # pc = [0.05, 0.5, 0.95]
        ps = [0.2, 0.3, 0.5, 0.7, 0.9]
        # ps = [0.2, 0.5, 0.9]
    elif model_type == "informational":
        # pc = [0.4, 0.6, 0.7, 0.8, 0.9]
        # pc = [0.4, 0.7, 0.9]
        pc = [0.1, 0.9]
        # ps = [0.1, 0.2, 0.3, 0.4, 0.5]
        # ps = [0.1, 0.3, 0.5]
        ps = [0.1, 0.5]

    cm = SDBN(pc, ps)

    for r in range(1, 16):
        # np.random.seed(r)
        FEATURE_SIZE = 136
        ranker = PDGDLinearRanker(FEATURE_SIZE, Learning_rate)
        print("PDGD fold{} {} run{} start!".format(f, model_type, r))
        ndcg_scores, cndcg_scores, final_weights = run(train_set, test_set, ranker, NUM_INTERACTION, cm)
        os.makedirs(os.path.dirname("{}/fold{}/".format(output_fold, f)),
                    exist_ok=True)  # create directory if not exist
        with open(
                "{}/fold{}/{}_run{}_ndcg.txt".format(output_fold, f, model_type, r),
                "wb") as fp:
            pickle.dump(ndcg_scores, fp)
        with open(
                "{}/fold{}/{}_run{}_cndcg.txt".format(output_fold, f, model_type, r),
                "wb") as fp:
            pickle.dump(cndcg_scores, fp)
        with open(
                "{}/fold{}/{}_run{}_weights.txt".format(output_fold, f, model_type, r),
                "wb") as fp:
            pickle.dump(final_weights, fp)
        print("PDGD fold{} {} run{} finished!".format(f, model_type, r))
Пример #11
0
def job(model_type, f, train_set, test_set, tau, step_size, gamma, num_rankers,
        learning_rate_decay):
    if model_type == "perfect":
        pc = [0.0, 0.5, 1.0]
        ps = [0.0, 0.0, 0.0]
    elif model_type == "navigational":
        pc = [0.05, 0.5, 0.95]
        ps = [0.2, 0.5, 0.9]
    elif model_type == "informational":
        pc = [0.4, 0.7, 0.9]
        ps = [0.1, 0.3, 0.5]

    cm = SDBN(pc, ps)

    for r in range(1, 26):
        # np.random.seed(r)
        ranker = CMAESLinearRanker(FEATURE_SIZE,
                                   Learning_rate,
                                   step_size,
                                   tau,
                                   gamma,
                                   learning_rate_decay=learning_rate_decay)
        print("COTLR start!")
        ndcg_scores, cndcg_scores, final_weight = run(train_set, test_set,
                                                      ranker, NUM_INTERACTION,
                                                      cm, num_rankers)
        with open(
                "../results/COLTR/mq2007/fold{}/{}_tau{}_run{}_ndcg.txt".
                format(f, model_type, tau, r), "wb") as fp:
            pickle.dump(ndcg_scores, fp)
        with open(
                "../results/COLTR/mq2007/fold{}/{}_tau{}_run{}_cndcg.txt".
                format(f, model_type, tau, r), "wb") as fp:
            pickle.dump(cndcg_scores, fp)
        with open(
                "../results/COLTR/mq2007/fold{}/{}_tau{}_run{}_final_weight.txt"
                .format(f, model_type, tau, r), "wb") as fp:
            pickle.dump(final_weight, fp)
        print("COTLR tau{} fold{} {} run{} finished!".format(
            tau, f, model_type, r))
Пример #12
0
from clickModel.RCTR import RCTR
from clickModel.Mixed import Mixed
from utils import read_file as rf
from utils import utility
from dataset import LetorDataset
# import matplotlib.pyplot as plt
import numpy as np
import multiprocessing as mp

train_path = "../datasets/ltrc_yahoo/set1.train.txt"
print("loading training set.......")
train_set = LetorDataset(train_path, 700)

pc = [0.05, 0.3, 0.5, 0.7, 0.95]
ps = [0.2, 0.3, 0.5, 0.7, 0.9]
mixed_models = [DCTR(pc), SDBN(pc, ps), UBM(pc)]
datasets_simulator = [
    ('SDBN', SDBN(pc, ps)),
    # ('SDCM', SDCM(pc)),
    # ('CM', CM(pc)),
    ('DCTR', DCTR(pc)),
    ('UBM', UBM(pc)),
    ('SDBN_reverse', SDBN_reverse(pc, ps))
]
click_model = RCTR()

for dataset, simulator in datasets_simulator:
    for id in range(1, 16):
        click_log_path = "../click_logs/{}/train_set{}.txt".format(dataset, id)
        click_log = rf.read_click_log(click_log_path)
        click_model.train(click_log)
Пример #13
0
    # plt.show()


# %%
if __name__ == "__main__":
    # %%
    train_path = "../datasets/ltrc_yahoo/set1.LetorDataset.txt"
    print("loading training set.......")
    with open(train_path, "rb") as fp:
        train_set = pickle.load(fp)
    # %%
    pc = [0.05, 0.3, 0.5, 0.7, 0.95]
    ps = [0.2, 0.3, 0.5, 0.7, 0.9]

    datasets_simulator = [
        ('SDBN', SDBN(pc, ps)),
        # ('SDCM', SDCM(pc)),
        # ('CM', CM(pc)),
        ('DCTR', DCTR(pc)),
        ('UBM', UBM(pc)),
        # ('SDBN_reverse', SDBN_reverse(pc, ps))
    ]

    progress = 0
    for dataset, simulator in datasets_simulator:
        for id in range(2, 16):
            click_log_path = "../click_logs/{}/train_set{}.txt".format(
                dataset, id)
            test_click_log_path = "../click_logs/{}/seen_set{}.txt".format(
                dataset, id)
            query_frequency_path = "../click_logs/{}/query_frequency{}.txt".format(
Пример #14
0
    print("loading testing set.......")
    test_set = LetorDataset(test_path, 700)
    # %%
    # pc = [0.4, 0.6, 0.7, 0.8, 0.9]
    # ps = [0.1, 0.2, 0.3, 0.4, 0.5]
    pc = [0.05, 0.3, 0.5, 0.7, 0.95]
    ps = [0.2, 0.3, 0.5, 0.7, 0.9]
    for id in range(1, 16):
        p1 = mp.Process(target=generate_dataset,
                        args=(train_set, test_set, DCTR(pc),
                              "../feature_click_datasets/DCTR/", id))
        p2 = mp.Process(target=generate_dataset,
                        args=(train_set, test_set, CM(pc),
                              "../feature_click_datasets/CM/", id))
        p3 = mp.Process(target=generate_dataset,
                        args=(train_set, test_set, SDBN(pc, ps),
                              "../feature_click_datasets/SDBN/", id))
        p4 = mp.Process(target=generate_dataset,
                        args=(train_set, test_set, SDCM(pc),
                              "../feature_click_datasets/SDCM/", id))

        p1.start()
        p2.start()
        p3.start()
        p4.start()
        p1.join()
        p2.join()
        p3.join()
        p4.join()
        print(" ")
Пример #15
0
import pickle
import bz2
from dataset import LetorDataset
from clickModel.SDBN import SDBN
import sys
import tensorflow as tf

train_path = "../datasets/ltrc_yahoo/set1.train.txt"
print("loading training set.......")
dataset = LetorDataset(train_path, 700)

model = NCM(64, 1024, 10240)

pc = [0.05, 0.3, 0.5, 0.7, 0.95]
ps = [0.2, 0.3, 0.5, 0.7, 0.9]
simulator = SDBN(pc, ps)

click_log_path = "../feature_click_datasets/{}/train_set_test.txt".format(
    "SDBN", "1")
# click_log_path = "../click_logs/{}/train_set{}_small.txt".format("SDBN", "1")

click_log = rf.read_click_log(click_log_path)

model.initial_representation(click_log)
# model.save_training_set_numpy(click_log, "test", "SDBN")

# model.save_training_set(click_log, "../click_logs/{}/train_set{}_small_NCM.tfrecord".format("SDBN", "1"), "SDBN")
model.save_training_tfrecord(click_log, "NCM_test.tfrecord", "SDBN")
# model.save_training_set_numpy(click_log, "../click_logs/{}/train_set{}_NCM".format("SDBN", "1"), "SDBN")
# model.save_training_set_numpy(click_log, "test", "SDBN")
Пример #16
0
    # plt.show()


# %%
if __name__ == "__main__":
    # %%
    train_path = "../datasets/ltrc_yahoo/set1.train.txt"
    test_path = "../datasets/ltrc_yahoo/set1.test.txt"
    print("loading training set.......")
    train_set = LetorDataset(train_path, 700)
    # %%
    # print("loading testing set.......")
    # test_set = LetorDataset(test_path, 700)
    pc = [0.05, 0.3, 0.5, 0.7, 0.95]
    ps = [0.2, 0.3, 0.5, 0.7, 0.9]
    datasets_simulator = [('SDBN', SDBN(pc, ps)), ('SDCM', SDCM(pc)),
                          ('CM', CM(pc)), ('DCTR', DCTR(pc))]
    # datasets = ['CM']
    for dataset, simulator in datasets_simulator:
        for id in range(1, 2):
            click_log_path = "../feature_click_datasets/{}/train_set{}.txt".format(
                dataset, id)
            test_click_log_path = "../feature_click_datasets/{}/seen_set{}.txt".format(
                dataset, id)
            query_frequency_path = "../feature_click_datasets/{}/query_frequency{}.txt".format(
                dataset, id)
            click_log = rf.read_click_log(click_log_path)
            test_click_log = rf.read_click_log(test_click_log_path)
            query_frequency = rf.read_query_frequency(query_frequency_path)

            click_models = [SDBN(), SDCM(), CM(), DCTR()]
Пример #17
0
    for freq in frequencies:
        perplexities = click_model.get_perplexity(np.array(test_logs[freq]))
        MSEs = click_model.get_MSE(np.array(test_logs[freq]), dataset, simulator)

        perplexity_line = "Frequency " + freq + " perplexities:"
        MSEs_line = "Frequency " + freq + " MSE:"
        for perp in perplexities:
            perplexity_line += " " + str(perp)
        for MSE in MSEs:
            MSEs_line += " " + str(MSE)
        f.write(perplexity_line + "\n")
        f.write(MSEs_line + "\n")

    f.close()

if __name__ == "__main__":
    pc = [0.05, 0.3, 0.5, 0.7, 0.95]
    ps = [0.2, 0.3, 0.5, 0.7, 0.9]
    Mixed_models = [DCTR(pc), SDBN(pc, ps), UBM(pc)]
    # simulators = [SDBN(pc, ps), Mixed(Mixed_models), DCTR(pc), UBM(pc)]
    simulators = [SDBN(pc, ps), DCTR(pc), UBM(pc)]

    dataset_path = "../datasets/ltrc_yahoo/set1.train.txt"
    print("loading training set.......")
    dataset = LetorDataset(dataset_path, 700)

    for r in range(1, 2):
        for simulator in simulators:
            run(simulator, dataset, r)
Пример #18
0
generator = "Mixed"

click_log_path = "../feature_click_datasets/{}/train_set1.txt".format(generator)
test_click_log_path =  "../feature_click_datasets/{}/seen_set1.txt".format(generator)
click_log = rf.read_click_log(click_log_path)
test_click_log = rf.read_click_log(test_click_log_path)


# #
dataset = tf.data.TFRecordDataset(filenames='../feature_click_datasets/{}/train_set1.tfrecord'.format(generator))
# # # test_dataset = tf.data.TFRecordDataset(filenames='../feature_click_datasets/SDBN/seen_set1.tfrecord')
# # #%%
pc = [0.05, 0.3, 0.5, 0.7, 0.95]
ps = [0.2, 0.3, 0.5, 0.7, 0.9]
Mixed_models = [DCTR(pc), CM(pc), SDBN(pc, ps), SDCM(pc), UBM(pc)]
simulator = Mixed(Mixed_models)
print(click_log.shape)
print(test_click_log.shape)
#
click_model = LSTMv2(700, 1024, train_set, batch_size=128, epoch=5)
print(click_model.get_MSE(test_click_log[np.random.choice(test_click_log.shape[0], 1000)], train_set, simulator))
click_model.train(dataset)

print(click_model.get_MSE(test_click_log[np.random.choice(test_click_log.shape[0], 1000)], train_set, simulator))

click_model.model.save("../click_model_results/LSTM_models/{}_train_set1.h5".format(generator))



# test model
Пример #19
0
def run(train_set, test_set, ranker1, ranker2, num_interation, click_model):
    click_predictor = SDBN()
    ndcg_scores1 = []
    cndcg_scores1 = []
    ndcg_scores2 = []
    cndcg_scores2 = []
    query_set = train_set.get_all_querys()
    np.random.shuffle(query_set)
    index = np.random.randint(query_set.shape[0], size=num_interation)

    pdf = np.random.normal(size=query_set.shape[0])
    e_x = np.exp((pdf - np.max(pdf)) / 0.2)
    probs = e_x / e_x.sum(axis=0)

    querys = np.random.choice(query_set,
                              replace=True,
                              p=probs,
                              size=num_interation)

    num_interaction = 0
    correct = 0
    wrong = 0
    test1 = 0
    test2 = 0
    for qid in querys:
        num_interaction += 1
        # qid = query_set[i]

        result_list1, scores1 = ranker1.get_query_result_list(train_set, qid)
        result_list2, scores2 = ranker2.get_query_result_list(train_set, qid)

        clicked_doc1, click_label1, _ = click_model.simulate(
            qid, result_list1, train_set)
        clicked_doc2, click_label2, _ = click_model.simulate(
            qid, result_list2, train_set)
        #
        last_exam = None
        # if len(clicked_doc2) > 0:
        #     last_exam = np.where(click_label2 == 1)[0][-1] + 1
        #
        #     click_predictor.online_training(qid, result_list2, click_label2)
        #     reduce, reduced_index = click_predictor.click_noise_reduce(qid, result_list2, click_label2, 0.5, 20)
        #
        #     if reduce:
        #         for rank in reduced_index:
        #             # print(train_set.get_relevance_label_by_query_and_docid(qid, result_list2[rank]))
        #             if train_set.get_relevance_label_by_query_and_docid(qid, result_list2[rank]) == 0:
        #                 correct += 1
        #             else:
        #                 wrong += 1
        #         # print(correct, wrong)
        clicked_doc_index = 0
        for j in np.where(click_label2 == 1)[0]:
            rel = train_set.get_relevance_label_by_query_and_docid(
                qid, result_list2[clicked_doc_index])
            if rel == 0:
                click_label2[j] = 0
            clicked_doc_index += 1

        ranker1.update_to_clicks(click_label1, result_list1, scores1,
                                 train_set.get_all_features_by_query(qid))
        ranker2.update_to_clicks(click_label2, result_list2, scores2,
                                 train_set.get_all_features_by_query(qid),
                                 last_exam)

        all_result1 = ranker1.get_all_query_result_list(test_set)
        ndcg1 = evl_tool.average_ndcg_at_k(test_set, all_result1, 10)
        cndcg1 = evl_tool.query_ndcg_at_k(train_set, result_list1, qid, 10)

        all_result2 = ranker2.get_all_query_result_list(test_set)
        ndcg2 = evl_tool.average_ndcg_at_k(test_set, all_result2, 10)
        cndcg2 = evl_tool.query_ndcg_at_k(train_set, result_list2, qid, 10)

        ndcg_scores1.append(ndcg1)
        cndcg_scores1.append(cndcg1)
        ndcg_scores2.append(ndcg2)
        cndcg_scores2.append(cndcg2)
        final_weight1 = ranker1.get_current_weights()
        final_weight2 = ranker2.get_current_weights()

        test1 += ndcg1
        test2 += ndcg2
    print(test1, test2)
    print(np.mean(ndcg_scores1), np.mean(ndcg_scores2))

    return ndcg_scores1, cndcg_scores1, final_weight1, ndcg_scores2, cndcg_scores2, final_weight2
Пример #20
0
from clickModel.NCM_TF import NCM
from utils import read_file as rf
import numpy as np
import pickle
from dataset import LetorDataset
from clickModel.SDBN import SDBN

model = NCM(774, 100, 10240+1024+1, 2)

pc = [0.05, 0.3, 0.5, 0.7, 0.95]
ps = [0.2, 0.3, 0.5, 0.7, 0.9]
simulator = SDBN(pc, ps)

click_log_path = "../feature_click_datasets/{}/train_set{}.txt".format("SDBN", "_test")

click_log = rf.read_click_log(click_log_path)

model.initial_representation(click_log)

# session = np.array(['1112', '16', '3', '45', '37', '31', '22', '5', '34', '17', '21', '0', '0', '1', '0', '0', '0', '0', '0', '0', '0' ])
#
model.save_training_set(click_log, "")

# with open("X.txt", "rb") as fp:
#     X = pickle.load(fp)
#
# with open("Y.txt", "rb") as fp:
#     Y = pickle.load(fp)
#
# train_path = "../datasets/ltrc_yahoo/set1.train.txt"
#