def sample_generation(args):
    # Preprocessing Step
    print("Numpy Version Check")
    print(np.__version__)
    print("Scipy Version Check")
    print(scipy.__version__)
    data_dicts = preprocessing(transition_matrix_path=args.transition_matrix,
                               doc_topics_path=args.document_topic,
                               user_topic_path=args.user_topic_interest,
                               query_topic_path=args.query_topic_relation,
                               search_relevance_path=args.search_relevance)

    # GPR, PTSPR, QTSPR construction
    if args.pagerank == "gpr":
        pr = PageRank(trans_matrix=data_dicts['transition_matrix'],
                      dampening_factor=args.dampening_factor)
    elif args.pagerank == "ptspr" or args.pagerank == "qtspr":
        pr = TopicSensitivePageRank(
            trans_matrix=data_dicts['transition_matrix'],
            topic_matrix=data_dicts['doc_topic_matrix'],
            dampening_factor=args.dampening_factor,
            topic_factor=args.topic_factor)

    pr.converge()

    if args.pagerank == "gpr":
        np.savetxt("GPR.txt", pr.ranked_vector, delimiter=" ")
    elif args.pagerank == "ptspr":
        topic_prob = data_dicts['user_topic_probs']["2-1"]
        vector = (pr.ranked_matrix * topic_prob.reshape(12, 1)).view(
            np.ndarray).squeeze()
        np.savetxt("QTSPR-U2Q1.txt", vector, delimiter=" ")
    elif args.pagerank == "qtspr":
        topic_prob = data_dicts['query_topic_probs']["2-1"]
        vector = (pr.ranked_matrix * topic_prob.reshape(12, 1)).view(
            np.ndarray).squeeze()
        np.savetxt("PTSPR-U2Q1.txt", vector, delimiter=" ")
    print("===================== END =====================")
示例#2
0
def main(args):
    # Preprocessing Step
    data_dicts = preprocessing(transition_matrix_path=args.transition_matrix,
                               doc_topics_path=args.document_topic,
                               user_topic_path=args.user_topic_interest,
                               query_topic_path=args.query_topic_relation,
                               search_relevance_path=args.search_relevance)

    # GPR, PTSPR, QTSPR construction
    if args.pagerank == "gpr":
        pr = PageRank(trans_matrix=data_dicts['transition_matrix'],
                      dampening_factor=args.dampening_factor)
    elif args.pagerank == "ptspr" or args.pagerank == "qtspr":
        pr = TopicSensitivePageRank(
            trans_matrix=data_dicts['transition_matrix'],
            topic_matrix=data_dicts['doc_topic_matrix'],
            dampening_factor=args.dampening_factor,
            topic_factor=args.topic_factor)

    pr_start = time.time()
    pr.converge()
    pr_end = time.time()
    print("Power iteration - {} required time: {:.3f}seconds".format(
        args.pagerank, pr_end - pr_start))

    pr_result = []
    for query_ID in data_dicts['search_relevance_score'].keys():
        candidate_indices, retrieval_scores = data_dicts[
            'search_relevance_score'][query_ID]
        user_topic_prob = data_dicts['user_topic_probs'][query_ID]
        query_topic_prob = data_dicts['query_topic_probs'][query_ID]

        if args.pagerank == "gpr":
            pr_indices, pr_scores = pr.ranking(candidate_indices,
                                               retrieval_scores,
                                               criterion=args.criterion)
        elif args.pagerank == "ptspr":
            pr_indices, pr_scores = pr.ranking(candidate_indices,
                                               retrieval_scores,
                                               user_topic_prob,
                                               criterion=args.criterion)
        elif args.pagerank == "qtspr":
            pr_indices, pr_scores = pr.ranking(candidate_indices,
                                               retrieval_scores,
                                               query_topic_prob,
                                               criterion=args.criterion)

        for idx in range(len(candidate_indices)):
            # Print function
            temp = [[]]
            temp[0].append(query_ID)
            temp[0].append("Q0")
            temp[0].append(str(pr_indices[idx] + 1))
            temp[0].append(str(idx + 1))
            temp[0].append(str(pr_scores[idx]))
            temp[0].append(args.cfg)
            pr_str = " ".join(temp[0])
            pr_result.append(pr_str)

    pr_result_text = "\n".join(pr_result)

    with open(args.pagerank + "_" + args.cfg + ".txt", "w") as f:
        f.write(pr_result_text)

    pr_end = time.time()
    print("total {} required time : {:.3f}seconds".format(
        args.pagerank, pr_end - pr_start))
    print("===================== END =====================")