Exemplo n.º 1
0
def get_top(data, topn, threshold):
    sent_tokens, word_tokens = ms.tokenize(data)
    words = list(set(ms.get_words(data)))
    N = len(sent_tokens)
    idf = similarity.idf(word_tokens, words, N)
    matrix = similarity.get_similarity_matrix(word_tokens, idf)
    gr = build_graph(sent_tokens, threshold, idf)
    keysentences = get_keysentences(gr)
    return keysentences[:topn]
def prediction(input_data, target="item", method="weighted_sum"):
    '''
    Predict value not rated

    :param input_data: rating data to be predicted
    :param method: prediction method(weighted_sum, linear_regression, ...)

    :returns: predicted rating value
    '''

    sim = similarity.get_similarity_matrix(input_data, target=target)

    axis = 0 if target == "item" else 1

    if method == "weighted_sum":
        return weighted_sum(input_data, sim, axis=axis)
    elif method == "linear_regression":
        pass
#%%
user_list = [
    "Chang", "Chan", "jmpark", "Ruby", "suji kang", "Cold New User",
    "Hot New User", "Chang's soul mate"
]
restaurant_list = ["아비꼬", "롱타임노씨", "피맥하우스", "오신 매운갈비찜", "오빠닭", "바나나 피자"]

test_data = np.array([[2.5, 3.5, 3.0, 3.5, 2.5, 3.0],
                      [3.0, 3.5, 1.5, 5.0, 3.5, 3.0],
                      [2.5, 3.5, 0, 3.5, 0, 4.0], [0, 3.5, 3.0, 4.0, 2.5, 4.5],
                      [1.0, 0, 3.0, 2.0, 0, 1.5], [0, 0, 0, 0, 0, 0],
                      [1.0, 0, 2.5, 3.5, 3.5, 4.5],
                      [2.5, 3.5, 3.0, 3.5, 2.5, 3.0]])

#%%
similarity_matrix = get_similarity_matrix(test_data, target="item")
print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
print("@@@@@Item-based Similarity Matrix@@@@@@@@@@@@@@@@@@@@@@")
print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
print(similarity_matrix)
print("\n\n")

result = prediction(test_data, target="item")
print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
print("@@@@@Item-based Predicted Rating Matrix@@@@@@@@@@@@@@@@")
print(result)
print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
print("\n\n")

similarity_matrix = get_similarity_matrix(test_data, target="user")
print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
Exemplo n.º 4
0
                          key=calculated_page_rank.get,
                          reverse=False)
    return keysentences


if __name__ == "__main__":
    #creating a argparser
    parser = argparse.ArgumentParser(
        description="Pass the data fname and threshold value")
    parser.add_argument("fname", help="Provide the data file name")
    parser.add_argument("threshold",
                        help="Provide the threshold value",
                        default=0.15,
                        type=float)
    parser.add_argument("N",
                        help="Top N sentences to be picked",
                        default=10,
                        type=int)
    args = parser.parse_args()
    with open(args.fname, "r") as f:
        data = f.read()
    sent_tokens, word_tokens = ms.tokenize(data)
    words = list(set(ms.get_words(data)))
    N = len(sent_tokens)
    idf = similarity.idf(word_tokens, words, N)
    matrix = similarity.get_similarity_matrix(word_tokens, idf)
    print("Printing similarity matrix:\n", matrix)
    gr = build_graph(sent_tokens, args.threshold, idf)
    keysentences = get_keysentences(gr)
    print("Printing Top 10 Key sentences:\n", keysentences[:args.N])
Exemplo n.º 5
0
        it += 1
        B.append(r)
'''

#key sentences are obtained
def get_keysentences(graph):
    # weight is the similarity value obtained from the idf_modified_cosine
    calculated_page_rank = nx.pagerank(graph, weight='weight')
    #most important words in ascending order of importance
    keysentences = sorted(calculated_page_rank, key=calculated_page_rank.get, reverse=False)
    return keysentences

if __name__ == "__main__":
    #creating a argparser
    parser = argparse.ArgumentParser(description="Pass the data fname and threshold value")
    parser.add_argument("fname", help="Provide the data file name")
    parser.add_argument("threshold", help="Provide the threshold value", default=0.15, type=float)
    parser.add_argument("N", help="Top N sentences to be picked", default=10, type=int)
    args = parser.parse_args()
    with open(args.fname, "r") as f:
        data = f.read()
    sent_tokens, word_tokens = ms.tokenize(data)
    words = list(set(ms.get_words(data)))
    N = len(sent_tokens)
    idf = similarity.idf(word_tokens, words, N)
    matrix = similarity.get_similarity_matrix(word_tokens, idf)
    print ("Printing similarity matrix:\n", matrix)
    gr = build_graph(sent_tokens, args.threshold, idf)
    keysentences = get_keysentences(gr)
    print ("Printing Top 10 Key sentences:\n",keysentences[:args.N])