Пример #1
0
def similar_poems_idx(query: str, poem_model, w2v_model, topn=5, use_associations=False) -> list:
    query_bag = sem.canonize_words(query.split())
    if use_associations:
        query_bag += sem.semantic_association(query_bag, w2v_model, topn=5)
        query_mx = sem.bag_to_matrix(query_bag, w2v_model)
        similars = [(i, sem.semantic_similarity_fast(query_mx, np.vstack((mx, poem_model['a_matrices'][i]))))
                    for i, mx in enumerate(poem_model['matrices']) if len(mx) > 0]
    else:
        query_mx = sem.bag_to_matrix(query_bag, w2v_model)
        similars = [(i, sem.semantic_similarity_fast(query_mx, mx))
                    for i, mx in enumerate(poem_model['matrices'])]
    similars.sort(key=lambda x: x[1], reverse=True)
    return similars[:topn]
Пример #2
0
def similar_poems_idx(query: str, poem_model, w2v_model, topn=5, use_associations=False) -> list:  # [(poem_idx, sim)]
    query_bag = sem.canonize_words(query.split())
    if use_associations:
        query_bag += sem.semantic_association(query_bag, w2v_model, topn=5)
        query_mx = sem.bag_to_matrix(query_bag, w2v_model)
        if len(query_mx) == 0:
            return []
        similars = [(i, sem.semantic_similarity_fast(query_mx, np.vstack((mx, poem_model['a_matrices'][i]))))
                    for i, mx in enumerate(poem_model['matrices']) if len(mx) > 0]
    else:
        query_mx = sem.bag_to_matrix(query_bag, w2v_model)
        if len(query_mx) == 0:
            return []
        similars = [(i, sem.semantic_similarity_fast_log(query_mx, mx))
                    for i, mx in enumerate(poem_model['matrices'])]
    # similars.sort(key=lambda x: x[1], reverse=True)
    return heapq.nlargest(topn, similars, key=lambda x: x[1])
Пример #3
0
def make_poems_model(file_name: str, semantics=True) -> dict:
    print("making poems model...")
    poems = read_poems(file_name)
    print('poem count:', len(poems))
    bags, voc = make_bags(poems)
    sa = []
    sd = []
    if semantics:
        print("loading w2v_model...")
        w2v_model = sem.load_w2v_model(sem.WORD2VEC_MODEL_FILE)
        print("adding semantics to model...")
        sd = [sem.semantic_density(bag, w2v_model, unknown_coef=-0.001) for bag in bags]
        sa = [sem.semantic_association(bag, w2v_model) for bag in bags]
    rates = [0.0 for _ in range(len(poems))]
    print("model created")
    return {'poems'       : poems,
            'bags'        : bags,
            'vocabulary'  : voc,
            'density'     : sd,
            'associations': sa,
            'rates'       : rates}
Пример #4
0
def make_poems_model(file_name: str, semantics=True) -> dict:
    print("making poems model...")
    poems = read_poems(file_name)
    print('poem count:', len(poems))
    bags, voc = make_bags(poems)
    sa = []
    sd = []
    if semantics:
        print("loading w2v_model...")
        w2v_model = sem.load_w2v_model(sem.WORD2VEC_MODEL_FILE)
        print("adding semantics to model...")
        sd = [sem.semantic_density(bag, w2v_model, unknown_coef=-0.001) for bag in bags]
        sa = [sem.semantic_association(bag, w2v_model) for bag in bags]
    rates = [0.0 for _ in range(len(poems))]
    print("model created")
    return {'poems'       : poems,
            'bags'        : bags,
            'vocabulary'  : voc,
            'density'     : sd,
            'associations': sa,
            'rates'       : rates}