Пример #1
0
def cosine_score(simple_index, query, document):
    """Oblicza cosine score."""
    terms = set(simple_index.inverted_index.keys()).union(query)
    doc_vector = [calc_tf_idf(simple_index, term, document) for term in terms]
    qry_vector = []
    for term in terms:
        value = 0
        if term in query:
            value = calc_idf(simple_index, term) * query.count(term)
        qry_vector.append(value)
    num = sum(a*b for a, b in zip(doc_vector, qry_vector))
    den = sqrt(sum(a*a for a in doc_vector))*sqrt(sum(a*a for a in qry_vector))
    return num/den
Пример #2
0
def calc_tf_idf(simple_index, term, document):
    """Oblicza wartość TF-IDF."""
    return calc_tf(simple_index, term, document) * calc_idf(simple_index, term)