Exemplo n.º 1
0
def search(query):
    tokenized_corpus = [doc.split(" ") for doc in corpus]
    bm25 = BM25Okapi(tokenized_corpus)
    query = query.split(" ")
    subquery = []
    subquery += query

    for item in subquery:
        query.append(item.replace('\n', ''))

    scores = (bm25.get_scores(query=query))
    for score in scores:
        if score > 0:
            print(score)

        if score > 5:
            return True

    for item in query:
        for word in corpus:
            if item in exceptions:
                return False
            if len(item) < 2:
                continue
            if item.lower().find(word) != -1:
                return True
            if similarity(item, word) > 0.85:
                return True

    return False
Exemplo n.º 2
0
def suggest_identifier(id, names):
    sorted_names = sorted(names,
                          key=lambda other: jaro_winkler(id, other),
                          reverse=True)
    if len(sorted_names) > 0:
        if jaro_winkler(id, sorted_names[0]) > 0.0 and similarity(
                id, sorted_names[0]) > 0.5:
            return sorted_names[0]
Exemplo n.º 3
0
def suggest_identifier(id, names):
    sorted_names = sorted(names, key=lambda other: jaro_winkler(id, other), reverse=True)
    if len(sorted_names) > 0:
        if jaro_winkler(id, sorted_names[0]) > 0.0 and similarity(id, sorted_names[0]) > 0.5:
            return sorted_names[0]