Exemplo n.º 1
0
def _extend_associations_with(associations: List[Association],
                              words: List[str],
                              model: KeyedVectors) -> List[Association]:
    valid_pos_tags_for_associations = associations_config.VALID_POS_TAGS_FOR_ASSOCIATIONS
    valid_pos_tags_for_associated_words = associations_config.VALID_POS_TAGS_FOR_ASSOCIATED_WORDS

    result = []
    for association in associations:
        association_word_with_tag = _add_pos_tag(
            association.association_word,
            _get_most_likely_pos_tag(association.association_word,
                                     valid_pos_tags_for_associations, model))
        for word in words:
            if association.has_associated_word(word):
                continue

            word_with_tag = _add_pos_tag(
                word,
                _get_most_likely_pos_tag(word,
                                         valid_pos_tags_for_associated_words,
                                         model))
            similarity = model.similarity(association_word_with_tag,
                                          word_with_tag)
            if similarity < associations_config.MIN_SIMILARITY_SCORE:
                continue

            new_association = association.copy()
            new_association.add_associated_word((word, similarity))
            result.append(new_association)

    return result
Exemplo n.º 2
0
def _add_rival_words_and_filter_associations(
        associations: List[Association],
        rival_words_with_coefficients: List[Tuple[str, float]],
        model: KeyedVectors) -> List[Association]:
    result: List[Association] = []
    valid_pos_tags_for_associations = associations_config.VALID_POS_TAGS_FOR_ASSOCIATIONS
    valid_pos_tags_for_associated_words = associations_config.VALID_POS_TAGS_FOR_ASSOCIATED_WORDS

    for association in associations:
        min_associated_word_score = min(association.associated_word_scores)
        association_word_with_tag = _add_pos_tag(
            association.association_word,
            _get_most_likely_pos_tag(association.association_word,
                                     valid_pos_tags_for_associations, model))

        association_is_too_dangerous = False
        for rival_word, rival_word_coefficient in rival_words_with_coefficients:
            rival_word_with_pos_tag = _add_pos_tag(
                rival_word,
                _get_most_likely_pos_tag(rival_word,
                                         valid_pos_tags_for_associated_words,
                                         model))

            rival_word_score = model.similarity(association_word_with_tag,
                                                rival_word_with_pos_tag)
            scaled_rival_word_score = rival_word_coefficient * rival_word_score
            if scaled_rival_word_score > min_associated_word_score:
                association_is_too_dangerous = True
                break

            association.add_rival_word((rival_word, float(rival_word_score)))

        if not association_is_too_dangerous:
            result.append(association)

    return result