Python to_words示例

编程语言: Python

命名空间/包名称: tseval.text

方法/功能: to_words

hotexamples.com的示例: 10

Python to_words - 已找到10个示例。这些是从开源项目中提取的最受好评的tseval.text.to_words现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

def get_reordered_words(c, s):
    # A reordered word is a word that is contained in the source and simplification
    # but not in the longuest common subsequence
    c = c.lower()
    s = s.lower()
    lcs = get_lcs(to_words(c), to_words(s))
    return flatten_counter(Counter(get_kept_words(c, s)) - Counter(lcs))

示例#2

显示文件

def get_wordrank_score(sentence):
    # Computed as the third quartile of log ranks
    words = to_words(remove_stopwords(remove_punctuation_tokens(sentence)))
    words = [word for word in words if word in get_word2rank()]
    if len(words) == 0:
        return np.log(1 + len(get_word2rank()))  # TODO: This is completely arbitrary
    return np.quantile([get_log_rank(word) for word in words], 0.75)

示例#3

显示文件

文件： embeddings.py 项目： liguiming77/text-simplification-evaluation

def to_embeddings(sentence):
    if 'EMBEDDINGS' not in globals():
        global EMBEDDINGS, WORD2INDEX
        print('Loading FastText embeddings...')
        EMBEDDINGS, WORD2INDEX = load_fasttext_embeddings(vocab_size=100000)
        print('Done.')
    sentence = sentence.lower()  # Fasttext embeddings are lowercase
    indexes = [WORD2INDEX.get(word, WORD2INDEX['<unk>']) for word in to_words(sentence)]
    return EMBEDDINGS[indexes]

示例#4

显示文件

def get_frequency_table_ranks(sentence):
    return np.log(1 + np.array([get_rank(word)
                                for word in to_words(sentence)]))

示例#5

显示文件

def get_concreteness_scores(sentence):
    return np.log(
        1 + np.array([get_concreteness(word) for word in to_words(sentence)]))

示例#6

显示文件

def only_deleted_words(c, s):
    # Only counting deleted words does not work because sometimes there is reordering
    return not is_exact_match(c, s) and get_lcs(to_words(c),
                                                to_words(s)) == to_words(s)

示例#7

显示文件

def get_kept_words(c, s):
    return flatten_counter(Counter(to_words(c)) & Counter(to_words(s)))

示例#8

显示文件

def get_deleted_words(c, s):
    return flatten_counter(Counter(to_words(c)) - Counter(to_words(s)))

示例#9

显示文件

def get_deletions_proportion(complex_sentence, simple_sentence):
    n_deletions = sum((Counter(to_words(complex_sentence)) -
                       Counter(to_words(simple_sentence))).values())
    return n_deletions / max(count_words(complex_sentence),
                             count_words(simple_sentence))

示例#10

显示文件

def word_intersection(complex_sentence, simple_sentence):
    complex_words = to_words(complex_sentence)
    simple_words = to_words(simple_sentence)
    nb_common_words = len(set(complex_words).intersection(set(simple_words)))
    nb_max_words = max(len(set(complex_words)), len(set(simple_words)))
    return nb_common_words / nb_max_words