示例#1
0
def check_embedding_quality(conversations):
    from vectorize import Vectorize
    vectorizer = Vectorize(conversations, MAX_VOCAB_SIZE)
    word_index = vectorizer.word2idx

    from embedding import Embedding
    embed = Embedding(word_index)

    docu_vocab = vectorizer.word_counts
    embedding_vocab = embed.get_embedding_vocab()
    oov_words = embed.check_coverage(docu_vocab, embedding_vocab)
    print('Collected oov words.')
    return oov_words