示例#1
0
 def __iter__(self):
     print(">>>>> 正在读取embed语料")
     _doc_count = 0
     for doc in read_txt_file(self.file):
         _doc_count += 1
         word_list = split_text(doc)
         yield word_list
     print("<<<<< 已读取{}文档".format(_doc_count))
示例#2
0
def get_embed_from_embedfile(file):
    """
    从embed文件获取word2vec训练语料
    :param file: embed 文件
    :return:
    """
    print(">>>>> 正在读取embed语料")
    doc_word_list = list()
    _doc_count = 0
    for doc in read_txt_file(file):
        _doc_count += 1
        word_list = split_text(doc)
        doc_word_list.append(word_list)
    print("<<<<< 已读取{}文档".format(_doc_count))
    return doc_word_list