Пример #1
0
 def __init__(self, dict, model_source, tag_source):
     utils.jieba_add_dict(dict)
     self.tag_repo = []
     self.tags_repo = []
     for line in open(tag_source):
         self.tags_repo.append(line)
     self.model = gensim.models.Word2Vec.load(model_source)
Пример #2
0
 def __init__(self,dict,model_source,tag_source):
     utils.jieba_add_dict(dict)
     self.tag_repo=[]
     self.tags_repo=[]
     for line in open(tag_source):
         self.tags_repo.append(line)
     self.model = gensim.models.Word2Vec.load(model_source)
Пример #3
0
def train_word_vector(source,dict,wordvec):
    utils.jieba_add_dict(dict)
    comments_df = DataFrame.from_csv(source,sep = '\t')
    document = []
    for line in comments_df['comment'].values:
        line =  utils.remove_punctuation(line)
        cutted_line = jieba.cut(line)
        document.append(list(cutted_line))
    model = gensim.models.Word2Vec(document)
    print 'saving word vector model'
    model.save(wordvec)
    return model
Пример #4
0
def train_word_vector(source, dict, wordvec):
    utils.jieba_add_dict(dict)
    comments_df = DataFrame.from_csv(source, sep='\t')
    document = []
    for line in comments_df['comment'].values:
        line = utils.remove_punctuation(line)
        cutted_line = jieba.cut(line)
        document.append(list(cutted_line))
    model = gensim.models.Word2Vec(document)
    print 'saving word vector model'
    model.save(wordvec)
    return model