Exemplo n.º 1
0
    def train(self, trainingfile):
        """Starts model building"""

        logger.info(
            f'Training started with : learningRate:{self.config.learningRate!s}, epoch:{self.config.epoch!s}, ngrams :{self.config.ngrams!s}'
        )
        model = FastText()
        if self.supervised:
            model.supervised(input=trainingfile,
                             output=self.filepath,
                             epoch=self.config.epochs,
                             lr=self.config.learningRate,
                             wordNgrams=self.config.ngrams,
                             verbose=2,
                             minCount=1)
        elif self.config.method == "cbow":
            model.cbow(input=trainingfile,
                       output='model',
                       epoch=self.config.epoch,
                       lr=self.config.learningRate)
        else:
            model.skipgram(input=trainingfile,
                           output='model',
                           epoch=self.config.epoch,
                           lr=self.config.learningRate)
Exemplo n.º 2
0
def train_pyfasttext_model():
    # Skipgram model
    model_sg = FastText()
    # equals to: `./fasttext skipgram -input ../data/880w_news_title_content_seg_sort_uniq_head_2.txt -output lxw_model_sg_pyfasttext`
    model_sg.skipgram(
        input="../data/880w_news_title_content_seg_sort_uniq_head_2.txt",
        output="../data/lxw_model_sg_pyfasttext")
    # 自动生成文件../data/lxw_model_sg_pyfasttext.bin 和 ../data/lxw_model_sg_pyfasttext.vec
    print(model_sg.words)  # list of words in dictionary

    # CBOW model
    model_cbow = FastText()
    # equals to: `./fasttext cbow -input ../data/880w_news_title_content_seg_sort_uniq_head_2.txt -output lxw_model_cbow_pyfasttext`
    model_cbow.cbow(
        input="../data/880w_news_title_content_seg_sort_uniq_head_2.txt",
        output="../data/lxw_model_cbow_pyfasttext")
    # 自动生成文件../data/lxw_model_cbow_pyfasttext.bin 和 ../data/lxw_model_cbow_pyfasttext.vec
    print(model_cbow.words)  # list of words in dictionary
    print(type(model_cbow.words))  # <class 'list'>
Exemplo n.º 3
0
# for word in skip_gram_model.words:
#    print(word, skip_gram_model[word])

print(skip_gram_model.nearest_neighbors('贷款', k=2))

# test data is stored inside a file, use this:
# skip_gram_model.predict_proba_file('./test.txt', k=2)

print("\n")

##################
# 使用cbow模型训练 #
##################
cbow_model = FastText()
cbow_model.cbow(input='./train.txt', output='cbow_model', epoch=100, lr=0.7)
print(cbow_model['贷款'])
# print(cbow_model.get_numpy_vector('贷款'))
# print(cbow_model.get_numpy_vector('贷款', normalized=True))

var1 = cbow_model.get_numpy_vector('人民币')
var2 = cbow_model.get_numpy_vector('贷款')
var3 = cbow_model.get_numpy_vector('外币')
cbow_model.words_for_vector(var1 + var2 - var3, k=1)

# for word in cbow_model.words:
#    print(word, cbow_model[word])

print(cbow_model.nearest_neighbors('贷款', k=2))

# test data is stored inside a file, use this: