def trainUnSpv(path: str, n=1, model='skipgram'):
    """  """
    m = FT.train_unsupervised(path,
                              epoch=100,
                              dim=100,
                              wordNgrams=n,
                              model=model)
    return m
Exemplo n.º 2
0
def make_model(data_path, data_file_name, save_path, save_file_name, **kwargs):
    total_data_path = os.path.join(data_path, data_file_name)
    total_save_path = os.path.join(save_path, save_file_name)

    assert os.path.isfile(total_data_path) == True
    start_time = strftime("%y%m%d-%H%M%S")
    print('모델생산 시작시간 : ', start_time)

    try:
        if kwargs.keys() == None:
            model = FastText.train_unsupervised(total_data_path)
        else:
            model = FastText.train_unsupervised(total_data_path, **kwargs)

        model.save_vectors(total_save_path)

    except Exception as e:
        print(e)
    finally:
        end_time = strftime("%y%m%d-%H%M%S")
        print('모델생산 종료시간 : ', end_time)
    return total_save_path + '.vec'
Exemplo n.º 3
0
    def train_words_model(self,
                          corpus_filename,
                          model_filename,
                          model='skpigram',
                          min_count=5):

        corpus_filename = self.data_dir + corpus_filename
        model_filename = self.data_dir + model_filename
        print('Training for [%s] Model=%s Dim=%d MinCount=%d...' %
              (corpus_filename, model, self.word_dim, min_count))

        self.model = fasttext.train_unsupervised(input=corpus_filename,
                                                 model=model,
                                                 dim=self.word_dim,
                                                 minCount=min_count)
        self.model.save_model(model_filename)
        self.words_list = list(self.model.get_words())

        print('Finished. Dictionary size:%s' %
              '{:,}'.format(len(self.model.get_words())))
Exemplo n.º 4
0
##### Install Fasttext #######
# $ git clone https://github.com/facebookresearch/fastText.git
# $ cd fastText
# $ pip install .

import fastText
import numpy as np
import pandas as pd

# train model
from fastText import FastText
model = FastText.train_unsupervised(input="data/BGT_Titles_2.txt",
                                    dim=128,
                                    epoch=20,
                                    minCount=2,
                                    wordNgrams=3,
                                    loss="ns",
                                    minn=2,
                                    maxn=5,
                                    thread=1,
                                    ws=3)

#save model
model.save_model('model/ns2520ns3005_new.bin')

with open("data/list_titles_BGT.txt", "rb") as fp:  # Unpickling
    list_titles = pickle.load(fp)

#generate emebeddings
p_name_fasttext_128 = np.zeros((len(list_titles), 128))
with open('data/BGT_Titles_2.txt', 'r') as f:
    for i, name in enumerate(f):
Exemplo n.º 5
0
# -*- coding: utf-8 -*-
from fastText import FastText 

model = FastText.train_unsupervised('raw_data/train_data.txt')
model.save_model('wv/model.bin')