Exemplo n.º 1
0
 def __init__(self, model_name=None, storage=None, *args, **kwargs):
     """
     Attributes:
         storage (:obj: `skills_ml.Store`): skills_ml Store object
         model (:obj: `gensim.models.doc2vec.Doc2Vec`): gensim doc2vec model.
     """
     BaseEmbeddingModel.__init__(self, model_name=model_name, storage=storage)
     Word2Vec.__init__(self, *args, **kwargs)
     self.model_type = Word2Vec.__name__.lower()
Exemplo n.º 2
0
 def __init__(self, *args, **kwargs):
     """
     Attributes:
         storage (:obj: `skills_ml.Store`): skills_ml Store object
         model (:obj: `gensim.models.doc2vec.Doc2Vec`): gensim doc2vec model.
     """
     ModelStorage.__init__(self, storage=kwargs.pop('storage', None))
     Word2Vec.__init__(self, *args, **kwargs)
     self.model_name = ""
     self._metadata = None
Exemplo n.º 3
0
    def __init__(self, keywords, sentences, corpus_file, corpus_worker,
                 corpus_chunksize, case_sensitive, size, alpha, window,
                 min_count, max_vocab_size, sample, seed, workers, min_alpha,
                 sg, hs, negative, ns_exponent, cbow_mean, iter, null_word,
                 trim_rule, sorted_vocab, batch_words, compute_loss,
                 max_final_vocab):

        Sec2Vec.__init__(self, sentences, corpus_file)
        KeywordCorpusFactory.__init__(self, keywords, case_sensitive,
                                      corpus_worker)

        # 20181130 Hannah Chen
        self.kc = self.create(SentenceIterator(self.sentences),
                              corpus_chunksize)
        # 20181130 LIN, Y.D.: Save all sentences for training
        # self.kc = self.create(self.sentences, corpus_chunksize)
        # self.kc = self.create(sentences, corpus_chunksize)

        self.kv = dict(((keyword, []) for keyword in self.kc.keys()))
        self.keyword_count = dict(((keyword, 0) for keyword in self.kc.keys()))
        self.corpus_chunksize = corpus_chunksize

        # 20181126 Hannah Chen, initialize epoch_logger
        epoch_logger = EpochLogger(compute_loss)

        Word2Vec.__init__(self,
                          corpus_file=corpus_file,
                          size=size,
                          alpha=alpha,
                          window=window,
                          min_count=min_count,
                          max_vocab_size=max_vocab_size,
                          sample=sample,
                          seed=seed,
                          workers=workers,
                          min_alpha=min_alpha,
                          sg=sg,
                          hs=hs,
                          negative=negative,
                          ns_exponent=ns_exponent,
                          cbow_mean=cbow_mean,
                          iter=iter,
                          null_word=null_word,
                          trim_rule=trim_rule,
                          sorted_vocab=sorted_vocab,
                          batch_words=batch_words,
                          compute_loss=compute_loss,
                          max_final_vocab=max_final_vocab,
                          callbacks=[epoch_logger])
Exemplo n.º 4
0
    def __init__(self, vocabulary_counts=None, logger_name="", **kwargs):
        '''
        Initialization
        -----------------------------------------------------
        Parameters:
            min_count: 对字典做阶段,少于min_count次数的单词会被丢弃,默认值为5.
            size: 隐藏层的单元数,默认值为100,推荐值为几十到几百
            workers: 控制训练并行, 默认是1,worker参数只有安装了Cython后才有效,没有的话,只能使用单核
        '''
        logger = logging.getLogger(logger_name)
        self.vocabulary_counts = None
        kwargs["min_count"] = kwargs.get("min_count", 5)
        kwargs["workers"] = kwargs.get("workers", cpu_count())
        kwargs["size"] = kwargs.get("size", 128)
        kwargs["sentences"] = kwargs.get("sentences", None)
        kwargs["window"] = kwargs.get("window", 10)
        kwargs["sg"] = 1
        kwargs["hs"] = 1

        if vocabulary_counts != None:
            self.vocabulary_counts = vocabulary_counts
        Word2Vec.__init__(self, **kwargs)
Exemplo n.º 5
0
 def __init__(self, *args, **kwargs):
     ModelStorage.__init__(self, storage=kwargs.pop('storage', None))
     Word2Vec.__init__(self, *args, **kwargs)
     self.model_name = ""
     self.model_type = "word2vec"
     self._metadata = None