def __init__(self, model_name=None, storage=None, *args, **kwargs): """ Attributes: storage (:obj: `skills_ml.Store`): skills_ml Store object model (:obj: `gensim.models.doc2vec.Doc2Vec`): gensim doc2vec model. """ BaseEmbeddingModel.__init__(self, model_name=model_name, storage=storage) Word2Vec.__init__(self, *args, **kwargs) self.model_type = Word2Vec.__name__.lower()
def __init__(self, *args, **kwargs): """ Attributes: storage (:obj: `skills_ml.Store`): skills_ml Store object model (:obj: `gensim.models.doc2vec.Doc2Vec`): gensim doc2vec model. """ ModelStorage.__init__(self, storage=kwargs.pop('storage', None)) Word2Vec.__init__(self, *args, **kwargs) self.model_name = "" self._metadata = None
def __init__(self, keywords, sentences, corpus_file, corpus_worker, corpus_chunksize, case_sensitive, size, alpha, window, min_count, max_vocab_size, sample, seed, workers, min_alpha, sg, hs, negative, ns_exponent, cbow_mean, iter, null_word, trim_rule, sorted_vocab, batch_words, compute_loss, max_final_vocab): Sec2Vec.__init__(self, sentences, corpus_file) KeywordCorpusFactory.__init__(self, keywords, case_sensitive, corpus_worker) # 20181130 Hannah Chen self.kc = self.create(SentenceIterator(self.sentences), corpus_chunksize) # 20181130 LIN, Y.D.: Save all sentences for training # self.kc = self.create(self.sentences, corpus_chunksize) # self.kc = self.create(sentences, corpus_chunksize) self.kv = dict(((keyword, []) for keyword in self.kc.keys())) self.keyword_count = dict(((keyword, 0) for keyword in self.kc.keys())) self.corpus_chunksize = corpus_chunksize # 20181126 Hannah Chen, initialize epoch_logger epoch_logger = EpochLogger(compute_loss) Word2Vec.__init__(self, corpus_file=corpus_file, size=size, alpha=alpha, window=window, min_count=min_count, max_vocab_size=max_vocab_size, sample=sample, seed=seed, workers=workers, min_alpha=min_alpha, sg=sg, hs=hs, negative=negative, ns_exponent=ns_exponent, cbow_mean=cbow_mean, iter=iter, null_word=null_word, trim_rule=trim_rule, sorted_vocab=sorted_vocab, batch_words=batch_words, compute_loss=compute_loss, max_final_vocab=max_final_vocab, callbacks=[epoch_logger])
def __init__(self, vocabulary_counts=None, logger_name="", **kwargs): ''' Initialization ----------------------------------------------------- Parameters: min_count: 对字典做阶段,少于min_count次数的单词会被丢弃,默认值为5. size: 隐藏层的单元数,默认值为100,推荐值为几十到几百 workers: 控制训练并行, 默认是1,worker参数只有安装了Cython后才有效,没有的话,只能使用单核 ''' logger = logging.getLogger(logger_name) self.vocabulary_counts = None kwargs["min_count"] = kwargs.get("min_count", 5) kwargs["workers"] = kwargs.get("workers", cpu_count()) kwargs["size"] = kwargs.get("size", 128) kwargs["sentences"] = kwargs.get("sentences", None) kwargs["window"] = kwargs.get("window", 10) kwargs["sg"] = 1 kwargs["hs"] = 1 if vocabulary_counts != None: self.vocabulary_counts = vocabulary_counts Word2Vec.__init__(self, **kwargs)
def __init__(self, *args, **kwargs): ModelStorage.__init__(self, storage=kwargs.pop('storage', None)) Word2Vec.__init__(self, *args, **kwargs) self.model_name = "" self.model_type = "word2vec" self._metadata = None