def test_with_word_embedding(self): w2v_embedding = WordEmbedding(TestMacros.w2v_path) model = self.TASK_MODEL_CLASS(embedding=w2v_embedding, sequence_length=120) train_x, train_y = TestMacros.load_labeling_corpus() valid_x, valid_y = train_x, train_y model.fit(train_x, train_y, x_validate=valid_x, y_validate=valid_y, epochs=self.EPOCH_COUNT)
def setUpClass(cls): cls.EPOCH_COUNT = 1 cls.TASK_MODEL_CLASS = BiLSTM_Model cls.w2v_embedding = WordEmbedding(TestMacros.w2v_path)
def build_embedding(self): sample_w2v_path = get_file('sample_w2v.txt', "http://s3.bmio.net/coco_nlp/sample_w2v.txt", cache_dir=DATA_PATH) embedding = WordEmbedding(sample_w2v_path) return embedding
tensor = embed_model.output for layer in layer_stack: tensor = layer(tensor) self.tf_model: keras.Model = keras.Model(embed_model.inputs, tensor) if __name__ == "__main__": import logging logging.basicConfig(level='DEBUG') from coco_nlp.embeddings import WordEmbedding w2v_path = '/Users/cole.zhang/Desktop/nlp/language_models/w2v/sgns.weibo.bigram-char' w2v = WordEmbedding(w2v_path, w2v_kwargs={'limit': 10000}) from coco_nlp.corpus import SMP2018ECDTCorpus x, y = SMP2018ECDTCorpus.load_data() model = BiLSTM_Model(embedding=w2v) model.fit(x, y) # 或者集成 CorpusGenerator 实现自己的数据迭代器 # train_gen = CorpusGenerator() # model.fit_generator(train_gen=train_gen, # valid_gen=valid_gen, # batch_size=batch_size, # epochs=epochs)