示例#1
0
    def test_with_word_embedding(self):
        w2v_embedding = WordEmbedding(TestMacros.w2v_path)
        model = self.TASK_MODEL_CLASS(embedding=w2v_embedding,
                                      sequence_length=120)
        train_x, train_y = TestMacros.load_labeling_corpus()
        valid_x, valid_y = train_x, train_y

        model.fit(train_x,
                  train_y,
                  x_validate=valid_x,
                  y_validate=valid_y,
                  epochs=self.EPOCH_COUNT)
示例#2
0
 def setUpClass(cls):
     cls.EPOCH_COUNT = 1
     cls.TASK_MODEL_CLASS = BiLSTM_Model
     cls.w2v_embedding = WordEmbedding(TestMacros.w2v_path)
 def build_embedding(self):
     sample_w2v_path = get_file('sample_w2v.txt',
                                "http://s3.bmio.net/coco_nlp/sample_w2v.txt",
                                cache_dir=DATA_PATH)
     embedding = WordEmbedding(sample_w2v_path)
     return embedding
示例#4
0
        tensor = embed_model.output
        for layer in layer_stack:
            tensor = layer(tensor)

        self.tf_model: keras.Model = keras.Model(embed_model.inputs, tensor)


if __name__ == "__main__":
    import logging

    logging.basicConfig(level='DEBUG')

    from coco_nlp.embeddings import WordEmbedding

    w2v_path = '/Users/cole.zhang/Desktop/nlp/language_models/w2v/sgns.weibo.bigram-char'
    w2v = WordEmbedding(w2v_path, w2v_kwargs={'limit': 10000})

    from coco_nlp.corpus import SMP2018ECDTCorpus

    x, y = SMP2018ECDTCorpus.load_data()

    model = BiLSTM_Model(embedding=w2v)
    model.fit(x, y)

    # 或者集成 CorpusGenerator 实现自己的数据迭代器
    # train_gen = CorpusGenerator()
    # model.fit_generator(train_gen=train_gen,
    #                     valid_gen=valid_gen,
    #                     batch_size=batch_size,
    #                     epochs=epochs)