]), # comment in this line to use character embeddings # CharacterEmbeddings(), # comment in these lines to use contextual string embeddings CharLMEmbeddings('data/model/lm-news-forward'), CharLMEmbeddings('data/model/lm-news-backward'), ] embeddings = StackedEmbeddings(embeddings=embedding_types) # 5. initialize sequence tagger tagger = SequenceTagger(hidden_size=256, embeddings=embeddings, tag_dictionary=tag_dictionary, tag_type=tag_type, use_crf=True) # 6. initialize trainer trainer = SequenceTaggerTrainer(tagger, corpus, test_mode=False) # 7. start training trainer.train(model_path, learning_rate=0.1, mini_batch_size=32, max_epochs=60, embeddings_in_memory=True) tagger.load_parameters(os.path.join(model_path, 'model.bin'), ctx=mxnet_prefer_gpu())
# 3. make the tag dictionary from the corpus tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type) print(tag_dictionary.idx2item) # 4. initialize embeddings with mx.Context(mxnet_prefer_gpu()): embedding_types = [ WordEmbeddings('data/embedding/glove/glove.6B.100d.debug.txt'), # CharLMEmbeddings('data/model/lm-news-forward'), # CharLMEmbeddings('data/model/lm-news-backward'), ] embeddings = StackedEmbeddings(embeddings=embedding_types) # 5. initialize sequence tagger tagger = SequenceTagger(hidden_size=256, embeddings=embeddings, tag_dictionary=tag_dictionary, tag_type=tag_type, use_crf=False) # 6. initialize trainer trainer = SequenceTaggerTrainer(tagger, corpus, test_mode=False) # 7. start training trainer.train('data/model/wsj-pos', learning_rate=0.1, mini_batch_size=32, max_epochs=150, embeddings_in_memory=True)