tag_to_biloes='ner') # 2. what tag do we want to predict? tag_type = 'ner' # 3. make the tag dictionary from the corpus tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type) print(tag_dictionary.idx2item) # 4. initialize embeddings with mx.Context(mxnet_prefer_gpu()): embedding_types = [ WordEmbeddings('data/embedding/glove/glove.6B.100d.txt'), BERTEmbeddings([ 'data/embedding/bert_large_sum/conll03.train.bert', 'data/embedding/bert_large_sum/conll03.dev.bert', 'data/embedding/bert_large_sum/conll03.test.bert' ]), # comment in this line to use character embeddings # CharacterEmbeddings(), # comment in these lines to use contextual string embeddings CharLMEmbeddings('data/model/lm-news-forward'), CharLMEmbeddings('data/model/lm-news-backward'), ] embeddings = StackedEmbeddings(embeddings=embedding_types) # 5. initialize sequence tagger tagger = SequenceTagger(hidden_size=256,
# 2. what tag do we want to predict? tag_type = 'pos' # 3. make the tag dictionary from the corpus tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type) print(tag_dictionary.idx2item) # 4. initialize embeddings with mx.Context(mxnet_prefer_gpu()): embedding_types = [ # WordEmbeddings('data/embedding/glove/glove.6B.100d.txt'), # WordEmbeddings('data/embedding/glove/glove.6B.100d.debug.txt'), # CharLMEmbeddings('data/model/lm-news-forward'), # CharLMEmbeddings('data/model/lm-news-backward'), BERTEmbeddings(['data/embedding/bert_large_cased/wsj.train.short.bert', 'data/embedding/bert_large_cased/wsj.dev.bert', 'data/embedding/bert_large_cased/wsj.test.bert']), ] embeddings = StackedEmbeddings(embeddings=embedding_types) # 5. initialize sequence tagger tagger = SequenceTagger(hidden_size=256, embeddings=embeddings, tag_dictionary=tag_dictionary, tag_type=tag_type, use_crf=True, attention=True) # 6. initialize trainer trainer = SequenceTaggerTrainer(tagger, corpus, test_mode=False)
train_file='train.tsv', test_file='test.tsv', dev_file='dev.tsv') # 2. what tag do we want to predict? tag_type = 'pos' # 3. make the tag dictionary from the corpus tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type) print(tag_dictionary.idx2item) # 4. initialize embeddings with mx.Context(mxnet_prefer_gpu()): embedding_types = [ # WordEmbeddings('data/embedding/glove/glove.6B.100d.txt'), BERTEmbeddings(['data/embedding/bert_base_sum/wsj.train.bert', 'data/embedding/bert_base_sum/wsj.dev.bert', 'data/embedding/bert_base_sum/wsj.test.bert']), # CharLMEmbeddings('data/model/lm-news-forward'), # CharLMEmbeddings('data/model/lm-news-backward'), ] embeddings = StackedEmbeddings(embeddings=embedding_types) # 5. initialize sequence tagger tagger = SequenceTagger(hidden_size=256, embeddings=embeddings, tag_dictionary=tag_dictionary, tag_type=tag_type, use_crf=True) # 6. initialize trainer
source_scheme='ioblu') # 2. what tag do we want to predict? tag_type = 'ner' # 3. make the tag dictionary from the corpus tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type) print(tag_dictionary.idx2item) # 4. initialize embeddings with mx.Context(mxnet_prefer_gpu()): embedding_types = [ # WordEmbeddings('data/embedding/glove/glove.6B.100d.txt'), BERTEmbeddings(['data/embedding/bert_large_sum/ontonotes-en.train.bert', 'data/embedding/bert_large_sum/ontonotes-en.dev.bert', 'data/embedding/bert_large_sum/ontonotes-en.test.bert']), # comment in this line to use character embeddings # CharacterEmbeddings(), # comment in these lines to use contextual string embeddings # CharLMEmbeddings('data/model/lm-news-forward'), # CharLMEmbeddings('data/model/lm-news-backward'), ] embeddings = StackedEmbeddings(embeddings=embedding_types) # 5. initialize sequence tagger tagger = SequenceTagger(hidden_size=256, embeddings=embeddings,
train_file='train.short.tsv', test_file='test.short.tsv', dev_file='dev.short.tsv') # 2. what tag do we want to predict? tag_type = 'pos' # 3. make the tag dictionary from the corpus tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type) print(tag_dictionary.idx2item) # 4. initialize embeddings with mx.Context(mxnet_prefer_gpu()): embedding_types = [ # WordEmbeddings('data/embedding/ctb.pos.fasttext.300.txt'), BERTEmbeddings(['data/embedding/bert_base_sum/ctb.pos.train.bert', 'data/embedding/bert_base_sum/ctb.pos.dev.bert', 'data/embedding/bert_base_sum/ctb.pos.test.bert']) # CharLMEmbeddings('data/model/lm-news-forward'), # CharLMEmbeddings('data/model/lm-news-backward'), ] embeddings = StackedEmbeddings(embeddings=embedding_types) # 5. initialize sequence tagger tagger = SequenceTagger(hidden_size=256, embeddings=embeddings, tag_dictionary=tag_dictionary, tag_type=tag_type, use_crf=True, attention=True)