Пример #1
0
                                                tag_to_biloes='ner')

# 2. what tag do we want to predict?
tag_type = 'ner'

# 3. make the tag dictionary from the corpus
tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
print(tag_dictionary.idx2item)

# 4. initialize embeddings
with mx.Context(mxnet_prefer_gpu()):
    embedding_types = [
        WordEmbeddings('data/embedding/glove/glove.6B.100d.txt'),
        BERTEmbeddings([
            'data/embedding/bert_large_sum/conll03.train.bert',
            'data/embedding/bert_large_sum/conll03.dev.bert',
            'data/embedding/bert_large_sum/conll03.test.bert'
        ]),

        # comment in this line to use character embeddings
        # CharacterEmbeddings(),

        # comment in these lines to use contextual string embeddings
        CharLMEmbeddings('data/model/lm-news-forward'),
        CharLMEmbeddings('data/model/lm-news-backward'),
    ]

    embeddings = StackedEmbeddings(embeddings=embedding_types)

    # 5. initialize sequence tagger
    tagger = SequenceTagger(hidden_size=256,
Пример #2
0
# 2. what tag do we want to predict?
tag_type = 'pos'

# 3. make the tag dictionary from the corpus
tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
print(tag_dictionary.idx2item)

# 4. initialize embeddings
with mx.Context(mxnet_prefer_gpu()):
    embedding_types = [
        # WordEmbeddings('data/embedding/glove/glove.6B.100d.txt'),
        # WordEmbeddings('data/embedding/glove/glove.6B.100d.debug.txt'),
        # CharLMEmbeddings('data/model/lm-news-forward'),
        # CharLMEmbeddings('data/model/lm-news-backward'),
        BERTEmbeddings(['data/embedding/bert_large_cased/wsj.train.short.bert',
                        'data/embedding/bert_large_cased/wsj.dev.bert',
                        'data/embedding/bert_large_cased/wsj.test.bert']),
    ]

    embeddings = StackedEmbeddings(embeddings=embedding_types)

    # 5. initialize sequence tagger
    tagger = SequenceTagger(hidden_size=256,
                            embeddings=embeddings,
                            tag_dictionary=tag_dictionary,
                            tag_type=tag_type,
                            use_crf=True,
                            attention=True)

    # 6. initialize trainer
    trainer = SequenceTaggerTrainer(tagger, corpus, test_mode=False)
Пример #3
0
                                                train_file='train.tsv',
                                                test_file='test.tsv',
                                                dev_file='dev.tsv')
# 2. what tag do we want to predict?
tag_type = 'pos'

# 3. make the tag dictionary from the corpus
tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
print(tag_dictionary.idx2item)

# 4. initialize embeddings
with mx.Context(mxnet_prefer_gpu()):
    embedding_types = [
        # WordEmbeddings('data/embedding/glove/glove.6B.100d.txt'),
        BERTEmbeddings(['data/embedding/bert_base_sum/wsj.train.bert',
                        'data/embedding/bert_base_sum/wsj.dev.bert',
                        'data/embedding/bert_base_sum/wsj.test.bert']),
        # CharLMEmbeddings('data/model/lm-news-forward'),
        # CharLMEmbeddings('data/model/lm-news-backward'),
    ]

    embeddings = StackedEmbeddings(embeddings=embedding_types)

    # 5. initialize sequence tagger
    tagger = SequenceTagger(hidden_size=256,
                            embeddings=embeddings,
                            tag_dictionary=tag_dictionary,
                            tag_type=tag_type,
                            use_crf=True)

    # 6. initialize trainer
Пример #4
0
                                                source_scheme='ioblu')

# 2. what tag do we want to predict?
tag_type = 'ner'

# 3. make the tag dictionary from the corpus
tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
print(tag_dictionary.idx2item)

# 4. initialize embeddings
with mx.Context(mxnet_prefer_gpu()):
    embedding_types = [

        # WordEmbeddings('data/embedding/glove/glove.6B.100d.txt'),
        BERTEmbeddings(['data/embedding/bert_large_sum/ontonotes-en.train.bert',
                        'data/embedding/bert_large_sum/ontonotes-en.dev.bert',
                        'data/embedding/bert_large_sum/ontonotes-en.test.bert']),

        # comment in this line to use character embeddings
        # CharacterEmbeddings(),

        # comment in these lines to use contextual string embeddings
        # CharLMEmbeddings('data/model/lm-news-forward'),
        # CharLMEmbeddings('data/model/lm-news-backward'),
    ]

    embeddings = StackedEmbeddings(embeddings=embedding_types)

    # 5. initialize sequence tagger
    tagger = SequenceTagger(hidden_size=256,
                            embeddings=embeddings,
Пример #5
0
                                                train_file='train.short.tsv',
                                                test_file='test.short.tsv',
                                                dev_file='dev.short.tsv')
# 2. what tag do we want to predict?
tag_type = 'pos'

# 3. make the tag dictionary from the corpus
tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
print(tag_dictionary.idx2item)

# 4. initialize embeddings
with mx.Context(mxnet_prefer_gpu()):
    embedding_types = [
        # WordEmbeddings('data/embedding/ctb.pos.fasttext.300.txt'),
        BERTEmbeddings(['data/embedding/bert_base_sum/ctb.pos.train.bert',
                        'data/embedding/bert_base_sum/ctb.pos.dev.bert',
                        'data/embedding/bert_base_sum/ctb.pos.test.bert'])
        # CharLMEmbeddings('data/model/lm-news-forward'),
        # CharLMEmbeddings('data/model/lm-news-backward'),
    ]

    embeddings = StackedEmbeddings(embeddings=embedding_types)

    # 5. initialize sequence tagger
    tagger = SequenceTagger(hidden_size=256,
                            embeddings=embeddings,
                            tag_dictionary=tag_dictionary,
                            tag_type=tag_type,
                            use_crf=True,
                            attention=True)