Пример #1
0
 def setup(self):
     download_dane_data()
     self.model = DA_ELECTRA_DA()
     self.model.download_network()
     self.model.load_network()
     # WikiANN has sentences longer than default of 128 words
     self.model.max_len = 300
Пример #2
0
import time

from flair.data import Sentence, Token
from utils import print_speed_performance, f1_report

from danlp.datasets import DDT
from danlp.models import load_spacy_model, load_flair_ner_model, \
    load_bert_ner_model

from NERDA.datasets import download_dane_data
from NERDA.precooked import DA_BERT_ML, DA_ELECTRA_DA

download_dane_data()


def is_misc(ent: str):
    if len(ent) < 4:
        return False
    return ent[-4:] == 'MISC'


def remove_miscs(se: list):
    return [[entity if not is_misc(entity) else 'O' for entity in entities]
            for entities in se]


# Load the DaNE data
_, _, test = DDT().load_as_simple_ner(predefined_splits=True)
sentences_tokens, sentences_entities = test

# Replace MISC with O for fair comparisons