def setup(self): download_dane_data() self.model = DA_ELECTRA_DA() self.model.download_network() self.model.load_network() # WikiANN has sentences longer than default of 128 words self.model.max_len = 300
import time from flair.data import Sentence, Token from utils import print_speed_performance, f1_report from danlp.datasets import DDT from danlp.models import load_spacy_model, load_flair_ner_model, \ load_bert_ner_model from NERDA.datasets import download_dane_data from NERDA.precooked import DA_BERT_ML, DA_ELECTRA_DA download_dane_data() def is_misc(ent: str): if len(ent) < 4: return False return ent[-4:] == 'MISC' def remove_miscs(se: list): return [[entity if not is_misc(entity) else 'O' for entity in entities] for entities in se] # Load the DaNE data _, _, test = DDT().load_as_simple_ner(predefined_splits=True) sentences_tokens, sentences_entities = test # Replace MISC with O for fair comparisons