def getSentences(text): nlp = Spanish() nlp.add_pipe(nlp.create_pipe('sentencizer')) document = nlp(text) return [sent.string.strip() for sent in document.sents]
# Output the annotation file annotation_filepath = './evaluation/NER/' + doc[:-3] + 'ann' with open(annotation_filepath, 'w', encoding='utf-8') as annotation_file: annotation_file.write(output) annotation_file.close() if __name__ == "__main__": # load the model you trained model = SequenceTagger.load( 'resources/taggers/medium_updated/final-model.pt') ## Create spanish sentence segmenter with Spacy # the sentence segmentation by spacy is non-destructive, i.e., the empty lines are considered when getting a span of a given word/entity nlp = Spanish() sentencizer = nlp.create_pipe("sentencizer") nlp.add_pipe(sentencizer) test_dir = "./data/datasets/test-background-set-to-publish/" if not os.path.exists("./evaluation/NER/"): os.makedirs("./evaluation/NER/") for doc in os.listdir( test_dir ): #For each document in test_dir build the respective annotation file with predicted entities doc_filepath = test_dir + doc build_annotation_file(doc, doc_filepath, model)