from flair.models import SequenceTagger from flair.data import Sentence, Token import requests from simstring.feature_extractor.character_ngram import CharacterNgramFeatureExtractor from simstring.measure.jaccard import JaccardMeasure from simstring.feature_extractor.word_ngram import WordNgramFeatureExtractor from simstring.measure.cosine import CosineMeasure from simstring.database.dict import DictDatabase from simstring.searcher import Searcher from flair.data_fetcher import NLPTaskDataFetcher LOCATION_WIKIPEDIA_DISAMBIGUATION = "../wikidisambiguationpages.txt" ag = Agdistis() ag.agdistisApi = "http://localhost:8080/AGDISTIS" def load_disambiguation(): db = DictDatabase(WordNgramFeatureExtractor(2)) with open(LOCATION_WIKIPEDIA_DISAMBIGUATION) as disambig_file: for line in disambig_file: r = line.replace("_(disambiguation)", "").replace("_", " ").lower() db.add(r.strip()) return Searcher(db, JaccardMeasure()) def process_conll_doc(input_file_name, output_file_name, ner_model,
def __init__(self): self.agdistis = Agdistis() self.fox = Fox()