Пример #1
0
from flair.models import SequenceTagger
from flair.data import Sentence, Token
import requests

from simstring.feature_extractor.character_ngram import CharacterNgramFeatureExtractor
from simstring.measure.jaccard import JaccardMeasure
from simstring.feature_extractor.word_ngram import WordNgramFeatureExtractor
from simstring.measure.cosine import CosineMeasure
from simstring.database.dict import DictDatabase
from simstring.searcher import Searcher

from flair.data_fetcher import NLPTaskDataFetcher

LOCATION_WIKIPEDIA_DISAMBIGUATION = "../wikidisambiguationpages.txt"
ag = Agdistis()
ag.agdistisApi = "http://localhost:8080/AGDISTIS"


def load_disambiguation():
    db = DictDatabase(WordNgramFeatureExtractor(2))

    with open(LOCATION_WIKIPEDIA_DISAMBIGUATION) as disambig_file:
        for line in disambig_file:
            r = line.replace("_(disambiguation)", "").replace("_", " ").lower()
            db.add(r.strip())

    return Searcher(db, JaccardMeasure())


def process_conll_doc(input_file_name, output_file_name, ner_model,
Пример #2
0
 def __init__(self):
     self.agdistis = Agdistis()
     self.fox = Fox()