class AgdistisWrapper(object): def __init__(self): self.agdistis = Agdistis() self.fox = Fox() def flatten_urls(self, entities): flattened_urls = [] for item in entities: if item['disambiguatedURL'] != None: flattened_urls.append(item['disambiguatedURL']) return flattened_urls def disambiguate_entity(self, string): """ string: Austria [{u'disambiguatedURL': u'http://dbpedia.org/resource/Austria', u'namedEntity': u'Austria', u'offset': 7, u'start': 0}] """ string = clear_string(string) return self.agdistis.disambiguateEntity(string) def disambiguate(self, string): string = clear_string(string) return self.agdistis.disambiguate(string) def disambiguate_table(self, table): entities = [] for row in table.table: entities.append(self.disambiguate_row(row)) return entities def _disambiguate_row(self, row): """ Concat row and disambiguate the complete row """ r_entities = [[]] * len(row) row_concat = " ".join(row) entities = self.fox.annotateEntities(row_concat) d_entities = self.disambiguate(entities) for _entity in d_entities: for cell_i, cell in enumerate(row): if _entity["namedEntity"] in row[cell_i]: r_entities[cell_i] = [_entity["disambiguatedURL"]] return r_entities def disambiguate_row(self, row): """ Disambiguate cell by cell This performs better """ entities = [] for cell_i, cell in enumerate(row): cell_entities = self.disambiguate_entity(cell) entities.append(self.flatten_urls(cell_entities)) return entities
class AgdistisIdentifier(object): def __init__(self): self.agdistis = Agdistis() def flattenUrls(self, entities): flattenedUrls = [] for item in entities: if item['disambiguatedURL'] != None: flattenedUrls.append(item['disambiguatedURL']) return flattenedUrls def identifyEntity(self, string): """ string: Austria [{u'disambiguatedURL': u'http://dbpedia.org/resource/Austria', u'namedEntity': u'Austria', u'offset': 7, u'start': 0}] """ string = self.clearString(string) return self.agdistis.disambiguateEntity(string) def clearString(self, string): characters = "{}|" string = string.translate(None, characters) string = re.sub(' ', '', string) string = string.strip() return string
from flair.models import SequenceTagger from flair.data import Sentence, Token import requests from simstring.feature_extractor.character_ngram import CharacterNgramFeatureExtractor from simstring.measure.jaccard import JaccardMeasure from simstring.feature_extractor.word_ngram import WordNgramFeatureExtractor from simstring.measure.cosine import CosineMeasure from simstring.database.dict import DictDatabase from simstring.searcher import Searcher from flair.data_fetcher import NLPTaskDataFetcher LOCATION_WIKIPEDIA_DISAMBIGUATION = "../wikidisambiguationpages.txt" ag = Agdistis() ag.agdistisApi = "http://localhost:8080/AGDISTIS" def load_disambiguation(): db = DictDatabase(WordNgramFeatureExtractor(2)) with open(LOCATION_WIKIPEDIA_DISAMBIGUATION) as disambig_file: for line in disambig_file: r = line.replace("_(disambiguation)", "").replace("_", " ").lower() db.add(r.strip()) return Searcher(db, JaccardMeasure()) def process_conll_doc(input_file_name, output_file_name, ner_model,
def __init__(self): self.agdistis = Agdistis() self.fox = Fox()
from agdistispy.agdistis import Agdistis ag = Agdistis() entities=ag.disambiguate("<entity>Austria</entity>") print(entities)
def __init__(self): self.agdistis = Agdistis()