Пример #1
0
class AgdistisWrapper(object):
    def __init__(self):
        self.agdistis = Agdistis()
        self.fox = Fox()

    def flatten_urls(self, entities):
        flattened_urls = []
        for item in entities:
            if item['disambiguatedURL'] != None:
                flattened_urls.append(item['disambiguatedURL'])
        return flattened_urls

    def disambiguate_entity(self, string):
        """
            string: Austria
            [{u'disambiguatedURL': u'http://dbpedia.org/resource/Austria',
              u'namedEntity': u'Austria',
              u'offset': 7,
              u'start': 0}]
        """
        string = clear_string(string)
        return self.agdistis.disambiguateEntity(string)

    def disambiguate(self, string):
        string = clear_string(string)
        return self.agdistis.disambiguate(string)

    def disambiguate_table(self, table):
        entities = []
        for row in table.table:
            entities.append(self.disambiguate_row(row))
        return entities

    def _disambiguate_row(self, row):
        """
            Concat row and disambiguate the complete row
        """
        r_entities = [[]] * len(row)
        row_concat = " ".join(row)
        entities = self.fox.annotateEntities(row_concat)
        d_entities = self.disambiguate(entities)
        for _entity in d_entities:
            for cell_i, cell in enumerate(row):
                if _entity["namedEntity"] in row[cell_i]:
                    r_entities[cell_i] = [_entity["disambiguatedURL"]]
        return r_entities

    def disambiguate_row(self, row):
        """
            Disambiguate cell by cell
            This performs better
        """
        entities = []
        for cell_i, cell in enumerate(row):
            cell_entities = self.disambiguate_entity(cell)
            entities.append(self.flatten_urls(cell_entities))
        return entities
Пример #2
0
class AgdistisIdentifier(object):
    def __init__(self):
        self.agdistis = Agdistis()

    def flattenUrls(self, entities):
        flattenedUrls = []
        for item in entities:
            if item['disambiguatedURL'] != None:
                flattenedUrls.append(item['disambiguatedURL'])
        return flattenedUrls

    def identifyEntity(self, string):
        """
            string: Austria
            [{u'disambiguatedURL': u'http://dbpedia.org/resource/Austria',
              u'namedEntity': u'Austria',
              u'offset': 7,
              u'start': 0}]
        """
        string = self.clearString(string)
        return self.agdistis.disambiguateEntity(string)

    def clearString(self, string):
        characters = "{}|"
        string = string.translate(None, characters)
        string = re.sub(' ', '', string)
        string = string.strip()
        return string
Пример #3
0
from flair.models import SequenceTagger
from flair.data import Sentence, Token
import requests

from simstring.feature_extractor.character_ngram import CharacterNgramFeatureExtractor
from simstring.measure.jaccard import JaccardMeasure
from simstring.feature_extractor.word_ngram import WordNgramFeatureExtractor
from simstring.measure.cosine import CosineMeasure
from simstring.database.dict import DictDatabase
from simstring.searcher import Searcher

from flair.data_fetcher import NLPTaskDataFetcher

LOCATION_WIKIPEDIA_DISAMBIGUATION = "../wikidisambiguationpages.txt"
ag = Agdistis()
ag.agdistisApi = "http://localhost:8080/AGDISTIS"


def load_disambiguation():
    db = DictDatabase(WordNgramFeatureExtractor(2))

    with open(LOCATION_WIKIPEDIA_DISAMBIGUATION) as disambig_file:
        for line in disambig_file:
            r = line.replace("_(disambiguation)", "").replace("_", " ").lower()
            db.add(r.strip())

    return Searcher(db, JaccardMeasure())


def process_conll_doc(input_file_name, output_file_name, ner_model,
Пример #4
0
 def __init__(self):
     self.agdistis = Agdistis()
     self.fox = Fox()
Пример #5
0
from agdistispy.agdistis import Agdistis

ag = Agdistis()
entities=ag.disambiguate("<entity>Austria</entity>")
print(entities)
Пример #6
0
 def __init__(self):
     self.agdistis = Agdistis()