Пример #1
0
    def __init__(self):
        # Initialize commons store with knowledge base.
        start = time.time()
        self.commons = sling.Store()
        self.commons.lockgc()
        self.commons.load(wikidir + "/kb.sling", snapshot=True)
        self.n_item_member = self.commons['/w/item/member']
        self.n_instance_of = self.commons['P31']
        self.n_wikimedia_category = self.commons['Q4167836']
        self.n_subject = self.commons['subject']
        self.extractor = sling.FactExtractor(self.commons)

        # Add category subject types.
        self.subjects = {}
        for subject, item in english_subject_types.iteritems():
            self.subjects[subject] = self.commons[item]

        # Add properties for subjects.
        self.subject_properties = []
        for p in subject_properties:
            self.subject_properties.append(self.commons[p])

        self.commons.freeze()
        end = time.time()
        print end - start, "secs loading commons"

        # Load phrase table.
        # TODO(ringgaard): Load language-dependent phrase table.
        start = time.time()
        self.phrasetab = sling.PhraseTable(self.commons,
                                           wikidir + "/en/phrase-table.repo")
        end = time.time()
        print end - start, "secs loading phrase table"

        # Open category member database.
        self.member_db = sling.RecordDatabase(wikidir +
                                              "/wikipedia-members.rec")
Пример #2
0
 def init(self, task):
     self.kb = load_kb(task)
     self.extractor = sling.FactExtractor(self.kb)
     self.matcher = FactMatcher(self.kb, self.extractor)
Пример #3
0
n_lei = kb["P1278"]
n_swift_bic_code = kb["P2627"]
n_subsidiary = kb["P355"]
n_parent = kb["P749"]
n_owned_by = kb["P127"]
n_owner_of = kb["P1830"]
n_starttime = kb["P580"]
n_endtime = kb["P582"]
n_legal_form = kb["P1454"]
n_coord_location = kb["P625"]
n_geo = kb["/w/geo"]
n_lat = kb["/w/lat"]
n_lng = kb["/w/lng"]

aliases = sling.PhraseTable(kb, "data/e/kb/en/phrase-table.repo")
factex = sling.FactExtractor(kb)

city_types = factex.taxonomy([
  "Q486972", # human settlement
  "Q56061",  # administrative territorial entity
])

# Read registers.
bizregs = sling.dataset.bizreg.BusinessRegistries(kb)
regauth = bizregs.by_auth_code()

# Build country and region table.
countries = {}
regions = {}
for item in kb:
  code = item[n_country_code]
Пример #4
0
def increment_key(dictionary, key, delta=1):
    dictionary[key] = dictionary.get(key, 0) + delta


def fact_to_text(fact):
    l = []
    for f in fact:
        l.append(str(f.name))
    return ": ".join(l)


commons = sling.Store()
commons.lockgc()
commons.load(wikidir + "/kb.sling", snapshot=True)
n_is = commons["is"]
extractor = sling.FactExtractor(commons)
phrasetab = sling.PhraseTable(commons, wikidir + "/en/phrase-table.repo")
commons.freeze()


class Name:
    def __init__(self, doc):
        self.doc = doc
        self.store = doc.frame.store()
        self.covered = [False] * len(doc.tokens)
        self.evokes = {}
        self.matched = set()
        self.skip = []
        for t in self.doc.tokens:
            self.skip.append(t.word in stop_words)
Пример #5
0
    def index(self, i, j):
        return i * self.size + j - 1  # second index is 1-based

    def assign(self, i, j, span):
        self.elements[self.index(i, j)] = span

    def get(self, i, j):
        return self.elements[self.index(i, j)]


commons = sling.Store()
commons.lockgc()
commons.load("data/e/wiki/kb.sling")
phrasetab = sling.PhraseTable(commons, "data/e/wiki/en/phrase-table.repo")
docschema = sling.DocumentSchema(commons)
factex = sling.FactExtractor(commons)
taxonomy = factex.taxonomy()
titles = [
    commons['Q4164871'],  # position
    commons['Q12737077'],  # occupation
    commons['Q216353'],  # title
]
commons.freeze()

documentids = [
    #'Q5945076', 'Q23883660', 'Q43287478', 'Q2147524',
    #'Q25048736', 'Q6525874', 'Q3851366', 'Q308735', 'Q2184354',
    'Q5337174',
    'Q6218080',
    'Q1606412',
    'Q7264446',