Exemplo n.º 1
0
 def to_hyperedge(self, with_namespaces=True):
     if not with_namespaces:
         s = sym.str2symbol(self.token.word)
     else:
         s = sym.build(self.token.word, self.namespace)
     if self.connector:
         s = '+%s' % s
     return s
Exemplo n.º 2
0
 def to_hyperedge(self, with_namespaces=True):
     if self.compound:
         words = [leaf.token.word for leaf in self.natural_leaf_sequence()]
         if not with_namespaces:
             s = sym.str2symbol('_'.join(words))
         else:
             if not self.namespace:
                 self.generate_namespace()
             s = sym.build('_'.join(words), self.namespace)
         return s
     else:
         return tuple([
             child.to_hyperedge(with_namespaces=with_namespaces)
             for child in self.children()
         ])
Exemplo n.º 3
0
 def command_search(self, params):
     term = ' '.join(params)
     root = sym.str2symbol(term)
     symbols = self.hg.symbols_with_root(root)
     [print(str(symbol)) for symbol in symbols]
Exemplo n.º 4
0
    def process_entity(self, entity_id, exclude):
        start = time.time()
        entity = self.output.tree.get(entity_id)

        # profiling
        prof_key = entity.as_text()
        self.profiling[prof_key] = {}
        self.profiling[prof_key]['candidates'] = 0
        self.profiling[prof_key]['words1'] = 0
        self.profiling[prof_key]['words2'] = 0

        roots = {sym.str2symbol(entity.as_text())}
        if entity.is_leaf():
            roots.add(sym.str2symbol(entity.token.lemma))
        else:
            words = entity.as_label_list()
            lemmas = entity.as_label_list(lemmas=True)
            lemma_at_end = ' '.join(words[:-1] + [lemmas[-1]])
            roots.add(sym.str2symbol(lemma_at_end))
        namespaces = None
        if force_wordnet(entity):
            namespaces = ('wn.', 'lem.wn.')

        if entity.is_leaf() and entity.token.pos in {'ADP', 'CONJ'}:
            disamb_ent = None
            metrics = CandidateMetrics()
        else:
            disamb_ent, metrics = self.disamb.best_sense(
                roots, self.aux_text, namespaces)
            # profiling
            self.profiling[prof_key]['candidates'] = self.disamb.candidates
            self.profiling[prof_key]['words1'] = self.disamb.words1
            self.profiling[prof_key]['words2'] = self.disamb.words2

        logging.info('[disamb] text: %s; entity: %s; metrics: %s' %
                     (entity.as_text(), disamb_ent, metrics))

        exclude = exclude[:]
        exclude.append(entity.as_text())

        make_entity = True
        if entity.is_node():
            for child_id in entity.children_ids:
                m = self.process_entity(child_id, exclude)
                if m.better_than(metrics):
                    make_entity = False
                    metrics = m

        if make_entity:
            if disamb_ent is None:
                entity.generate_namespace()
            else:
                if entity.as_text() == sym.root(disamb_ent):
                    entity.namespace = sym.nspace(disamb_ent)
                # entity with shared lemma
                else:
                    entity.namespace = '%s.%s' % (
                        const.lemma_derived_namespace, sym.nspace(disamb_ent))
                    # additional edge for shared lemma
                    self.output.edges.append(
                        (const.have_same_lemma, entity.to_hyperedge(),
                         disamb_ent))
            if entity.is_node():
                entity.compound = True
        elif entity.is_node():
            if self.is_compound(entity):
                entity.compound = True

        # profiling
        self.profiling[prof_key]['time'] = time.time() - start

        return metrics
Exemplo n.º 5
0
 def add(self, wikidata_id, label):
     cur = self.grab_cursor()
     symbol = sym.str2symbol(label)
     cur.execute('INSERT INTO entities (wikidata_id, symbol) VALUES (?, ?)',
                 (wikidata_id, symbol))
     self.release_cursor()