Пример #1
0
 def test_edge2str(self):
     self.assertEqual(ed.edge2str(('is', 'graphbrain/1', 'great/1')), '(is graphbrain/1 great/1)')
     self.assertEqual(ed.edge2str(('size', 'graphbrain/1', 7)), '(size graphbrain/1 7)')
     self.assertEqual(ed.edge2str(('size', 'graphbrain/1', 7.)), '(size graphbrain/1 7.0)')
     self.assertEqual(ed.edge2str(('size', 'graphbrain/1', -7.)), '(size graphbrain/1 -7.0)')
     self.assertEqual(ed.edge2str(('src', 'graphbrain/1', ('is', 'graphbrain/1', 'great/1'))),
                      '(src graphbrain/1 (is graphbrain/1 great/1))')
Пример #2
0
 def remove_raw(self, edge):
     """Auxiliary function for remove! to call from inside a transaction."""
     if self.exists(edge):
         for vert in edge:
             self.dec_degree(ed.edge2str(vert))
         self.remove_edge_permutations(edge)
         self.remove_str(ed.edge2str(edge))
Пример #3
0
 def add_raw(self, edge, timestamp):
     """Auxiliary function for add to call from inside a transaction."""
     if not self.exists(edge):
         for vert in edge:
             vert_str = ed.edge2str(vert)
             if not self.inc_degree(vert_str):
                 self.add_str(vert_str, 1, timestamp)
         self.add_str(ed.edge2str(edge), 0, timestamp)
         self.write_edge_permutations(edge)
     return edge
Пример #4
0
 def star(self, center):
     """Return all the edges that contain a given entity.
     Entity can be atomic or an edge."""
     center_id = center
     if isinstance(center, (list, tuple)):
         center_id = ed.edge2str(center)
     return self.str2perms(center_id)
Пример #5
0
def do_with_edge_permutations(edge, f):
    """Applies the function f to all permutations of the given edge."""
    nperms = math.factorial(len(edge))
    for nperm in range(nperms):
        perm_str = ' '.join([ed.edge2str(e) for e in nthperm(edge, nperm)])
        perm_str = '%s %s' % (perm_str, nperm)
        f(perm_str)
Пример #6
0
 def degree(self, vertex):
     """Returns the degree of a vertex."""
     vert_str = ed.edge2str(vertex)
     cur = self.open_cursor()
     cur.execute('SELECT degree FROM vertices WHERE id=%s' % (self.ph,), (vert_str,))
     for row in cur:
         deg = row[0]
         cur.close()
         return deg
     self.close_cursor(cur, local=True, commit=False)
     return 0
Пример #7
0
 def timestamp(self, vertex):
     """Returns the timestamp of a vertex."""
     vert_str = ed.edge2str(vertex)
     cur = self.open_cursor()
     cur.execute('SELECT timestamp FROM vertices WHERE id=%s' % (self.ph,), (vert_str,))
     for row in cur:
         ts = row[0]
         cur.close()
         return ts
     self.close_cursor(cur, local=True, commit=False)
     return -1
Пример #8
0
def edge_to_visual(hg, edge, depth):
    rels = edge[0]
    entities = edge[1:]
    if sym.sym_type(rels) != sym.SymbolType.EDGE:
        rels = (rels, )
    visual_edge = []

    if len(entities) == 1 and len(rels) == 1:
        visual_edge.append(
            edge_html(hg,
                      rels[0],
                      show_degree=False,
                      outer=False,
                      rel=True,
                      depth=depth + 1))
        visual_edge.append(
            edge_html(hg,
                      entities[0],
                      show_degree=False,
                      outer=False,
                      rel=False,
                      depth=depth + 1))
    else:
        for i in range(len(entities)):
            visual_edge.append(
                edge_html(hg,
                          entities[i],
                          show_degree=False,
                          outer=False,
                          rel=False,
                          depth=depth + 1))
            if len(rels) > i:
                visual_edge.append(
                    edge_html(hg,
                              rels[i],
                              show_degree=False,
                              outer=False,
                              rel=True,
                              depth=depth + 1))
    if depth > 0:
        zoom_in_html = '<a href="/vertex?id=%s">%s</a>'\
                       % (urllib.parse.quote_plus(ed.edge2str(edge)),
                          '<span class="glyphicon glyphicon-zoom-in zoom-in" aria-hidden="true" />')
        visual_edge.append(zoom_in_html)
    return visual_edge
Пример #9
0
    def generate_synonyms(self, entity_id):
        # process children first
        entity = self.output.tree.get(entity_id)
        if entity.is_node():
            for i in range(len(entity.children_ids)):
                self.generate_synonyms(entity.children_ids[i])

        edge = entity.to_hyperedge()
        synonym = entity.to_synonym()
        if synonym:
            self.output.edges.append([cons.are_synonyms, edge, synonym])

        if entity.is_node() and entity.children()[0].is_connector():
            text = entity.as_text()
            ns = 'gb%s' % sym.hashed(ed.edge2str(edge))
            symbol = sym.build(text, ns)
            syn_edge = [cons.are_synonyms, edge, symbol]
            self.output.edges.append(syn_edge)
Пример #10
0
    def add_edges(self, edge):
        if sym.is_edge(edge):
            for item in edge:
                self.add_edges(item)

        edge_str = ed.edge2str(edge, namespaces=False)
        if not sym.is_edge(edge):
            if edge_str[0] == '+':
                edge_str = edge_str[1:]
            if len(edge_str) == 0:
                return
            if not edge_str[0].isalnum():
                return
            if self.parser.make_word(edge_str).prob > MAX_PROB:
                return
        if edge_str not in self.edge_counts:
            self.edge_counts[edge_str] = 0
        self.edge_counts[edge_str] += 1
Пример #11
0
def edge_html(hg, edge, show_degree=False, outer=True, rel=False, depth=0):
    if sym.sym_type(edge) == sym.SymbolType.EDGE:
        depth_class = 'depth%s' % str(depth)
        html_edge = '<div class="hyperedge %s">%s</div>' % (
            depth_class, ' '.join(edge_to_visual(hg, edge, depth)))
        if outer:
            extra_html = ''
            if show_degree:
                degree = hg.degree(edge)
                extra_html = '<span class="badge">%s</span>' % degree
            html_outer = '<a href="/vertex?id=%s">%s</a>'\
                         % (urllib.parse.quote_plus(ed.edge2str(edge)),
                            '<span class="glyphicon glyphicon-zoom-out zoom-out" aria-hidden="true" />')
            html_edge = '<div class="outer-hyperedge">%s%s%s</div>' % (
                html_edge, html_outer, extra_html)
        return html_edge
    else:
        return symbol_html(edge, rel)
Пример #12
0
    def edge2str(self, edge):
        s = ed.edge2str(edge, namespaces=False)
        if sym.is_edge(edge):
            return s

        if s[0] == '+':
            s = s[1:]

        if len(s) == 0:
            return None

        if not s[0].isalnum():
            return None

        word = self.parser.make_word(s)
        if word.prob < MAX_PROB:
            return s

        return None
Пример #13
0
 def print_atom_groups(self):
     n = 0
     for k in self.atom_groups:
         atom_group = self.atom_groups[k]
         size = len(atom_group['sentences'])
         if size > 3:
             n += 1
             print('ATOM_GROUP id: %s' % n)
             print('Base concepts: %s' % atom_group['label'])
             print('size: %s' % size)
             print('sentences:')
             for sentence in atom_group['sentences']:
                 print('* %s' % sentence)
             print('edges:')
             for edge in atom_group['edges']:
                 print(
                     '* %s' %
                     ed.edge2str(ed.without_namespaces(ed.str2edge(edge))))
             print()
Пример #14
0
    def read_text(self, text, aux_text=None, reset_context=True):
        if self.parser is None:
            self.debug_msg('creating parser...')
            self.parser = Parser()
            self.disamb = Disambiguation(self.hg, self.parser)
        nlp_parses = self.parser.parse_text(text)
        if reset_context:
            self.aux_text = text
            if aux_text:
                self.aux_text = '%s\n%s' % (text, aux_text)

        parses = [(p[0], self.read_sentence(Sentence(p[1])))
                  for p in nlp_parses]

        for p in parses:
            self.debug_msg('== extra ==')
            for edg in p[1].edges:
                self.debug_msg(ed.edge2str(edg))

        return parses
Пример #15
0
    def test_all_metrics(self):
        self.hg.destroy()
        self.hg.add(('size', 'graphbrain/1', -7.0))
        self.hg.add(('is', 'graphbrain/1', 'great/1'))
        self.hg.add(('src', 'mary/1', ('is', 'graphbrain/1', 'great/1')))

        labels = set([
            '%s %s' % (ed.edge2str(t[0]), t[1]['d'])
            for t in self.hg.all_attributes()
        ])
        self.assertNotEqual(
            labels, {
                'size 1', 'graphbrain/1 2', '-7.0 1', 'is 1', 'great/1 1',
                'src 1', 'mary/1 1', '(size graphbrain/1 -7.0) 0',
                '(is graphbrain/1 great/1) 1',
                '(src mary/1 (is graphbrain/1 great/1)) 0'
            })
        self.hg.destroy()
        labels = set(self.hg.all_attributes())
        self.assertEqual(labels, set())
Пример #16
0
    def similar_edges(self, targ_edge):
        edges = self.hg.all()

        targ_eedge = enrich_edge(self.parser, targ_edge)

        sims = {}
        for edge in edges:
            if edge != targ_edge and not exclude(edge):
                eedge = enrich_edge(self.parser, edge)
                total_sim = simil.eedge_similarity(targ_eedge, eedge)
                if total_sim >= self.sim_threshold:
                    sims[ed.edge2str(edge)] = total_sim

        sorted_edges = sorted(sims.items(), key=operator.itemgetter(1), reverse=True)

        result = []
        for e in sorted_edges:
            edge_data = {'edge': e[0],
                         'sim': e[1],
                         'text': self.hg.get_str_attribute(ed.str2edge(e[0]), 'text')}
            result.append(edge_data)
        return result
Пример #17
0
    def edges_with_similar_concepts(self, targ_edge):
        edges = self.hg.all()

        targ_eedge = enrich_edge(self.parser, targ_edge)

        sims = {}
        for edge in edges:
            if edge != targ_edge and not exclude(edge):
                eedge = enrich_edge(self.parser, edge)
                total_sim, worst_sim, complete, matches = simil.edge_concepts_similarity(targ_eedge, eedge)
                if complete and worst_sim >= self.sim_threshold:
                    sims[ed.edge2str(edge)] = (worst_sim, total_sim, matches)

        sorted_edges = sorted(sims.items(), key=operator.itemgetter(1), reverse=True)

        result = []
        for e in sorted_edges:
            edge_data = {'edge': e[0],
                         'worst_sim': e[1][0],
                         'sim': e[1][1],
                         'matches': e[1][2],
                         'text': self.hg.get_str_attribute(ed.str2edge(e[0]), 'text')}
            result.append(edge_data)
        return result
Пример #18
0
    def generate_atom_groups(self):
        nsyns = len(self.synonym_sets)

        # build coocurrence sparse matrix
        synonym_cooc = sps.lil_matrix((nsyns, nsyns))
        for edge in extra_edges:
            co_synonyms = self.find_co_synonyms(edge)
            if len(co_synonyms) > 1:
                for pair in itertools.combinations(co_synonyms, 2):
                    synonym_cooc[pair[0], pair[1]] += 1
                    synonym_cooc[pair[1], pair[0]] += 1

        # normalize matrix
        synonym_cooc = normalize(synonym_cooc, norm='l1', axis=1, copy=False)

        # iterate matrix, build graph
        gedges = []
        weights = []
        cx = synonym_cooc.tocoo()
        for i, j, v in zip(cx.row, cx.col, cx.data):
            gedges.append((i, j))
            weights.append(v)
        g = igraph.Graph()
        g.add_vertices(nsyns)
        g.add_edges(gedges)
        g.es['weight'] = weights

        # community detection
        comms = igraph.Graph.community_multilevel(g,
                                                  weights='weight',
                                                  return_levels=False)

        # build atom_groups
        self.atom_groups = {}
        for i in range(len(comms)):
            comm = comms[i]
            count = 0
            syns = []
            sentences = set()
            edges = []
            for item in comm:
                edges += self.synonym_map[item]['edges']

                for atom in self.synonym_map[item]['edges']:
                    for edat in edge_data:
                        if ed.contains(ed.str2edge(
                                ed.edge2str(ed.str2edge(edat['edge']),
                                            namespaces=False)),
                                       ed.str2edge(atom),
                                       deep=True):
                            if edat['text']:
                                sentences.add(edat['text'])
                syns.append(self.synonym_map[item])
                count += self.synonym_map[item]['count']
            label = ', '.join(edges)
            atom_group = {
                'label': label,
                'syns': syns,
                'count': count,
                'sentences': sentences,
                'edges': edges
            }
            self.atom_groups[i] = atom_group
Пример #19
0
def vertex2key(vertex):
    return ('v%s' % ed.edge2str(vertex)).encode('utf-8')
Пример #20
0
from gb.explore.similarity import edge_similarity

if __name__ == '__main__':
    print('creating parser...')
    par = par.Parser()
    print('parser created.')

    edge_data = json_tools.read('edges_similar_concepts.json')

    extra_edges = {}
    for item in edge_data:
        edge = ed.str2edge(item['edge'])
        matched = [ed.str2edge(match[1]) for match in item['matches']]
        for part in edge[1:]:
            if part not in matched:
                key = ed.edge2str(part)
                if key in extra_edges:
                    extra_edges[key] += 1
                else:
                    extra_edges[key] = 1

    sorted_edges = sorted(extra_edges.items(),
                          key=operator.itemgetter(1),
                          reverse=False)
    print(sorted_edges)
    print(len(sorted_edges))

    print('creating distance matrix...')
    size = len(sorted_edges)
    dists = np.zeros((size, size))
    for i in range(size):
Пример #21
0
 def write_edge_permutation(self, perm):
     perm_key = (u'p%s' % ed.edge2str(perm)).encode('utf-8')
     self.db.put(perm_key, b'x')
Пример #22
0
 def remove_edge_permutation(self, perm):
     perm_key = (u'p%s' % ed.edge2str(perm)).encode('utf-8')
     self.db.delete(perm_key)
Пример #23
0
 def timestamp(self, vertex):
     """Returns the timestamp of a vertex."""
     logging.debug('[hypergraph timestamp()] %s' % ed.edge2str(vertex))
     return self.backend.timestamp(vertex)
Пример #24
0
 def get_float_attribute(self, vertex, attribute, or_else=None):
     """Returns attribute as float value."""
     logging.debug(
         '[hypergraph get_float_attribute()] %s attribute: %s; or_else: %s'
         % (ed.edge2str(vertex), attribute, or_else))
     return self.backend.get_float_attribute(vertex, attribute, or_else)
Пример #25
0
 def write_edge_permutation(self, perm):
     eid = ed.edge2str(perm)
     self.update_or_insert('perms', {'id': eid}, eid)
Пример #26
0
 def f(x):
     return '%s %s' % (ed.edge2str(x['vertex']), x['degree'])
Пример #27
0
 def edges_with_symbols(self, symbols, root=None):
     """Find all edges containing the given symbols, and optionally a given root"""
     logging.debug('[hypergraph edges_with_symbols()] %s root: %s' %
                   (symbols, ed.edge2str(root)))
     return self.backend.edges_with_symbols(symbols, root)
Пример #28
0
 def pattern2edges(self, pattern):
     """Return all the edges that match a pattern.
     A pattern is a collection of entity ids and wildcards (None)."""
     logging.debug('[hypergraph pattern2edges()] %s' % ed.edge2str(pattern))
     return self.backend.pattern2edges(pattern)
Пример #29
0
 def exists(self, vertex):
     """Checks if the given edge exists in the hypergraph."""
     logging.debug('[hypergraph exists()] %s' % ed.edge2str(vertex))
     return self.backend.exists(vertex)
Пример #30
0
 def dec_attribute(self, vertex, attribute):
     """Increments an attribute of a vertex."""
     logging.debug('[hypergraph dec_attribute()] %s attribute: %s' %
                   (ed.edge2str(vertex), attribute))
     return self.backend.dec_attribute(vertex, attribute)
Пример #31
0
 def remove_edge_permutation(self, perm):
     eid = ed.edge2str(perm)
     cur = self.conn.cursor()
     cur.execute('DELETE FROM perms WHERE id=%s' % (self.ph,), (eid,))
     self.conn.commit()
     cur.close()
Пример #32
0
 def degree(self, vertex):
     """Returns the degree of a vertex."""
     logging.debug('[hypergraph degree()] %s' % ed.edge2str(vertex))
     return self.backend.degree(vertex)
Пример #33
0
 def remove_by_pattern(self, pattern):
     """Removes from the hypergraph all edges that match the pattern."""
     logging.debug('[hypergraph remove_by_pattern()] %s' % ed.edge2str(pattern))
     edges = self.pattern2edges(pattern)
     for edge in edges:
         self.remove(edge)
Пример #34
0
 def sources(self, edge):
     """Set of sources (nodes) that support a statement (edge)."""
     logging.debug('[hypergraph sources()] %s' % ed.edge2str(edge))
     edges = self.pattern2edges((const.source, edge, None))
     sources = [edge[2] for edge in edges]
     return set(sources)
Пример #35
0
 def symbols_with_root(self, root):
     """Find all symbols with the given root."""
     logging.debug('[hypergraph symbols_with_root()] %s' % ed.edge2str(root))
     if len(root) == 0:
         return {}
     return self.backend.symbols_with_root(root)
Пример #36
0
 def remove(self, edge):
     """Removes and edge from the hypergraph."""
     logging.debug('[hypergraph remove()] %s' % ed.edge2str(edge))
     if isinstance(edge, (list, tuple)):
         self.backend.remove(edge)
Пример #37
0
def edge2str(edge):
    s = ed.edge2str(edge, namespaces=False)
    s = unidecode(s)
    s = s.replace('.', '')
    return s
Пример #38
0
 def star(self, center, limit=None):
     """Return all the edges that contain a given entity.
     Entity can be atomic or an edge."""
     logging.debug('[hypergraph star()] %s' % ed.edge2str(center))
     return self.backend.star(center, limit=limit)
Пример #39
0
    concept_sets = [concepts1, concepts2, concepts3]

    # filter edges
    print('before filter: %s' % len(full_edges))
    full_edges = [edge for edge in full_edges if contains_all_concept_sets(edge, concept_sets)]
    print('after filter: %s' % len(full_edges))

    # build graph
    g = Graph(par, full_edges, black_list=synset1+synset2)

    pr_pairs = g.synset_pr_pairs()

    remaining_edges = full_edges[:]
    covered = set()
    for pr_pair in pr_pairs[:50]:
        syn_id = int(pr_pair[0])
        pr = pr_pair[1]
        count = 0
        new_remaining_edges = []
        for full_edge in remaining_edges:
            if g.contains_synonym(full_edge, syn_id):
                count += 1
                covered.add(ed.edge2str(full_edge, namespaces=False))
            else:
                new_remaining_edges.append(full_edge)
        remaining_edges = new_remaining_edges
        if count > 0:
            print('%s [%s]{%s} %.2f%% %s' % (g.meronomy.synonym_label(syn_id), count, len(covered),
                                             (float(len(covered)) / float(len(full_edges))) * 100., pr))
Пример #40
0
 def set_attribute(self, vertex, attribute, value):
     """Sets the value of an attribute."""
     logging.debug('[hypergraph set_attribute()] %s %s=%s' %
                   (ed.edge2str(vertex), attribute, value))
     return self.backend.set_attribute(vertex, attribute, value)
Пример #41
0
 def f(x):
     return "%s %s" % (ed.edge2str(x["vertex"]), x["degree"])