Exemplo n.º 1
0
#   along with GraphBrain.  If not, see <http://www.gnu.org/licenses/>.

import operator
import numpy as np
from sklearn.cluster import DBSCAN
import gb.tools.json as json_tools
import gb.hypergraph.edge as ed
import gb.nlp.parser as par
from gb.explore.similarity import edge_similarity

if __name__ == '__main__':
    print('creating parser...')
    par = par.Parser()
    print('parser created.')

    edge_data = json_tools.read('edges_similar_concepts.json')

    extra_edges = {}
    for item in edge_data:
        edge = ed.str2edge(item['edge'])
        matched = [ed.str2edge(match[1]) for match in item['matches']]
        for part in edge[1:]:
            if part not in matched:
                key = ed.edge2str(part)
                if key in extra_edges:
                    extra_edges[key] += 1
                else:
                    extra_edges[key] = 1

    sorted_edges = sorted(extra_edges.items(),
                          key=operator.itemgetter(1),
Exemplo n.º 2
0
    # read data
    # edge_data = json_tools.read('edges_similar_concepts.json')

    # build extra edges list
    # extra_edges = []
    # full_edges = []
    # for it in edge_data:
    #     e = ed.str2edge(it['edge'])
    #     full_edges.append(e)
    #     matched = [ed.str2edge(match[1]) for match in it['matches']]
    #     for part in e[1:]:
    #         if part not in matched:
    #             extra_edges.append(part)

    edge_data = json_tools.read('all.json')
    # build full edges list
    extra_edges = []
    for it in edge_data:
        extra_edges.append(ed.without_namespaces(ed.str2edge(it['edge'])))
    full_edges = extra_edges

    ag = AtomGroups(par)
    print('set edges')
    ag.set_edges(extra_edges)
    print('generate_atoms')
    ag.generate_atoms()
    print('generate synonyms')
    ag.generate_synonyms()
    print('generate atom groups')
    ag.generate_atom_groups()