Exemplo n.º 1
0
                            2) in voc.word2index and lineSearch.group(
                                3) in voc.word2index:
                        edgeList.append(
                            (lineSearch.group(2), lineSearch.group(3), {
                                "relation": lineSearch.group(1)
                            }))
                        nodeList.add(lineSearch.group(2))
                        nodeList.add(lineSearch.group(3))
                        f.write(line)
                        trimFactNum += 1
                if index % 100000 == 0:
                    print('Trimming processing %d' % index)
    print("after trimmed , there are %d facts left." % trimFactNum)

finally:
    nodeList = list(nodeList)
    print(nodeList[:10])
    dump_pickle('edgeList.pickle', edgeList, ROOTPATH)
    dump_pickle('errorList.pickle', errorList, ROOTPATH)
    dump_pickle('nodeList.pickle', nodeList, ROOTPATH)
    dump_pickle('relationList.pickle', relationList, ROOTPATH)
    config.add('trimFactNum', trimFactNum)
    config.add('nodeNum', len(nodeList))
    config.add('relationNum', len(relationList))
    config.add('edgeNum', len(edgeList))
    print("there are %d different nodes in graph" % len(nodeList))
    print("there are %d different relations in graph" % len(relationList))
    print('there are %d edges in graph' % len(edgeList))

print("all done")