2) in voc.word2index and lineSearch.group( 3) in voc.word2index: edgeList.append( (lineSearch.group(2), lineSearch.group(3), { "relation": lineSearch.group(1) })) nodeList.add(lineSearch.group(2)) nodeList.add(lineSearch.group(3)) f.write(line) trimFactNum += 1 if index % 100000 == 0: print('Trimming processing %d' % index) print("after trimmed , there are %d facts left." % trimFactNum) finally: nodeList = list(nodeList) print(nodeList[:10]) dump_pickle('edgeList.pickle', edgeList, ROOTPATH) dump_pickle('errorList.pickle', errorList, ROOTPATH) dump_pickle('nodeList.pickle', nodeList, ROOTPATH) dump_pickle('relationList.pickle', relationList, ROOTPATH) config.add('trimFactNum', trimFactNum) config.add('nodeNum', len(nodeList)) config.add('relationNum', len(relationList)) config.add('edgeNum', len(edgeList)) print("there are %d different nodes in graph" % len(nodeList)) print("there are %d different relations in graph" % len(relationList)) print('there are %d edges in graph' % len(edgeList)) print("all done")