def saveToNeo4j(found_pages, found_links): print "++: saving to neo4j" title_index = GRAPHDB.get_or_create_index(neo4j.Node, "TitleIndex") i=0 for link in found_links: pageA, pageB = link nodeA = title_index.get_or_create("title", pageA, {"title": pageA}) nodeB = title_index.get_or_create("title", pageB, {"title": pageB}) GRAPHDB.create((nodeA, "links_to", nodeB)) if not i % 100: print i i += 1
def saveToNeo4jBatch(found_pages, found_links): url_index = GRAPHDB.get_or_create_index(neo4j.Node, "UrlIndex") pageToNode = {} for page in found_pages: name = getNameFromLink(page) node = GRAPHDB.create({"name":name, "url":page})[0] # TODO: add labels based on infobox pageToNode[page] = node # save links i = 0 batch = neo4j.WriteBatch(GRAPHDB) for link in found_links: pageA, pageB = link nodeA = pageToNode.get(pageA) nodeB = pageToNode.get(pageB) batch.get_or_create_path(nodeA, "links_to", nodeB) if not i % 100: print "i: " + str(i) batch.run() batch = neo4j.WriteBatch(GRAPHDB) i += 1 batch.run() print "total num links created: " + str(i)