def test_pushDataBig(self): myc = mpcouch.mpcouchPusher("http://127.0.0.1:5984/testdb",1000,threads = False) for i in range(0,5000000): myc.pushData({"id":i}) myc.finish() self.assertEqual(myc.destroyDatabase(),None) del myc
def elementReader(filename): couchPusher = mpcouch.mpcouchPusher("http://localhost:5984/osmnodes", 10000, threads=False) def gotCompleteEntry(entry): couchPusher.pushData({"data": entry}) pass # print(entry) def start_osm_element(name, attrs): global currentTreeData if name == "node": """start collecting information including all sub-keys""" currentTreeData = [] currentTreeData.append(attrs) # the current meta-information currentTreeData.append({}) # for the tags elif name == "tag": """collect the tag-information""" key = attrs[u"k"] value = attrs[u"v"] # print key # print value currentTreeData[1][key] = value else: print("uncatched element:") print(name) def end_osm_element(name): global currentTreeData if name == "node": """the node has finished, store all information collected so far""" # lon = float(currentTreeData[0][u'lon']) # lat = float(currentTreeData[0][u'lat']) # idx.insert(int(currentTreeData[0][u'id']), (lon, lat, lon, lat), obj=currentTreeData) if "natural" in currentTreeData[1]: if currentTreeData[1]["natural"] == "tree": gotCompleteEntry(currentTreeData) # e = eventlet.spawn(gotCompleteEntry,currentTreeData) # e = eventlet.spawn(couchPusher.pushData,{'data':currentTreeData}) # e.wait() def char_osm_data(data): pass osmParser = xml.parsers.expat.ParserCreate() osmParser.StartElementHandler = start_osm_element osmParser.EndElementHandler = end_osm_element # osmParser.CharacterDataHandler = char_osm_data with open(filename, "rb") as osmFile: print("start parsing") osmParser.ParseFile(osmFile) print("finished parsing") couchPusher.finish()
def test_pushDataBig(self): myc = mpcouch.mpcouchPusher("http://127.0.0.1:5984/testdb", 1000, threads=False) for i in range(0, 5000000): myc.pushData({"id": i}) myc.finish() self.assertEqual(myc.destroyDatabase(), None) del myc
def elementReader(filename): couchPusher = mpcouch.mpcouchPusher("http://gi88.geoinfo.tuwien.ac.at:5984/osmnodesvienna",30000,threads = False, jobsbuffersizemax = 20) oldids = [] def gotCompleteEntry(entry): # couchPusher.pushData({'data':entry, '_id': entry[0]['version'] + '-' + entry[0]['id']}) couchPusher.pushData({'data':entry}) #print(entry) pass def start_osm_element(name, attrs): global currentTreeData if name == "node": '''start collecting information including all sub-keys''' currentTreeData = [] currentTreeData.append(attrs) # the current meta-information currentTreeData.append({}) # for the tags elif name == "tag": '''collect the tag-information''' key = attrs[u'k'] value = attrs[u'v'] #print key #print value currentTreeData[1][key] = value else: print("uncatched element: {}".format(name)) def end_osm_element(name): global currentTreeData global entries global nodes global ways global relations #entries += 1 #if entries % 1000000 == 0: print("Processed {} XML entries".format(entries)) if name == "node": #nodes += 1 #if nodes % 100000 == 0: print("Processed {} OSM nodes".format(nodes)) # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% """ For reduction of the amount of documents, we only include non-tree data, if it represents deleted nodes. The following code is there to make sure only trees are kept. """ #print("visible = {}".format(currentTreeData[0]['visible'])) """ if len(currentTreeData[1]) == 0: gotCompleteEntry(currentTreeData) #if currentTreeData[0]['visible'] != "true": print(currentTreeData[0]['visible']) elif 'natural' in currentTreeData[1]: if currentTreeData[1]['natural'] == 'tree': # yay, it's a tree ! if currentTreeData[0]['version'] != 1: oldids.append(currentTreeData[0]['id']) # print("oldidsremark: {}".format(len(currentTreeData[0]['version']))) gotCompleteEntry(currentTreeData) """ # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% """ The following checks if we are required to include this doc in any way. This is the case, if it has an id which is contained in the oldversions variabel. """ """ if currentTreeData[0]['id'] in oldids: print("Got old version: {} of {}, adding.".format(len(currentTreeData[0]['version']), currentTreeData[0]['id'] )) gotCompleteEntry(currentTreeData) """ # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% gotCompleteEntry(currentTreeData) elif name == "way": #ways += 1 #if ways % 100000 == 0: print("Processed {} OSM ways".format(ways)) # !!!!!!!!!!!!!!!!!!!!!! DIRTY HACK HERE !!!!!!!!!!!!!!!!!! # !!!!!!!!!!!!!!!!!!!! DO NOT TRY AT HOME !!!!!!!!!!!!!!!!! couchPusher.finish() quit() pass # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! elif name == "relation": pass #relations += 1 #if relations % 100000 == 0: print("Processed {} OSM relations".format(relations)) elif name == "tag": pass else: print("Unknown element: {}".format(name)) def char_osm_data(data): pass osmParser = xml.parsers.expat.ParserCreate() osmParser.StartElementHandler = start_osm_element osmParser.EndElementHandler = end_osm_element #osmParser.CharacterDataHandler = char_osm_data if filename[-3:] == 'bz2': with bz2.open(filename, 'rb') as osmFile: print("start parsing") osmParser.ParseFile(osmFile) print("finished parsing") else: with open(filename, 'rb') as osmFile: print("start parsing") osmParser.ParseFile(osmFile) print("finished parsing") couchPusher.finish()