def test_pushDataBig(self):
        myc = mpcouch.mpcouchPusher("http://127.0.0.1:5984/testdb",1000,threads = False)

        for i in range(0,5000000):
            myc.pushData({"id":i})
        myc.finish()
        
        self.assertEqual(myc.destroyDatabase(),None)
        
        del myc
Пример #2
0
def elementReader(filename):
    couchPusher = mpcouch.mpcouchPusher("http://localhost:5984/osmnodes", 10000, threads=False)

    def gotCompleteEntry(entry):
        couchPusher.pushData({"data": entry})
        pass
        # print(entry)

    def start_osm_element(name, attrs):
        global currentTreeData
        if name == "node":
            """start collecting information including all sub-keys"""
            currentTreeData = []
            currentTreeData.append(attrs)  # the current meta-information
            currentTreeData.append({})  # for the tags
        elif name == "tag":
            """collect the tag-information"""
            key = attrs[u"k"]
            value = attrs[u"v"]
            # print key
            # print value
            currentTreeData[1][key] = value
        else:
            print("uncatched element:")
            print(name)

    def end_osm_element(name):
        global currentTreeData
        if name == "node":
            """the node has finished, store all information collected so far"""
            # lon = float(currentTreeData[0][u'lon'])
            # lat = float(currentTreeData[0][u'lat'])
            # idx.insert(int(currentTreeData[0][u'id']), (lon, lat, lon, lat), obj=currentTreeData)
            if "natural" in currentTreeData[1]:
                if currentTreeData[1]["natural"] == "tree":
                    gotCompleteEntry(currentTreeData)
            # e = eventlet.spawn(gotCompleteEntry,currentTreeData)
            # e = eventlet.spawn(couchPusher.pushData,{'data':currentTreeData})
            # e.wait()

    def char_osm_data(data):
        pass

    osmParser = xml.parsers.expat.ParserCreate()
    osmParser.StartElementHandler = start_osm_element
    osmParser.EndElementHandler = end_osm_element
    # osmParser.CharacterDataHandler = char_osm_data

    with open(filename, "rb") as osmFile:
        print("start parsing")
        osmParser.ParseFile(osmFile)
        print("finished parsing")

    couchPusher.finish()
    def test_pushDataBig(self):
        myc = mpcouch.mpcouchPusher("http://127.0.0.1:5984/testdb",
                                    1000,
                                    threads=False)

        for i in range(0, 5000000):
            myc.pushData({"id": i})
        myc.finish()

        self.assertEqual(myc.destroyDatabase(), None)

        del myc
def elementReader(filename):
    couchPusher = mpcouch.mpcouchPusher("http://gi88.geoinfo.tuwien.ac.at:5984/osmnodesvienna",30000,threads = False, jobsbuffersizemax = 20)
    oldids = []
    
    def gotCompleteEntry(entry):
        # couchPusher.pushData({'data':entry, '_id': entry[0]['version'] + '-' + entry[0]['id']})
        couchPusher.pushData({'data':entry})
        #print(entry)
        pass
    
    def start_osm_element(name, attrs):
        global currentTreeData
        if   name == "node":
            '''start collecting information including all sub-keys'''
            currentTreeData = []
            currentTreeData.append(attrs) # the current meta-information
            currentTreeData.append({})    # for the tags
        elif name == "tag":
            '''collect the tag-information'''
            key = attrs[u'k']
            value = attrs[u'v']
            #print key
            #print value
            currentTreeData[1][key] = value
        else:
            print("uncatched element: {}".format(name))
    
    def end_osm_element(name):
        global currentTreeData
        global entries
        global nodes
        global ways
        global relations
        #entries += 1
        #if entries % 1000000 == 0: print("Processed {} XML entries".format(entries))
        
        if name == "node":
            #nodes += 1
            #if nodes % 100000 == 0: print("Processed {} OSM nodes".format(nodes))
            
            
            # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
            """
            For reduction of the amount of documents, we only include non-tree
            data, if it represents deleted nodes.
            The following code is there to make sure only trees are kept.
            """
            #print("visible = {}".format(currentTreeData[0]['visible']))
            """
            if len(currentTreeData[1]) == 0:
                gotCompleteEntry(currentTreeData)
                #if currentTreeData[0]['visible'] != "true": print(currentTreeData[0]['visible'])
            elif 'natural' in currentTreeData[1]:
                if currentTreeData[1]['natural'] == 'tree': # yay, it's a tree !
                    if currentTreeData[0]['version'] != 1:
                        oldids.append(currentTreeData[0]['id'])
                        # print("oldidsremark: {}".format(len(currentTreeData[0]['version'])))
                    gotCompleteEntry(currentTreeData)
            """
            # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
            
            # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
            """
            The following checks if we are required to include this doc in any way.
            This is the case, if it has an id which is contained in the oldversions variabel.
            """
            """
            if currentTreeData[0]['id'] in oldids:
                print("Got old version: {} of {}, adding.".format(len(currentTreeData[0]['version']), currentTreeData[0]['id'] ))
                gotCompleteEntry(currentTreeData)
            """
            # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
            
            
            gotCompleteEntry(currentTreeData)
        
        elif name == "way":
            #ways += 1
            #if ways % 100000 == 0: print("Processed {} OSM ways".format(ways))
            
            # !!!!!!!!!!!!!!!!!!!!!! DIRTY HACK HERE !!!!!!!!!!!!!!!!!!
            # !!!!!!!!!!!!!!!!!!!! DO NOT TRY AT HOME !!!!!!!!!!!!!!!!!
            couchPusher.finish()
            quit()
            pass
            # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
            # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
            
        elif name == "relation":
            pass
            #relations += 1
            #if relations % 100000 == 0: print("Processed {} OSM relations".format(relations))
        
        elif name == "tag":
            pass
        
        else:
            print("Unknown element: {}".format(name))
    
    def char_osm_data(data):
        pass
    
    osmParser = xml.parsers.expat.ParserCreate()
    osmParser.StartElementHandler = start_osm_element
    osmParser.EndElementHandler = end_osm_element
    #osmParser.CharacterDataHandler = char_osm_data
    
    if filename[-3:] == 'bz2':
        with bz2.open(filename, 'rb') as osmFile:
            print("start parsing")
            osmParser.ParseFile(osmFile)
            print("finished parsing")
    else:
        with open(filename, 'rb') as osmFile:
            print("start parsing")
            osmParser.ParseFile(osmFile)
            print("finished parsing")

    
    couchPusher.finish()