Exemplo n.º 1
0
class SampleSpace(object):
    def __init__(self):
        self._index = Rtree()
        self._locations = []
        self._values = []

    def __setitem__(self, location, value):
        i = len(self._locations)

        self._locations.append(location)
        self._values.append(value)
        self._index.add(i, self._locations[i])

    def __getitem__(self, location):
        js = list(self._index.nearest(location, 3))
        if len(js) == 0:
            return 0

        if len(js) == 1:
            return self._values[js[0]]

        ds = [sqrt(sum([(self._locations[j][i]-location[i])**2 for i in range(2)]))
              for j in js]

        for i in range(len(js)):
            if ds[i] == 0:
                return self._values[js[i]]

        R = max(ds)
        nums = [((R - d)/(R * d))**2 for d in ds]
        den = sum(nums)

        ws = [num/den for num in nums]

        return sum([self._values[js[i]] * ws[i] for i in range(len(js))])

    def __iter__(self):
        for i in range(len(self._values)):
            yield self._locations[i], self._values[i]
Exemplo n.º 2
0
What points are within this 2-dimensional box.
RTREE helps us speed up other calculations by only considering legitimate
objects for more complex comparisons.

'''

from rtree import Rtree
# instantiate the Rtree class
idx = Rtree()
# create a selection boundary (2d bounding box)
minx, miny, maxx, maxy = (0.0, 0.0, 1.0, 1.0)
# add an item to the index
idx.add(0, (minx, miny, maxx, maxy))
# Intersect another bounding box with the original bounding box.
list(idx.intersection((1.0, 1.0, 2.0, 2.0)))
#[0L]
# Point out the accuracy of the spatial calculation
list(idx.intersection((1.0000001, 1.0000001, 2.0, 2.0)))
#[]

#add another item to the index. - show EXACT dimensiion matching
index.add(id=id, (left, bottom, right, top))
object = [n for n in index.intersection((left, bottom, right, top))]
#[id]

# Find nearest object / bounding box
idx.add(1, (minx, miny, maxx, maxy))
list(idx.nearest((1.0000001, 1.0000001, 2.0, 2.0), 1))
#[0L, 1L]

Exemplo n.º 3
0
class MapMatcher():
    """A very nice MapMatching class
    """
    
    def __init__(self):
        self.GPS = GPS()
        self.idx = Rtree()
        self.nodeidx = Rtree()
        self.G = None
        self.edgeindex_edge = {}
        self.edgecounter = 0
        self.nodecounter = 0
        self.gps_points = [];
        self.edge_id__count = {}
        self.node_counter__node = {}
        self.distance_matrix = {}
        
    def saveGraph(self, filename):
        """Saves the graph as a YAML file
        """
        nx.write_yaml(self.G,filename)
        
    def readGraphFromYAMLFile(self, filename):
        """Loads a graph from an YAML file. 
        """
        self.G = nx.read_yaml(filename)
        # TODO: buiild up the indexes !!!
        
        
    def addNodeToIndex(self, node):
        """Adds a node to the node index (RTree)
        """
        # self.nodeidx.add(self.nodecounter, (node.getPoint()[0], node.getPoint()[1]), obj=node)
        self.nodeidx.add(self.nodecounter, (node.getPoint()[0], node.getPoint()[1], node.getPoint()[0], node.getPoint()[1]))

        self.node_counter__node[self.nodecounter] = node
        
    
    def addEdgeToIndex(self, edge): 
        """Add an edge to the edhe index.
        """
        self.idx.add(self.edgecounter, (edge.getMinX(), edge.getMinY(), edge.getMaxX(), edge.getMaxY()),obj=edge)
        # print "%d/%d -> %d/%d" % (edge.getMinX(), edge.getMinY(), edge.getMaxX(), edge.getMaxY())
        self.edgeindex_edge[self.edgecounter] = edge
        self.edgecounter = self.edgecounter + 1
        
    def openShape(self, inFile, index=0):
        self.shapeFile = ogr.Open(inFile)
        if self.shapeFile is None:
            print "Failed to open " + inFile + ".\n"
            sys.exit( 1 )
        else:
            print "SHP file successfully read"
     
    def getfieldinfo(self, lyr, feature, flds):
            f = feature
            return [f.GetField(f.GetFieldIndex(x)) for x in flds]
     
    def addlyr(self, G,lyr, fields):
        
        point_coords__nodes = {}
        
        for findex in xrange(lyr.GetFeatureCount()):
            f = lyr.GetFeature(findex)
            flddata = self.getfieldinfo(lyr, f, fields)
            g = f.geometry()
            attributes = dict(zip(fields, flddata))
            attributes["ShpName"] = lyr.GetName()
            
            if g.GetGeometryType() == 2: #linestring
                last = g.GetPointCount() - 1
                p_from = g.GetPoint_2D(0)
                p_to = g.GetPoint_2D(last)
                 
                 # check whether we have a node in the index
                
                intersection_mask = (p_from[0]-INTERSECTION_MASK/2, 
                                     p_from[1]-INTERSECTION_MASK/2,
                                     p_from[0]+INTERSECTION_MASK/2, 
                                     p_from[1]+INTERSECTION_MASK/2)
                
                results = list(self.nodeidx.intersection(intersection_mask))
                
                if len(results)==0:
                    
                    #print "New from-node " + str(self.nodecounter) + " for edge " + str(attributes.get("ID_NR")) + "."
                    
                    
                    pfrom = Node(p_from, attributes={'from_edge':attributes.get(self.shapeFileUniqueId), "nodecounter":self.nodecounter})
                    self.node_counter__node[self.nodecounter] = pfrom
                    self.nodeidx.add(self.nodecounter, (p_from[0], 
                                                        p_from[1], 
                                                        p_from[0], 
                                                        p_from[1]))
                    # print p_from
                    self.nodecounter = self.nodecounter + 1
                else:
                    #print len(results)
                    #print "From-node " + str(results[0]) + " recycled for edge " + str(attributes.get("ID_NR")) + "."
                    
                    pfrom = self.node_counter__node[results[0]]
                    

                intersection_mask = (p_to[0]-INTERSECTION_MASK/2, 
                                     p_to[1]-INTERSECTION_MASK/2,
                                     p_to[0]+INTERSECTION_MASK/2, 
                                     p_to[1]+INTERSECTION_MASK/2)
                
                # print intersection_mask
                
                results = list(self.nodeidx.intersection(intersection_mask))

                if len(results)==0:
                    
                    #print "New to-node " + str(self.nodecounter) + " for edge " + str(attributes.get("ID_NR")) + "."
                    
                    pto = Node(p_to, attributes={'to_edge':attributes.get(self.shapeFileUniqueId), "nodecounter":self.nodecounter})
                    self.node_counter__node[self.nodecounter] = pto
                    self.nodeidx.add(self.nodecounter, (p_to[0], 
                                                        p_to[1], 
                                                        p_to[0], 
                                                        p_to[1]))
                    self.nodecounter = self.nodecounter + 1
                else:
                    
                    #print "To-node " + str(results[0]) +  " recycled for edge " + str(attributes.get("ID_NR")) + "."
                    
                    pto = self.node_counter__node[results[0]]
                    
                    
                shly_geom = shapely.wkt.loads(g.ExportToWkt())
                
                e = Edge(pfrom, pto, attributes, geometry = shly_geom)
                
                # G.add_edge(pfrom, pto, {"edge": e, "edgecounter" : self.edgecounter})
                
                G.add_edge(pfrom, pto, edge=e, edgecounter=self.edgecounter)
                
                self.addEdgeToIndex(e)     
                
        return G
            
    def shapeToGraph(self, inFile, uniqueId="FID"):
        """Loads a shapefile and builds the graph.
        uniqueId is the name of a unique field in the shape file. 
        """
        # self.G = nx.readwrite.nx_shp.read_shp(inFile)
        
        self.G = nx.MultiGraph()
        
        self.shapeFileUniqueId = uniqueId
        
        lyrcount = self.shapeFile.GetLayerCount() # multiple layers indicate a directory 
        for lyrindex in xrange(lyrcount):
            lyr = self.shapeFile.GetLayerByIndex(lyrindex)
            flds = [x.GetName() for x in lyr.schema]
            self.G=self.addlyr(self.G, lyr, flds)
            
        self.routefinder = RouteFinder(self.G, euclideanOD=self.distance_matrix)

    def readGPS(self, inFile):
        """Parses a shapefile and build the GPS object
        """
        self.GPS.readFromShapeFile(inFile)
        self.gps_points = self.GPS.getGPSPoints()
                
    def maxGPSDistance(self):
        """Calculate the maximum distance of two consecutive GPS Points
        """
        # TODO check whether GPS points are already there
        # TODO: move into sl.gps.GPS()
        maxDistance = 0
        gps_point = self.gps_points[0]
        for gpspoint in self.gps_points:
            distance = gpspoint.getGeometry().distance(gps_point.getGeometry())
            gps_point = gps_point
            
            if distance > maxDistance:
                maxDistance = distance
                
        return maxDistance
        

    def nearPoints(self):
        """Sums up the gps point per edge segment. Stores in self.edge_id__count
        """
        # initialize the edge counter
        for edge in self.G.edges():
            self.edge_id__count[self.G[edge[0]][edge[1]].get("edgecounter")] = 0
    
        for point in self.gps_points:
            nearest_edge = self.getNearestEdge(point)
            # print str(point.getAttributes().get("ID")) + "->" + str(nearest_edge.getAttributes().get('Id'))
            self.addPointCountToEdge(nearest_edge)
            
    def addPointCountToEdge(self, edge):
        """Increments the point counter for the given edge by one.
        """
        attributes = edge.getAttributes()
        if self.edge_id__count.has_key(attributes.get(self.shapeFileUniqueId)):
            self.edge_id__count[attributes.get(self.shapeFileUniqueId)] = self.edge_id__count[attributes.get(self.shapeFileUniqueId)] + 1
        else:
            self.edge_id__count[attributes.get(self.shapeFileUniqueId)] = 1
        edge.setAttributes(attributes)
    
    def getNearestEdge(self, point):
        """Returns the edge closes to a Shapely entity given (point) 
        """
        edge = mm.idx.nearest((point.getPoint().x, point.getPoint().y), objects=True)
        edges = [e.object for e in edge]
        if len(edges) == 1:
            result = edges[0]
        else:
            dist = 99999999999999999999999999999999999999999
            for edge in edges:
                distance = point.getPoint().distance(edge.getGeometry())
                if distance < dist:
                    dist = distance
                    result = edge
        return result
    
    
    def getNearestNode(self, point):
        """Returns the closest node to a GPS point.
        """
        nodes = list(mm.nodeidx.nearest((point.getPoint().x, point.getPoint().y)))
        return self.node_counter__node.get(nodes[0])
    
    def find_all_paths(self, graph, start, end, path=[]):
        path = path + [start]
        if start == end:
            return [path]
        if not graph.has_key(start):
            return []
        paths = []
        for node in graph[start]:
            if node not in path:
                newpaths = self.find_all_paths(graph, node, end, path)
                for newpath in newpaths:
                    paths.append(newpath)
        return paths 
    
    def findRoutes2(self):
        
        start_point = self.gps_points[0]
        end_point = self.gps_points[-1]
        
        start_node =  self.getNearestNode(start_point)
        end_node =  self.getNearestNode(end_point)
        
        graph = {}
        print "preparing python graph"
        
        for node in  self.node_counter__node.values():
            graph[node.getNodeID()] =  [n.getOutNode2(node).getNodeID() for n in node.getOutEdges()]
        
        import pprint
        pprint.pprint( graph )
        
        print "From ", start_node.getNodeID(), " to ", end_node.getNodeID(), "."
        results = self.find_all_paths(graph, start_node.getNodeID(), end_node.getNodeID())
            
        # let us find the edges
        route_list = []    
        for result in results:
            i=1
            edges = []
            while i<len(result):
                node_from = self.node_counter__node.get(result[i-1] )
                node_to = self.node_counter__node.get(result[i] )
                i = i +1
                edge_dict = self.G[node_from][node_to]
                if edge_dict.keys()>1:
                    lngth = 9E99999
                    
                    for k in edge_dict.keys():
                        
                        if edge_dict[k]['edge'].getLength() < lngth:
                            lngth = edge_dict[k]['edge'].getLength()
                            edge = edge_dict[k]['edge']
                else:
                    edge = edge_dict[0]['edge']
                
                edges.append(edge)
           
            route_list.append(Route(edges))
           
        factor__selected_route = {}
         
        for route in route_list:
            
            number_of_points = 0
            length = 0 
            for edge in route.getEdges():
                
                # import pdb;pdb.set_trace()
                
                length = length + edge.getLength()
                
                edge_id = edge.getAttributes().get(self.shapeFileUniqueId)
                
                number_of_points = number_of_points + self.edge_id__count.get(edge_id, 0)
                
            if number_of_points > 1:
                
                factor__selected_route[number_of_points/length] = route
                
        keys = factor__selected_route.keys()
        keys.sort()
        
        return factor__selected_route.get(keys[0])
        
        
    def findRoute(self, returnNonSelection=False):
        """Finds a route from the node closest to the first GPS point to 
        the node closest to the latest GPS point. 
        """
        
        # pick the start and end GPS points # TODO: sort GPS Points first
        start_point = self.gps_points[0]
        end_point = self.gps_points[-1]
        
        start_node =  self.getNearestNode(start_point)
        end_node =  self.getNearestNode(end_point)
        
        # the start and endnodes returns by the index are not in the graph, 
        # therefore we need to look them up ....
        
        start_node = self.node_counter__node.get(start_node.getAttributes().get("nodecounter"))
        end_node = self.node_counter__node.get(end_node.getAttributes().get("nodecounter"))
        
        self.routfinder = RouteFinder(self.G, euclideanOD=self.distance_matrix)
        label_list = self.routefinder.findroutes(start_node, end_node)

        label_scores = []
        
        # import pdb;pdb.set_trace()
        
        # let us loop through the label list 
        for label in label_list:
            number_of_points = 0
            # we sum up the number of points and relate them to the length of the route
            print label
            
            for edge in label.getEdges():

                edge_id = edge.getAttributes().get(self.shapeFileUniqueId)
                number_of_points = number_of_points + self.edge_id__count.get(edge_id, 0)
                print "      ", number_of_points
            #we add the scores to a dict
            
            if number_of_points > 1:
                label_scores.append((label, number_of_points/label.getLength()))
            
        # print label_scores
        
        # and extract the maximum score
        score = 0
        selected = None
        
        for ls in label_scores:
            if ls[1] > score:
                selected = ls[0]
                score = ls[1]
        
        if returnNonSelection:
            pass
        else:
            return selected
        
    def eliminiateEmptyEdges(self, distance = 100):
        """Loops through the GPS pointset and selects edges within a boundary 
        of <distance> meters
        """
        print "Edge elimination started"
        
        selected_edge_ids = []
        # let us 
        
        for point in self.gps_points:
            results = self.idx.nearest(((point.getPoint().x-distance/2), 
                                     (point.getPoint().y-distance/2),
                                     (point.getPoint().x+distance/2),
                                     (point.getPoint().y+distance/2)), objects=True)
            for result in results:
                from_node = self.node_counter__node.get(result.object.from_node.getAttributes().get("nodecounter"))
                to_node = self.node_counter__node.get(result.object.to_node.getAttributes().get("nodecounter"))
                edge_counter = self.G.edge[from_node][to_node].get("edgecounter")
                if edge_counter not in selected_edge_ids:
                    selected_edge_ids.append(edge_counter)
        print str(len(selected_edge_ids)) + " edges found to keep."
        
        elimination_counter = 0
        for edge in self.G.edges():
            edgecounter = self.G.edge[edge[0]][edge[1]].get("edgecounter")
            if edgecounter not in selected_edge_ids:
                edge_tuple = (self.G.edge[edge[0]][edge[1]].get("edge").from_node, self.G.edge[edge[0]][edge[1]].get("edge").to_node)
                self.G.remove_edge(*edge_tuple)
                elimination_counter =  elimination_counter + 1
          
        print str(elimination_counter) + " edges eliminated."
        
    def dumpPointShape(self, filename, original_coverage=None):
        if filename:
            driverName = "ESRI Shapefile"
            drv = ogr.GetDriverByName( driverName )
            
            if drv:
                drv.DeleteDataSource(filename)
        
            if drv is None:
                print "%s driver not available.\n" % driverName    
            ds = drv.CreateDataSource( filename)
            
            lyr = ds.CreateLayer( "blabla", None, ogr.wkbPoint )
            if lyr is None:
                print "Layer creation failed.\n"

            field_defn = ogr.FieldDefn( "node_count", ogr.OFTInteger )
            
            if lyr.CreateField ( field_defn ) != 0:
                print "Creating Name field failed.\n"
                sys.exit( 1 )    
                
            field_defn = ogr.FieldDefn( "edge_list", ogr.OFTString )
            field_defn.SetWidth( 1024)
            
            if lyr.CreateField ( field_defn ) != 0:
                print "Creating Name field failed.\n"
                sys.exit( 1 )
            
            for node in self.node_counter__node.values():
                
                
                
                feat = ogr.Feature( lyr.GetLayerDefn() )
                
                nc = node.getAttributes().get("nodecounter")
                print nc
                # import pdb;pdb.set_trace()
                feat.SetField( "node_count", nc )  
                s = ""
                for edge in node.getOutEdges():
                    s = s + str(int(edge.getAttributes().get("ID_NR"))) + ", "
                     
                feat.SetField( "edge_list", s )  
                node_entity = ogr.Geometry(ogr.wkbPoint)
#                wkb = edge.getGeometry().to_wkb()
                node_entity.SetPoint_2D(0,node.getGeometry().x, node.getGeometry().y)
                
                feat.SetGeometry(node_entity)
                
                lyr.CreateFeature(feat)
                feat.Destroy()
                
            print "Shapefile (%s) written." % filename
Exemplo n.º 4
0
class IntRtreeIndex(BaseIndex):
    """Avoids the slower Rtree query object=True interface
    """
    _v_nextuid = None
    family = BTrees.family32

    def clear(self):
        self.fwd = Rtree()
        self.bwd = self.family.OO.BTree()
        self.keys = self.family.IO.BTree()
        self.intids = self.family.OI.BTree()
        self.ids = self.family.OO.BTree()
    def __init__(self):
        self.clear()
    def key(self, item):
        try:
            return item['id'], tuple(self.bbox(item))
        except:
            return tuple(item.items())
    def fid(self, item):
        return item['id']
    def intid(self, item):
        # Get and track next available key using zope.intid algorithm
        # Item might be already registered
        uid = self.intids.get(self.key(item))
        if uid is not None:
            return uid
        # But if not registered
        nextuid = getattr(self, '_v_nextuid', None)
        while True:
            if nextuid is None:
                nextuid = random.randrange(0, self.family.maxint)
            uid = nextuid
            if uid not in self.keys:
                nextuid += 1
                if nextuid > self.family.maxint:
                    nextuid = None
                self._v_nextuid = nextuid
                return uid
            nextuid = None
    def intersection(self, bbox):
        """Return an iterator over Items that intersect with the bbox"""
        for hit in self.fwd.intersection(bbox, objects=False):
            yield self.bwd[int(hit)]
    def nearest(self, bbox, limit=1):
        """Return an iterator over the nearest N=limit Items to the bbox"""
        for hit in self.fwd.nearest(bbox, num_results=limit, objects=False):
            yield self.bwd[int(hit)]
    def item(self, fid, bbox):
        return self.bwd[self.intids[(fid, bbox)]]
    def items(self, fid):
        return [self.bwd[intid] for intid in self.ids[fid]]
    def index_item(self, itemid, bbox, item):
        """Add an Item to the index"""
        if itemid in self.bwd:
            self.unindex_item(itemid, bbox)
        # Store an id for the item if it has None
        try:
            item.update(id=item.get('id') or str(uuid.uuid4()))
            key = self.key(item)
            sid = self.fid(item)
            
            # Map keys <-> intids
            intid = self.intid(item)
            self.keys[intid] = key
            self.intids[key] = intid
            
            if sid not in self.ids:
                self.ids[sid] = IISet([])
            self.ids[sid].add(intid)

            self.bwd[intid] = item
            self.fwd.add(intid, bbox)
        except:
            import pdb; pdb.set_trace()
            raise
    def unindex_item(self, itemid, bbox):
        """Remove an Item from the index"""
        intid = int(itemid)
        key = self.keys.get(intid)
        if key is None:
            return
        self.ids[key[0]].remove(intid)
        del self.keys[intid]
        del self.intids[key]
        del self.bwd[intid]
        self.fwd.delete(intid, bbox)
    def batch(self, changeset):
        BaseIndex.batch(self, changeset)
    def commit(self):
        transaction.commit()
        rtree_storage = self.fwd.properties.filename
        self.fwd.close()
        self.fwd = Rtree(rtree_storage)
    def close(self):
        self.fwd.close()
Exemplo n.º 5
0
class MapMatcher():
    """A very nice MapMatching class
    """
    
    def __init__(self):
        self.GPS = GPS()
        self.idx = Rtree()
        self.nodeidx = Rtree()
        self.G = None
        self.edgeindex_edge = {}
        self.edgecounter = 0
        self.nodecounter = 0
        self.gps_points = [];
        self.edge_id__count = {}
        self.node_counter__node = {}
        self.result_counter = 0
        
    def saveGraph(self, filename):
        """Saves the graph as a YAML file
        """
        nx.write_yaml(self.G,filename)
        
    def readGraphFromYAMLFile(self, filename):
        """Loads a graph from an YAML file. 
        """
        self.G = nx.read_yaml(filename)
        # TODO: buiild up the indexes !!!
        
        
    def addNodeToIndex(self, node):
        """Adds a node to the node index (RTree)
        """
        # self.nodeidx.add(self.nodecounter, (node.getPoint()[0], node.getPoint()[1]), obj=node)
        self.nodeidx.add(self.nodecounter, (node.getPoint()[0], node.getPoint()[1], node.getPoint()[0], node.getPoint()[1]))

        self.node_counter__node[self.nodecounter] = node
        
    
    def addEdgeToIndex(self, edge): 
        """Add an edge to the edhe index.
        """
        self.idx.add(self.edgecounter, (edge.getMinX(), edge.getMinY(), edge.getMaxX(), edge.getMaxY()),obj=edge)
        # print "%d/%d -> %d/%d" % (edge.getMinX(), edge.getMinY(), edge.getMaxX(), edge.getMaxY())
        self.edgeindex_edge[self.edgecounter] = edge
        self.edgecounter = self.edgecounter + 1
        
    def openShape(self, inFile, index=0):
        self.shapeFile = ogr.Open(inFile)
        if self.shapeFile is None:
            print "Failed to open " + inFile + ".\n"
            sys.exit( 1 )
        else:
            print "SHP file successfully read"
     
    def getfieldinfo(self, lyr, feature, flds):
            f = feature
            return [f.GetField(f.GetFieldIndex(x)) for x in flds]
     
    def addlyr(self, G,lyr, fields):
        
        point_coords__nodes = {}
        
        for findex in xrange(lyr.GetFeatureCount()):
            f = lyr.GetFeature(findex)
            flddata = self.getfieldinfo(lyr, f, fields)
            g = f.geometry()
            attributes = dict(zip(fields, flddata))
            attributes["ShpName"] = lyr.GetName()
            
            if g.GetGeometryType() == 2: #linestring
                last = g.GetPointCount() - 1
                p_from = g.GetPoint_2D(0)
                p_to = g.GetPoint_2D(last)
                 
                 # check whether we have a node in the index
                
                intersection_mask = (p_from[0]-INTERSECTION_MASK/2, 
                                     p_from[1]-INTERSECTION_MASK/2,
                                     p_from[0]+INTERSECTION_MASK/2, 
                                     p_from[1]+INTERSECTION_MASK/2)
                
                results = list(self.nodeidx.intersection(intersection_mask))
                
                if len(results)==0:
                    
                    print "New from-node " + str(self.nodecounter) + " for edge " + str(attributes.get("ID_NR")) + "."
                    
                    
                    pfrom = Node(p_from, attributes={'from_edge':attributes.get(self.shapeFileUniqueId), "nodecounter":self.nodecounter})
                    self.node_counter__node[self.nodecounter] = pfrom
                    self.nodeidx.add(self.nodecounter, (p_from[0], 
                                                        p_from[1], 
                                                        p_from[0], 
                                                        p_from[1]))
                    # print p_from
                    self.nodecounter = self.nodecounter + 1
                else:
                    print len(results)
                    print "From-node " + str(results[0]) + " recycled for edge " + str(attributes.get("ID_NR")) + "."
                    
                    pfrom = self.node_counter__node[results[0]]
                    

                intersection_mask = (p_to[0]-INTERSECTION_MASK/2, 
                                     p_to[1]-INTERSECTION_MASK/2,
                                     p_to[0]+INTERSECTION_MASK/2, 
                                     p_to[1]+INTERSECTION_MASK/2)
                
                # print intersection_mask
                
                results = list(self.nodeidx.intersection(intersection_mask))

                if len(results)==0:
                    
                    print "New to-node " + str(self.nodecounter) + " for edge " + str(attributes.get("ID_NR")) + "."
                    
                    pto = Node(p_to, attributes={'to_edge':attributes.get(self.shapeFileUniqueId), "nodecounter":self.nodecounter})
                    self.node_counter__node[self.nodecounter] = pto
                    self.nodeidx.add(self.nodecounter, (p_to[0], 
                                                        p_to[1], 
                                                        p_to[0], 
                                                        p_to[1]))
                    self.nodecounter = self.nodecounter + 1
                else:
                    
                    print "To-node " + str(results[0]) +  " recycled for edge " + str(attributes.get("ID_NR")) + "."
                    
                    pto = self.node_counter__node[results[0]]
                    
                    
                shly_geom = shapely.wkt.loads(g.ExportToWkt())
                
                e = Edge(pfrom, pto, attributes, geometry = shly_geom)
                
                # G.add_edge(pfrom, pto, {"edge": e, "edgecounter" : self.edgecounter})
                
                G.add_edge(pfrom, pto, edge=e, edgecounter=self.edgecounter)
                
                self.addEdgeToIndex(e)
           
            #if g.GetGeometryType() == 1: #point
            #    G.add_node((g.GetPoint_2D(0)), attributes)
            
#            if g.GetGeometryType() == 2: #linestring
#                last = g.GetPointCount() - 1
#                
#                p_from = g.GetPoint_2D(0)
#                p_to = g.GetPoint_2D(last)
#                
#                if point_coords__nodes.get(p_from):
#                
#                    pfrom = point_coords__nodes.get(p_from)  
#                    print "node " + str(pfrom.getAttributes().get("nodecounter")) + " edge " + str(attributes.get('ID_NR'))
#                
#                else:
#                    
#                    pfrom = Node(p_from, attributes={'from_edge':attributes.get(self.shapeFileUniqueId), "nodecounter":self.nodecounter})
#                    self.nodecounter = self.nodecounter + 1 
#                    point_coords__nodes[p_from] = pfrom
#                
#                if point_coords__nodes.get(p_to):
#                
#                    pto = point_coords__nodes.get(p_to)  
#                    print "node " + str(pto.getAttributes().get("nodecounter")) + " edge " + str(attributes.get('ID_NR')) 
#                
#                else:
#                
#                    pto = Node(p_to, attributes={'to_edge':attributes.get(self.shapeFileUniqueId), "nodecounter":self.nodecounter})
#                    self.nodecounter = self.nodecounter + 1 
#                    point_coords__nodes[p_to] = pto
#                
#                shly_geom = shapely.wkt.loads(g.ExportToWkt())
#                e = Edge(pfrom, pto, attributes, geometry = shly_geom)
#                            
#                G.add_edge(pfrom, pto, {"edge": e, "edgecounter" : self.edgecounter})
#
#                # we pull the nodes out of the graph again to index them
#                edges_dict = nx.get_edge_attributes(G,"edgecounter")
#                
#                # import pdb;pdb.set_trace()
#                
#                edges_keys = edges_dict.keys()
#                for k in edges_keys:
#                    if self.edgecounter == edges_dict[k]:
#                        self.node_counter__node[k[0].getAttributes()['nodecounter']] = k[0]
#                        self.node_counter__node[k[1].getAttributes()['nodecounter']] = k[1]
#                        self.addNodeToIndex(k[0])
#                        self.addNodeToIndex(k[1])
#                
#                # let us throw the Edge into the index
#                self.addEdgeToIndex(e)
#                
##                # add an edge in the other direction
##                e2 = Edge(pto, pfrom, attributes, geometry = shly_geom)
##                G.add_edge(pto, pfrom, {"edge": e2, "edgecounter" : self.edgecounter})
##                
##                edges_dict = nx.get_edge_attributes(G,"edgecounter")
##                
##                # import pdb;pdb.set_trace()
##                
##                edges_keys = edges_dict.keys()
##                for k in edges_keys:
##                    if self.edgecounter == edges_dict[k]:
##                        self.node_counter__node[k[0].getAttributes()['nodecounter']] = k[0]
##                        self.node_counter__node[k[1].getAttributes()['nodecounter']] = k[1]
##                        self.addNodeToIndex(k[0])
##                        self.addNodeToIndex(k[1])
##                
##                # let us throw the Edge into the index
##                self.addEdgeToIndex(e2)
                
                
        return G
            
    def shapeToGraph(self, inFile, uniqueId="FID"):
        """Loads a shapefile and builds the graph.
        uniqueId is the name of a unique field in the shape file. 
        """
        # self.G = nx.readwrite.nx_shp.read_shp(inFile)
        
        self.G = nx.MultiGraph()
        self.shapeFileUniqueId = uniqueId
        
        lyrcount = self.shapeFile.GetLayerCount() # multiple layers indicate a directory 
        for lyrindex in xrange(lyrcount):
            lyr = self.shapeFile.GetLayerByIndex(lyrindex)
            flds = [x.GetName() for x in lyr.schema]
            self.G=self.addlyr(self.G, lyr, flds)
            
        self.routefinder = RouteFinder(self.G)

    def readGPS(self, inFile):
        """Parses a shapefile and build the GPS object
        """
        self.GPS.readFromShapeFile(inFile)
        self.gps_points = self.GPS.getGPSPoints()
                
    def maxGPSDistance(self):
        """Calculate the maximum distance of two consecutive GPS Points
        """
        # TODO check whether GPS points are already there
        # TODO: move into sl.gps.GPS()
        maxDistance = 0
        gps_point = self.gps_points[0]
        for gpspoint in self.gps_points:
            distance = gpspoint.getGeometry().distance(gps_point.getGeometry())
            gps_point = gps_point
            
            if distance > maxDistance:
                maxDistance = distance
                
        return maxDistance
        

    def nearPoints(self):
        """Sums up the gps point per edge segment. Stores in self.edge_id__count
        """
        # initialize the edge counter
        for edge in self.G.edges():
            self.edge_id__count[self.G[edge[0]][edge[1]].get("edgecounter")] = 0
    
        for point in self.gps_points:
            nearest_edge = self.getNearestEdge(point)
            # print str(point.getAttributes().get("ID")) + "->" + str(nearest_edge.getAttributes().get('Id'))
            self.addPointCountToEdge(nearest_edge)
            
    def addPointCountToEdge(self, edge):
        """Increments the point counter for the given edge by one.
        """
        attributes = edge.getAttributes()
        if self.edge_id__count.has_key(attributes.get(self.shapeFileUniqueId)):
            self.edge_id__count[attributes.get(self.shapeFileUniqueId)] = self.edge_id__count[attributes.get(self.shapeFileUniqueId)] + 1
        else:
            self.edge_id__count[attributes.get(self.shapeFileUniqueId)] = 1
        edge.setAttributes(attributes)
    
    def getNearestEdge(self, point):
        """Returns the edge closes to a Shapely entity given (point) 
        """
        edge = mm.idx.nearest((point.getPoint().x, point.getPoint().y), objects=True)
        edges = [e.object for e in edge]
        if len(edges) == 1:
            result = edges[0]
        else:
            dist = 99999999999999999999999999999999999999999
            for edge in edges:
                distance = point.getPoint().distance(edge.getGeometry())
                if distance < dist:
                    dist = distance
                    result = edge
        return result
    
    
    def getNearestNode(self, point):
        """Returns the closest node to a GPS point.
        """
        nodes = list(mm.nodeidx.nearest((point.getPoint().x, point.getPoint().y)))
        return self.node_counter__node.get(nodes[0])
        
    def findRoute(self, returnNonSelection=False):
        """Finds a route from the node closest to the first GPS point to 
        the node closest to the latest GPS point. 
        """
        
        # pick the start and end GPS points # TODO: sort GPS Points first
        start_point = self.gps_points[0]
        end_point = self.gps_points[-1]
        
        start_node =  self.getNearestNode(start_point)
        end_node =  self.getNearestNode(end_point)
        
        # the start and endnodes returnes by the index are not in the graph, 
        # therefore we need to look them up ....
        
        start_node = self.node_counter__node.get(start_node.getAttributes().get("nodecounter"))
        end_node = self.node_counter__node.get(end_node.getAttributes().get("nodecounter"))
        
        self.routfinder = RouteFinder(self.G)
        label_list = self.routefinder.findroutes(start_node, end_node)

        import pdb;pdb.set_trace()

        label_scores = []
        
        # let us loop through the label list 
        for label in label_list:
            number_of_points = 0
            # we sum up the number of points and relate them to the length of the route
            for edge in label.getEdges():
                edge_id = edge.getAttributes().get(self.shapeFileUniqueId)
                number_of_points = number_of_points + self.edge_id__count.get(edge_id, 0)
                
            #we add the scores to a dict
            label_scores.append((label, number_of_points/label.getLength()))
            
        # print label_scores
        
        # and extract the maximum score
        score = 0
        selected = None
        
        for ls in label_scores:
            if ls[1] > score:
                selected = ls[0]
                score = ls[1]
        
        if returnNonSelection:
            pass
        else:
            return selected
        
    def eliminiateEmptyEdges(self, distance = 100):
        """Loops through the GPS pointset and selects edges within a boundary 
        of <distance> meters
        """
        print "Edge elimination started"
        
        selected_edge_ids = []
        # let us 
        
        for point in self.gps_points:
            results = self.idx.nearest(((point.getPoint().x-distance/2), 
                                     (point.getPoint().y-distance/2),
                                     (point.getPoint().x+distance/2),
                                     (point.getPoint().y+distance/2)), objects=True)
            for result in results:
                from_node = self.node_counter__node.get(result.object.from_node.getAttributes().get("nodecounter"))
                to_node = self.node_counter__node.get(result.object.to_node.getAttributes().get("nodecounter"))
                edge_counter = self.G.edge[from_node][to_node].get("edgecounter")
                if edge_counter not in selected_edge_ids:
                    selected_edge_ids.append(edge_counter)
        print str(len(selected_edge_ids)) + " edges found to keep."
        
        elimination_counter = 0
        for edge in self.G.edges():
            edgecounter = self.G.edge[edge[0]][edge[1]].get("edgecounter")
            if edgecounter not in selected_edge_ids:
                edge_tuple = (self.G.edge[edge[0]][edge[1]].get("edge").from_node, self.G.edge[edge[0]][edge[1]].get("edge").to_node)
                self.G.remove_edge(*edge_tuple)
                elimination_counter =  elimination_counter + 1
          
        print str(elimination_counter) + " edges eliminated."
Exemplo n.º 6
0
class Graph:
    def __init__(self, all_trips):
        # trips
        self.all_trips = all_trips

        # cluster seeds
        self.cluster_seeds = {}
        self.cluster_seed_id = 0
        self.cluster_seed_index = Rtree()

        # graph edges
        self.graph_edges = {}  # indexed by "edge id"
        self.graph_edge_id = 0
        self.graph_edge_lookup = {}  # indexed by "location1_id,location2_id"

    def cluster_traces(self):
        self._create_all_trip_edges()
        self._generate_cluster_seeds()
        self._cluster_seeds_with_traces()
        self._generate_graph_edges()
        self._output_graph_to_db()

    def _generate_graph_edges(self):

        sys.stdout.write("Generating graph edges... ")
        sys.stdout.flush()

        # iterate through all trips
        for trip in self.all_trips:

            # grab trip edges
            trip_edges = trip.edges.values()

            # put trip edges in order
            trip_edges.sort(key=lambda x: x.id)

            # storage for previous cluster
            prev_cluster = None

            # iterate through trip edges
            for trip_edge in trip_edges:

                # if the current trip edge is clustered
                if trip_edge.cluster is not None:

                    # create a graph edge between the previous cluster and the current cluster
                    self._create_graph_edge(prev_cluster, trip_edge.cluster)

                    # update previous cluster with current cluster
                    prev_cluster = trip_edge.cluster

        # output graph edges
        self._write_graph_edges_to_file()

        print "done."

    def _create_graph_edge(self, in_node, out_node):

        # if in_node or out_node is None
        if (in_node is None) or (out_node is None):

            # return without doing anything
            return

        # see if we can find an existing graph edge with the same nodes
        existing_graph_edge = self._find_graph_edge(in_node, out_node)

        # if there is no existing graph edge with the same nodes
        if existing_graph_edge is None:

            # create new graph edge object
            new_graph_edge = Edge(self.graph_edge_id, in_node, out_node)

            # add new graph edge to graph edge dictionary
            self.graph_edges[new_graph_edge.id] = new_graph_edge

            # add new graph edge to graph edge lookup dictionary
            self.graph_edge_lookup[str(in_node.id) + "," + str(out_node.id)] = new_graph_edge

            # increment graph edge id
            self.graph_edge_id += 1

    def _find_graph_edge(self, node1, node2):

        # generate edge lookup key
        edge_lookup_key = str(node1.id) + "," + str(node2.id)

        # if edge is in lookup table
        if edge_lookup_key in self.graph_edge_lookup.keys():

            # return the matching edge
            return self.graph_edge_lookup[edge_lookup_key]

        # if the edge wasn't in the lookup table
        return None

    def _cluster_seeds_with_traces(self):

        # storage for total cluster distance moved
        total_cluster_distance_moved = float("infinity")

        # iterate until total cluster distance moved below threshold
        while total_cluster_distance_moved >= cluster_distance_moved_threshold:

            # find all points on traces and move clusters
            total_cluster_distance_moved = self._find_points_on_traces()

            # write cluster seeds to file
            self._write_cluster_seeds_to_file("edelkamp_cluster_seeds_clustered.txt")

    def _find_points_on_traces(self):

        # counter for cluster seeds
        seed_counter = 1

        # storage for total cluster distance moved
        total_cluster_distance_moved = 0.0

        # iterate through all cluster seeds
        for cluster_seed in self.cluster_seeds.values():

            # clear current trace points from cluster
            cluster_seed.clear_trace_points()

            sys.stdout.write(
                "\rFinding intersecting points with cluster "
                + str(seed_counter)
                + "/"
                + str(len(self.cluster_seeds))
                + "... "
            )
            sys.stdout.flush()

            # increment seed counter
            seed_counter += 1

            # determine leftward cluster bearing
            leftward_bearing = math.fmod((cluster_seed.bearing - 90.0) + 360.0, 360.0)

            # determine rightward cluster bearing
            rightward_bearing = math.fmod((cluster_seed.bearing + 90.0) + 360.0, 360.0)

            # storage for candidate trace points
            candidate_trace_points = []

            # iterate through all trips
            for trip in self.all_trips:

                # find leftward intersection points with trip
                candidate_trace_points.extend(self._find_intersection_points(trip, cluster_seed, leftward_bearing))

                # find rightward intersection points with trip
                candidate_trace_points.extend(self._find_intersection_points(trip, cluster_seed, rightward_bearing))

            # add candidate trace points to cluster
            cluster_seed.add_trace_points(candidate_trace_points)

            # recompute cluster centroid
            total_cluster_distance_moved += cluster_seed.recompute_cluster_centroid()

            # clear current trace points from cluster
            cluster_seed.clear_trace_points()

            # add candidate trace points to cluster, again
            cluster_seed.add_trace_points(candidate_trace_points)

        # normalize total cluster distance moved by number of seeds
        total_cluster_distance_moved = total_cluster_distance_moved / len(self.cluster_seeds.values())

        # and we're done!
        print "done (clusters moved an average of " + str(total_cluster_distance_moved) + " meters)."

        # return total cluster distance moved
        return total_cluster_distance_moved

    def _find_intersection_points(self, trip, cluster, cluster_bearing):

        # find all nearby trip edge id's
        nearby_trip_edge_ids = self._find_nearby_trip_edge_ids(cluster, edge_bounding_box_size, trip.edge_index)

        # storage for intersection points
        intersection_points = []

        # iterate through all nearby edge id's
        for edge_id in nearby_trip_edge_ids:

            # grab current edge
            edge = trip.edges[edge_id]

            # determine intersection point between edge and cluster
            intersection_point = self._intersection_point(edge.in_node, edge.bearing, cluster, cluster_bearing)

            # if there is an intersection point
            if intersection_point is not None:

                # determine distance from edge in_node to intersection point
                intersection_distance = self._distance_coords(
                    edge.in_node.latitude, edge.in_node.longitude, intersection_point[0], intersection_point[1]
                )

                # if intersection distance is less than edge length
                if intersection_distance <= edge.length:

                    # this edge has a valid intersection point
                    intersection_points.append(
                        TracePoint(intersection_point[0], intersection_point[1], edge.bearing, edge)
                    )

        # return all intersection points for this trip
        return intersection_points

    def _generate_cluster_seeds(self):

        # iterate through all trips
        for i in range(0, len(self.all_trips)):

            sys.stdout.write("\rCluster seeding trip " + str(i + 1) + "/" + str(len(self.all_trips)) + "... ")
            sys.stdout.flush()

            # grab current trip
            trip = self.all_trips[i]

            # set last cluster seed distance to zero for first trip location
            trip.locations[0].last_cluster_seed_distance = 0.0

            # iterate through all trip locations
            for j in range(1, len(trip.locations)):

                # drop cluster seeds along current edge every 50 meters
                self._drop_cluster_seeds_along_edge(trip.locations[j - 1], trip.locations[j])

        print "done (generated " + str(len(self.cluster_seeds)) + " cluster seeds)."

        # write cluster seeds to file
        self._write_cluster_seeds_to_file("edelkamp_cluster_seeds_initial.txt")

    def _drop_cluster_seeds_along_edge(self, in_node, out_node):

        # determine edge length
        edge_length = self._distance(in_node, out_node)

        # determine distance along edge for first cluster seed
        first_cluster_seed_distance = cluster_seed_interval - in_node.last_cluster_seed_distance

        # storage for relative cluster seed intervals
        rel_cluster_seed_intervals = []

        # storage for current cluster seed distance along this edge
        curr_cluster_seed_distance = first_cluster_seed_distance

        # determine the relative cluster seed intervals needed for this edge
        while curr_cluster_seed_distance <= edge_length:

            # append current cluster seed distance to relative cluster seed interval list
            rel_cluster_seed_intervals.append(curr_cluster_seed_distance)

            # increment current cluster seed distance
            curr_cluster_seed_distance += cluster_seed_interval

        # determine bearing of current edge
        edge_bearing = self._path_bearing(in_node, out_node)

        # create cluster seeds for edge
        for i in range(0, len(rel_cluster_seed_intervals)):

            # determine fraction along current edge to drop cluster seed
            fraction_along = rel_cluster_seed_intervals[i] / edge_length

            # determine point along line to drop cluster seed
            (new_cluster_seed_latitude, new_cluster_seed_longitude) = self._point_along_line(
                in_node, out_node, fraction_along
            )

            # locate nearest existing cluster seeds
            closest_cluster_seeds = list(
                self.cluster_seed_index.nearest((new_cluster_seed_longitude, new_cluster_seed_latitude), 25)
            )

            # if there does not exist a closest existing cluster seed
            if len(closest_cluster_seeds) == 0:

                # create a new cluster seed
                new_cluster_seed = self._create_new_cluster_seed(
                    new_cluster_seed_latitude, new_cluster_seed_longitude, edge_bearing
                )

            # else, if there exists a closest existing cluster seed
            elif len(closest_cluster_seeds) > 0:

                # storage for matched cluster seed
                matched_cluster_seed = None

                # iterate through closest existing cluster seeds
                for curr_cluster_seed_id in closest_cluster_seeds:

                    # grab current cluster seed
                    curr_cluster_seed = self.cluster_seeds[curr_cluster_seed_id]

                    # compute distance to current cluster seed
                    distance = self._distance_coords(
                        new_cluster_seed_latitude,
                        new_cluster_seed_longitude,
                        curr_cluster_seed.latitude,
                        curr_cluster_seed.longitude,
                    )

                    # determine bearing difference between edge and current cluster seed
                    bearing_difference = math.cos(math.radians(edge_bearing - curr_cluster_seed.bearing))

                    # if current cluster is less than 50 meters away and bearing difference is less than or equal to 45 degrees
                    if (distance <= cluster_seed_interval) and (bearing_difference >= cluster_bearing_difference_limit):

                        # store current cluster seed as matched cluster seed
                        matched_cluster_seed = curr_cluster_seed

                        # stop searching
                        break

                # if there was not a matched cluster seed
                if matched_cluster_seed is None:

                    # create a new cluster seed
                    new_cluster_seed = self._create_new_cluster_seed(
                        new_cluster_seed_latitude, new_cluster_seed_longitude, edge_bearing
                    )

            # update last cluster seed distance
            out_node.last_cluster_seed_distance = self._distance_coords(
                new_cluster_seed_latitude, new_cluster_seed_longitude, out_node.latitude, out_node.longitude
            )

        # if no cluster seeds were generated along this edge
        if len(rel_cluster_seed_intervals) == 0:

            # update last cluster seed distance
            out_node.last_cluster_seed_distance = in_node.last_cluster_seed_distance + edge_length

    def _create_new_cluster_seed(self, latitude, longitude, bearing):

        # create a new cluster seed
        new_cluster_seed = ClusterSeed(self.cluster_seed_id, latitude, longitude, bearing)

        # add new cluster seed to the cluster seeds dictionary
        self.cluster_seeds[new_cluster_seed.id] = new_cluster_seed

        # insert new cluster seed into spatial index
        self.cluster_seed_index.insert(new_cluster_seed.id, (new_cluster_seed.longitude, new_cluster_seed.latitude))

        # increment cluster seed id
        self.cluster_seed_id += 1

        # return new cluster seed
        return new_cluster_seed

    def _create_all_trip_edges(self):

        sys.stdout.write("Creating and indexing edges for all trips... ")
        sys.stdout.flush()

        # iterate through all trips
        for trip in self.all_trips:

            # add edge storage to trip
            trip.edges = {}

            # add edge index to trip
            trip.edge_index = Rtree()

            # storage for edge id
            trip_edge_id = 0

            # iterate through all trip locations
            for i in range(1, len(trip.locations)):

                # create new edge
                new_edge = Edge(trip_edge_id, trip.locations[i - 1], trip.locations[i])

                # insert edge into dictionary
                trip.edges[trip_edge_id] = new_edge

                # insert edge into index
                self._index_trip_edge(new_edge, trip.edge_index)

                # increment trip edge id
                trip_edge_id += 1

        # done
        print "done."

    def _index_trip_edge(self, edge, edge_index):

        # determine edge minx, miny, maxx, maxy values
        edge_minx = min(edge.in_node.longitude, edge.out_node.longitude)
        edge_miny = min(edge.in_node.latitude, edge.out_node.latitude)
        edge_maxx = max(edge.in_node.longitude, edge.out_node.longitude)
        edge_maxy = max(edge.in_node.latitude, edge.out_node.latitude)

        # insert edge into spatial index
        edge_index.insert(edge.id, (edge_minx, edge_miny, edge_maxx, edge_maxy))

    def _find_nearby_trip_edge_ids(self, location, distance, edge_index):

        # define longitude/latitude offset
        lon_offset = (distance / 2.0) / spatialfunclib.METERS_PER_DEGREE_LONGITUDE
        lat_offset = (distance / 2.0) / spatialfunclib.METERS_PER_DEGREE_LATITUDE

        # create bounding box
        bounding_box = (
            location.longitude - lon_offset,
            location.latitude - lat_offset,
            location.longitude + lon_offset,
            location.latitude + lat_offset,
        )

        # return nearby edge id's inside bounding box
        return list(edge_index.intersection(bounding_box))

    def _intersection_point(self, location1, location1_bearing, location2, location2_bearing):
        return spatialfunclib.intersection_point(
            location1.latitude,
            location1.longitude,
            location1_bearing,
            location2.latitude,
            location2.longitude,
            location2_bearing,
        )

    def _point_along_line(self, location1, location2, fraction_along):
        return spatialfunclib.point_along_line(
            location1.latitude, location1.longitude, location2.latitude, location2.longitude, fraction_along
        )

    def _path_bearing(self, location1, location2):
        return spatialfunclib.path_bearing(
            location1.latitude, location1.longitude, location2.latitude, location2.longitude
        )

    def _distance(self, location1, location2):
        return spatialfunclib.distance(location1.latitude, location1.longitude, location2.latitude, location2.longitude)

    def _distance_coords(self, location1_latitude, location1_longitude, location2_latitude, location2_longitude):
        return spatialfunclib.distance(location1_latitude, location1_longitude, location2_latitude, location2_longitude)

    def _write_cluster_seeds_to_file(self, filename="edelkamp_cluster_seeds.txt"):

        # open graph file
        graph_file = open(filename, "w")

        # iterate through all cluster_seeds
        for cluster_seed in self.cluster_seeds.values():

            # output cluster seed to file
            graph_file.write(
                str(cluster_seed.latitude) + "," + str(cluster_seed.longitude) + "," + str(cluster_seed.bearing) + "\n"
            )

        # close graph file
        graph_file.close()

    def _write_graph_edges_to_file(self):

        # open graph file
        graph_file = open("edelkamp_cluster_edges.txt", "w")

        # iterate through all graph_edges
        for graph_edge in self.graph_edges.values():

            # output edge to file
            graph_file.write(str(graph_edge.in_node.latitude) + "," + str(graph_edge.in_node.longitude) + "\n")
            graph_file.write(str(graph_edge.out_node.latitude) + "," + str(graph_edge.out_node.longitude) + "\n\n")

        # close graph file
        graph_file.close()

    def _output_graph_to_db(self):

        # output that we are starting the database writing process...
        sys.stdout.write("\nOutputting graph to database... ")
        sys.stdout.flush()

        # connect to database
        conn = sqlite3.connect("edelkamp_graph.db")

        # grab cursor
        cur = conn.cursor()

        # create nodes table
        cur.execute("CREATE TABLE nodes (id INTEGER, latitude FLOAT, longitude FLOAT)")

        # create edges table
        cur.execute("CREATE TABLE edges (id INTEGER, in_node INTEGER, out_node INTEGER)")

        # remove values from nodes table
        # cur.execute("DELETE FROM nodes")

        # remove values from edges table
        # cur.execute("DELETE FROM edges")

        # commit creates
        conn.commit()

        # iterate through all cluster seeds
        for cluster_seed in self.cluster_seeds.values():

            # insert cluster seed into nodes table
            cur.execute(
                "INSERT INTO nodes VALUES ("
                + str(cluster_seed.id)
                + ","
                + str(cluster_seed.latitude)
                + ","
                + str(cluster_seed.longitude)
                + ")"
            )

        # iterate through all graph edges
        for graph_edge in self.graph_edges.values():

            # insert graph edge into edges table
            cur.execute(
                "INSERT INTO edges VALUES ("
                + str(graph_edge.id)
                + ","
                + str(graph_edge.in_node.id)
                + ","
                + str(graph_edge.out_node.id)
                + ")"
            )

        # commit inserts
        conn.commit()

        # close database connection
        conn.close()

        print "done."
Exemplo n.º 7
0
class OSMDB:
    def __init__(self, dbname, overwrite=False, rtree_index=True):
        self.dbname = dbname

        if overwrite:
            try:
                os.remove(dbname)
            except OSError:
                pass

        self.conn = sqlite3.connect(dbname)

        if rtree_index:
            self.index = Rtree(dbname)
        else:
            self.index = None

        if overwrite:
            self.setup()

    def get_cursor(self):
        # Attempts to get a cursor using the current connection to the db. If we've found ourselves in a different thread
        # than that which the connection was made in, re-make the connection.

        try:
            ret = self.conn.cursor()
        except sqlite3.ProgrammingError:
            self.conn = sqlite3.connect(self.dbname)
            ret = self.conn.cursor()

        return ret

    def setup(self):
        c = self.get_cursor()
        c.execute(
            "CREATE TABLE nodes (id TEXT UNIQUE, tags TEXT, lat FLOAT, lon FLOAT, endnode_refs INTEGER DEFAULT 1)"
        )
        c.execute("CREATE TABLE ways (id TEXT UNIQUE, tags TEXT, nds TEXT)")
        self.conn.commit()
        c.close()

    def create_indexes(self):
        c = self.get_cursor()
        c.execute("CREATE INDEX nodes_id ON nodes (id)")
        c.execute("CREATE INDEX nodes_lon ON nodes (lon)")
        c.execute("CREATE INDEX nodes_lat ON nodes (lat)")
        c.execute("CREATE INDEX ways_id ON ways (id)")
        self.conn.commit()
        c.close()

    def populate(self,
                 osm_filename,
                 dryrun=False,
                 accept=lambda tags: True,
                 reporter=None,
                 create_indexes=True):
        print "importing %s osm from XML to sqlite database" % osm_filename

        c = self.get_cursor()

        self.n_nodes = 0
        self.n_ways = 0

        superself = self

        class OSMHandler(xml.sax.ContentHandler):
            @classmethod
            def setDocumentLocator(self, loc):
                pass

            @classmethod
            def startDocument(self):
                pass

            @classmethod
            def endDocument(self):
                pass

            @classmethod
            def startElement(self, name, attrs):
                if name == 'node':
                    self.currElem = Node(attrs['id'], float(attrs['lon']),
                                         float(attrs['lat']))
                elif name == 'way':
                    self.currElem = Way(attrs['id'])
                elif name == 'tag':
                    self.currElem.tags[attrs['k']] = attrs['v']
                elif name == 'nd':
                    self.currElem.nd_ids.append(attrs['ref'])

            @classmethod
            def endElement(self, name):
                if name == 'node':
                    if superself.n_nodes % 5000 == 0:
                        print "node %d" % superself.n_nodes
                    superself.n_nodes += 1
                    if not dryrun: superself.add_node(self.currElem, c)
                elif name == 'way':
                    if superself.n_ways % 5000 == 0:
                        print "way %d" % superself.n_ways
                    superself.n_ways += 1
                    if not dryrun and accept(self.currElem.tags):
                        superself.add_way(self.currElem, c)

            @classmethod
            def characters(self, chars):
                pass

        xml.sax.parse(osm_filename, OSMHandler)

        self.conn.commit()
        c.close()

        if not dryrun and create_indexes:
            print "indexing primary tables...",
            self.create_indexes()

        print "done"

    def set_endnode_ref_counts(self):
        """Populate ways.endnode_refs. Necessary for splitting ways into single-edge sub-ways"""

        print "counting end-node references to find way split-points"

        c = self.get_cursor()

        endnode_ref_counts = {}

        c.execute("SELECT nds from ways")

        print "...counting"
        for i, (nds_str, ) in enumerate(c):
            if i % 5000 == 0:
                print i

            nds = json.loads(nds_str)
            for nd in nds:
                endnode_ref_counts[nd] = endnode_ref_counts.get(nd, 0) + 1

        print "...updating nodes table"
        for i, (node_id, ref_count) in enumerate(endnode_ref_counts.items()):
            if i % 5000 == 0:
                print i

            if ref_count > 1:
                c.execute("UPDATE nodes SET endnode_refs = ? WHERE id=?",
                          (ref_count, node_id))

        self.conn.commit()
        c.close()

    def index_endnodes(self):
        print "indexing endpoint nodes into rtree"

        c = self.get_cursor()

        #TODO index endnodes if they're at the end of oneways - which only have one way ref, but are still endnodes
        c.execute("SELECT id, lat, lon FROM nodes WHERE endnode_refs > 1")

        for id, lat, lon in c:
            self.index.add(int(id), (lon, lat, lon, lat))

        c.close()

    def create_and_populate_edges_table(self, tolerant=False):
        self.set_endnode_ref_counts()
        self.index_endnodes()

        print "splitting ways and inserting into edge table"

        c = self.get_cursor()

        c.execute(
            "CREATE TABLE edges (id TEXT, parent_id TEXT, start_nd TEXT, end_nd TEXT, dist FLOAT, geom TEXT)"
        )

        for i, way in enumerate(self.ways()):
            try:
                if i % 5000 == 0:
                    print i

                subways = []
                curr_subway = [way.nds[0]
                               ]  # add first node to the current subway
                for nd in way.nds[1:-1]:  # for every internal node of the way
                    curr_subway.append(nd)
                    if self.node(
                            nd
                    )[4] > 1:  # node reference count is greater than one, node is shared by two ways
                        subways.append(curr_subway)
                        curr_subway = [nd]
                curr_subway.append(
                    way.nds[-1]
                )  # add the last node to the current subway, and store the subway
                subways.append(curr_subway)

                #insert into edge table
                for i, subway in enumerate(subways):
                    coords = [(lambda x: (x[3], x[2]))(self.node(nd))
                              for nd in subway]
                    packt = pack_coords(coords)
                    dist = sum([
                        vincenty(lat1, lng1, lat2, lng2)
                        for (lng1, lat1), (lng2, lat2) in cons(coords)
                    ])
                    c.execute("INSERT INTO edges VALUES (?, ?, ?, ?, ?, ?)",
                              ("%s-%s" % (way.id, i), way.id, subway[0],
                               subway[-1], dist, packt))
            except IndexError:
                if tolerant:
                    continue
                else:
                    raise

        print "indexing edges...",
        c.execute("CREATE INDEX edges_id ON edges (id)")
        c.execute("CREATE INDEX edges_parent_id ON edges (parent_id)")
        print "done"

        self.conn.commit()
        c.close()

    def edge(self, id):
        c = self.get_cursor()

        c.execute(
            "SELECT edges.*, ways.tags FROM edges, ways WHERE ways.id = edges.parent_id AND edges.id = ?",
            (id, ))

        try:
            ret = c.next()
            way_id, parent_id, from_nd, to_nd, dist, geom, tags = ret
            return (way_id, parent_id, from_nd, to_nd, dist,
                    unpack_coords(geom), json.loads(tags))
        except StopIteration:
            c.close()
            raise IndexError("Database does not have an edge with id '%s'" %
                             id)

        c.close()
        return ret

    def edges(self):
        c = self.get_cursor()

        c.execute(
            "SELECT edges.*, ways.tags FROM edges, ways WHERE ways.id = edges.parent_id"
        )

        for way_id, parent_id, from_nd, to_nd, dist, geom, tags in c:
            yield (way_id, parent_id, from_nd, to_nd, dist,
                   unpack_coords(geom), json.loads(tags))

        c.close()

    def add_way(self, way, curs=None):
        if curs is None:
            curs = self.get_cursor()
            close_cursor = True
        else:
            close_cursor = False

        curs.execute(
            "INSERT OR IGNORE INTO ways (id, tags, nds) VALUES (?, ?, ?)",
            (way.id, json.dumps(way.tags), json.dumps(way.nd_ids)))

        if close_cursor:
            self.conn.commit()
            curs.close()

    def add_node(self, node, curs=None):
        if curs is None:
            curs = self.get_cursor()
            close_cursor = True
        else:
            close_cursor = False

        curs.execute(
            "INSERT OR IGNORE INTO nodes (id, tags, lat, lon) VALUES (?, ?, ?, ?)",
            (node.id, json.dumps(node.tags), node.lat, node.lon))

        if close_cursor:
            self.conn.commit()
            curs.close()

    def nodes(self):
        c = self.get_cursor()

        c.execute("SELECT * FROM nodes")

        for node_row in c:
            yield node_row

        c.close()

    def node(self, id):
        c = self.get_cursor()

        c.execute("SELECT * FROM nodes WHERE id = ?", (id, ))

        try:
            ret = c.next()
        except StopIteration:
            c.close()
            raise IndexError("Database does not have node with id '%s'" % id)

        c.close()
        return ret

    def nearest_node(self, lat, lon, range=0.005):
        c = self.get_cursor()

        if self.index:
            #print "YOUR'RE USING THE INDEX"
            id = list(self.index.nearest((lon, lat), 1))[0]
            #print "THE ID IS %d"%id
            c.execute("SELECT id, lat, lon FROM nodes WHERE id = ?", (id, ))
        else:
            c.execute(
                "SELECT id, lat, lon FROM nodes WHERE endnode_refs > 1 AND lat > ? AND lat < ? AND lon > ? AND lon < ?",
                (lat - range, lat + range, lon - range, lon + range))

        dists = [(nid, nlat, nlon, ((nlat - lat)**2 + (nlon - lon)**2)**0.5)
                 for nid, nlat, nlon in c]

        if len(dists) == 0:
            return (None, None, None, None)

        return min(dists, key=lambda x: x[3])

    def nearest_of(self, lat, lon, nodes):
        c = self.get_cursor()

        c.execute("SELECT id, lat, lon FROM nodes WHERE id IN (%s)" %
                  ",".join([str(x) for x in nodes]))

        dists = [(nid, nlat, nlon, ((nlat - lat)**2 + (nlon - lon)**2)**0.5)
                 for nid, nlat, nlon in c]

        if len(dists) == 0:
            return (None, None, None, None)

        return min(dists, key=lambda x: x[3])

    def way(self, id):
        c = self.get_cursor()

        c.execute("SELECT id, tags, nds FROM ways WHERE id = ?", (id, ))

        try:
            id, tags_str, nds_str = c.next()
            ret = WayRecord(id, tags_str, nds_str)
        except StopIteration:
            raise Exception("OSMDB has no way with id '%s'" % id)
        finally:
            c.close()

        return ret

    def way_nds(self, id):
        c = self.get_cursor()
        c.execute("SELECT nds FROM ways WHERE id = ?", (id, ))

        (nds_str, ) = c.next()
        c.close()

        return json.loads(nds_str)

    def ways(self):
        c = self.get_cursor()

        c.execute("SELECT id, tags, nds FROM ways")

        for id, tags_str, nds_str in c:
            yield WayRecord(id, tags_str, nds_str)

        c.close()

    def count_ways(self):
        c = self.get_cursor()

        c.execute("SELECT count(*) FROM ways")
        ret = c.next()[0]

        c.close()

        return ret

    def count_edges(self):
        c = self.get_cursor()

        c.execute("SELECT count(*) FROM edges")
        ret = c.next()[0]

        c.close()

        return ret

    def delete_way(self, id):
        c = self.get_cursor()

        c.execute("DELETE FROM ways WHERE id = ?", (id, ))

        c.close()

    def bounds(self):
        c = self.get_cursor()
        c.execute("SELECT min(lon), min(lat), max(lon), max(lat) FROM nodes")

        ret = c.next()
        c.close()
        return ret

    def execute(self, sql, args=None):
        c = self.get_cursor()
        if args:
            for row in c.execute(sql, args):
                yield row
        else:
            for row in c.execute(sql):
                yield row
        c.close()

    def cursor(self):
        return self.get_cursor()
Exemplo n.º 8
0
class Graph:
    def __init__(self, all_trips):
        # trips
        self.all_trips = all_trips

        # cluster seeds
        self.cluster_seeds = {}
        self.cluster_seed_id = 0
        self.cluster_seed_index = Rtree()

        # graph edges
        self.graph_edges = {}  # indexed by "edge id"
        self.graph_edge_id = 0
        self.graph_edge_lookup = {}  # indexed by "location1_id,location2_id"

    def cluster_traces(self):
        self._create_all_trip_edges()
        self._generate_cluster_seeds()
        self._cluster_seeds_with_traces()
        self._generate_graph_edges()
        self._output_graph_to_db()

    def _generate_graph_edges(self):

        sys.stdout.write("Generating graph edges... ")
        sys.stdout.flush()

        # iterate through all trips
        for trip in self.all_trips:

            # grab trip edges
            trip_edges = trip.edges.values()

            # put trip edges in order
            trip_edges.sort(key=lambda x: x.id)

            # storage for previous cluster
            prev_cluster = None

            # iterate through trip edges
            for trip_edge in trip_edges:

                # if the current trip edge is clustered
                if (trip_edge.cluster is not None):

                    # create a graph edge between the previous cluster and the current cluster
                    self._create_graph_edge(prev_cluster, trip_edge.cluster)

                    # update previous cluster with current cluster
                    prev_cluster = trip_edge.cluster

        # output graph edges
        self._write_graph_edges_to_file()

        print "done."

    def _create_graph_edge(self, in_node, out_node):

        # if in_node or out_node is None
        if ((in_node is None) or (out_node is None)):

            # return without doing anything
            return

        # see if we can find an existing graph edge with the same nodes
        existing_graph_edge = self._find_graph_edge(in_node, out_node)

        # if there is no existing graph edge with the same nodes
        if (existing_graph_edge is None):

            # create new graph edge object
            new_graph_edge = Edge(self.graph_edge_id, in_node, out_node)

            # add new graph edge to graph edge dictionary
            self.graph_edges[new_graph_edge.id] = new_graph_edge

            # add new graph edge to graph edge lookup dictionary
            self.graph_edge_lookup[str(in_node.id) + "," +
                                   str(out_node.id)] = new_graph_edge

            # increment graph edge id
            self.graph_edge_id += 1

    def _find_graph_edge(self, node1, node2):

        # generate edge lookup key
        edge_lookup_key = str(node1.id) + "," + str(node2.id)

        # if edge is in lookup table
        if (edge_lookup_key in self.graph_edge_lookup.keys()):

            # return the matching edge
            return self.graph_edge_lookup[edge_lookup_key]

        # if the edge wasn't in the lookup table
        return None

    def _cluster_seeds_with_traces(self):

        # storage for total cluster distance moved
        total_cluster_distance_moved = float('infinity')

        # iterate until total cluster distance moved below threshold
        while (total_cluster_distance_moved >=
               cluster_distance_moved_threshold):

            # find all points on traces and move clusters
            total_cluster_distance_moved = self._find_points_on_traces()

            # write cluster seeds to file
            self._write_cluster_seeds_to_file(
                "edelkamp_cluster_seeds_clustered.txt")

    def _find_points_on_traces(self):

        # counter for cluster seeds
        seed_counter = 1

        # storage for total cluster distance moved
        total_cluster_distance_moved = 0.0

        # iterate through all cluster seeds
        for cluster_seed in self.cluster_seeds.values():

            # clear current trace points from cluster
            cluster_seed.clear_trace_points()

            sys.stdout.write("\rFinding intersecting points with cluster " +
                             str(seed_counter) + "/" +
                             str(len(self.cluster_seeds)) + "... ")
            sys.stdout.flush()

            # increment seed counter
            seed_counter += 1

            # determine leftward cluster bearing
            leftward_bearing = math.fmod((cluster_seed.bearing - 90.0) + 360.0,
                                         360.0)

            # determine rightward cluster bearing
            rightward_bearing = math.fmod(
                (cluster_seed.bearing + 90.0) + 360.0, 360.0)

            # storage for candidate trace points
            candidate_trace_points = []

            # iterate through all trips
            for trip in self.all_trips:

                # find leftward intersection points with trip
                candidate_trace_points.extend(
                    self._find_intersection_points(trip, cluster_seed,
                                                   leftward_bearing))

                # find rightward intersection points with trip
                candidate_trace_points.extend(
                    self._find_intersection_points(trip, cluster_seed,
                                                   rightward_bearing))

            # add candidate trace points to cluster
            cluster_seed.add_trace_points(candidate_trace_points)

            # recompute cluster centroid
            total_cluster_distance_moved += cluster_seed.recompute_cluster_centroid(
            )

            # clear current trace points from cluster
            cluster_seed.clear_trace_points()

            # add candidate trace points to cluster, again
            cluster_seed.add_trace_points(candidate_trace_points)

        # normalize total cluster distance moved by number of seeds
        total_cluster_distance_moved = (total_cluster_distance_moved /
                                        len(self.cluster_seeds.values()))

        # and we're done!
        print "done (clusters moved an average of " + str(
            total_cluster_distance_moved) + " meters)."

        # return total cluster distance moved
        return total_cluster_distance_moved

    def _find_intersection_points(self, trip, cluster, cluster_bearing):

        # find all nearby trip edge id's
        nearby_trip_edge_ids = self._find_nearby_trip_edge_ids(
            cluster, edge_bounding_box_size, trip.edge_index)

        # storage for intersection points
        intersection_points = []

        # iterate through all nearby edge id's
        for edge_id in nearby_trip_edge_ids:

            # grab current edge
            edge = trip.edges[edge_id]

            # determine intersection point between edge and cluster
            intersection_point = self._intersection_point(
                edge.in_node, edge.bearing, cluster, cluster_bearing)

            # if there is an intersection point
            if (intersection_point is not None):

                # determine distance from edge in_node to intersection point
                intersection_distance = self._distance_coords(
                    edge.in_node.latitude, edge.in_node.longitude,
                    intersection_point[0], intersection_point[1])

                # if intersection distance is less than edge length
                if (intersection_distance <= edge.length):

                    # this edge has a valid intersection point
                    intersection_points.append(
                        TracePoint(intersection_point[0],
                                   intersection_point[1], edge.bearing, edge))

        # return all intersection points for this trip
        return intersection_points

    def _generate_cluster_seeds(self):

        # iterate through all trips
        for i in range(0, len(self.all_trips)):

            sys.stdout.write("\rCluster seeding trip " + str(i + 1) + "/" +
                             str(len(self.all_trips)) + "... ")
            sys.stdout.flush()

            # grab current trip
            trip = self.all_trips[i]

            # set last cluster seed distance to zero for first trip location
            trip.locations[0].last_cluster_seed_distance = 0.0

            # iterate through all trip locations
            for j in range(1, len(trip.locations)):

                # drop cluster seeds along current edge every 50 meters
                self._drop_cluster_seeds_along_edge(trip.locations[j - 1],
                                                    trip.locations[j])

        print "done (generated " + str(len(
            self.cluster_seeds)) + " cluster seeds)."

        # write cluster seeds to file
        self._write_cluster_seeds_to_file("edelkamp_cluster_seeds_initial.txt")

    def _drop_cluster_seeds_along_edge(self, in_node, out_node):

        # determine edge length
        edge_length = self._distance(in_node, out_node)

        # determine distance along edge for first cluster seed
        first_cluster_seed_distance = (cluster_seed_interval -
                                       in_node.last_cluster_seed_distance)

        # storage for relative cluster seed intervals
        rel_cluster_seed_intervals = []

        # storage for current cluster seed distance along this edge
        curr_cluster_seed_distance = first_cluster_seed_distance

        # determine the relative cluster seed intervals needed for this edge
        while (curr_cluster_seed_distance <= edge_length):

            # append current cluster seed distance to relative cluster seed interval list
            rel_cluster_seed_intervals.append(curr_cluster_seed_distance)

            # increment current cluster seed distance
            curr_cluster_seed_distance += cluster_seed_interval

        # determine bearing of current edge
        edge_bearing = self._path_bearing(in_node, out_node)

        # create cluster seeds for edge
        for i in range(0, len(rel_cluster_seed_intervals)):

            # determine fraction along current edge to drop cluster seed
            fraction_along = (rel_cluster_seed_intervals[i] / edge_length)

            # determine point along line to drop cluster seed
            (new_cluster_seed_latitude,
             new_cluster_seed_longitude) = self._point_along_line(
                 in_node, out_node, fraction_along)

            # locate nearest existing cluster seeds
            closest_cluster_seeds = list(
                self.cluster_seed_index.nearest(
                    (new_cluster_seed_longitude, new_cluster_seed_latitude),
                    25))

            # if there does not exist a closest existing cluster seed
            if (len(closest_cluster_seeds) == 0):

                # create a new cluster seed
                new_cluster_seed = self._create_new_cluster_seed(
                    new_cluster_seed_latitude, new_cluster_seed_longitude,
                    edge_bearing)

            # else, if there exists a closest existing cluster seed
            elif (len(closest_cluster_seeds) > 0):

                # storage for matched cluster seed
                matched_cluster_seed = None

                # iterate through closest existing cluster seeds
                for curr_cluster_seed_id in closest_cluster_seeds:

                    # grab current cluster seed
                    curr_cluster_seed = self.cluster_seeds[
                        curr_cluster_seed_id]

                    # compute distance to current cluster seed
                    distance = self._distance_coords(
                        new_cluster_seed_latitude, new_cluster_seed_longitude,
                        curr_cluster_seed.latitude,
                        curr_cluster_seed.longitude)

                    # determine bearing difference between edge and current cluster seed
                    bearing_difference = math.cos(
                        math.radians(edge_bearing - curr_cluster_seed.bearing))

                    # if current cluster is less than 50 meters away and bearing difference is less than or equal to 45 degrees
                    if ((distance <= cluster_seed_interval)
                            and (bearing_difference >=
                                 cluster_bearing_difference_limit)):

                        # store current cluster seed as matched cluster seed
                        matched_cluster_seed = curr_cluster_seed

                        # stop searching
                        break

                # if there was not a matched cluster seed
                if (matched_cluster_seed is None):

                    # create a new cluster seed
                    new_cluster_seed = self._create_new_cluster_seed(
                        new_cluster_seed_latitude, new_cluster_seed_longitude,
                        edge_bearing)

            # update last cluster seed distance
            out_node.last_cluster_seed_distance = self._distance_coords(
                new_cluster_seed_latitude, new_cluster_seed_longitude,
                out_node.latitude, out_node.longitude)

        # if no cluster seeds were generated along this edge
        if (len(rel_cluster_seed_intervals) == 0):

            # update last cluster seed distance
            out_node.last_cluster_seed_distance = (
                in_node.last_cluster_seed_distance + edge_length)

    def _create_new_cluster_seed(self, latitude, longitude, bearing):

        # create a new cluster seed
        new_cluster_seed = ClusterSeed(self.cluster_seed_id, latitude,
                                       longitude, bearing)

        # add new cluster seed to the cluster seeds dictionary
        self.cluster_seeds[new_cluster_seed.id] = new_cluster_seed

        # insert new cluster seed into spatial index
        self.cluster_seed_index.insert(
            new_cluster_seed.id,
            (new_cluster_seed.longitude, new_cluster_seed.latitude))

        # increment cluster seed id
        self.cluster_seed_id += 1

        # return new cluster seed
        return new_cluster_seed

    def _create_all_trip_edges(self):

        sys.stdout.write("Creating and indexing edges for all trips... ")
        sys.stdout.flush()

        # iterate through all trips
        for trip in self.all_trips:

            # add edge storage to trip
            trip.edges = {}

            # add edge index to trip
            trip.edge_index = Rtree()

            # storage for edge id
            trip_edge_id = 0

            # iterate through all trip locations
            for i in range(1, len(trip.locations)):

                # create new edge
                new_edge = Edge(trip_edge_id, trip.locations[i - 1],
                                trip.locations[i])

                # insert edge into dictionary
                trip.edges[trip_edge_id] = new_edge

                # insert edge into index
                self._index_trip_edge(new_edge, trip.edge_index)

                # increment trip edge id
                trip_edge_id += 1

        # done
        print "done."

    def _index_trip_edge(self, edge, edge_index):

        # determine edge minx, miny, maxx, maxy values
        edge_minx = min(edge.in_node.longitude, edge.out_node.longitude)
        edge_miny = min(edge.in_node.latitude, edge.out_node.latitude)
        edge_maxx = max(edge.in_node.longitude, edge.out_node.longitude)
        edge_maxy = max(edge.in_node.latitude, edge.out_node.latitude)

        # insert edge into spatial index
        edge_index.insert(edge.id,
                          (edge_minx, edge_miny, edge_maxx, edge_maxy))

    def _find_nearby_trip_edge_ids(self, location, distance, edge_index):

        # define longitude/latitude offset
        lon_offset = ((distance / 2.0) /
                      spatialfunclib.METERS_PER_DEGREE_LONGITUDE)
        lat_offset = ((distance / 2.0) /
                      spatialfunclib.METERS_PER_DEGREE_LATITUDE)

        # create bounding box
        bounding_box = (location.longitude - lon_offset,
                        location.latitude - lat_offset,
                        location.longitude + lon_offset,
                        location.latitude + lat_offset)

        # return nearby edge id's inside bounding box
        return list(edge_index.intersection(bounding_box))

    def _intersection_point(self, location1, location1_bearing, location2,
                            location2_bearing):
        return spatialfunclib.intersection_point(
            location1.latitude, location1.longitude, location1_bearing,
            location2.latitude, location2.longitude, location2_bearing)

    def _point_along_line(self, location1, location2, fraction_along):
        return spatialfunclib.point_along_line(location1.latitude,
                                               location1.longitude,
                                               location2.latitude,
                                               location2.longitude,
                                               fraction_along)

    def _path_bearing(self, location1, location2):
        return spatialfunclib.path_bearing(location1.latitude,
                                           location1.longitude,
                                           location2.latitude,
                                           location2.longitude)

    def _distance(self, location1, location2):
        return spatialfunclib.distance(location1.latitude, location1.longitude,
                                       location2.latitude, location2.longitude)

    def _distance_coords(self, location1_latitude, location1_longitude,
                         location2_latitude, location2_longitude):
        return spatialfunclib.distance(location1_latitude, location1_longitude,
                                       location2_latitude, location2_longitude)

    def _write_cluster_seeds_to_file(self,
                                     filename="edelkamp_cluster_seeds.txt"):

        # open graph file
        graph_file = open(filename, 'w')

        # iterate through all cluster_seeds
        for cluster_seed in self.cluster_seeds.values():

            # output cluster seed to file
            graph_file.write(
                str(cluster_seed.latitude) + "," +
                str(cluster_seed.longitude) + "," + str(cluster_seed.bearing) +
                "\n")

        # close graph file
        graph_file.close()

    def _write_graph_edges_to_file(self):

        # open graph file
        graph_file = open('edelkamp_cluster_edges.txt', 'w')

        # iterate through all graph_edges
        for graph_edge in self.graph_edges.values():

            # output edge to file
            graph_file.write(
                str(graph_edge.in_node.latitude) + "," +
                str(graph_edge.in_node.longitude) + "\n")
            graph_file.write(
                str(graph_edge.out_node.latitude) + "," +
                str(graph_edge.out_node.longitude) + "\n\n")

        # close graph file
        graph_file.close()

    def _output_graph_to_db(self):

        # output that we are starting the database writing process...
        sys.stdout.write("\nOutputting graph to database... ")
        sys.stdout.flush()

        # connect to database
        conn = sqlite3.connect("edelkamp_graph.db")

        # grab cursor
        cur = conn.cursor()

        # create nodes table
        cur.execute(
            "CREATE TABLE nodes (id INTEGER, latitude FLOAT, longitude FLOAT)")

        # create edges table
        cur.execute(
            "CREATE TABLE edges (id INTEGER, in_node INTEGER, out_node INTEGER)"
        )

        # remove values from nodes table
        #cur.execute("DELETE FROM nodes")

        # remove values from edges table
        #cur.execute("DELETE FROM edges")

        # commit creates
        conn.commit()

        # iterate through all cluster seeds
        for cluster_seed in self.cluster_seeds.values():

            # insert cluster seed into nodes table
            cur.execute("INSERT INTO nodes VALUES (" + str(cluster_seed.id) +
                        "," + str(cluster_seed.latitude) + "," +
                        str(cluster_seed.longitude) + ")")

        # iterate through all graph edges
        for graph_edge in self.graph_edges.values():

            # insert graph edge into edges table
            cur.execute("INSERT INTO edges VALUES (" + str(graph_edge.id) +
                        "," + str(graph_edge.in_node.id) + "," +
                        str(graph_edge.out_node.id) + ")")

        # commit inserts
        conn.commit()

        # close database connection
        conn.close()

        print "done."
Exemplo n.º 9
0
class OSMDB:
    def __init__(self, dbname,overwrite=False,rtree_index=True):
        if overwrite:
            try:
                os.remove( dbname )
            except OSError:
                pass
            
        self.conn = sqlite3.connect(dbname)
        
        if rtree_index:
            self.index = Rtree( dbname )
        else:
            self.index = None
        
        if overwrite:
            self.setup()
        
    def setup(self):
        c = self.conn.cursor()
        c.execute( "CREATE TABLE nodes (id TEXT, tags TEXT, lat FLOAT, lon FLOAT, endnode_refs INTEGER DEFAULT 1)" )
        c.execute( "CREATE TABLE ways (id TEXT, tags TEXT, nds TEXT)" )
        self.conn.commit()
        c.close()
        
    def create_indexes(self):
        c = self.conn.cursor()
        c.execute( "CREATE INDEX nodes_id ON nodes (id)" )
        c.execute( "CREATE INDEX nodes_lon ON nodes (lon)" )
        c.execute( "CREATE INDEX nodes_lat ON nodes (lat)" )
        c.execute( "CREATE INDEX ways_id ON ways (id)" )
        self.conn.commit()
        c.close()
        
    def populate(self, osm_filename, accept=lambda tags: True, reporter=None):
        print "importing osm from XML to sqlite database"
        
        c = self.conn.cursor()
        
        self.n_nodes = 0
        self.n_ways = 0
        
        superself = self

        class OSMHandler(xml.sax.ContentHandler):
            @classmethod
            def setDocumentLocator(self,loc):
                pass

            @classmethod
            def startDocument(self):
                pass

            @classmethod
            def endDocument(self):
                pass

            @classmethod
            def startElement(self, name, attrs):
                if name=='node':
                    self.currElem = Node(attrs['id'], float(attrs['lon']), float(attrs['lat']))
                elif name=='way':
                    self.currElem = Way(attrs['id'])
                elif name=='tag':
                    self.currElem.tags[attrs['k']] = attrs['v']
                elif name=='nd':
                    self.currElem.nd_ids.append( attrs['ref'] )

            @classmethod
            def endElement(self,name):
                if name=='node':
                    if superself.n_nodes%5000==0:
                        print "node %d"%superself.n_nodes
                    superself.n_nodes += 1
                    superself.add_node( self.currElem, c )
                elif name=='way':
                    if superself.n_ways%5000==0:
                        print "way %d"%superself.n_ways
                    superself.n_ways += 1
                    superself.add_way( self.currElem, c )

            @classmethod
            def characters(self, chars):
                pass

        xml.sax.parse(osm_filename, OSMHandler)
        
        self.conn.commit()
        c.close()
        
        print "indexing primary tables...",
        self.create_indexes()
        print "done"
        
    def set_endnode_ref_counts( self ):
        """Populate ways.endnode_refs. Necessary for splitting ways into single-edge sub-ways"""
        
        print "counting end-node references to find way split-points"
        
        c = self.conn.cursor()
        
        endnode_ref_counts = {}
        
        c.execute( "SELECT nds from ways" )
        
        print "...counting"
        for i, (nds_str,) in enumerate(c):
            if i%5000==0:
                print i
                
            nds = json.loads( nds_str )
            for nd in nds:
                endnode_ref_counts[ nd ] = endnode_ref_counts.get( nd, 0 )+1
        
        print "...updating nodes table"
        for i, (node_id, ref_count) in enumerate(endnode_ref_counts.items()):
            if i%5000==0:
                print i
            
            if ref_count > 1:
                c.execute( "UPDATE nodes SET endnode_refs = ? WHERE id=?", (ref_count, node_id) )
            
        self.conn.commit()
        c.close()
    
    def index_endnodes( self ):
        print "indexing endpoint nodes into rtree"
        
        c = self.conn.cursor()
        
        #TODO index endnodes if they're at the end of oneways - which only have one way ref, but are still endnodes
        c.execute( "SELECT id, lat, lon FROM nodes WHERE endnode_refs > 1" )
        
        for id, lat, lon in c:
            self.index.add( int(id), (lon, lat, lon, lat) )
            
        c.close()
    
    def create_and_populate_edges_table( self, tolerant=False ):
        self.set_endnode_ref_counts()
        self.index_endnodes()
        
        print "splitting ways and inserting into edge table"
        
        c = self.conn.cursor()
        
        c.execute( "CREATE TABLE edges (id TEXT, parent_id TEXT, start_nd TEXT, end_nd TEXT, dist FLOAT, geom TEXT)" )
        
        for i, way in enumerate(self.ways()):
            try:
                if i%5000==0:
                    print i
                
                subways = []
                curr_subway = [ way.nds[0] ] # add first node to the current subway
                for nd in way.nds[1:-1]:     # for every internal node of the way
                    curr_subway.append( nd )
                    if self.node(nd)[4] > 1: # node reference count is greater than one, node is shared by two ways
                        subways.append( curr_subway )
                        curr_subway = [ nd ]
                curr_subway.append( way.nds[-1] ) # add the last node to the current subway, and store the subway
                subways.append( curr_subway );
                
                #insert into edge table
                for i, subway in enumerate(subways):
                    coords = [(lambda x:(x[3],x[2]))(self.node(nd)) for nd in subway]
                    packt = pack_coords( coords )
                    dist = sum([vincenty(lat1, lng1, lat2, lng2) for (lng1, lat1), (lng2, lat2) in cons(coords)])
                    c.execute( "INSERT INTO edges VALUES (?, ?, ?, ?, ?, ?)", ("%s-%s"%(way.id, i),
                                                                               way.id,
                                                                               subway[0],
                                                                               subway[-1],
                                                                               dist,
                                                                               packt) )
            except IndexError:
                if tolerant:
                    continue
                else:
                    raise
        
        print "indexing edges...",
        c.execute( "CREATE INDEX edges_id ON edges (id)" )
        c.execute( "CREATE INDEX edges_parent_id ON edges (parent_id)" )
        print "done"
        
        self.conn.commit()
        c.close()
        
    def edge(self, id):
        c = self.conn.cursor()
        
        c.execute( "SELECT edges.*, ways.tags FROM edges, ways WHERE ways.id = edges.parent_id AND edges.id = ?", (id,) )
        
        try:
            ret = c.next()
            way_id, parent_id, from_nd, to_nd, dist, geom, tags = ret
            return (way_id, parent_id, from_nd, to_nd, dist, unpack_coords( geom ), json.loads(tags))
        except StopIteration:
            c.close()
            raise IndexError( "Database does not have an edge with id '%s'"%id )
            
        c.close()
        return ret
        
    def edges(self):
        c = self.conn.cursor()
        
        c.execute( "SELECT edges.*, ways.tags FROM edges, ways WHERE ways.id = edges.parent_id" )
        
        for way_id, parent_id, from_nd, to_nd, dist, geom, tags in c:
            yield (way_id, parent_id, from_nd, to_nd, dist, unpack_coords(geom), json.loads(tags))
            
        c.close()
        
        
    def add_way( self, way, curs=None ):
        if curs is None:
            curs = self.conn.cursor()
            close_cursor = True
        else:
            close_cursor = False
            
        curs.execute("INSERT INTO ways (id, tags, nds) VALUES (?, ?, ?)", (way.id, json.dumps(way.tags), json.dumps(way.nd_ids) ))
        
        if close_cursor:
            self.conn.commit()
            curs.close()
            
    def add_node( self, node, curs=None ):
        if curs is None:
            curs = self.conn.cursor()
            close_cursor = True
        else:
            close_cursor = False
            
        curs.execute("INSERT INTO nodes (id, tags, lat, lon) VALUES (?, ?, ?, ?)", ( node.id, json.dumps(node.tags), node.lat, node.lon ) )
        
        if close_cursor:
            self.conn.commit()
            curs.close()
        
    def nodes(self):
        c = self.conn.cursor()
        
        c.execute( "SELECT * FROM nodes" )
        
        for node_row in c:
            yield node_row
            
        c.close()
        
    def node(self, id):
        c = self.conn.cursor()
        
        c.execute( "SELECT * FROM nodes WHERE id = ?", (id,) )
        
        try:
            ret = c.next()
        except StopIteration:
            c.close()
            raise IndexError( "Database does not have node with id '%s'"%id )
            
        c.close()
        return ret
    
    def nearest_node(self, lat, lon, range=0.005):
        c = self.conn.cursor()
        
        if self.index:
            print "YOU'RE USING THE INDEX"
            id = list(self.index.nearest( (lon, lat), 1 ))[0]
            print "THE ID IS %d"%id
            c.execute( "SELECT id, lat, lon FROM nodes WHERE id = ?", (id,) )
        else:
            c.execute( "SELECT id, lat, lon FROM nodes WHERE endnode_refs > 1 AND lat > ? AND lat < ? AND lon > ? AND lon < ?", (lat-range, lat+range, lon-range, lon+range) )
        
        dists = [(nid, nlat, nlon, ((nlat-lat)**2+(nlon-lon)**2)**0.5) for nid, nlat, nlon in c]
            
        if len(dists)==0:
            return (None, None, None, None)
            
        return min( dists, key = lambda x:x[3] )

    def nearest_of( self, lat, lon, nodes ):
        c = self.conn.cursor()
        
        c.execute( "SELECT id, lat, lon FROM nodes WHERE id IN (%s)"%",".join([str(x) for x in nodes]) )
        
        dists = [(nid, nlat, nlon, ((nlat-lat)**2+(nlon-lon)**2)**0.5) for nid, nlat, nlon in c]
            
        if len(dists)==0:
            return (None, None, None, None)
            
        return min( dists, key = lambda x:x[3] )
        
    def way(self, id):
        c = self.conn.cursor()
        
        c.execute( "SELECT id, tags, nds FROM ways WHERE id = ?", (id,) )
       
        try: 
          id, tags_str, nds_str = c.next()
          ret = WayRecord(id, tags_str, nds_str)
        except StopIteration:
          raise Exception( "OSMDB has no way with id '%s'"%id )
        finally:
          c.close()
        
        return ret
        
    def way_nds(self, id):
        c = self.conn.cursor()
        c.execute( "SELECT nds FROM ways WHERE id = ?", (id,) )
        
        (nds_str,) = c.next()
        c.close()
        
        return json.loads( nds_str )
        
    def ways(self):
        c = self.conn.cursor()
        
        c.execute( "SELECT id, tags, nds FROM ways" )
        
        for id, tags_str, nds_str in c:
            yield WayRecord( id, tags_str, nds_str )
            
        c.close()
        
    def count_ways(self):
        c = self.conn.cursor()
        
        c.execute( "SELECT count(*) FROM ways" )
        ret = c.next()[0]
        
        c.close()
        
        return ret
        
    def count_edges(self):
        c = self.conn.cursor()
        
        c.execute( "SELECT count(*) FROM edges" )
        ret = c.next()[0]
        
        c.close()
        
        return ret
        
    def delete_way(self, id):
        c = self.conn.cursor()
        
        c.execute("DELETE FROM ways WHERE id = ?", (id,))
        
        c.close()
        
    def bounds(self):
        c = self.conn.cursor()
        c.execute( "SELECT min(lon), min(lat), max(lon), max(lat) FROM nodes" )
        
        ret = c.next()
        c.close()
        return ret
    
    def execute(self,sql,args=None):
        c = self.conn.cursor()
        if args:
            for row in c.execute(sql,args):
                yield row
        else:
            for row in c.execute(sql):
                yield row
        c.close()
    
    def cursor(self):
        return self.conn.cursor()    
class Graph:
    def __init__(self, bus_trips):
        self.bus_trips = bus_trips
        self.graph_nodes = {}  # indexed by "location_id"
        self.graph_edge_id = 0
        self.graph_edges = {}  # indexed by "edge id"
        self.graph_edge_lookup = {}  # indexed by "location1_id,location2_id"
        self.graph_edge_index = Rtree()

    def generate_graph(self):
        print "Running graph generation algorithm..."

        # initialize trip counter
        trip_count = 1

        for trip in self.bus_trips:

            # initialize location counter
            location_count = 1

            # storage for previous node
            prev_node = None

            for location in trip.locations:
                sys.stdout.write("\rAnalyzing location " +
                                 str(location_count) + "/" +
                                 str(len(trip.locations)) + " for trip " +
                                 str(trip_count) + "/" +
                                 str(len(self.bus_trips)) + "... ")
                sys.stdout.flush()

                # find closest edges in graph to location
                closest_edges = self._find_closest_edges_in_graph(
                    location, 100)

                # flag variable for whether we merged location
                did_merge_location = False

                # iterate through candidate edge ids
                for candidate_edge_id in closest_edges:

                    # grab candidate edge from graph edge dictionary
                    candidate_edge = self.graph_edges[candidate_edge_id]

                    # determine whether we should merge with candidate edge
                    if (self._should_merge_location_with_edge(
                            location, candidate_edge) is True):

                        # merge location with edge, update previous node
                        prev_node = self._merge_location_with_edge(
                            location, candidate_edge, prev_node)

                        # update merge flag variable
                        did_merge_location = True

                        # no need to look at further edges, break out of candidate edges loop
                        break

                # if we did not merge the location with any edge
                if (did_merge_location is False):

                    # add location to graph
                    self._add_location_to_graph(location, prev_node)

                    # update previous node with current location
                    prev_node = location

                # increment location counter
                location_count += 1

            # done with current trip locations
            print "done."

            # increment trip counter
            trip_count += 1

        # write graph edges to file
        self._write_graph_edges_to_file()

        # create graph database
        self._output_graph_to_db()

    def _merge_location_with_edge(self, location, edge, prev_node):

        # get the edge node closest to the location
        edge_node = self._get_closest_edge_node(location, edge)

        # if prev_node is None
        if (prev_node is None):

            # increase volume of just this edge
            edge.volume += 1

        # if prev_node is not None
        else:

            # find path from prev_node to edge_node
            path = self._find_path(prev_node, edge_node, max_path_length)

            # if there was a path from prev_node to edge_node
            if (path is not None):

                # iterate through nodes in path
                for i in range(1, len(path)):

                    # grab in_node
                    in_node = path[i - 1]

                    # grab out_node
                    out_node = path[i]

                    # find corresponding graph edge
                    graph_edge = self._find_graph_edge(in_node, out_node)

                    # increment volume on edge
                    graph_edge.volume += 1

            # if there is no path from prev_node to edge_node
            else:

                # create a new graph edge between prev_node and edge_node
                self._create_graph_edge(prev_node, edge_node)

        # return the edge_node
        return edge_node

    def _get_closest_edge_node(self, location, edge):

        # if in_node distance is less than out_node distance
        if (self._distance(location, edge.in_node) < self._distance(
                location, edge.out_node)):

            # return the edge in_node
            return edge.in_node

        # otherwise, return the edge out_node
        return edge.out_node

    def _find_path(self, source, destination, max_length):

        # reset all node visited flags
        self._reset_node_visited_flags()

        # get a breath-first search path from source to destination
        path = self._bfs_path(source, destination)

        # if there is a path from source to destination
        if (path is not None):

            # and if the path length is less than or equal to the maximum length
            if (len(path) <= max_length):

                # return the path
                return path

        # otherwise, return None
        return None

    def _bfs_path(self, source, destination):

        # storage for breadth-first search parents
        bfs_parent = {}  # key is current node, value is parent node

        # source node has no breadth-first search parent
        bfs_parent[source] = None

        # node queue for breadth-first search
        bfs_queue = []

        # enqueue source node
        bfs_queue.append(source)

        # mark source node as visited
        source.visited = True

        # while the queue is not empty
        while (len(bfs_queue) > 0):

            # dequeue the first node in the queue
            curr_node = bfs_queue.pop(0)

            # if the current node is the destination
            if (curr_node is destination):

                # create storage for breadth-first search path
                bfs_path = []

                # add the current node to the breadth-first search path
                bfs_path.insert(0, curr_node)

                # grab the parent of the current node
                parent = bfs_parent[curr_node]

                # iterate through breadth-first search parents
                while (parent is not None):

                    # add the parent to the breadth-first search path
                    bfs_path.insert(0, parent)

                    # grab the next parent
                    parent = bfs_parent[parent]

                # return the breadth-first search path
                return bfs_path

            # if the current node is not the destination
            else:

                # iterate through the current node's out_nodes
                for out_node in curr_node.out_nodes:

                    # if the out_node has not been visited
                    if (out_node.visited is False):

                        # mark the out_node as visited
                        out_node.visited = True

                        # enqueue the out_node
                        bfs_queue.append(out_node)

                        # store curr_node as out_node's breadth-first search parent
                        bfs_parent[out_node] = curr_node

        # if we reached here, no path was found
        return None

    def _should_merge_location_with_edge(self, location, edge):

        # project location onto edge
        (location_projection, location_projection_fraction,
         location_projection_distance) = self._projection_onto_line(
             edge.in_node, edge.out_node, location)

        # if projection is not onto edge
        if (location_projection_fraction < 0.0
                or location_projection_fraction > 1.0):

            # we cannot merge location with edge
            return False

        # determine bearing difference between edge and location
        bearing_difference = math.cos(
            math.radians(
                self._path_bearing(edge.in_node, edge.out_node) -
                self._location_bearing(location)))

        # if location projection distance is less than 20 meters
        if (location_projection_distance < location_projection_distance_limit):

            # if bearing difference is less than 45 degrees
            if (bearing_difference > location_bearing_difference_limit):

                # merge location with edge
                return True

        # otherwise, do not merge location with edge
        return False

    def _add_location_to_graph(self, location, prev_node):

        # add an out_nodes list to location
        location.out_nodes = []

        # add an in_nodes list to location
        location.in_nodes = []

        # add a visited flag to location
        location.visited = False

        # add location to graph nodes list
        self.graph_nodes[location.id] = location

        # if prev_node is not None
        if (prev_node is not None):

            # create a new graph edge between prev_node and location
            self._create_graph_edge(prev_node, location)

    def _create_graph_edge(self, in_node, out_node):

        # see if we can find an existing graph edge with the same nodes
        existing_graph_edge = self._find_graph_edge(in_node, out_node)

        # if there is no existing graph edge with the same nodes
        if (existing_graph_edge is None):

            # create new graph edge object
            new_graph_edge = Edge(self.graph_edge_id, in_node, out_node)

            # add new graph edge to graph edge dictionary
            self.graph_edges[new_graph_edge.id] = new_graph_edge

            # add new graph edge to graph edge lookup dictionary
            self.graph_edge_lookup[str(in_node.id) + "," +
                                   str(out_node.id)] = new_graph_edge

            # add new graph edge to graph edges spatial index
            self._add_graph_edge_to_index(new_graph_edge)

            # increment graph edge id
            self.graph_edge_id += 1

            # store out_node in in_nodes's out_nodes list
            in_node.out_nodes.append(out_node)

            # store in_node in out_node's in_nodes list
            out_node.in_nodes.append(in_node)

    def _find_graph_edge(self, node1, node2):

        # generate edge lookup key
        edge_lookup_key = str(node1.id) + "," + str(node2.id)

        # if edge is in lookup table
        if (edge_lookup_key in self.graph_edge_lookup.keys()):

            # return the matching edge
            return self.graph_edge_lookup[edge_lookup_key]

        # if the edge wasn't in the lookup table
        return None

    def _add_graph_edge_to_index(self, graph_edge):

        # determine graph edge minx, miny, maxx, maxy values
        graph_edge_minx = min(graph_edge.in_node.longitude,
                              graph_edge.out_node.longitude)
        graph_edge_miny = min(graph_edge.in_node.latitude,
                              graph_edge.out_node.latitude)
        graph_edge_maxx = max(graph_edge.in_node.longitude,
                              graph_edge.out_node.longitude)
        graph_edge_maxy = max(graph_edge.in_node.latitude,
                              graph_edge.out_node.latitude)

        # insert graph edge into spatial index
        self.graph_edge_index.insert(graph_edge.id,
                                     (graph_edge_minx, graph_edge_miny,
                                      graph_edge_maxx, graph_edge_maxy))

    def _location_bearing(self, location):

        # if location has a previous neighbor and a next neighbor
        if ((location.prev_location is not None)
                and (location.next_location is not None)):

            # determine bearing using previous and next neighbors
            return self._path_bearing(location.prev_location,
                                      location.next_location)

        # if location has no previous neighbor, but has a next neighbor
        elif ((location.prev_location is None)
              and (location.next_location is not None)):

            # determine bearing using current location and next neighbor
            return self._path_bearing(location, location.next_location)

        # if location has a previous neighbor, but not a next neighbor
        elif ((location.prev_location is not None)
              and (location.next_location is None)):

            # determine bearing using previous neighbor and current location
            return self._path_bearing(location.prev_location, location)

        # if we reach here, there is an error
        return None

    def _find_closest_edges_in_graph(self, location, number_of_edges):
        return self.graph_edge_index.nearest(
            (location.longitude, location.latitude), number_of_edges)

    def _projection_onto_line(self, location1, location2, location3):
        return spatialfunclib.projection_onto_line(
            location1.latitude, location1.longitude, location2.latitude,
            location2.longitude, location3.latitude, location3.longitude)

    def _path_bearing(self, location1, location2):
        return spatialfunclib.path_bearing(location1.latitude,
                                           location1.longitude,
                                           location2.latitude,
                                           location2.longitude)

    def _distance(self, location1, location2):
        return spatialfunclib.distance(location1.latitude, location1.longitude,
                                       location2.latitude, location2.longitude)

    def _output_graph_to_db(self):

        # output that we are starting the database writing process...
        sys.stdout.write("\nOutputting graph to database... ")
        sys.stdout.flush()

        # connect to database
        conn = sqlite3.connect("cao_graph.db")

        # grab cursor
        cur = conn.cursor()

        # create nodes table
        cur.execute(
            "CREATE TABLE nodes (id INTEGER, latitude FLOAT, longitude FLOAT)")

        # create edges table
        cur.execute(
            "CREATE TABLE edges (id INTEGER, in_node INTEGER, out_node INTEGER)"
        )

        # remove values from nodes table
        #cur.execute("DELETE FROM nodes")

        # remove values from edges table
        #cur.execute("DELETE FROM edges")

        # commit creates
        conn.commit()

        # iterate through all graph nodes
        for graph_node in self.graph_nodes.values():

            # insert graph node into nodes table
            cur.execute("INSERT INTO nodes VALUES (" + str(graph_node.id) +
                        "," + str(graph_node.latitude) + "," +
                        str(graph_node.longitude) + ")")

        # iterate through all graph edges
        for graph_edge in self.graph_edges.values():

            # if the graph edge has volume greater than or equal to 3
            if (graph_edge.volume >= min_graph_edge_volume):

                # insert graph edge into edges table
                cur.execute("INSERT INTO edges VALUES (" + str(graph_edge.id) +
                            "," + str(graph_edge.in_node.id) + "," +
                            str(graph_edge.out_node.id) + ")")

        # commit inserts
        conn.commit()

        # close database connection
        conn.close()

        print "done."

    def _write_graph_edges_to_file(self):

        # output that we are starting the writing process
        sys.stdout.write("\nWriting graph edges to file... ")
        sys.stdout.flush()

        # open graph file
        graph_file = open('cao_edges.txt', 'w')

        # iterate through all graph_edges
        for graph_edge in self.graph_edges.values():

            # if the graph edge has volume greater than or equal to 3
            if (graph_edge.volume >= min_graph_edge_volume):

                # output edge to file
                graph_file.write(
                    str(graph_edge.in_node.latitude) + "," +
                    str(graph_edge.in_node.longitude) + "\n")
                graph_file.write(
                    str(graph_edge.out_node.latitude) + "," +
                    str(graph_edge.out_node.longitude) + "," +
                    str(graph_edge.volume) + "\n\n")

        # close graph file
        graph_file.close()

        print "done."

    def _reset_node_visited_flags(self):

        # iterate through all graph nodes
        for graph_node in self.graph_nodes.values():

            # set visited flag to False
            graph_node.visited = False
class Graph:
    def __init__(self, bus_trips):
        self.bus_trips = bus_trips
        self.graph_nodes = {} # indexed by "location_id"
        self.graph_edge_id = 0
        self.graph_edges = {} # indexed by "edge id"
        self.graph_edge_lookup = {} # indexed by "location1_id,location2_id"
        self.graph_edge_index = Rtree()
    
    def generate_graph(self):
        print "Running graph generation algorithm..."
        
        # initialize trip counter
        trip_count = 1
        
        for trip in self.bus_trips:
            
            # initialize location counter
            location_count = 1
            
            # storage for previous node
            prev_node = None
            
            for location in trip.locations:
                sys.stdout.write("\rAnalyzing location " + str(location_count) + "/" + str(len(trip.locations)) + " for trip " + str(trip_count) + "/" + str(len(self.bus_trips)) + "... ")
                sys.stdout.flush()
                
                # find closest edges in graph to location
                closest_edges = self._find_closest_edges_in_graph(location, 100)
                
                # flag variable for whether we merged location
                did_merge_location = False
                
                # iterate through candidate edge ids
                for candidate_edge_id in closest_edges:
                    
                    # grab candidate edge from graph edge dictionary
                    candidate_edge = self.graph_edges[candidate_edge_id]
                    
                    # determine whether we should merge with candidate edge
                    if (self._should_merge_location_with_edge(location, candidate_edge) is True):
                        
                        # merge location with edge, update previous node
                        prev_node = self._merge_location_with_edge(location, candidate_edge, prev_node)
                        
                        # update merge flag variable
                        did_merge_location = True
                        
                        # no need to look at further edges, break out of candidate edges loop
                        break
                
                # if we did not merge the location with any edge
                if (did_merge_location is False):
                    
                    # add location to graph
                    self._add_location_to_graph(location, prev_node)
                    
                    # update previous node with current location
                    prev_node = location
                
                # increment location counter
                location_count += 1
            
            # done with current trip locations
            print "done."
            
            # increment trip counter
            trip_count += 1
        
        # write graph edges to file
        self._write_graph_edges_to_file()
        
        # create graph database
        self._output_graph_to_db()
    
    def _merge_location_with_edge(self, location, edge, prev_node):
        
        # get the edge node closest to the location
        edge_node = self._get_closest_edge_node(location, edge)
        
        # if prev_node is None
        if (prev_node is None):
            
            # increase volume of just this edge
            edge.volume += 1
        
        # if prev_node is not None
        else:
            
            # find path from prev_node to edge_node
            path = self._find_path(prev_node, edge_node, max_path_length)
            
            # if there was a path from prev_node to edge_node
            if (path is not None):
                
                # iterate through nodes in path
                for i in range(1, len(path)):
                    
                    # grab in_node
                    in_node = path[i - 1]
                    
                    # grab out_node
                    out_node = path[i]
                    
                    # find corresponding graph edge
                    graph_edge = self._find_graph_edge(in_node, out_node)
                    
                    # increment volume on edge
                    graph_edge.volume += 1
            
            # if there is no path from prev_node to edge_node
            else:
                
                # create a new graph edge between prev_node and edge_node
                self._create_graph_edge(prev_node, edge_node)
        
        # return the edge_node
        return edge_node
    
    def _get_closest_edge_node(self, location, edge):
        
        # if in_node distance is less than out_node distance
        if (self._distance(location, edge.in_node) < self._distance(location, edge.out_node)):
            
            # return the edge in_node
            return edge.in_node
        
        # otherwise, return the edge out_node
        return edge.out_node
    
    def _find_path(self, source, destination, max_length):
        
        # reset all node visited flags
        self._reset_node_visited_flags()
        
        # get a breath-first search path from source to destination
        path = self._bfs_path(source, destination)
        
        # if there is a path from source to destination
        if (path is not None):
            
            # and if the path length is less than or equal to the maximum length
            if (len(path) <= max_length):
                
                # return the path
                return path
        
        # otherwise, return None
        return None
    
    def _bfs_path(self, source, destination):
        
        # storage for breadth-first search parents
        bfs_parent = {} # key is current node, value is parent node
        
        # source node has no breadth-first search parent
        bfs_parent[source] = None
        
        # node queue for breadth-first search
        bfs_queue = []
        
        # enqueue source node
        bfs_queue.append(source)
        
        # mark source node as visited
        source.visited = True
        
        # while the queue is not empty
        while (len(bfs_queue) > 0):
            
            # dequeue the first node in the queue
            curr_node = bfs_queue.pop(0)
            
            # if the current node is the destination
            if (curr_node is destination):
                
                # create storage for breadth-first search path
                bfs_path = []
                
                # add the current node to the breadth-first search path
                bfs_path.insert(0, curr_node)
                
                # grab the parent of the current node
                parent = bfs_parent[curr_node]
                
                # iterate through breadth-first search parents
                while (parent is not None):
                    
                    # add the parent to the breadth-first search path
                    bfs_path.insert(0, parent)
                    
                    # grab the next parent
                    parent = bfs_parent[parent]
                
                # return the breadth-first search path
                return bfs_path
            
            # if the current node is not the destination
            else:
                
                # iterate through the current node's out_nodes
                for out_node in curr_node.out_nodes:
                    
                    # if the out_node has not been visited
                    if (out_node.visited is False):
                        
                        # mark the out_node as visited
                        out_node.visited = True
                        
                        # enqueue the out_node
                        bfs_queue.append(out_node)
                        
                        # store curr_node as out_node's breadth-first search parent
                        bfs_parent[out_node] = curr_node
        
        # if we reached here, no path was found
        return None
    
    def _should_merge_location_with_edge(self, location, edge):
        
        # project location onto edge
        (location_projection, location_projection_fraction, location_projection_distance) = self._projection_onto_line(edge.in_node, edge.out_node, location)
        
        # if projection is not onto edge
        if (location_projection_fraction < 0.0 or location_projection_fraction > 1.0):
            
            # we cannot merge location with edge
            return False
        
        # determine bearing difference between edge and location
        bearing_difference = math.cos(math.radians(self._path_bearing(edge.in_node, edge.out_node) - self._location_bearing(location)))
        
        # if location projection distance is less than 20 meters
        if (location_projection_distance < location_projection_distance_limit):
            
            # if bearing difference is less than 45 degrees
            if (bearing_difference > location_bearing_difference_limit):
                
                # merge location with edge
                return True
        
        # otherwise, do not merge location with edge
        return False
    
    def _add_location_to_graph(self, location, prev_node):
        
        # add an out_nodes list to location
        location.out_nodes = []
        
        # add an in_nodes list to location
        location.in_nodes = []
        
        # add a visited flag to location
        location.visited = False
        
        # add location to graph nodes list
        self.graph_nodes[location.id] = location
        
        # if prev_node is not None
        if (prev_node is not None):
            
            # create a new graph edge between prev_node and location
            self._create_graph_edge(prev_node, location)
    
    def _create_graph_edge(self, in_node, out_node):
        
        # see if we can find an existing graph edge with the same nodes
        existing_graph_edge = self._find_graph_edge(in_node, out_node)
        
        # if there is no existing graph edge with the same nodes
        if (existing_graph_edge is None):
            
            # create new graph edge object
            new_graph_edge = Edge(self.graph_edge_id, in_node, out_node)
            
            # add new graph edge to graph edge dictionary
            self.graph_edges[new_graph_edge.id] = new_graph_edge
            
            # add new graph edge to graph edge lookup dictionary
            self.graph_edge_lookup[str(in_node.id) + "," + str(out_node.id)] = new_graph_edge
            
            # add new graph edge to graph edges spatial index
            self._add_graph_edge_to_index(new_graph_edge)
            
            # increment graph edge id
            self.graph_edge_id += 1
            
            # store out_node in in_nodes's out_nodes list
            in_node.out_nodes.append(out_node)
            
            # store in_node in out_node's in_nodes list
            out_node.in_nodes.append(in_node)
    
    def _find_graph_edge(self, node1, node2):
        
        # generate edge lookup key
        edge_lookup_key = str(node1.id) + "," + str(node2.id)
        
        # if edge is in lookup table
        if (edge_lookup_key in self.graph_edge_lookup.keys()):
            
            # return the matching edge
            return self.graph_edge_lookup[edge_lookup_key]
        
        # if the edge wasn't in the lookup table
        return None
    
    def _add_graph_edge_to_index(self, graph_edge):
        
        # determine graph edge minx, miny, maxx, maxy values
        graph_edge_minx = min(graph_edge.in_node.longitude, graph_edge.out_node.longitude)
        graph_edge_miny = min(graph_edge.in_node.latitude, graph_edge.out_node.latitude)
        graph_edge_maxx = max(graph_edge.in_node.longitude, graph_edge.out_node.longitude)
        graph_edge_maxy = max(graph_edge.in_node.latitude, graph_edge.out_node.latitude)
        
        # insert graph edge into spatial index
        self.graph_edge_index.insert(graph_edge.id, (graph_edge_minx, graph_edge_miny, graph_edge_maxx, graph_edge_maxy))
    
    def _location_bearing(self, location):
        
        # if location has a previous neighbor and a next neighbor
        if ((location.prev_location is not None) and (location.next_location is not None)):
            
            # determine bearing using previous and next neighbors
            return self._path_bearing(location.prev_location, location.next_location)
        
        # if location has no previous neighbor, but has a next neighbor
        elif ((location.prev_location is None) and (location.next_location is not None)):
            
            # determine bearing using current location and next neighbor
            return self._path_bearing(location, location.next_location)
        
        # if location has a previous neighbor, but not a next neighbor
        elif ((location.prev_location is not None) and (location.next_location is None)):
            
            # determine bearing using previous neighbor and current location
            return self._path_bearing(location.prev_location, location)
        
        # if we reach here, there is an error
        return None
    
    def _find_closest_edges_in_graph(self, location, number_of_edges):
        return self.graph_edge_index.nearest((location.longitude, location.latitude), number_of_edges)
    
    def _projection_onto_line(self, location1, location2, location3):
        return spatialfunclib.projection_onto_line(location1.latitude, location1.longitude, location2.latitude, location2.longitude, location3.latitude, location3.longitude)
    
    def _path_bearing(self, location1, location2):
        return spatialfunclib.path_bearing(location1.latitude, location1.longitude, location2.latitude, location2.longitude)
    
    def _distance(self, location1, location2):
        return spatialfunclib.distance(location1.latitude, location1.longitude, location2.latitude, location2.longitude)
    
    def _output_graph_to_db(self):
        
        # output that we are starting the database writing process...
        sys.stdout.write("\nOutputting graph to database... ")
        sys.stdout.flush()
        
        # connect to database
        conn = sqlite3.connect("cao_graph.db")
        
        # grab cursor
        cur = conn.cursor()
        
        # create nodes table
        cur.execute("CREATE TABLE nodes (id INTEGER, latitude FLOAT, longitude FLOAT)")
        
        # create edges table
        cur.execute("CREATE TABLE edges (id INTEGER, in_node INTEGER, out_node INTEGER)")
        
        # remove values from nodes table
        #cur.execute("DELETE FROM nodes")
        
        # remove values from edges table
        #cur.execute("DELETE FROM edges")
        
        # commit creates
        conn.commit()
        
        # iterate through all graph nodes
        for graph_node in self.graph_nodes.values():
            
            # insert graph node into nodes table
            cur.execute("INSERT INTO nodes VALUES (" + str(graph_node.id) + "," + str(graph_node.latitude) + "," + str(graph_node.longitude) + ")")
        
        # iterate through all graph edges
        for graph_edge in self.graph_edges.values():
            
            # if the graph edge has volume greater than or equal to 3
            if (graph_edge.volume >= min_graph_edge_volume):
                
                # insert graph edge into edges table
                cur.execute("INSERT INTO edges VALUES (" + str(graph_edge.id) + "," + str(graph_edge.in_node.id) + "," + str(graph_edge.out_node.id) + ")")
        
        # commit inserts
        conn.commit()
        
        # close database connection
        conn.close()
        
        print "done."
    
    def _write_graph_edges_to_file(self):
        
        # output that we are starting the writing process
        sys.stdout.write("\nWriting graph edges to file... ")
        sys.stdout.flush()
        
        # open graph file
        graph_file = open('cao_edges.txt', 'w')
        
        # iterate through all graph_edges
        for graph_edge in self.graph_edges.values():
            
            # if the graph edge has volume greater than or equal to 3
            if (graph_edge.volume >= min_graph_edge_volume):
                
                # output edge to file
                graph_file.write(str(graph_edge.in_node.latitude) + "," + str(graph_edge.in_node.longitude) + "\n")
                graph_file.write(str(graph_edge.out_node.latitude) + "," + str(graph_edge.out_node.longitude) + "," + str(graph_edge.volume) + "\n\n")
        
        # close graph file
        graph_file.close()
        
        print "done."
    
    def _reset_node_visited_flags(self):
        
        # iterate through all graph nodes
        for graph_node in self.graph_nodes.values():
            
            # set visited flag to False
            graph_node.visited = False