def restorePathMatch(dbServer, networkName, userName, password, shapePath, pathMatchFilename, useDirectDist=True):
    # Get the database connected:
    print("INFO: Connect to database...", file = sys.stderr)
    database = vista_network.connect(dbServer, userName, password, networkName)
    # Read in the topology from the VISTA database:
    print("INFO: Read topology from database...", file = sys.stderr)
    vistaGraph = vista_network.fillGraph(database, useDirectDist)
    # Read in the shapefile information:
    print("INFO: Read GTFS shapefile...", file = sys.stderr)
    gtfsShapes = gtfs.fillShapes(shapePath, vistaGraph.gps)

    # Read the path-match file:
    print("INFO: Read the path-match file '%s'..." % pathMatchFilename, file = sys.stderr)
    with open(pathMatchFilename, 'r') as inFile:
        gtfsNodes = path_engine.readStandardDump(vistaGraph, gtfsShapes, inFile)
        "@type gtfsNodes: dict<int, list<path_engine.PathEnd>>"

    # Filter out the unused shapes:
    unusedShapeIDs = set()
    for shapeID in compat.listkeys(gtfsShapes):
        if shapeID not in gtfsNodes:
            del gtfsShapes[shapeID]

    return (vistaGraph, gtfsShapes, gtfsNodes, unusedShapeIDs)
def filterSimilarity(gtfsShapes):
    Compares among all entries and figures out which ones are enough of duplicates. Keeps the
    longer lists.
    shapeIDs = compat.listkeys(gtfsShapes)

    excludedIDs = set()
    "@type excluded: set<int>"

    for origIndex in range(len(shapeIDs)):
        "@type origIndex: int"
        if shapeIDs[origIndex] not in excludedIDs:
            for targetIndex in range(len(shapeIDs)):
                "@type targetIndex: int"
                if (origIndex != targetIndex) and (shapeIDs[targetIndex] not in excludedIDs) \
                        and (len(gtfsShapes[shapeIDs[origIndex]]) >= len(gtfsShapes[shapeIDs[targetIndex]])):
                    s = difflib.SequenceMatcher()
                        [(, shapeEntry.lng)
                         for shapeEntry in gtfsShapes[shapeIDs[origIndex]]],
                        [(, shapeEntry.lng)
                         for shapeEntry in gtfsShapes[shapeIDs[targetIndex]]])
                    if s.ratio() > SEQUENCE_CUTOFF:
                        print("INFO: Shape ID %d is kept, where Shape ID %d is a duplicate (%.3g)" \
                              % (shapeIDs[origIndex], shapeIDs[targetIndex], s.ratio()), file = sys.stderr)
    ret = dict(gtfsShapes)
    for shapeID in excludedIDs:
        del ret[shapeID]
    return ret
def arcgiscsvReport(gtfsNodes, vistaGraph, outFile=sys.stdout):
    Takes a node set and outputs VISTA table files that report the link matches for the ArcGIS CSV GPS track set.
    @type gtfsNodes: list<path_engine.PathEnd>
    @type vistaGraph: graph.GraphLib  

    datafileIDs = compat.listkeys(gtfsNodes)
    for datafileID in datafileIDs:
        gtfsNodeList = gtfsNodes[datafileID]
        "@type gtfsNodeList: list<path_engine.PathEnd>"
        for node in gtfsNodeList:
            "@type node: path_engine.PathEnd"

            if len(node.routeInfo) > 0:
                 vistaLng) = vistaGraph.GPS.feet2gps(node.pointOnLink.pointX,

                for link in node.routeInfo:
                    outStr = "%d,%s,%d,%s,%d,%g,%g,%g,%g" % (
                        node.shapeEntry.shapeSeq, node.shapeEntry.shapeID,
                        node.shapeEntry.time.strftime('%m/%d/%Y %H:%M:%S'),
                        1 if node.restart else 0,,
                        node.shapeEntry.lng, vistaLat, vistaLng)
                    print(outStr, file=outFile)
def pathMatch(dbServer, networkName, userName, password, filename, limitMap = None):
    # Default parameters, with explanations and cross-references to Perrine et al., 2015:
    pointSearchRadius = 1000    # "k": Radius (ft) to search from GTFS point to perpendicular VISTA links
    pointSearchPrimary = 350    # "k_p": Radius (ft) to search from GTFS point to new VISTA links    
    pointSearchSecondary = 200  # "k_s": Radius (ft) to search from VISTA perpendicular point to previous point
    limitLinearDist = 3800      # Path distance (ft) to allow new proposed paths from one point to another
    limitDirectDist = 3500      # Radius (ft) to allow new proposed paths from one point to another
    limitDirectDistRev = 500    # Radius (ft) to allow backtracking on an existing link (e.g. parking lot)
    distanceFactor = 1.0        # "f_d": Cost multiplier for Linear path distance
    driftFactor = 1.5           # "f_r": Cost multiplier for distance from GTFS point to its VISTA link
    nonPerpPenalty = 1.5        # "f_p": Penalty multiplier for GTFS points that aren't perpendicular to VISTA links
    limitClosestPoints = 8     # "q_p": Number of close-proximity points that are considered for each GTFS point 
    limitSimultaneousPaths = 6  # "q_e": Number of proposed paths to maintain during pathfinding stage
    maxHops = 12                # Maximum number of VISTA links to pursue in a path-finding operation
    # Get the database connected:
    print("INFO: Connect to database...", file = sys.stderr)
    database = vista_network.connect(dbServer, userName, password, networkName)
    # Read in the topology from the VISTA database:
    print("INFO: Read topology from database...", file = sys.stderr)
    vistaGraph = vista_network.fillGraph(database)
    # Read in the GPS track information:
    print("INFO: Read GDB GPS track...", file = sys.stderr)
    gpsTracks = fillFromFile(filename, vistaGraph.gps)
    # Initialize the path-finder:
    pathFinder = path_engine.PathEngine(pointSearchRadius, pointSearchPrimary, pointSearchSecondary, limitLinearDist,
                            limitDirectDist, limitDirectDistRev, distanceFactor, driftFactor, nonPerpPenalty, limitClosestPoints,
    pathFinder.maxHops = maxHops
    # Begin iteration through each shape:
    datafileIDs = compat.listkeys(gpsTracks)
    "@type datafileIDs: list<str>"
    nodesResults = {}
    "@type nodesResults: dict<str, list<path_engine.PathEnd>>"
    if limitMap is not None:
        for datafileID in limitMap:
            if datafileID not in datafileIDs:
                print("WARNING: Limit datafile ID %d is not found in the shape file." % datafileID, file = sys.stderr)
    for datafileID in datafileIDs:
        "@type datafileID: int"
        if limitMap is not None and datafileID not in limitMap:
        print("INFO: -- Datafile %s --" % datafileID, file = sys.stderr)
        # Find the path for the given shape:
        gtfsNodes = pathFinder.constructPath(gpsTracks[datafileID], vistaGraph)
        # File this away as a result for later output:
        nodesResults[datafileID] = gtfsNodes
    return nodesResults
def dumpGPS(gtfsNodes, vistaGraph, outFile=sys.stdout):
    Takes a GTFS node set and outputs a CSV format of GPS points and other information.
    @type gtfsNodes: list<path_engine.PathEnd>
    @type vistaGraph: graph.GraphLib  

    shapeIDs = compat.listkeys(gtfsNodes)
    for shapeID in shapeIDs:
        gtfsNodeList = gtfsNodes[shapeID]
        "@type gtfsNodeList: list<path_engine.PathEnd>"
        for gtfsNode in gtfsNodeList:
            "@type gtfsNode: path_engine.PathEnd"
             vistaLng) = vistaGraph.GPS.feet2gps(gtfsNode.pointOnLink.pointX,

            outStr = "%d,%d,%d,%g,%g,%g,%g,%g,%g,%g" % (
                gtfsNode.shapeEntry.shapeID, gtfsNode.shapeEntry.shapeSeq,
      , gtfsNode.pointOnLink.dist,
      , gtfsNode.shapeEntry.lng, vistaLat,
            print(outStr, file=outFile)
def problemReport(gtfsNodes, vistaGraph, showLinks=False, outFile=sys.stdout):
    Takes a GTFS node set and outputs a CSV format of GPS points where there are indications of problems.
    @type gtfsNodes: dict<?, path_engine.PathEnd>
    @param showLinks: Place markers at starts of links in addition to those at trackpoints. These output problemCode 4.
    @type showLinks: bool
    @type vistaGraph: graph.GraphLib  

    shapeIDs = compat.listkeys(gtfsNodes)
    for shapeID in shapeIDs:
        gtfsNodeList = gtfsNodes[shapeID]
        "@type gtfsNodeList: list<path_engine.PathEnd>"
        prevSeq = -1
        for gtfsNode in gtfsNodeList:
            "@type gtfsNode: path_engine.PathEnd"
             vistaLng) = vistaGraph.gps.feet2gps(gtfsNode.pointOnLink.pointX,

            # Determine whether we have a problem to report:
            problemCode = 0
            if gtfsNode.restart:
                problemCode = 1
            elif not gtfsNode.pointOnLink.nonPerpPenalty and gtfsNode.pointOnLink.refDist > PERP_DIST:
                problemCode = 2
            elif gtfsNode.pointOnLink.nonPerpPenalty and gtfsNode.pointOnLink.refDist > NONPERP_DIST:
                problemCode = 3

            if showLinks and gtfsNode.routeInfo:
                divisor = 10**int(math.log10(len(gtfsNode.routeInfo) + 1) + 1)
                increment = 1 / divisor
                seqCtr = prevSeq + increment
                for routeInfo in gtfsNode.routeInfo:
                    "@type routeInfo: graph.GraphLink"
                    outStr = "%s,%g,%d,%g,%d,%s,%s" % (
                        str(gtfsNode.shapeEntry.shapeID), seqCtr,,
                        0, 4, str(routeInfo.origNode.gpsLat) + " " +
                        str(routeInfo.origNode.gpsLat) + " " +
                    print(outStr, file=outFile)
                    seqCtr += increment

            outStr = "%s,%d,%d,%g,%d,%s,%s" % (
                str(gtfsNode.shapeEntry.shapeID), gtfsNode.shapeEntry.shapeSeq,
                is not None else -1, gtfsNode.pointOnLink.dist, problemCode,
                str( + " " +
                str(vistaLat) + " " + str(vistaLng))
            print(outStr, file=outFile)
            prevSeq = gtfsNode.shapeEntry.shapeSeq
def main(argv):
    # Initialize from command-line parameters:
    if len(argv) < 7:
    dbServer = argv[1]
    networkName = argv[2]
    userName = argv[3]
    password = argv[4]
    shapePath = argv[5]
    pathMatchFilename = argv[6]
    hintFilename = None
    routeRestrictFilename = None
    if len(argv) > 6:
        i = 7
        while i < len(argv):
            if argv[i] == "-h" and i < len(argv) - 1:
                hintFilename = argv[i + 1]
                i += 1
            elif argv[i] == "-r" and i < len(argv) - 1:
                routeRestrictFilename = argv[i + 1]
                i += 1
            i += 1
    # Restore the stuff that was built with path_match:
    (vistaGraph, gtfsShapes, gtfsNodes, unusedShapeIDs) = transit_gtfs.restorePathMatch(dbServer, networkName,
        userName, password, shapePath, pathMatchFilename)
    # TODO: We don't do anything with unusedShapeIDs right now.
    # Restore the hint file if it is specified:
    if hintFilename is not None:
        print("INFO: Read hint file...", file = sys.stderr)
        print("INFO: No hint file was specified.", file = sys.stderr)
    hintEntries = fillHints(hintFilename, shapePath, gtfsShapes, vistaGraph.gps, unusedShapeIDs)
    "@type hintEntries: dict<int, path_engine.ShapesEntry>"

    # Filter down the routes that we're interested in:
    if routeRestrictFilename is not None:
        gtfsNodes = filterRoutes(gtfsNodes, shapePath, gtfsShapes, routeRestrictFilename)

    print("INFO: Refining paths.", file = sys.stderr)
    gtfsNodesResults = pathsRefine(gtfsNodes, hintEntries, vistaGraph)
    "@type gtfsNodesResults: dict<int, list<path_engine.PathEnd>>"
    print("INFO: -- Final --", file = sys.stderr)
    print("INFO: Print output...", file = sys.stderr)

    shapeIDs = compat.listkeys(gtfsNodesResults)
    "@type shapeIDs: list<int>"
    for shapeID in shapeIDs:
        "@type shapeID: int"
    print("INFO: Done.", file = sys.stderr)
def pathsRefine(gtfsNodes, hintEntries, vistaGraph):
    # Default parameters, with explanations and cross-references to Perrine et al., 2015:
    hintRefactorRadius = 1000  # Radius (ft) to invalidate surrounding found points.
    termRefactorRadius = 3000  # Radius (ft) to invalidate found points at either end of a restart.
    pointSearchRadius = 1600  # "k": Radius (ft) to search from GTFS point to perpendicular VISTA links
    pointSearchPrimary = 1600  # "k_p": Radius (ft) to search from GTFS point to new VISTA links
    pointSearchSecondary = 200  # "k_s": Radius (ft) to search from VISTA perpendicular point to previous point
    limitLinearDist = 6200  # Path distance (ft) to allow new proposed paths from one point to another
    limitDirectDist = 6200  # Radius (ft) to allow new proposed paths from one point to another
    limitDirectDistRev = 500  # Radius (ft) to allow backtracking on an existing link (e.g. parking lot)
    distanceFactor = 1.0  # "f_d": Cost multiplier for Linear path distance
    driftFactor = 1.5  # "f_r": Cost multiplier for distance from GTFS point to its VISTA link
    nonPerpPenalty = 1.5  # "f_p": Penalty multiplier for GTFS points that aren't perpendicular to VISTA links
    limitClosestPoints = 25  # "q_p": Number of close-proximity points that are considered for each GTFS point
    limitSimultaneousPaths = 25  # "q_e": Number of proposed paths to maintain during pathfinding stage

    maxHops = 8  # Maximum number of VISTA links to pursue in a path-finding operation
    limitHintClosest = 4  # Number of hint closest points and closest previous track points

    # Initialize the path-finder:
    pathFinder = path_engine.PathEngine(pointSearchRadius, pointSearchPrimary,
                                        pointSearchSecondary, limitLinearDist,
                                        limitDirectDist, limitDirectDistRev,
                                        distanceFactor, driftFactor,
                                        nonPerpPenalty, limitClosestPoints,
    pathFinder.setRefineParams(hintRefactorRadius, termRefactorRadius)
    pathFinder.maxHops = maxHops
    pathFinder.limitHintClosest = limitHintClosest

    # Begin iteration through each shape:
    shapeIDs = compat.listkeys(gtfsNodes)
    "@type shapeIDs: list<int>"
    gtfsNodesResults = {}
    "@type gtfsNodesResults: dict<int, list<path_engine.PathEnd>>"

    for shapeID in shapeIDs:
        "@type shapeID: int"

        print("INFO: -- Shape ID %s --" % str(shapeID), file=sys.stderr)

        # Find the path for the given shape:
        gtfsNodesRevised = pathFinder.refinePath(
            gtfsNodes[shapeID], vistaGraph,
            hintEntries[shapeID] if shapeID in hintEntries else list())

        # File this away as a result for later output:
        gtfsNodesResults[shapeID] = gtfsNodesRevised
    return gtfsNodesResults
def main(argv):
    # Initialize from command-line parameters:
    if (len(argv) < 1) or (argv[1].lower == "-h") or (argv[1].lower
                                                      == "--help"):
    shapePath = argv[1]
    routeRestrictFilename = None
    if len(argv) > 1:
        i = 2
        while i < len(argv):
            if argv[i] == "-x" and i < len(argv) - 1:
                routeRestrictFilename = argv[i + 1]
                i += 1
            i += 1

    # Create a fake GPS coordinate:
    graph = graph.GraphLib(0, 0)

    # Read in the shapefile information:
    print("INFO: Read GTFS shapefile...", file=sys.stderr)
    gtfsShapes = gtfs.fillShapes(shapePath, graph.GPS)

    # Filter shapes according to exclusion file:
    if routeRestrictFilename is not None:
        gtfsShapes = path_refine.filterRoutes(gtfsShapes, shapePath,
                                              routeRestrictFilename, True)

    # Similarity search:
    gtfsShapes = filterSimilarity(gtfsShapes)

    # Extract useful information:
    print("INFO: Print output...", file=sys.stderr)
    shapeIDs = compat.listkeys(gtfsShapes)
    "@type shapeIDs: list<int>"

    for shapeID in shapeIDs:
        "@type shapeID: int"
        for shapeEntry in gtfsShapes[shapeID]:
            "@type shapeEntry: gtfs.ShapesEntry"
            print("%d,%f,%f,%d," % (shapeEntry.shapeID,,
                                    shapeEntry.lng, shapeEntry.shapeSeq))
def dumpGPS(gtfsNodes, vistaGraph, outFile = sys.stdout):
    Takes a GTFS node set and outputs a CSV format of GPS points and other information.
    @type gtfsNodes: list<path_engine.PathEnd>
    @type vistaGraph: graph.GraphLib  
    print("shapeID,shapeSeq,linkID,linkDist,gtfsLat,gtfsLng,vistaLat,vistaLng,vistaNodeLat,vistaNodeLng", file = outFile)

    shapeIDs = compat.listkeys(gtfsNodes)
    for shapeID in shapeIDs:
        gtfsNodeList = gtfsNodes[shapeID]
        "@type gtfsNodeList: list<path_engine.PathEnd>"
        for gtfsNode in gtfsNodeList:
            "@type gtfsNode: path_engine.PathEnd"
            (vistaLat, vistaLng) = vistaGraph.GPS.feet2gps(gtfsNode.pointOnLink.pointX, gtfsNode.pointOnLink.pointY) 
            outStr = "%d,%d,%d,%g,%g,%g,%g,%g,%g,%g" % (gtfsNode.shapeEntry.shapeID, gtfsNode.shapeEntry.shapeSeq,
                  , gtfsNode.pointOnLink.dist,, gtfsNode.shapeEntry.lng,
                            vistaLat, vistaLng,,
            print(outStr, file = outFile)
def dumpBusRoutes(gtfsTrips, userName, networkName, outFile = sys.stdout):
    dumpBusRoutes dumps out a public.bus_route.csv file contents.
    @type gtfsTrips: dict<int, gtfs.TripsEntry>
    @type userName: str
    @type networkName: str
    @type outFile: file
    _outHeader("public.bus_route", userName, networkName, outFile)
    print("\"id\",\"name\",", file = outFile)
    # Remember, we are treating each route as a trip.
    tripIDs = compat.listkeys(gtfsTrips)
    for tripID in tripIDs:
        append = ""
        if len(gtfsTrips[tripID] > 0:
            append = ": " + gtfsTrips[tripID]
        if len(gtfsTrips[tripID].tripHeadsign) > 0:
            append += " " + gtfsTrips[tripID].tripHeadsign
        print("\"%d\",\"%s\"" % (tripID, gtfsTrips[tripID].route.shortName + append),
                file = outFile)
def main(argv):
    # Initialize from command-line parameters:
    if len(argv) < 6:
    dbServer = argv[1]
    networkName = argv[2]
    userName = argv[3]
    password = argv[4]
    filename = argv[5]
    gtfsNodesResults = pathMatch(dbServer, networkName, userName, password, filename)
    # Extract useful information:
    print("INFO: -- Final --", file = sys.stderr)
    print("INFO: Print output...", file = sys.stderr)

    datafileIDs = compat.listkeys(gtfsNodesResults)
    "@type datafileIDs: list<str>"
    for datafileID in datafileIDs:
        "@type datafileID: str"
    Takes a node set and outputs VISTA table files that report the link matches for the GDB GPS track set.
    @type gtfsNodes: list<path_engine.PathEnd>
    @type vistaGraph: graph.GraphLib  
    print("objID,datafileID,linkID,time,speed,dist,restart,lat,lng,vistaLat,vistaLng", file = outFile)

    datafileIDs = compat.listkeys(gtfsNodes)
    for datafileID in datafileIDs:
        gtfsNodeList = gtfsNodes[datafileID]
        "@type gtfsNodeList: list<path_engine.PathEnd>"
        for node in gtfsNodeList:
            "@type node: path_engine.PathEnd"
            if len(node.routeInfo) > 0:
                (vistaLat, vistaLng) = vistaGraph.gps.feet2gps(node.pointOnLink.pointX, node.pointOnLink.pointY) 

                for link in node.routeInfo:            
                    outStr = "%d,%s,%d,%s,%g,%g,%d,%g,%g,%g,%g" % (node.shapeEntry.shapeSeq, node.shapeEntry.shapeID,,
                                node.shapeEntry.time.strftime('%H:%M:%S'), node.shapeEntry.speed, node.pointOnLink.dist,
                                1 if node.restart else 0,, node.shapeEntry.lng, vistaLat, vistaLng)
                    print(outStr, file = outFile)
def dumpBusRouteLinks(gtfsTrips,
    dumpBusRouteLinks dumps out a public.bus_route_link.csv file contents. This also will remove all stop times and trips
    that fall outside of the valid evaluation interval as dictated by the exclusion parameters.
    @type gtfsTrips: dict<int, gtfs.TripsEntry>
    @type gtfsStopTimes: dict<TripsEntry, list<StopTimesEntry>>
    @type gtfsNodes: dict<int, list<path_engine.PathEnd>>
    @type vistaNetwork: graph.GraphLib
    @type stopSearchRadius: float
    @type excludeUpstream: boolean
    @type userName: str
    @type networkName: str
    @type startTime: datetime
    @type endTime: datetime
    @type widenBegin: bool
    @type widenEnd: bool
    @type excludeBegin: bool
    @type excludeEnd: bool
    @type outFile: file
    @return A mapping of stopID to points-on-links plus the start and end times adjusted for
            warm-up and cool-down (if widenBegin or widenEnd is True)
    @rtype (dict<int, graph.PointOnLink>, datetime, datetime)
    _outHeader("public.bus_route_link", userName, networkName, outFile)
    print('"route","sequence","link","stop","dwelltime",', file=outFile)

    # Set up the output:
    ret = {}
    "@type ret: dict<int, graph.PointOnLink>"

    warmupStartTime = startTime
    cooldownEndTime = endTime

    # Initialize the path engine for use later:
    pathEngine = path_engine.PathEngine(stopSearchRadius, stopSearchRadius,
                                        stopSearchRadius, sys.float_info.max,
                                        sys.float_info.max, stopSearchRadius,
                                        1, 1, 1, sys.maxsize, sys.maxsize)
    pathEngine.limitClosestPoints = 8
    pathEngine.limitSimultaneousPaths = 6
    pathEngine.maxHops = 12
    pathEngine.logFile = None  # Suppress the log outputs for the path engine; enough stuff will come from other sources.

    problemReportNodes = {}
    "@type problemReportNodes: dict<?, path_engine.PathEnd>"

    tripIDs = compat.listkeys(gtfsTrips)
    for tripID in tripIDs:
        if gtfsTrips[tripID].shapeEntries[0].shapeID not in gtfsNodes:
            # This happens if the incoming files contain a subset of all available topology.
                "WARNING: Skipping route for trip %d because no points are available."
                % tripID,

        treeNodes = gtfsNodes[gtfsTrips[tripID].shapeEntries[0].shapeID]
        "@type treeNodes: list<path_engine.PathEnd>"

        # Step 1: Find the longest distance of contiguous valid links within the shape for each trip:
        startIndex = -1
        curIndex = 0
        linkCount = 0
        totalLinks = 0

        longestStart = -1
        longestEnd = len(treeNodes)
        longestDist = sys.float_info.min
        longestLinkCount = 0

        while curIndex <= len(treeNodes):
            if (curIndex == len(treeNodes)) or (
                    curIndex == 0) or treeNodes[curIndex].restart:
                totalLinks += 1
                linkCount += 1
                if (curIndex > startIndex) and (startIndex >= 0):
                    # We have a contiguous interval.  See if it wins:
                    if treeNodes[curIndex - 1].totalDist - treeNodes[
                            startIndex].totalDist > longestDist:
                        longestStart = startIndex
                        longestEnd = curIndex
                        longestDist = treeNodes[
                            curIndex -
                            1].totalDist - treeNodes[startIndex].totalDist
                        longestLinkCount = linkCount
                        linkCount = 0

                # This happens if it is time to start a new interval:
                startIndex = curIndex
                totalLinks += len(treeNodes[curIndex].routeInfo)
                linkCount += len(treeNodes[curIndex].routeInfo)
            curIndex += 1

        if longestStart >= 0:
            # We have a valid path.  See if it had been trimmed down and report it.
            if (longestStart > 0) or (longestEnd < len(treeNodes)):
                print("WARNING: For shape ID %s from seq. %d through %d, %.2g%% of %d links will be used." \
                      % (str(treeNodes[longestStart].shapeEntry.shapeID), treeNodes[longestStart].shapeEntry.shapeSeq,
                         treeNodes[longestEnd - 1].shapeEntry.shapeSeq, 100 * float(longestLinkCount) / float(totalLinks),
                         totalLinks), file = sys.stderr)

            # Step 2: Ignore routes that are entirely outside our valid time interval.
            flag = False
            if len(gtfsStopTimes[gtfsTrips[tripID]]) == 0:
                # This will happen if we don't have stops defined. In this case, we want to go ahead and process the bus_route_link
                # outputs because we don't know if the trip falls in or out of the valid time range.
                flag = True
                for stopEntry in gtfsStopTimes[gtfsTrips[tripID]]:
                    if stopEntry.arrivalTime >= startTime and stopEntry.arrivalTime <= endTime:
                        flag = True
            if not flag:
                # This will be done silently because (depending upon the valid interval) there could be
                # hundreds of these in a GTFS set.

            # Step 3: Match up stops to that contiguous list:
            # At this point, we're doing something with this.
            print("INFO: -- Matching stops for trip %d --" % tripID,

            stopTimes = gtfsStopTimes[gtfsTrips[tripID]]
            "@type stopTimes: list<gtfs.StopTimesEntry>"

            # Isolate the relevant VISTA tree nodes: (Assume from above that this is a non-zero length array)
            ourGTFSNodes = treeNodes[longestStart:longestEnd]

            # We are going to recreate a small VISTA network from ourGTFSNodes and then match up the stops to that.
            # First, prepare the small VISTA network:
            vistaSubset = graph.GraphLib(vistaNetwork.gps.latCtr,
            vistaNodePrior = None
            "@type vistaNodePrior: graph.GraphNode"

            # Build a list of links:
            outLinkIDList = []
            "@type outLinkList: list<int>"

            # Plop in the start node:
            vistaNodePrior = graph.GraphNode(

            # Link together nodes as we traverse through them:
            for ourGTFSNode in ourGTFSNodes:
                "@type ourGTFSNode: path_engine.PathEnd"
                # There should only be one destination link per VISTA node because this comes form our tree.
                # If there is no link or we're repeating the first one, then there were no new links assigned.
                if (len(ourGTFSNode.routeInfo) < 1) or ((len(outLinkIDList) == 1) \
                        and (ourGTFSNode.routeInfo[0].id == ourGTFSNodes[0]
                for link in ourGTFSNode.routeInfo:
                    "@type link: graph.GraphLink"

                    if not in vistaNetwork.linkMap:
                            "WARNING: In finding bus route links, link ID %d is not found in the VISTA network."
                    origVistaLink = vistaNetwork.linkMap[]
                    "@type origVistaLink: graph.GraphLink"

                    if not in vistaSubset.nodeMap:
                        # Create a new node:
                        vistaNode = graph.GraphNode(
                        # The path evidently crosses over itself.  Reuse an existing node.
                        vistaNode = vistaSubset.nodeMap[

                    # We shall label our links as indices into the stage we're at in ourGTFSNodes links.  This will allow for access later.
                    if outLinkIDList[-1] not in vistaSubset.linkMap:
                            graph.GraphLink(outLinkIDList[-1], vistaNodePrior,
                    vistaNodePrior = vistaNode

            # And then finish off the graph with the last link:
            if not in vistaSubset.nodeMap:
                vistaNode = graph.GraphNode(
            if outLinkIDList[-1] not in vistaSubset.linkMap:
                    graph.GraphLink(outLinkIDList[-1], vistaNodePrior,

            # Then, prepare the stops as GTFS shapes entries:
            print("INFO: Mapping stops to VISTA network...", file=sys.stderr)
            gtfsShapes = []
            gtfsStopsLookup = {}
            "@type gtfsStopsLookup: dict<int, gtfs.StopTimesEntry>"

            # Append an initial dummy shape to force routing through the path start:
                    -1, -1, ourGTFSNodes[0],

            # Append all of the stops:
            for gtfsStopTime in stopTimes:
                "@type gtfsStopTime: gtfs.StopTimesEntry"
                    gtfs.ShapesEntry(-1, gtfsStopTime.stopSeq,
                gtfsStopsLookup[gtfsStopTime.stopSeq] = gtfsStopTime

            # Append a trailing dummy shape to force routing through the path end:
                    -1, -1, ourGTFSNodes[-1],

            # Find a path through our prepared node map subset:
            resultTree = pathEngine.constructPath(gtfsShapes, vistaSubset)
            "@type resultTree: list<path_engine.PathEnd>"

            # Strip off the dummy ends:
            del resultTree[-1]
            del resultTree[0]
            if len(resultTree) > 0:
                resultTree[0].prevTreeNode = None

            # So now we should have one tree entry per matched stop.

            # Deal with Problem Report:
            # TODO: The Problem Report will include all nodes on each path regardless of valid time interval;
            # However; we will not have gotten here if the trip was entirely outside of it.
            if problemReport:
                revisedNodeList = {}
                prevNode = None
                "@type revisedNodeList = list<path_engine.PathEnd>"
                for stopNode in resultTree:
                    # Reconstruct a tree node in terms of the original network.
                    newShape = gtfs.ShapesEntry(
                        stopNode.shapeEntry.lng, False)
                    origLink = vistaNetwork.linkMap[
                    newPointOnLink = graph.PointOnLink(
                        origLink, stopNode.pointOnLink.dist,
                    newNode = path_engine.PathEnd(newShape, newPointOnLink)
                    newNode.restart = False
                    newNode.totalCost = stopNode.totalCost
                    newNode.totalDist = stopNode.totalDist
                    newNode.routeInfo = []
                    for link in stopNode.routeInfo:
                    newNode.prevTreeNode = prevNode
                    prevNode = newNode
                    revisedNodeList[stopNode.shapeEntry.shapeSeq] = newNode
                                   shapeID] = revisedNodeList

            # Walk through our output link list and see where the resultTree entries occur:
            resultIndex = 0
            stopMatches = []
            "@type stopMatches: list<StopMatch>"
            rejectFlag = False
            for linkID in outLinkIDList:
                curResultIndex = resultIndex
                # This routine will advance resultIndex only if a stop is found for linkID, and will exit out when
                # no more stops are found for linkID.
                stopMatch = StopMatch(linkID)
                "@type stopMatch: StopMatch"
                while curResultIndex < len(resultTree):
                    if resultTree[
                            curResultIndex] == linkID:
                        # Only pay attention to this stop if it is within the valid time range:
                        gtfsStopTime = gtfsStopsLookup[
                        if excludeBegin and gtfsStopTime.arrivalTime < startTime or excludeEnd and gtfsStopTime.arrivalTime > endTime:
                            # Throw away this entire route because it is excluded and part of it falls outside:
                                "INFO: Excluded because of activity outside of the valid time range.",
                            del stopMatches[:]
                            rejectFlag = True
                        elif (widenBegin
                              or gtfsStopTime.arrivalTime >= startTime) and (
                                  or gtfsStopTime.arrivalTime <= endTime):
                            if (stopMatch.bestTreeEntry is None) \
                                    or (resultTree[resultIndex].pointOnLink.refDist < stopMatch.bestTreeEntry.pointOnLink.refDist):
                                # Log the best match:
                                stopMatch.bestTreeEntry = resultTree[
                            stopMatch.matchCtr += 1
                        resultIndex = curResultIndex + 1
                    curResultIndex += 1
                    if (stopMatch and stopMatch.matchCtr == 0) \
                            or ((curResultIndex < len(resultTree)) and (resultTree[resultIndex] == linkID)):
                    # We have gotten to the end of matched link(s).
                if rejectFlag:

            # Then, output the results out if we are supposed to.
            foundStopSet = set()
            "@type foundStopSet: set<int>"
            if not rejectFlag:
                outSeqCtr = longestStart
                minTime = warmupStartTime
                maxTime = cooldownEndTime
                foundValidStop = False
                for stopMatch in stopMatches:
                    if stopMatch.matchCtr > 1:
                        # Report duplicates:
                        print("WARNING: %d stops have been matched for TripID %d, LinkID %d. Keeping Stop %d, Stop Seq %d" \
                            % (stopMatch.matchCtr, tripID, stopMatch.linkID, gtfsStopsLookup[stopMatch.bestTreeEntry.shapeEntry.shapeSeq].stop.stopID,
                            stopMatch.bestTreeEntry.shapeEntry.shapeSeq), file = sys.stderr)
                        # TODO: This is a problem because VISTA only allows one stop per link. So, the stop that is closest to
                        # the link is the one that is the winner and the rest are ignored. We don't yet do anything intelligent with dwell
                        # times, etc.
                    if stopMatch.matchCtr > 0:
                        # Report the best match:
                        )  # Check off this stop sequence.
                        foundValidStop = True
                            '"%d","%d","%d","%d","%d",' %
                            (tripID, outSeqCtr, stopMatch.linkID,
                        if gtfsStopsLookup[stopMatch.bestTreeEntry.shapeEntry.shapeSeq].stop.stopID in ret \
                                and ret[gtfsStopsLookup[stopMatch.bestTreeEntry.shapeEntry.shapeSeq].stop.stopID] \
                            print("WARNING: stopID %d is attempted to be assigned to linkID %d, but it had already been assigned to linkID %d." \
                                % (gtfsStopsLookup[stopMatch.bestTreeEntry.shapeEntry.shapeSeq].stop.stopID,,
                                   ret[gtfsStopsLookup[stopMatch.bestTreeEntry.shapeEntry.shapeSeq].stop.stopID], file = sys.stderr)
                            # TODO: This is a tricky problem. This means that among multiple bus routes, the same stop had been
                            # found to best fit two different links. I don't exactly know the best way to resolve this, other
                            # than (for NMC analyses) to create a "fake" stop that's tied with the new link.
                                stopID] = stopMatch.bestTreeEntry.pointOnLink

                        # Check on the minimum/maximum time range:
                        gtfsStopTime = gtfsStopsLookup[
                        minTime = min(gtfsStopTime.arrivalTime, minTime)
                        maxTime = max(gtfsStopTime.arrivalTime, maxTime)
                        # The linkID has nothing to do with any points in consideration.  Report it without a stop:
                        if foundValidStop or not excludeUpstream:
                            print('"%d","%d","%d",,,' %
                                  (tripID, outSeqCtr, stopMatch.linkID),
                    outSeqCtr += 1
                    # TODO: For start time estimation (as reported in the public.bus_frequency.csv output), it may be
                    # ideal to keep track of linear distance traveled before the first valid stop.

                # Widen out the valid interval if needed:
                warmupStartTime = min(minTime, warmupStartTime)
                cooldownEndTime = max(maxTime, cooldownEndTime)

            # Are there any stops left over?  If so, report them to say that they aren't in the output file.
            startGap = -1
            endGap = -1
            for gtfsStopTime in stopTimes:
                "@type gtfsStopTime: gtfs.StopTimesEntry"
                flag = False
                if gtfsStopTime.stopSeq not in foundStopSet:
                    # This stop is unaccounted for:
                    if startGap < 0:
                        startGap = gtfsStopTime.stopSeq
                    endGap = gtfsStopTime.stopSeq

                    # Old message is very annoying, especially if the underlying topology is a subset of shapefile
                    # geographic area and there's a ton of them. That's why there is the new range message as shown below.
                    # print("WARNING: Trip tripID %d, stopID %d stop seq. %d will not be in the bus_route_link file." % (tripID,
                    #    gtfsStopTime.stop.stopID, gtfsStopTime.stopSeq), file = sys.stderr)

                    if problemReport:
                        revisedNodeList = problemReportNodes[
                        if gtfsStopTime.stopSeq not in revisedNodeList:
                            # Make a dummy "error" node for reporting.
                            newShape = gtfs.ShapesEntry(
                                gtfsStopTime.stopSeq, gtfsStopTime.stop.gpsLat,
                                gtfsStopTime.stop.gpsLng, False)
                            newPointOnLink = graph.PointOnLink(None, 0)
                            newPointOnLink.pointX = gtfsStopTime.stop.pointX
                            newPointOnLink.pointY = gtfsStopTime.stop.pointY
                            newNode = path_engine.PathEnd(
                                newShape, newPointOnLink)
                            newNode.restart = True
                            revisedNodeList[gtfsStopTime.stopSeq] = newNode
                    flag = True
                if (flag or gtfsStopTime.stopSeq
                        == stopTimes[-1].stopSeq) and startGap >= 0:
                    subStr = "Seqs. %d-%d" % (
                        endGap) if startGap != endGap else "Seq. %d" % startGap
                        "WARNING: Trip ID %d, Stop %s will not be in the bus_route_link file."
                        % (tripID, subStr),
                    startGap = -1
            print("WARNING: No links for tripID %d." % tripID, file=sys.stderr)

    # Deal with Problem Report:
    if problemReport:
        print("INFO: Output problem report CSV...", file=sys.stderr)
        problemReportNodesOut = {}
        for shapeID in problemReportNodes:
            seqs = compat.listkeys(problemReportNodes[shapeID])
            ourTgtList = []
            for seq in seqs:
            problemReportNodesOut[shapeID] = ourTgtList
        problem_report.problemReport(problemReportNodesOut, vistaNetwork)

    return (ret, warmupStartTime, cooldownEndTime)
