Exemplo n.º 1
0
 def read_realtime_logs(filenames):
     for fn in filenames:
         try:
             for r in commons.zipjson_load(fn):
                 yield r
         except json.decoder.JSONDecodeError as e:
             commons.logger.warning("Could not read {} ({})".format(fn, e))
Exemplo n.º 2
0
def img_transit():

    for fn in commons.ls(OFILE['transit_map'].format(uuid="*", ext="json")):
        with open(commons.reformat(OFILE['transit_map'], fn, {'ext': "png"}),
                  'wb') as fd:
            J = commons.zipjson_load(fn)
            fd.write(make_transit_img(J))
Exemplo n.º 3
0
def segment_by_route():

    # A "case" is the result of "RUN_KEY", i.e. a pair (routeid, direction)

    commons.logger.info("Collecting cases...")

    # Associate to each case a list of files that contain instances of it
    # case_directory : run_key --> list of filenames
    case_directory = {
        case: set(r[1] for r in g)
        for (case, g) in groupby(sorted(
            (RUN_KEY(s), busfile) for busfile in commons.ls(
                IFILE['segment_by_bus'].format(busid="*", ext="json"))
            for s in commons.zipjson_load(busfile)),
                                 key=(lambda r: r[0]))
    }
Exemplo n.º 4
0
    def keep_ttfile(fn):
        return True

        J = commons.zipjson_load(fn)

        # "Inner" Kaohsiung
        bbox = (120.2593, 22.5828, 120.3935, 22.6886)
        (left, bottom, right, top) = bbox

        (lat, lon) = map(
            np.asarray,
            zip(*map(
                commons.inspect(
                    {'StopPosition': ('PositionLat',
                                      'PositionLon')}), J['route']['Stops'])))
        return all([bottom <= lat, lat <= top, left <= lon, lon <= right])
Exemplo n.º 5
0
def generate_timetables():
    motc_routes = commons.index_dicts_by_key(
        commons.zipjson_load(IFILE['MOTC_routes'].format(City="Kaohsiung")),
        ROUTE_KEY)

    run_files = commons.ls(IFILE['segment_by_route'].format(scenario="**",
                                                            routeid="*",
                                                            dir="*"))
    print("Found {} route files.".format(len(run_files)))

    for run_file in run_files:
        print("===")
        print("Analyzing route file {}.".format(run_file))

        (scenario, routeid, dir) = re.fullmatch(
            IFILE['segment_by_route'].format(scenario="(.*)",
                                             routeid="(.*)",
                                             dir="(.*)"), run_file).groups()
        case = {'scenario': scenario, 'routeid': routeid, 'dir': int(dir)}
        print("Route: {routeid}, direction: {dir} (from scenario: {scenario})".
              format(**case))

        # # DEBUG: trouble case
        # if not ((routeid, int(dir)) == ('KHH116', 1)) : continue

        fn_output = commons.makedirs(OFILE['timetable_json'].format(**case))

        if os.path.isfile(fn_output):
            print("Output file exists ({}) -- skipping".format(fn_output))
            continue

        # Load all bus run segments for this case
        runs = commons.zipjson_load(run_file)
        print("Number of runs: {} ({})".format(len(runs), "total"))

        runs = [run for run in runs if (run.get('quality') == "+")]

        print("Number of runs: {} ({})".format(len(runs), "quality"))

        try:
            route = motc_routes[(case['routeid'], case['dir'])]
            stops = route['Stops']
        except KeyError:
            print(
                "Warning: No stops info for route {routeid}, direction {dir}".
                format(**case))
            continue

        # ETA table of Busrun x Stop
        ETA = np.vstack(
            Parallel(n_jobs=PARAM['n_parallel_jobs'])(
                delayed(bus_at_stops)(run, stops) for run in progressbar(runs))
            # bus_at_stops(run, stops) for run in progressbar(runs)
        )

        # 2018-12-12: pandas does not digest dt.datetime with timezones
        # https://github.com/pandas-dev/pandas/issues/13287
        # Note: datetime64 automatically converts to UTC,
        # i.e. these are the same:
        #     np.datetime64(dt.datetime.utcnow())
        #     np.datetime64(dt.datetime.now().astimezone())
        ETA = ETA.astype('datetime64[ms]')

        # Timetable as DataFrame
        df = pd.DataFrame(data=ETA, columns=[s['StopUID'] for s in stops])

        J = {
            'case': case,
            'route': route,
            'run_file': run_file,
            'timetable_df': df.to_json(),
        }

        with open(fn_output, 'w') as fd:
            json.dump(J, fd)
Exemplo n.º 6
0
def match_routes() :

	# MOTC route info
	motc_routes = commons.index_dicts_by_key(commons.zipjson_load(IFILE['MOTC_routes']), (lambda r: r['SubRouteUID']))

	# for (route_id, route) in route_stops.items() :
	# 	stops = dict(zip(route['Direction'], route['Stops']))

	OSM = pickle.load(open(IFILE['OSM'], 'rb'))

	for (route_id, route) in OSM['rels']['route'].items():

		# Skip non-bus routes
		if not (route['t'].get('route') == 'bus'): continue

		# Note: most routes have relations in route['r']

		(route_tags, route_stops, route_ways) = (route['t'], route['n'], route['w'])

		# https://wiki.openstreetmap.org/wiki/Buses
		route_name = route_tags['name']

		# Common routines

		def strip_brackets(s):
			return re.match(r'(?P<name>\w+)+[ ]*(?P<extra>\(\w+\))*', s).group('name')

		def matchratio_stop_names(name1, name2):
			return difflib.SequenceMatcher(None, strip_brackets(name1), strip_brackets(name2)).ratio()

		# Method 0: Match route names

		top_namematch_motc_ids = None

		try :

			top_namematch_motc_ids = sorted(
				motc_routes.keys(),
				key=(lambda j : matchratio_stop_names(route_name, motc_routes[j]['RouteName']['Zh_tw'])),
				reverse=True
			)[0:6]

			#print("Route {} best matches: {}".format(route_name, ",".join([motc_routes[j]['RouteName']['Zh_tw'] for j in top_namematch_motc_ids])))
		except :
			raise


		# Method 1: Match route start/end stops

		def zip_listify(a, b) :
			return zip(a, b) if (type(a) is list) else zip([a], [b])

		try :
			(route_a, route_b) = (route_tags['from'], route_tags['to'])

			def matchratio_ab(motc_route) :
				# motc_name = motc_route['RouteName']['Zh_tw']
				for (dir, stops) in zip_listify(motc_route['Direction'], motc_route['Stops']) :

					(motc_a, motc_b) = map(commons.inspect({'StopName' : 'Zh_tw'}), [stops[0], stops[-1]])

					ab_ratio = (matchratio_stop_names(route_a, motc_a) + matchratio_stop_names(route_b, motc_b)) / 2
					assert((0 <= ab_ratio) and (ab_ratio <= 1))

					yield (ab_ratio, { 'SubRouteUID' : motc_route['SubRouteUID'], 'Direction' : dir })

			ab_matchratios = sorted(
				chain.from_iterable([
					matchratio_ab(motc_routes[j]) for j in top_namematch_motc_ids
				]),
				key=(lambda p: p[0]), reverse=True
			)

			print(route_name, ab_matchratios)

		except KeyError as e :
			#print("Method 1 failed on route {}".format(route_name))
			continue

		#print(route_tags)
		continue


		if (len(route_stops) < 2) :
			#print("Route {} has fewer than two stops".format(route_name))
			#print(route_ways)
			continue

		# Method 2: Match all stops

		# Get stop info
		if not all(OSM['node_tags'].get(i) for i in route_stops) :
			print("Nodes of route {} not found".format(route_tags['name']))
			continue

		route_stops = {
			i : OSM['node_tags'].get(i)
			for i in route_stops
		}

		print(route_stops)


		#print(route['n'])
		#time.sleep(1)

	#
	# 	route_name = route['t'].get('name')
	#
	# 	route_ref = route['t']['ref']
	# 	#if (route_ref == '88') :
	# 	print(route_name, route_id, route['t'])
	# exit(39)


	return
Exemplo n.º 7
0
    for ((scenario, routeid, dir), files) in files_by_case.items():
        commons.logger.info("===")
        commons.logger.info(
            "Mapping route {}, direction {} (from scenario '{}')...".format(
                routeid, dir, scenario))

        try:

            if not files:
                commons.logger.warning("No mapmatch files to distill")
                continue

            # Load map-matched variants
            sources = {
                fn: preprocess_source(commons.zipjson_load(fn))
                for fn in files
            }

            commons.logger.info(
                "Number of sources before quality filter: {}".format(
                    len(sources)))

            # Quality filter
            def is_qualified(src):
                if (len(src['waypoints_used']) < PARAM['quality_min_wp/src']):
                    return False
                return True

            # Filter quality
            sources = {
Exemplo n.º 8
0
def mapmatch_all():

    commons.seed()

    PARAM['graph_bbox'] = maps.bbox_for_points(
        nx.get_node_attributes(
            trim_graph_to_busable(
                pickle.load(open(IFILE['OSM_graph_file'],
                                 'rb'))['main_component_with_knn']['g']),
            'pos').values())

    for route_file_template in IFILE['segment_by_route']:

        route_files = commons.ls(
            route_file_template.format(scenario="**",
                                       routeid="*",
                                       direction="*"))

        commons.logger.info(
            "Route file template: {}".format(route_file_template))
        commons.logger.info("Found {} route files".format(len(route_files)))

        for route_file in route_files:
            # time.sleep(2)

            commons.logger.info("===")
            commons.logger.info("Analyzing route file {}.".format(route_file))

            case = commons.unformat(route_file_template, route_file)
            commons.logger.info(
                "Route: {routeid}, direction: {direction} (from scenario: {scenario})"
                .format(**case))

            # # DEBUG
            # if not ("KHH239-0" == "{routeid}-{direction}".format(**case)) :
            # 	continue

            # Load all bus run segments for this case
            runs = commons.zipjson_load(route_file)
            commons.logger.info("Number of runs: {} ({})".format(
                len(runs), "total"))

            # Check that the file indeed contains only one type of route
            assert ({(case['routeid'], int(case['direction']))
                     } == set(RUN_KEY(r) for r in runs))

            # Remove runs that have a negative quality flag
            runs = [run for run in runs if not (run.get('quality') == "-")]
            commons.logger.info("Number of runs: {} ({})".format(
                len(runs), "not marked as bad quality"))

            # Keep only runs within the map
            runs = [
                run for run in runs if all(
                    is_in_map(*p) for p in run[KEYS.pos])
            ]
            commons.logger.info("Number of runs: {} ({})".format(
                len(runs), "within the map bbox"))

            if (len(runs) > PARAM['max_runs_to_mapmatch']):
                commons.logger.info(
                    "Out of {} available runs, will mapmatch only random {}".
                    format(len(runs), PARAM['max_runs_to_mapmatch']))
                runs = commons.random_subset(runs,
                                             k=PARAM['max_runs_to_mapmatch'])

            if (len(runs) < PARAM['min_runs_to_mapmatch']):
                commons.logger.warning("Skipping mapmatch: too few runs.")
                continue

            # Q: clustering here?

            # Existing mapmatched runs for this route
            existing = commons.ls(OFILE['mapmatched'].format(**case,
                                                             mapmatch_uuid="*",
                                                             ext="json"))

            if existing:
                commons.logger.warning(
                    "Skipping mapmatch: {} mapmatched files found".format(
                        len(existing)))
                continue

            try:

                mapmatch_runs(case['scenario'], runs)

            except Exception as e:

                commons.logger.error("Mapmatch failed ({}) \n{}".format(
                    e, traceback.format_exc()))
                commons.logger.warning(
                    "Mapmatch incomplete on route {routeid}-{direction} from scenario '{scenario}'"
                    .format(**case))
                time.sleep(5)
Exemplo n.º 9
0
def vis1() :

	# OSM = pickle.load(open(IFILE['OSM'], 'rb'))
	# for (route_id, route) in OSM['rels']['route'].items():
	# 	# Skip non-bus routes
	# 	if not (route['t'].get('route') == 'bus'): continue
	#
	# 	route_name = route['t'].get('name')
	#
	# 	route_ref = route['t']['ref']
	# 	#if (route_ref == '88') :
	# 	print(route_name, route_id, route['t'])
	# exit(39)

	routeid_of = (lambda r: r['SubRouteUID'])

	# List of filenames, one file per physical bus, identified by plate number
	bus_files = commons.ls(IFILE['busses'].format(busid="*"))

	# Refile bus runs by their route ID
	runs_by_route = defaultdict(list)
	for fn in bus_files :
		runs = commons.zipjson_load(fn)
		for run in runs :
			runs_by_route[routeid_of(run)].append(run)

	#
	route_stops = commons.index_dicts_by_key(commons.zipjson_load(IFILE['route-stops']), routeid_of)

	# Are those valid route ID that can be found among the routes?
	unknown_route_ids = sorted(set(runs_by_route.keys()) - set(route_stops.keys()))

	if unknown_route_ids :
		print("The following route IDs from bus records are unknown:")
		print(", ".join(unknown_route_ids))
		raise KeyError("Unkown route IDs in bus records")

	#

	route_uid = 'KHH24'

	runs = runs_by_route[route_uid]
	route = route_stops[route_uid]

	# Kaohsiung (left, bottom, right, top)
	bbox = (120.2593, 22.5828, 120.3935, 22.6886)
	(left, bottom, right, top) = bbox

	# Download the background map
	i = maps.get_map_by_bbox(bbox, token=PARAM['mapbox_api_token'])

	# Show the background map
	(fig, ax) = plt.subplots()
	plt.ion()
	ax.axis([left, right, bottom, top])
	ax.imshow(i, extent=(left, right, bottom, top), interpolation='quadric')

	#fig.canvas.draw_idle()

	plt.pause(0.1)


	stops_by_direction = dict(zip(route['Direction'], route['Stops']))

	# Draw stops for both route directions
	for (dir, stops) in stops_by_direction.items() :

		# Stop locations
		(y, x) = zip(*[
			commons.inspect({'StopPosition': ('PositionLat', 'PositionLon')})(stop)
			for stop in stops
		])

		# Plot as dots
		ax.scatter(x, y, c=('b' if dir else 'g'), marker='o', s=4)


	# Show bus location

	for run in runs :

		# Trace bus
		(y, x) = (run['PositionLat'], run['PositionLon'])
		h1 = ax.plot(x, y, '--+', c='r', linewidth=1)
		h2 = ax.plot(x[0], y[0], 'o', c='r')
		h3 = ax.plot(x[-1], y[-1], 's', c='r')

		plt.title(run['PlateNumb'])

		#plt.savefig("{}.png".format(route_uid), dpi=180)
		plt.pause(0.1)

		bus_at_stops(run, stops_by_direction[run['Direction']])

		plt.pause(0.1)
		[h[0].remove() for h in [h1, h2, h3]]

	return
Exemplo n.º 10
0
def compress():
    realtime_files = commons.ls(IFILE['realtime'].format(city=PARAM['city'],
                                                         date="*",
                                                         time="*"))
    #commons.logger.debug(realtime_files)

    # Allow for pending write operations
    time.sleep(1)

    if True:

        # Brutal compression step
        commons.logger.info("COMPRESSION 0: Zip all")

        for fn in commons.progressbar(realtime_files):
            try:
                # See if file is in a valid format
                commons.zipjson_load(fn)

                try:
                    commons.zipjson_load(fn, insist=True)
                    # commons.logger.info("File {}: compressed already".format(fn))
                except RuntimeError:
                    commons.zipjson_dump(commons.zipjson_load(fn), fn)
                    commons.logger.info("File {}: compressed".format(fn))
                except:
                    commons.logger.exception(
                        "File {}: unexpected error".format(fn))

            except:
                commons.logger.warning("File {}: reading error".format(fn))

    if False:

        commons.logger.info(
            "COMPRESSION I: Remove duplicates in back-to-back records")

        for (fn1, fn2) in zip(realtime_files[:-1], realtime_files[1:]):

            def hashable(J):
                assert (type(J) is list)
                return list(map(json.dumps, J))

            def unhashable(J):
                assert (type(J) is list)
                return list(map(json.loads, J))

            try:
                J1 = set(hashable(commons.zipjson_load(fn1)))
                J2 = set(hashable(commons.zipjson_load(fn2)))
            except EOFError:
                # Raised by zipjson_load if a file is empty
                continue
            except Exception as e:
                commons.logger.warning("Cannot open {}/{} ({})".format(
                    fn1, fn2, e))
                continue

            if not J1.intersection(J2):
                continue

            J1 = J1.difference(J2)

            J1 = list(unhashable(list(J1)))
            J2 = list(unhashable(list(J2)))

            commons.logger.info("Compressing {}".format(fn1))
            commons.zipjson_dump(J1, fn1)

    if False:

        commons.logger.info(
            "COMPRESSION II: Remove redundancies from individual records")

        unknown_subroutes = set()

        # Route meta
        R = commons.zipjson_load(IFILE['routes'].format(city=PARAM['city']))

        # Reindex by subroute-direction
        S = defaultdict(dict)
        for r in R:
            for s in r['SubRoutes']:
                sid = s['SubRouteUID']
                dir = s['Direction']
                assert (dir not in S[sid])
                S[sid][dir] = s
        #
        S = dict(S)

        # Reindex by RouteUID
        assert (commons.all_distinct([g['RouteUID'] for g in R]))
        R = {g['RouteUID']: g for g in R}

        def remove_single_route_redundancies(j):

            subroute_id = j['SubRouteUID']

            if not (subroute_id in S):
                if not (subroute_id in unknown_subroutes):
                    commons.logger.warning(
                        "Unknown subroute {} [warning will not be repeated]".
                        format(subroute_id))
                    unknown_subroutes.add(subroute_id)
                return j

            assert (j['Direction'] in S[subroute_id])
            s = S[subroute_id][j['Direction']]

            for key in ['SubRouteName', 'SubRouteID']:
                if key in j:
                    if (j[key] == s[key]):
                        del j[key]
                    else:
                        # commons.logger.warning("Unexpected attribute value {}={}".format(key, j[key]))
                        pass

            if ('RouteUID' in j):
                route_id = j['RouteUID']
                assert (route_id in R)
                r = R[route_id]

                for key in ['RouteName', 'RouteID']:
                    if key in j:
                        if not (j[key] == r[key]):
                            commons.logger.warning(
                                "Unexpected attribute value {}={}".format(
                                    key, j[key]))
                        else:
                            del j[key]

                if (j['RouteUID'] == j['SubRouteUID']):
                    del j['RouteUID']

            assert ('GPSTime' in j)

            for key in ['SrcUpdateTime', 'UpdateTime']:
                if key in j:
                    del j[key]

            # Note:
            #  - we keep the 'OperatorID' field, even if s['OperatorIDs'] has length 1
            #  - of the time stamps, we keep 'GPSTime' which is the bus on-board time

            return j

        for fn in realtime_files:
            try:
                J = commons.zipjson_load(fn)
            except EOFError:
                commons.logger.warning("{} appears empty".format(fn))
                continue
            except Exception:
                commons.logger.warning("Failed to open {}".format(fn))
                continue

            b = len(json.dumps(J))  # Before compression

            try:
                J = list(map(remove_single_route_redundancies, J))
            except ValueError as e:
                commons.logger.exception("ValueError at {} -- {}".format(
                    fn, e))
                continue
            except AssertionError as e:
                commons.logger.exception("Assertion error at {} -- {}".format(
                    fn, e))
                continue
            except Exception as e:
                commons.logger.exception(
                    "Warning: Compression attempt failed for {} -- {}".format(
                        fn, e))
                continue

            # J = remove_global_route_redundancies(J)
            a = len(json.dumps(J))  # After compression

            assert (a <= b)
            if (a == b): continue

            commons.logger.info("Compressing {}".format(fn))
            commons.zipjson_dump(J, fn)

    commons.logger.info("DONE")
Exemplo n.º 11
0
    # # Filter to specific routes (DEBUG)
    # commons.logger.warning("Filtering the case directory")
    # case_directory = {
    # 	case : files
    # 	for (case, files) in case_directory.items()
    # 	# DEBUG:
    # 	if (case[0] in ["KHH239"])
    # }

    #
    for (case, files) in sorted(case_directory.items(),
                                key=(lambda cf: -len(cf[1]))):
        segments = [{
            **s, PARAM['quality_key']: ("+" if is_run_acceptable(s) else "-")
        } for busfile in files for s in commons.zipjson_load(busfile)
                    if (RUN_KEY(s) == case)]

        if not segments:
            commons.logger.warning(
                "No valid bus runs found for {}".format(case))
            continue

        fn = OFILE['segment_by_route'].format(**{
            k: segments[0].get(K)
            for (k, K) in KEYS.items()
        },
                                              ext="json")

        with open(fn, 'w') as fd:
            json.dump(segments, fd)
Exemplo n.º 12
0
def debug_compare_two():
    uuids = [
        "16b767f12ac841fea47ad9b735df1504", "69e47ef6a81a4a3aae0529b8b974896b"
    ]
    (J1, J2) = (commons.zipjson_load(OFILE['transit_map'].format(uuid=uuid,
                                                                 ext="json"))
                for uuid in uuids)

    o = tuple(J1['origin']['x'])
    assert (J1['origin'] == J2['origin'])

    (H1, H2) = ({}, {})
    (O1, O2) = ({}, {})
    for (J, H, O) in zip([J1, J2], [H1, H2], [O1, O2]):
        # Location --> Transit time in minutes ; keep track of duplicates
        J['gohere'] = commons.index_dicts_by_key(
            J['gohere'],
            key_func=(lambda __: tuple(__['x'])),
            collapse_repetitive=False)
        # Keep the *time* field
        H.update({x: attr['s'] for (x, attr) in J['gohere'].items()})
        # Keep the *origin* field
        O.update({x: attr['o'] for (x, attr) in J['gohere'].items()})

    # The two datasets cover the same geo-locations
    assert (set(H1) == set(H2))

    X = sorted([x for x in H1 if (set(H1[x]) != set(H2[x]))],
               key=(lambda x: sum(H1[x]) + sum(H2[x])))
    # commons.logger.debug("Earliest differing location: {}".format(items[0]))

    for x in X[0:4]:

        g1 = nx.DiGraph()
        g2 = nx.DiGraph()

        def retrace(O, g, x):
            for o in O[x]:
                if o:
                    o = tuple(o)
                    if not g.has_edge(o, x):
                        g.add_edge(o, x)
                        retrace(O, g, o)
            g.nodes[x]['xy'] = ll2xy(x)

        retrace(O1, g1, x)
        retrace(O2, g2, x)

        commons.logger.debug("Graph 1: {}".format(g1.nodes))
        commons.logger.debug("Graph 2: {}".format(g2.nodes))

        import matplotlib as mpl
        mpl.use("TkAgg")

        import matplotlib.pyplot as plt
        (fig, ax) = plt.subplots()

        # # "Inner" Kaohsiung
        # bbox = (120.2593, 22.5828, 120.3935, 22.6886)
        # # Set plot view to the bbox
        # ax.axis(maps.mb2ax(*bbox))
        # ax.autoscale(enable=False)

        nx.draw_networkx(g1,
                         ax=ax,
                         pos=nx.get_node_attributes(g1, 'xy'),
                         edge_color='b',
                         node_size=1,
                         with_labels=False)
        nx.draw_networkx(g2,
                         ax=ax,
                         pos=nx.get_node_attributes(g2, 'xy'),
                         edge_color='g',
                         node_size=1,
                         with_labels=False)

        plt.show()