def read_json_network(network_path: str, epsg: str): """ Reads Network graph from JSON file. :param network_path: path to json or geojson network file :param epsg: projection for the network, e.g. 'epsg:27700' :return: genet.Network object """ logging.info(f'Reading Network from {network_path}') with open(network_path) as json_file: json_data = json.load(json_file) for node, data in json_data['nodes'].items(): try: del data['geometry'] except KeyError: pass for link, data in json_data['links'].items(): try: data['geometry'] = spatial.decode_polyline_to_shapely_linestring(data['geometry']) except KeyError: pass try: data['modes'] = set(data['modes'].split(',')) except KeyError: pass n = core.Network(epsg=epsg) n.add_nodes(json_data['nodes']) n.add_links(json_data['links']) n.change_log = change_log.ChangeLog() return n
def read_geojson_network(nodes_path: str, links_path: str, epsg: str): """ Reads Network graph from JSON file. :param nodes_path: path to geojson network nodes file :param links_path: path to geojson network links file :param epsg: projection for the network, e.g. 'epsg:27700' :return: genet.Network object """ logging.info(f'Reading Network nodes from {nodes_path}') nodes = gpd.read_file(nodes_path) nodes = nodes.drop('geometry', axis=1) nodes['id'] = nodes['id'].astype(int).astype(str) nodes = nodes.set_index('id', drop=False) if 'index' in nodes.columns: nodes = nodes.drop('index', axis=1) logging.info(f'Reading Network links from {links_path}') links = gpd.read_file(links_path).to_crs(epsg) links['modes'] = links['modes'].apply(lambda x: set(x.split(','))) links['id'] = links['id'].astype(int).astype(str) links = links.set_index('id', drop=False) if 'index' in links.columns: links = links.drop('index', axis=1) n = core.Network(epsg=epsg) n.add_nodes(nodes.T.to_dict()) n.add_links(links.T.to_dict()) n.change_log = change_log.ChangeLog() return n
def schedule_graph(): graph = DiGraph( name='Schedule Graph', routes={'4': {'ordered_stops': ['4', '5'], 'route_short_name': 'route4', 'mode': 'rail', 'trips': {'trip_id': ['route4_05:40:00'], 'trip_departure_time': ['05:40:00'], 'vehicle_id': ['veh_0_bus']}, 'arrival_offsets': ['00:00:00', '00:03:00'], 'departure_offsets': ['00:00:00', '00:05:00'], 'route_long_name': '', 'id': '4', 'route': ['4', '5'], 'await_departure': []}, '3': {'ordered_stops': ['3', '4'], 'route_short_name': 'route3', 'mode': 'rail', 'trips': {'trip_id': ['route3_04:40:00'], 'trip_departure_time': ['04:40:00'], 'vehicle_id': ['veh_1_bus']}, 'arrival_offsets': ['00:00:00', '00:02:00'], 'departure_offsets': ['00:00:00', '00:02:00'], 'route_long_name': '', 'id': '3', 'route': ['3', '4'], 'await_departure': []}, '1': {'ordered_stops': ['0', '1'], 'route_short_name': 'route1', 'mode': 'bus', 'trips': {'trip_id': ['route1_04:40:00'], 'trip_departure_time': ['04:40:00'], 'vehicle_id': ['veh_2_bus']}, 'arrival_offsets': ['00:00:00', '00:02:00'], 'departure_offsets': ['00:00:00', '00:02:00'], 'route_long_name': '', 'id': '1', 'route': ['0', '1'], 'await_departure': []}, '2': {'ordered_stops': ['1', '2'], 'route_short_name': 'route2', 'mode': 'bus', 'trips': {'trip_id': ['route2_05:40:00'], 'trip_departure_time': ['05:40:00'], 'vehicle_id': ['veh_3_bus']}, 'arrival_offsets': ['00:00:00', '00:03:00'], 'departure_offsets': ['00:00:00', '00:05:00'], 'route_long_name': '', 'id': '2', 'route': ['1', '2'], 'await_departure': []}}, services={'service2': {'id': 'service2', 'name': 'route3'}, 'service1': {'id': 'service1', 'name': 'route1'}}, route_to_service_map={'1': 'service1', '2': 'service1', '3': 'service2', '4': 'service2'}, service_to_route_map={'service1': ['1', '2'], 'service2': ['3', '4']}, crs='epsg:27700' ) nodes = {'4': {'services': {'service2'}, 'routes': {'3', '4'}, 'id': '4', 'x': 529350.7866124967, 'y': 182388.0201078112, 'epsg': 'epsg:27700', 'name': '', 'lat': 51.52560003323918, 'lon': -0.13682698708848137, 's2_id': 5221390668558830581, 'additional_attributes': {'linkRefId'}, 'linkRefId': '4'}, '5': {'services': {'service2'}, 'routes': {'4'}, 'id': '5', 'x': 529350.7866124967, 'y': 182388.0201078112, 'epsg': 'epsg:27700', 'name': '', 'lat': 51.52560003323918, 'lon': -0.13682698708848137, 's2_id': 5221390668558830581, 'additional_attributes': {'linkRefId'}, 'linkRefId': '5'}, '3': {'services': {'service2'}, 'routes': {'3'}, 'id': '3', 'x': 529455.7452394223, 'y': 182401.37630677427, 'epsg': 'epsg:27700', 'name': '', 'lat': 51.525696033239186, 'lon': -0.13530998708775874, 's2_id': 5221390668020036699, 'additional_attributes': {'linkRefId'}, 'linkRefId': '3'}, '1': {'services': {'service1'}, 'routes': {'2', '1'}, 'id': '1', 'x': 529350.7866124967, 'y': 182388.0201078112, 'epsg': 'epsg:27700', 'name': '', 'lat': 51.52560003323918, 'lon': -0.13682698708848137, 's2_id': 5221390668558830581, 'additional_attributes': {'linkRefId'}, 'linkRefId': '1'}, '2': {'services': {'service1'}, 'routes': {'2'}, 'id': '2', 'x': 529350.7866124967, 'y': 182388.0201078112, 'epsg': 'epsg:27700', 'name': '', 'lat': 51.52560003323918, 'lon': -0.13682698708848137, 's2_id': 5221390668558830581, 'additional_attributes': {'linkRefId'}, 'linkRefId': '2'}, '0': {'services': {'service1'}, 'routes': {'1'}, 'id': '0', 'x': 529455.7452394223, 'y': 182401.37630677427, 'epsg': 'epsg:27700', 'name': '', 'lat': 51.525696033239186, 'lon': -0.13530998708775874, 's2_id': 5221390668020036699, 'additional_attributes': {'linkRefId'}, 'linkRefId': '0'}} edges = [('4', '5', {'services': {'service2'}, 'routes': {'4'}, 'modes': {'rail'}}), ('3', '4', {'services': {'service2'}, 'routes': {'3'}, 'modes': {'rail'}}), ('1', '2', {'services': {'service1'}, 'routes': {'2'}, 'modes': {'bus'}}), ('0', '1', {'services': {'service1'}, 'routes': {'1'}, 'modes': {'bus'}})] graph.add_nodes_from(nodes) graph.add_edges_from(edges) set_node_attributes(graph, nodes) graph.graph['change_log'] = change_log.ChangeLog() return graph
def read_csv(path_to_network_nodes: str, path_to_network_links: str, epsg: str): """ Reads CSV data into a genet.Network object :param path_to_network_nodes: CSV file describing nodes. Should at least include columns: - id: unique ID for the node - x: spatial coordinate in given epsg - y: spatial coordinate in given epsg :param path_to_network_links: CSV file describing links. Should at least include columns: - from - source Node ID - to - target Node ID Optional columns, but strongly encouraged - id - unique ID for link - length - link length in metres - freespeed - meter/seconds speed - capacity - vehicles/hour - permlanes - number of lanes - modes - set of modes :param epsg: projection for the network, e.g. 'epsg:27700' :return: genet.Network object """ logging.info(f'Reading nodes from {path_to_network_nodes}') df_nodes = pd.read_csv(path_to_network_nodes) if {'index', 'id'}.issubset(set(df_nodes.columns)): df_nodes = df_nodes.drop('index', axis=1) elif 'id' not in df_nodes.columns: raise NetworkSchemaError('Expected `id` column in the nodes.csv is missing. This need to be the IDs to which ' 'links.csv refers to in `from` and `to` columns.') df_nodes['id'] = df_nodes['id'].astype(int).astype(str) df_nodes = df_nodes.set_index('id', drop=False) try: df_nodes = df_nodes.drop('geometry', axis=1) except KeyError: pass logging.info(f'Reading links from {path_to_network_nodes}') df_links = pd.read_csv(path_to_network_links) if {'index', 'id'}.issubset(set(df_links.columns)): df_links = df_links.drop('index', axis=1) elif 'id' not in df_links.columns: if 'index' in df_links.columns: if not df_links['index'].duplicated().any(): df_links['id'] = df_links['index'] else: df_links = df_links.drop('index', axis=1) else: df_links['id'] = range(len(df_links)) df_links['id'] = df_links['id'].astype(int).astype(str) df_links['from'] = df_links['from'].astype(int).astype(str) df_links['to'] = df_links['to'].astype(int).astype(str) df_links = df_links.set_index('id', drop=False) # recover encoded geometry try: df_links['geometry'] = df_links['geometry'].apply(lambda x: spatial.decode_polyline_to_shapely_linestring(x)) except KeyError: pass df_links['attributes'] = _literal_eval_col(df_links['attributes']) df_links['modes'] = _literal_eval_col(df_links['modes']) n = core.Network(epsg=epsg) n.add_nodes(df_nodes.T.to_dict()) n.add_links(df_links.T.to_dict()) n.change_log = change_log.ChangeLog() return n
def correct_schedule_graph_data_from_test_gtfs(): return {'name': 'Schedule graph', 'crs': {'init': 'epsg:4326'}, 'route_to_service_map': {'1001_0': '1001', '1002_0': '1002'}, 'service_to_route_map': {'1001': ['1001_0'], '1002': ['1002_0']}, 'change_log': change_log.ChangeLog(), 'routes': {'1001_0': {'arrival_offsets': ['00:00:00', '00:02:00'], 'route_color': 'CE312D', 'ordered_stops': ['BSE', 'BSN'], 'mode': 'bus', 'route_type': 3, 'departure_offsets': ['00:00:00', '00:02:00'], 'route_long_name': 'Bus Test Route', 'route_short_name': 'BTR', 'trips': {'trip_id': ['BT1'], 'trip_departure_time': ['03:21:00'], 'vehicle_id': ['veh_0']}, 'service_id': '1001', 'id': '1001_0'}, '1002_0': {'arrival_offsets': ['00:00:00', '00:02:00'], 'route_color': 'CE312D', 'ordered_stops': ['RSN', 'RSE'], 'mode': 'rail', 'route_type': 2, 'departure_offsets': ['00:00:00', '00:02:00'], 'route_long_name': 'Rail Test Route', 'route_short_name': 'RTR', 'trips': {'trip_id': ['RT1'], 'trip_departure_time': ['03:21:00'], 'vehicle_id': ['veh_1']}, 'service_id': '1002', 'id': '1002_0'}}, 'services': {'1001': {'id': '1001', 'name': 'BTR'}, '1002': {'id': '1002', 'name': 'RTR'}}}
def gtfs_db_to_schedule_graph(stop_times_db, stops_db, trips_db, routes_db, services): def get_time(time): # return time as datetime.datetime, account for 24 in %H time_list = time.split(':') if int(time_list[0]) >= 24: days = int(time_list[0]) // 24 time_list[0] = int(time_list[0]) % 24 if time_list[0] < 10: time_list[0] = '0{}'.format(time_list[0]) else: time_list[0] = str(time_list[0]) return datetime.strptime(':'.join(time_list), '%H:%M:%S') + timedelta(days=days) else: return datetime.strptime(time, '%H:%M:%S') def timedelta_to_hms(td): return str(td).split('days')[-1].strip(' ') def generate_stop_sequence(group): group = group.sort_values(by='stop_sequence') # remove stops that are loopy (consecutively duplicated) unique_stops_mask = group['stop_id'].shift() != group['stop_id'] if not unique_stops_mask.all(): logging.warning( 'Your GTFS has (a) looooop edge(s)! A zero link between a node and itself, edge affected ' '\nThis edge will not be considered for computation, the stop will be deleted and the ' f'schedule will be changed. Affected stops: {group[~unique_stops_mask]["stop_id"].to_list()}' ) group = group.loc[unique_stops_mask] flattened = group.iloc[0, :][list( set(group.columns) - { 'trip_id', 'stop_sequence', 'stop_id', 'arrival_time', 'departure_time' })] departure_time = group.iloc[0, :]['arrival_time'] flattened['trip_departure_time'] = departure_time.strftime("%H:%M:%S") flattened['ordered_stops'] = group['stop_id'].to_list() flattened['stops_str'] = ','.join(group['stop_id'].to_list()) flattened['arrival_offsets'] = [ timedelta_to_hms(t - departure_time) for t in group['arrival_time'] ] flattened['departure_offsets'] = [ timedelta_to_hms(t - departure_time) for t in group['departure_time'] ] return flattened def generate_trips(group): flattened = group.iloc[0, :][list( set(group.columns) - { 'route_id', 'stops_str', 'trip_id', 'vehicle_id', 'trip_departure_time' })] trip_id = group['trip_id'].to_list() trip_departure_time = group['trip_departure_time'].to_list() vehicle_id = group['vehicle_id'].to_list() flattened['trips'] = { 'trip_id': trip_id, 'trip_departure_time': trip_departure_time, 'vehicle_id': vehicle_id } return flattened def generate_routes(group): service_id = group.iloc[0, :]['service_id'] group['route_id'] = [f'{service_id}_{i}' for i in range(len(group))] return group trips_db = trips_db[trips_db['service_id'].isin(services)] df = trips_db[['route_id', 'trip_id']].merge(routes_db[[ 'route_id', 'route_type', 'route_short_name', 'route_long_name', 'route_color' ]], on='route_id', how='left') df['mode'] = df['route_type'].apply(lambda x: get_mode(x)) df = df.merge(stop_times_db[[ 'trip_id', 'stop_id', 'arrival_time', 'departure_time', 'stop_sequence' ]], on='trip_id', how='left') df['arrival_time'] = df['arrival_time'].apply(lambda x: get_time(x)) df['departure_time'] = df['departure_time'].apply(lambda x: get_time(x)) df = df.groupby('trip_id').apply(generate_stop_sequence).reset_index() # drop stop sequences that are single stops df = df[df['ordered_stops'].str.len() > 1] df['vehicle_id'] = [f'veh_{i}' for i in range(len(df))] df = df.groupby(['route_id', 'stops_str']).apply(generate_trips).reset_index() df = df.drop('stops_str', axis=1) df['service_id'] = df['route_id'].astype(str) df = df.groupby(['service_id']).apply(generate_routes) g = nx.DiGraph(name='Schedule graph') g.graph['crs'] = {'init': 'epsg:4326'} g.graph['route_to_service_map'] = df.set_index( 'route_id')['service_id'].T.to_dict() g.graph['service_to_route_map'] = df.groupby( 'service_id')['route_id'].apply(list).to_dict() g.graph['change_log'] = change_log.ChangeLog() df['id'] = df['route_id'] g.graph['routes'] = df.set_index('route_id').T.to_dict() df['id'] = df['service_id'] df = df.rename(columns={'route_short_name': 'name'}) g.graph['services'] = df[['service_id', 'id', 'name' ]].groupby('service_id').first().T.to_dict() # finally nodes stops = pd.DataFrame({ col: np.repeat(df[col].values, df['ordered_stops'].str.len()) for col in {'route_id', 'service_id'} }).assign(stop_id=np.concatenate(df['ordered_stops'].values)) stop_groups = stops.groupby('stop_id') stops = set(stop_groups.groups) g.add_nodes_from(stops) stops_db = stops_db.rename(columns={ 'stop_lat': 'lat', 'stop_lon': 'lon', 'stop_name': 'name' }) stops_db['id'] = stops_db['stop_id'] stops_db['x'] = stops_db['lon'] stops_db['y'] = stops_db['lat'] stops_db['epsg'] = 'epsg:4326' stops_db['s2_id'] = stops_db.apply(lambda x: spatial.generate_index_s2( lat=float(x['lat']), lng=float(x['lon'])), axis=1) nx.set_node_attributes( g, stops_db[stops_db['stop_id'].isin(stops)].set_index( 'stop_id').T.to_dict()) nx.set_node_attributes( g, pd.DataFrame( stop_groups['route_id'].apply(set)).rename(columns={ 'route_id': 'routes' }).T.to_dict()) nx.set_node_attributes( g, pd.DataFrame(stop_groups['service_id'].apply(set)).rename( columns={ 'service_id': 'services' }).T.to_dict()) # and edges df['ordered_stops'] = df['ordered_stops'].apply( lambda x: list(zip(x[:-1], x[1:]))) stop_cols = np.concatenate(df['ordered_stops'].values) edges = pd.DataFrame({ col: np.repeat(df[col].values, df['ordered_stops'].str.len()) for col in {'route_id', 'service_id'} }).assign(from_stop=stop_cols[:, 0], to_stop=stop_cols[:, 1]) edge_groups = edges.groupby(['from_stop', 'to_stop']) g.add_edges_from(edge_groups.groups) nx.set_edge_attributes( g, pd.DataFrame( edge_groups['route_id'].apply(set)).rename(columns={ 'route_id': 'routes' }).T.to_dict()) nx.set_edge_attributes( g, pd.DataFrame(edge_groups['service_id'].apply(set)).rename( columns={ 'service_id': 'services' }).T.to_dict()) return g