def test_feed_edge_types(): path = fixture('samtrans-2017-11-28.zip') feed = get_representative_feed(path) start = 7 * 60 * 60 end = 10 * 60 * 60 G1 = load_feed_as_graph(feed, start, end) # In the base case, all should be transit for _, _, e in G1.edges(data=True): assert e['mode'] == 'transit' # Now perform a second check where we impute walk edges G2 = load_feed_as_graph(feed, start, end, impute_walk_transfers=True) # Count the number of edge types by mode, which should now # include walk edges as well transit_count = 0 walk_count = 0 for _, _, e in G2.edges(data=True): if e['mode'] == 'transit': transit_count += 1 if e['mode'] == 'walk': walk_count += 1 # And make sure the correct number were made assert transit_count == 1940 assert walk_count == 864
def test_generate_summary_graph_elements(): path_1 = fixture('samtrans-2017-11-28.zip') feed_1 = get_representative_feed(path_1) start = 7 * 60 * 60 end = 10 * 60 * 60 interpolate_times = True # Make sure everything works the same with both multiprocessing on/off for use_multiprocessing in [True, False]: (summary_edge_costs, wait_times_by_stop) = generate_summary_graph_elements( feed_1, start, end, FALLBACK_STOP_COST_DEFAULT, interpolate_times, use_multiprocessing) # Ensure that the summary edge cost dataframe looks as it should ec_cols = ['edge_cost', 'from_stop_id', 'to_stop_id'] for c in ec_cols: assert c in summary_edge_costs.columns # Make sure that all edges are unique - there are no duplicated # in the returned edge dataframe (each should be its own summary) f = summary_edge_costs.from_stop_id t = summary_edge_costs.to_stop_id z = list(zip(f, t)) assert len(list(set(z))) == len(z) # Ensure that the wait times dataframe looks as it should wt_cols = ['avg_cost', 'stop_id'] for c in wt_cols: assert c in wait_times_by_stop.columns # Sanity check edge costs mask = (wait_times_by_stop.avg_cost < 0) assert len(wait_times_by_stop[mask]) == 0 # Make sure that there are stop ids unique u = wait_times_by_stop.stop_id.unique() assert len(u) == len(wait_times_by_stop) # Another sanity check, we should be sure that the resulting # edges list captures all the stops that were assigned null # values in the fixture dataset were assigned a linearly imputed # arrival and departure time and thus preserved as a stop # in the edge list # First get the null times mask null_times = feed_1.stop_times.departure_time.isnull() # And identify all unique stops from the original feed null_stop_ids = feed_1.stop_times[null_times].stop_id.unique() # Now let's take the list of these null stop ids and extract # all the ones from that list in the summary edge dataframe mask = summary_edge_costs.from_stop_id.isin(null_stop_ids) # And now we can get the stop ids out from this list preserved_from_nulls = summary_edge_costs.from_stop_id[mask].unique() assert len(preserved_from_nulls) == 205
def test_parsing_when_just_on_trip_during_target_window(): path = fixture('highdesertpointorus-2018-03-20.zip') feed = get_representative_feed(path) start = 7 * 60 * 60 # 7:00 AM end = 8 * 60 * 60 # 10:00 AM G = load_feed_as_graph(feed, start, end) assert len(list(G.nodes())) == 2 assert len(list(G.edges())) == 1
def test_loading_in_too_small_timeframes(): path_1 = fixture('caltrain-2017-07-24.zip') feed_1 = get_representative_feed(path_1) # Loading in a time frame that will result # in no valid results start = 0 end = 1 with pytest.raises(InsufficientSummaryResults): load_feed_as_graph(feed_1, start, end)
def test_feed_to_graph_plot(): path = fixture('caltrain-2017-07-24.zip') feed = get_representative_feed(path) start = 7 * 60 * 60 end = 10 * 60 * 60 G = load_feed_as_graph(feed, start, end) fig, ax = generate_plot(G)
def test_save_and_read_zip(): path_1 = fixture('caltrain-2017-07-24.zip') feed_1 = get_representative_feed(path_1) start = 7 * 60 * 60 end = 10 * 60 * 60 G1 = load_feed_as_graph(feed_1, start, end, 'foo') # Get counts as a measure to compare with save-read results nodes_len_g1 = len(list(G1.nodes())) edges_len_g1 = len(list(G1.edges())) # First save the graph to a zip zip_fpath = 'foobar.zip' save_graph_to_zip(G1, zip_fpath) # Then read in as a new graph G2 = graph_from_zip(zip_fpath) # Also immediately remove the zip file so it's not hanging # around or impacting later tests os.remove(zip_fpath) # Get new lengths nodes_len_g2 = len(list(G2.nodes())) edges_len_g2 = len(list(G2.edges())) # They should both be the same as the ones from G1 assert nodes_len_g1 == nodes_len_g2 assert edges_len_g1 == edges_len_g2 # Make sure same numbers of unique nodes are present set_n1 = set(list(G1.nodes())) set_n2 = set(list(G2.nodes())) assert len(set_n1) == len(set_n2) # Make sure that all nodes are accounted for for n in set_n1: assert n in set_n2 # Do the same for the edges e1 = list(G1.edges()) e2 = list(G2.edges()) for edge_pair in e1: assert edge_pair in e2 # Also make sure the basic attributes are preserved for node_id, node in G2.nodes(data=True): for key in ['boarding_cost', 'modes', 'x', 'y']: assert key in node.keys() for from_id, to_id, edge in G2.edges(data=True): for key in ['length', 'mode']: assert key in edge.keys()
def test_feeds_with_no_direction_id(): path = fixture('samtrans-2017-11-28.zip') feed = get_representative_feed(path) # Overwrite the direction id columns in trips df to be nan feed.trips['direction_id'] = np.nan start = 7 * 60 * 60 end = 10 * 60 * 60 G = load_feed_as_graph(feed, start, end) # Make sure each node has numeric boarding cost for i, node in G.nodes(data=True): assert not np.isnan(node['boarding_cost'])
def test_feed_to_graph_performance(): # Replicate the original workflow of the graph creation path # but open up to expose to benchmarking/performance profiling start = 7 * 60 * 60 end = 10 * 60 * 60 interpolate_times = True use_multiprocessing = False print('Running time profiles on each major ' 'function in graph generation workflow') a = time() path = fixture('samtrans-2017-11-28.zip') feed = get_representative_feed(path) elapsed = round(time() - a, 2) print('Perf of get_representative_feed: {}s'.format(elapsed)) fl = len(feed.routes) print('Iteration on {} routes.'.format(fl)) a = time() (all_edge_costs, all_wait_times) = generate_edge_and_wait_values(feed, start, end, interpolate_times, use_multiprocessing) elapsed = round(time() - a, 2) print('Perf of generate_edge_and_wait_values: {}s'.format(elapsed)) a = time() summary_edge_costs = generate_summary_edge_costs(all_edge_costs) elapsed = round(time() - a, 2) print('Perf of generate_summary_edge_costs: {}s'.format(elapsed)) a = time() wait_times_by_stop = generate_summary_wait_times( all_wait_times, FALLBACK_STOP_COST_DEFAULT) elapsed = round(time() - a, 2) print('Perf of generate_summary_wait_times: {}s'.format(elapsed)) a = time() G = generate_empty_md_graph('foo') elapsed = round(time() - a, 2) print('Perf of generate_empty_md_graph: {}s'.format(elapsed)) a = time() G = populate_graph(G, 'bar', feed, wait_times_by_stop, summary_edge_costs, 50, 4.5) elapsed = round(time() - a, 2) print('Perf of populate_graph: {}s'.format(elapsed))
def test_convert_multidigraph_to_digraph(): path = fixture('samtrans-2017-11-28.zip') feed = get_representative_feed(path) # Shorter amount of time to speed up the test start = 7 * 60 * 60 end = 8 * 60 * 60 Gmdg = load_feed_as_graph(feed, start, end, name='foobar') # Run conversaion operation Gdg = convert_to_digraph(Gmdg) assert isinstance(Gdg, nx.DiGraph) assert len(Gdg.edges()) == len(Gmdg.edges()) assert len(Gdg.nodes()) == len(Gmdg.nodes())
def test_loading_in_invalid_timeframes(): path_1 = fixture('caltrain-2017-07-24.zip') feed_1 = get_representative_feed(path_1) # Loading in a timeframe where the # start comes before the end start = 500 end = 100 with pytest.raises(InvalidTimeBracket): load_feed_as_graph(feed_1, start, end) # Loading in a timeframe is of length 0 start = 0 end = 0 with pytest.raises(InvalidTimeBracket): load_feed_as_graph(feed_1, start, end) start = 1000 end = 1000 with pytest.raises(InvalidTimeBracket): load_feed_as_graph(feed_1, start, end)
def test_simplify_graph(): path = fixture('samtrans-2017-11-28.zip') feed = get_representative_feed(path) # Shorter amount of time to speed up the test start = 7 * 60 * 60 end = 8 * 60 * 60 G = load_feed_as_graph(feed, start, end, name='foobar') # Run simplification Gs = simplify_graph(G) # TODO: We have this ongoing issue where we can't # consistently test by index for edges, so we need # to figure out _how_ to test for a specific edge assert len(Gs.nodes()) == 298 assert len(Gs.edges()) == 451 # Pull out a summary list of edges as dicts all_es = [] for e_fr, e_to, edge in Gs.edges(data=True): edge['from'] = e_fr edge['to'] = e_to # Let's just look at those that have a larger # length associated with them and were coalesced from # other internal ways (so a geometry object is present) if edge['length'] > 110 and 'geometry' in edge.keys(): all_es.append(edge) # Sort the list and pull the max out, where max is determined # based on the number of coordinates in the LineString target_edge = max(all_es, key=lambda x: len(x['geometry'].coords.xy[0])) assert target_edge['length'] == 5114.0 assert target_edge['mode'] == 'transit' assert target_edge['from'] == 'foobar_351008' assert target_edge['to'] == 'foobar_334008' assert len(target_edge['geometry'].coords.xy[0]) == 49
def test_extract_valid_feed(): # Read in without name, or any # other optional arguments path = fixture('caltrain-2017-07-24.zip') feed = get_representative_feed(path) assert isinstance(feed, ptg.gtfs.feed)
def test_empty_feed(): path = fixture('empty.zip') with pytest.raises(InvalidGTFS): get_representative_feed(path)
def test_feed_to_graph_path(): path_1 = fixture('caltrain-2017-07-24.zip') feed_1 = get_representative_feed(path_1) start = 7 * 60 * 60 end = 10 * 60 * 60 G = load_feed_as_graph(feed_1, start, end, 'foo') # We should assume all routes do not have segments that exceed some # given length (measured in seconds) max_reasonable_segment_length = 60 * 60 _check_unreasonable_lengths(G, max_reasonable_segment_length) # Sanity check that the number of nodes and edges go up orig_node_len = len(G.nodes()) orig_edge_len = len(G.edges()) orig_node_list = list(G.nodes()) path_2 = fixture('samtrans-2017-11-28.zip') feed_2 = get_representative_feed(path_2) G = load_feed_as_graph(feed_2, start, end, 'bar', G) assert isinstance(G, nx.MultiDiGraph) _check_unreasonable_lengths(G, max_reasonable_segment_length) # Part 2 of sanity check that the number of nodes and edges go up node_len_2 = len(G.nodes()) edge_len_2 = len(G.edges()) assert node_len_2 > orig_node_len assert edge_len_2 > orig_edge_len connector_edge_count = 0 for from_node, to_node, edge in G.edges(data=True): # Make sure that a length measure has been calculated for each # edge in the resulting graph, also sanity check that all are # positive values assert 'length' in edge.keys() assert isinstance(edge['length'], float) assert edge['length'] >= 0 # Also, we should also make sure that edges were also created that # connect the two feeds from_orig_a = from_node in orig_node_list from_orig_b = to_node in orig_node_list one_valid_fr = from_orig_a and (not from_orig_b) one_valid_to = (not from_orig_a) and from_orig_b if one_valid_fr or one_valid_to: connector_edge_count += 1 # We know that there should be 9 new edges that are created to connect # the two GTFS feeds in the joint graph assert connector_edge_count == 9 # Now reload in the synthetic graph geojson geojson_path = fixture('synthetic_san_bruno.geojson') with open(geojson_path, 'r') as gjf: reference_geojson = json.load(gjf) # Then load it onto the graph, as well G = load_synthetic_network_as_graph(reference_geojson, existing_graph=G) # And make sure it connected correctly node_len_3 = len(G.nodes()) edge_len_3 = len(G.edges()) assert node_len_3 - node_len_2 == 74 assert edge_len_3 - edge_len_2 == 80