-
Notifications
You must be signed in to change notification settings - Fork 1
/
tfl.py
127 lines (102 loc) · 4.51 KB
/
tfl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# -*- coding: utf-8 -*-
"""
Created on Sun May 8 11:19:10 2016
@author: andyjones
"""
import networkx as nx
import scipy as sp
import json
import pandas as pd
import requests
import os
ROOT = 'https://api.tfl.gov.uk/'
KEYS = json.load(open('data/tfl_keys.json'))
CACHE_PATH = 'cache'
DEFAULT_ORIGIN = '940GZZLUGPK'
def call_api(endpoint, **kwargs):
result = requests.get(ROOT + endpoint, params=dict(KEYS, **kwargs))
return json.loads(result.content)
def get_routes():
data = call_api('Line/Route')
results = []
for route in data:
for section in route['routeSections']:
results.append({
'route_id': route['id'],
'mode': route['modeName'],
'route_name': route['name'],
'destination_id': section['destination'],
'destination_name': section['destinationName'],
'origin_id': section['originator'],
'origin_name': section['originationName'],
'section_name': section['name'],
'direction': section['direction']
})
return pd.DataFrame(results)
def get_timetable(route_id, origin, destination):
path = os.path.join(CACHE_PATH, 'timetables', '{}-{}-{}.json'.format(route_id, origin, destination))
if not os.path.exists(path):
print('Fetching {}-{}-{}'.format(route_id, origin, destination))
data = call_api('Line/{}/Timetable/{}/to/{}'.format(route_id, origin, destination))
json.dump(data, open(path, 'w+'))
return json.load(open(path))
def walk_timetables(routes):
for _, row in routes.iterrows():
data = get_timetable(row['route_id'], row['origin_id'], row['destination_id'])
if ('timetable' in data) and (len(data['timetable']['routes']) > 0):
yield data
def get_stops(route_id):
path = os.path.join(CACHE_PATH, 'stoppoints', '{}.json'.format(route_id))
if not os.path.exists(path):
print('Fetching {}'.format(route_id))
data = call_api('Line/{}/StopPoints'.format(route_id))
json.dump(data, open(path, 'w+'))
return json.load(open(path))
def walk_stops(routes):
for _, row in routes.iterrows():
data = get_stops(row['route_id'])
yield data
def get_locations(routes):
results = []
for stops in walk_stops(routes):
for stop in stops:
results.append({
'id': stop['id'],
'naptan': stop['naptanId'],
'station_naptan': stop.get('stationNaptan', ''),
'hub_naptan': stop.get('hubNaptanCode', ''),
'name': stop['commonName'],
'latitude': stop['lat'],
'longitude': stop['lon']
})
return pd.DataFrame(results).drop_duplicates('naptan').set_index('naptan')
def get_edges(routes):
results = []
for timetable in walk_timetables(routes):
origin = timetable['timetable']['departureStopId']
for route in timetable['timetable']['routes']:
for intervals in route['stationIntervals']:
stops = [origin] + [x['stopId'] for x in intervals['intervals']]
edges = [[s, t] for s, t in zip(stops, stops[1:])]
times = [0] + [x['timeToArrival'] for x in intervals['intervals']]
weights = list(sp.diff(sp.array(times)))
results.extend([[s, t, w] for (s, t), w in zip(edges, weights)])
results = pd.DataFrame(results, columns=['origin', 'destination', 'time'])
results = results.groupby(['origin', 'destination']).mean()
return results
def get_travel_times(edges, locations, origin=DEFAULT_ORIGIN, transit_time=5):
G = nx.Graph()
G.add_weighted_edges_from(map(tuple, list(edges.reset_index().values)))
for naptan, location in locations.iterrows():
if location.hub_naptan != '':
G.add_weighted_edges_from([(naptan, location.hub_naptan, transit_time)])
times = nx.single_source_dijkstra_path_length(G, origin, weight='weight')
return pd.Series(times)
def cache():
edge_cache = os.path.join(CACHE_PATH, 'edges.pkl')
location_cache = os.path.join(CACHE_PATH, 'locations.pkl')
if not os.path.exists(edge_cache):
routes = get_routes()
get_edges(routes).to_pickle(edge_cache)
get_locations(routes).to_pickle(location_cache)
return pd.read_pickle(edge_cache), pd.read_pickle(location_cache)