def dfBystop(stop, graph, allStations): '''Return dataframe with timelines for stations near input station and smoothed timelines''' mapFromId, mapToId = distances.getMappings(graph) subgraph = networkx.ego_graph(graph, stop, radius=5, undirected=True, distance='weight') columnNames = [mapFromId[node] for node in subgraph.nodes()] for station in subgraph.nodes(): if station in allStations: filtered = filtfilt(b, a, allStations[station].interpolate().fillna(method='backfill')) stationDf = pd.DataFrame({station: allStations[station], (station + '_filtered'): filtered}) try: tmpDf = pd.concat([tmpDf, stationDf], axis=1) except NameError: tmpDf = stationDf return tmpDf
for station in subgraph.nodes(): if station in allStations: filtered = filtfilt(b, a, allStations[station].interpolate().fillna(method='backfill')) stationDf = pd.DataFrame({station: allStations[station], (station + '_filtered'): filtered}) try: tmpDf = pd.concat([tmpDf, stationDf], axis=1) except NameError: tmpDf = stationDf return tmpDf if __name__ == "__main__": fulldata = util.pickle_load('data/fulldata_NOnearest_9-22.pkl') graph = util.pickle_load('subwaydata/NYCsubway_network_graph.pkl') mapFromId, mapToId = distances.getMappings(graph) allStations = processHistoricalData(fulldata, graph) station = mapToId['W 4 St'] stationDf = dfBystop(station, graph, allStations) result = fitAR(stationDf, station, makePlot=True, order=(1, 1, 0), showTest=True) stationDf = dfBystop(station, graph, allStations) result = fitGP(stationDf, station, showTest=True) # run GP for all stations and write to dB test_scores = {} for station in graph.nodes(): if station in allStations: stationDf = dfBystop(station, graph, allStations)