def CompareMultiDayRidersToYearlyAveFrom(startDate, endDate, source1, hour1, year1, minStations, minRiders, interval): """ Compare Destination station end to end run for all stations in a year to yearly average :param startDate: start date for query :param endDate: end date for query :param dest1: destination station :param hour1: hour to query :param year1: year to average :param minStations: min stations to intersect :param minRiders: min riders to include per station :param interval: query skip interval """ yearlyAvg = BARTQueries.GetYearlyAverageDailyRidersFromSource(source1, hour1, year1) start_date = startDate end_date = endDate delta = timedelta(days=interval) while start_date <= end_date: if start_date.weekday() < 5: sDate = start_date.strftime("%m-%d-%Y") da, df = BARTQueries.GetDailyRidersFrom(source1, hour1, sDate) if len(da) > 0: dayYearPair = [da, yearlyAvg] allStations, allStationsComplete = BartLibs.ScrubRiders(dayYearPair, minRiders, minStations, minRiders) rejectHO, pVal = BartLibs.TestMultipleRoutes(allStations) title = "{0}, Stats: {1}RejectHO: {4}\nPVal: {2:.5f} Date {3}".format(source1, len(da), pVal, sDate, rejectHO) # print(title) # PlotTwoSets(allStationsComplete, sDate, year1, 2,title) PlotTwoSetsTrueProp(allStationsComplete, sDate, year1, 2, title) start_date += delta
def PlotMeanRidersPerStation(df, allStationsComplete, sourceStation): """ Plot average mean riders per station :param df: dataframe for all routes :param allStationsComplete: all routes as list type """ stationList = df.dest.unique() x = [] y = [] errs = [] for s in stationList: data = df[df['dest'] == s].riders.tolist() xmean = np.mean(data) x.append(xmean) c1, c2 = BartLibs.ConfidenceIntervalT(data) errs.append(abs(xmean - c1)) yRange = list(range(len(stationList))) plt.figure() plt.errorbar(x, yRange, xerr=errs, fmt='o', color='k') plt.yticks(yRange, stationList) plt.title("Mean Riders from {0} to Destinations".format(sourceStation)) plt.xlabel("Mean Riders") plt.ylabel("Destinations with Confidence Limits 95%") plt.show()
def CompareMultipleDayRidersTo(startDate, endDate, dest, hour, minStations, minRiders, minNumber, dayInterval): """ Compares multiple routes over time frame Cleans stations, intersects and create route contingency table Produces plots, goodness of fit tests :param startDate: Start date for route query :param endDate: End date for route query :param dest: The destination station :param hour: The hour to query :param minStations: Min number of stations to intersect to be considered in test table :param minRiders: Min riders to consider for each route station (min must be > 5) :param minNumber: Min number of total riders for train to be considered :param dayInterval: Skip day interval """ propList = [] start_date = startDate end_date = endDate delta = timedelta(days=dayInterval) while start_date <= end_date: if start_date.weekday() < 5: sDate = start_date.strftime("%m-%d-%Y") da, df = BARTQueries.GetDailyRidersTo(dest, hour, sDate) if len(da) > 0: propList.append(da) start_date += delta if (len(propList) > 1): BartLibs.PrintRoutes(propList) allStations, allStationsComplete = BartLibs.ScrubRiders(propList, minRiders, minStations, minNumber) stations = len(allStationsComplete[0]) rejectHO, pVal = BartLibs.TestMultipleRoutes(allStations) title = "Tuesday From {0}, RejectHO: {3}\n PVal: {2:.5f}, Days: {1}, Stations:{4} ".format(dest, len(allStations), pVal, rejectHO, stations) # print(title) PlotMultiSetsTo(allStationsComplete, 1, title) dropRidersPerc = BartLibs.CalcDroppedRiders(propList, allStationsComplete) BartLibs.PrintRoutes(allStationsComplete) Plot3DRoutesTo(allStationsComplete, 1, title) PlotTimeSeriesRoutesTo(allStationsComplete, 1, title) else: print("No Stations Found")
def RunBARTTimeSeries2(source, hour, year): """ Runs complete time series tests, outputs plot set and test results :param source: Station to test :param hour: Hour of day :param year: year """ plotdata = BARTQueries.GetAveragedWeekdayRidersFromSource(source, hour, year) title = "Daily Riders for {0} at {1}:00AM in {2}".format(source, hour, year) PlotTimeSeriesWithLimitBars(plotdata, title) smoothData = BartLibs.Smooth_1StandardDeviation(plotdata) PlotTimeSeriesWithLimitBars(smoothData, title) PlotTimeSeriesFFT(smoothData, title) BartLibs.Decomposition(smoothData, 5) BartLibs.ACF(smoothData, 10) print("\n\nRQ1 - TIME SERIES AutoCorrelation -----------------------------") # ADF statistic to check stationary timeseries = adfuller(smoothData, autolag='AIC') pVal = timeseries[1] print("\n\n\nAugmented Dickey-Fuller Test: pval = {0}\n\n\n".format(pVal)) # if timeseries[0] > timeseries[4]["5%"] : if pVal > 0.05: print("Failed to Reject Ho - Time Series is Non-Stationary") else: print("Reject Ho - Time Series is Stationary") model = sm.tsa.UnobservedComponents(smoothData, level='fixed intercept', freq_seasonal=[{'period': 50, 'harmonics': 5}]) res_f = model.fit(disp=False) print(res_f.summary()) # The first state variable holds our estimate of the intercept print("fixed intercept estimated as {0:.3f}".format(res_f.smoother_results.smoothed_state[0, -1:][0])) print("\n\nRQ1 --------------------------------------") res_f.plot_components() plt.show()
def RunBARTTimeSeriesZoomed(source, hour, year): """ Runs complete time series tests, outputs plot set and test results :param source: Station to test :param hour: Hour of day :param year: year """ plotdata = BARTQueries.GetAveragedWeekdayRidersFromSource(source, hour, year) title = "Daily Riders for {0} at {1}:00AM in {2}".format(source, hour, year) PlotTimeSeriesWithLimitBars(plotdata, title) smoothData = BartLibs.Smooth_1StandardDeviation(plotdata) PlotTimeSeriesWithLimitBarsZoomed(smoothData, title, False)
def PlotTimeSeriesFFT(smoothData, title): """ Plots an FFT from time series data :param smoothData: time series data :param title: title to place on plot """ smoothMean = statistics.mean(smoothData) smoothDataZeroed = list(map(lambda x: x - smoothMean, smoothData)) ft = np.fft.fft(smoothDataZeroed) realAmplitudes = list(map(lambda x: BartLibs.SumSquares(x), ft)) realAmpsLen = len(realAmplitudes) fftScale = 2.0 / (realAmpsLen) realAmplitudesScaled = list(map(lambda x: fftScale * x, realAmplitudes)) plt.plot(realAmplitudesScaled[:int(realAmpsLen / 3.0)]) plt.suptitle(title) plt.show()
def CompareMultipleDayRidersFrom(startDate, endDate, origin, hour, minStations, minRiders, minNumber, dayInterval): """ Complete run of multiple routes destination format :param startDate: Start Date to query :param endDate: End Date to query :param origin: source station :param hour: hour to query :param minStations: minimum station intersections :param minRiders: min riders per station (must be at least 5) :param minNumber: min number total riders for route to be included :param dayInterval: interval for query or skip level """ propList = [] start_date = startDate end_date = endDate delta = timedelta(days=dayInterval) while start_date <= end_date: if start_date.weekday() < 5: sDate = start_date.strftime("%m-%d-%Y") da, df = BARTQueries.GetDailyRidersFrom(origin, hour, sDate) if len(da) > 0: propList.append(da) start_date += delta if (len(propList) > 1): BartLibs.PrintRoutes(propList) allStations, allStationsComplete = BartLibs.ScrubRiders(propList, minRiders, minStations, minNumber) stations = len(allStationsComplete[0]) df = BartLibs.AllStationsToDF(allStationsComplete) PlotMeanRidersPerStation(df, allStationsComplete, origin) rejectHO, pVal = BartLibs.TestMultipleRoutes(allStations) PlotRouteDestinations(df, origin, startDate, endDate) TestMultipleRoutesAnova(df) title = "Tuesday From {0}, RejectHO: {3}\n PVal: {2:.5f}, Days: {1}, Stations:{4} ".format(origin, len(allStations), pVal, rejectHO, stations) # print(title) PlotMultiSetsTo(allStationsComplete, 2, title) dropRidersPerc = BartLibs.CalcDroppedRiders(propList, allStationsComplete) BartLibs.PrintRoutes(allStationsComplete) Plot3DRoutesTo(allStationsComplete, 2, title) PlotTimeSeriesRoutesTo(allStationsComplete, 2, title) PlotStationHistrogram(df, 'EMBR', "EMBR Station Riders 2019") else: print("No Stations Found")