Exemplo n.º 1
0
def CompareMultiDayRidersToYearlyAveFrom(startDate, endDate, source1, hour1, year1, minStations, minRiders, interval):
    """
    Compare Destination station end to end run for all stations in a year to yearly average

    :param startDate: start date for query
    :param endDate: end date for query
    :param dest1: destination station
    :param hour1: hour to query
    :param year1: year to average
    :param minStations: min stations to intersect
    :param minRiders: min riders to include per station
    :param interval: query skip interval
    """
    yearlyAvg = BARTQueries.GetYearlyAverageDailyRidersFromSource(source1, hour1, year1)

    start_date = startDate
    end_date = endDate
    delta = timedelta(days=interval)
    while start_date <= end_date:
        if start_date.weekday() < 5:
            sDate = start_date.strftime("%m-%d-%Y")
            da, df = BARTQueries.GetDailyRidersFrom(source1, hour1, sDate)
            if len(da) > 0:
                dayYearPair = [da, yearlyAvg]
                allStations, allStationsComplete = BartLibs.ScrubRiders(dayYearPair, minRiders, minStations, minRiders)
                rejectHO, pVal = BartLibs.TestMultipleRoutes(allStations)
                title = "{0}, Stats: {1}RejectHO: {4}\nPVal: {2:.5f} Date {3}".format(source1,
                                                                                      len(da), pVal,
                                                                                      sDate,
                                                                                      rejectHO)
                # print(title)
                # PlotTwoSets(allStationsComplete, sDate, year1, 2,title)
                PlotTwoSetsTrueProp(allStationsComplete, sDate, year1, 2, title)

        start_date += delta
Exemplo n.º 2
0
def PlotMeanRidersPerStation(df, allStationsComplete, sourceStation):
    """
    Plot average mean riders per station

    :param df: dataframe for all routes
    :param allStationsComplete: all routes as list type
    """
    stationList = df.dest.unique()
    x = []
    y = []
    errs = []
    for s in stationList:
        data = df[df['dest'] == s].riders.tolist()
        xmean = np.mean(data)
        x.append(xmean)
        c1, c2 = BartLibs.ConfidenceIntervalT(data)
        errs.append(abs(xmean - c1))

    yRange = list(range(len(stationList)))
    plt.figure()

    plt.errorbar(x, yRange, xerr=errs, fmt='o', color='k')
    plt.yticks(yRange, stationList)
    plt.title("Mean Riders from {0} to Destinations".format(sourceStation))
    plt.xlabel("Mean Riders")
    plt.ylabel("Destinations with Confidence Limits 95%")
    plt.show()
Exemplo n.º 3
0
def CompareMultipleDayRidersTo(startDate, endDate, dest, hour, minStations, minRiders, minNumber, dayInterval):
    """
    Compares multiple routes over time frame
    Cleans stations, intersects and create route contingency table
    Produces plots, goodness of fit tests

    :param startDate: Start date for route query
    :param endDate:  End date for route query
    :param dest: The destination station
    :param hour: The hour to query
    :param minStations: Min number of stations to intersect to be considered in test table
    :param minRiders: Min riders to consider for each route station (min must be > 5)
    :param minNumber: Min number of total riders for train to be considered
    :param dayInterval: Skip day interval
    """
    propList = []
    start_date = startDate
    end_date = endDate
    delta = timedelta(days=dayInterval)
    while start_date <= end_date:
        if start_date.weekday() < 5:
            sDate = start_date.strftime("%m-%d-%Y")
            da, df = BARTQueries.GetDailyRidersTo(dest, hour, sDate)
            if len(da) > 0:
                propList.append(da)
        start_date += delta

    if (len(propList) > 1):
        BartLibs.PrintRoutes(propList)
        allStations, allStationsComplete = BartLibs.ScrubRiders(propList, minRiders, minStations, minNumber)

        stations = len(allStationsComplete[0])
        rejectHO, pVal = BartLibs.TestMultipleRoutes(allStations)
        title = "Tuesday From {0}, RejectHO: {3}\n PVal: {2:.5f}, Days: {1}, Stations:{4} ".format(dest,
                                                                                                   len(allStations),
                                                                                                   pVal, rejectHO,
                                                                                                   stations)
        # print(title)
        PlotMultiSetsTo(allStationsComplete, 1, title)
        dropRidersPerc = BartLibs.CalcDroppedRiders(propList, allStationsComplete)
        BartLibs.PrintRoutes(allStationsComplete)

        Plot3DRoutesTo(allStationsComplete, 1, title)
        PlotTimeSeriesRoutesTo(allStationsComplete, 1, title)
    else:
        print("No Stations Found")
Exemplo n.º 4
0
def RunBARTTimeSeries2(source, hour, year):
    """
    Runs complete time series tests, outputs plot set and test results

    :param source: Station to test
    :param hour: Hour of day
    :param year: year
    """
    plotdata = BARTQueries.GetAveragedWeekdayRidersFromSource(source, hour, year)
    title = "Daily Riders for {0} at {1}:00AM in {2}".format(source, hour, year)
    PlotTimeSeriesWithLimitBars(plotdata, title)

    smoothData = BartLibs.Smooth_1StandardDeviation(plotdata)
    PlotTimeSeriesWithLimitBars(smoothData, title)

    PlotTimeSeriesFFT(smoothData, title)

    BartLibs.Decomposition(smoothData, 5)
    BartLibs.ACF(smoothData, 10)

    print("\n\nRQ1 - TIME SERIES AutoCorrelation -----------------------------")

    # ADF statistic to check stationary
    timeseries = adfuller(smoothData, autolag='AIC')
    pVal = timeseries[1]
    print("\n\n\nAugmented Dickey-Fuller Test: pval = {0}\n\n\n".format(pVal))
    # if timeseries[0] > timeseries[4]["5%"] :
    if pVal > 0.05:
        print("Failed to Reject Ho - Time Series is Non-Stationary")
    else:
        print("Reject Ho - Time Series is Stationary")

    model = sm.tsa.UnobservedComponents(smoothData,
                                        level='fixed intercept',
                                        freq_seasonal=[{'period': 50,
                                                        'harmonics': 5}])
    res_f = model.fit(disp=False)
    print(res_f.summary())
    # The first state variable holds our estimate of the intercept
    print("fixed intercept estimated as {0:.3f}".format(res_f.smoother_results.smoothed_state[0, -1:][0]))
    print("\n\nRQ1 --------------------------------------")

    res_f.plot_components()
    plt.show()
Exemplo n.º 5
0
def RunBARTTimeSeriesZoomed(source, hour, year):
    """
    Runs complete time series tests, outputs plot set and test results

    :param source: Station to test
    :param hour: Hour of day
    :param year: year
    """
    plotdata = BARTQueries.GetAveragedWeekdayRidersFromSource(source, hour, year)
    title = "Daily Riders for {0} at {1}:00AM in {2}".format(source, hour, year)
    PlotTimeSeriesWithLimitBars(plotdata, title)

    smoothData = BartLibs.Smooth_1StandardDeviation(plotdata)
    PlotTimeSeriesWithLimitBarsZoomed(smoothData, title, False)
Exemplo n.º 6
0
def PlotTimeSeriesFFT(smoothData, title):
    """
    Plots an FFT from time series data

    :param smoothData: time series data
    :param title: title to place on plot
    """
    smoothMean = statistics.mean(smoothData)
    smoothDataZeroed = list(map(lambda x: x - smoothMean, smoothData))
    ft = np.fft.fft(smoothDataZeroed)
    realAmplitudes = list(map(lambda x: BartLibs.SumSquares(x), ft))
    realAmpsLen = len(realAmplitudes)
    fftScale = 2.0 / (realAmpsLen)
    realAmplitudesScaled = list(map(lambda x: fftScale * x, realAmplitudes))
    plt.plot(realAmplitudesScaled[:int(realAmpsLen / 3.0)])
    plt.suptitle(title)
    plt.show()
Exemplo n.º 7
0
def CompareMultipleDayRidersFrom(startDate, endDate, origin, hour, minStations, minRiders, minNumber, dayInterval):
    """
    Complete run of multiple routes destination format

    :param startDate: Start Date to query
    :param endDate:  End Date to query
    :param origin: source station
    :param hour: hour to query
    :param minStations: minimum station intersections
    :param minRiders: min riders per station (must be at least 5)
    :param minNumber: min number total riders for route to be included
    :param dayInterval: interval for query or skip level
    """
    propList = []
    start_date = startDate
    end_date = endDate
    delta = timedelta(days=dayInterval)
    while start_date <= end_date:
        if start_date.weekday() < 5:
            sDate = start_date.strftime("%m-%d-%Y")
            da, df = BARTQueries.GetDailyRidersFrom(origin, hour, sDate)
            if len(da) > 0:
                propList.append(da)
        start_date += delta

    if (len(propList) > 1):
        BartLibs.PrintRoutes(propList)
        allStations, allStationsComplete = BartLibs.ScrubRiders(propList, minRiders, minStations, minNumber)
        stations = len(allStationsComplete[0])

        df = BartLibs.AllStationsToDF(allStationsComplete)
        PlotMeanRidersPerStation(df, allStationsComplete, origin)

        rejectHO, pVal = BartLibs.TestMultipleRoutes(allStations)
        PlotRouteDestinations(df, origin, startDate, endDate)
        TestMultipleRoutesAnova(df)
        title = "Tuesday From {0}, RejectHO: {3}\n PVal: {2:.5f}, Days: {1}, Stations:{4} ".format(origin,
                                                                                                   len(allStations),
                                                                                                   pVal, rejectHO,
                                                                                                   stations)
        # print(title)
        PlotMultiSetsTo(allStationsComplete, 2, title)
        dropRidersPerc = BartLibs.CalcDroppedRiders(propList, allStationsComplete)
        BartLibs.PrintRoutes(allStationsComplete)

        Plot3DRoutesTo(allStationsComplete, 2, title)
        PlotTimeSeriesRoutesTo(allStationsComplete, 2, title)

        PlotStationHistrogram(df, 'EMBR', "EMBR Station Riders 2019")
    else:
        print("No Stations Found")