示例#1
0
def CompareMultiDayRidersToYearlyAveFrom(startDate, endDate, source1, hour1, year1, minStations, minRiders, interval):
    """
    Compare Destination station end to end run for all stations in a year to yearly average

    :param startDate: start date for query
    :param endDate: end date for query
    :param dest1: destination station
    :param hour1: hour to query
    :param year1: year to average
    :param minStations: min stations to intersect
    :param minRiders: min riders to include per station
    :param interval: query skip interval
    """
    yearlyAvg = BARTQueries.GetYearlyAverageDailyRidersFromSource(source1, hour1, year1)

    start_date = startDate
    end_date = endDate
    delta = timedelta(days=interval)
    while start_date <= end_date:
        if start_date.weekday() < 5:
            sDate = start_date.strftime("%m-%d-%Y")
            da, df = BARTQueries.GetDailyRidersFrom(source1, hour1, sDate)
            if len(da) > 0:
                dayYearPair = [da, yearlyAvg]
                allStations, allStationsComplete = BartLibs.ScrubRiders(dayYearPair, minRiders, minStations, minRiders)
                rejectHO, pVal = BartLibs.TestMultipleRoutes(allStations)
                title = "{0}, Stats: {1}RejectHO: {4}\nPVal: {2:.5f} Date {3}".format(source1,
                                                                                      len(da), pVal,
                                                                                      sDate,
                                                                                      rejectHO)
                # print(title)
                # PlotTwoSets(allStationsComplete, sDate, year1, 2,title)
                PlotTwoSetsTrueProp(allStationsComplete, sDate, year1, 2, title)

        start_date += delta
示例#2
0
def PlotTotalRidersPerMonth():
    """
    Plots a scatter of monthly riders to give a time series

    """
    plotdata, df = BARTQueries.GetTotalRidersPerMonth()
    df = df[df['year'] < 2020]
    df = df[df['year'] > 2015]
    title = "Riders per Month 2016 to 2019"
    PlotTimeSeriesWithLimitBars(df['riders'], title, False)

    print("\n\nRQ4 - TOTAL RIDERS REGRESSION -----------------------------")

    # Initialise and fit linear regression model using `statsmodels`
    model = ols('riders ~ rMonth', data=df)
    model = model.fit()
    a = model.params
    print(a)
    print(model.summary())
    month_predict = model.predict()

    print("\n\nRQ4 - ----------------------------------------------------")

    plt.plot(df['rMonth'], df['riders'])  # scatter plot showing actual data
    plt.plot(df['rMonth'], month_predict, 'r', linewidth=2)  # regression line
    plt.xlabel('Months 2016 to 2019')
    plt.ylabel('Riders')
    plt.title('Riders per Month 2016 to 2019')

    plt.show()
示例#3
0
def PlotRidersOnMap(year):
    """
    Plot overlaid bubble chart on street map for Source routes

    :param year: year to plot
    """
    px.set_mapbox_access_token(open(".mapbox_token").read())

    dat, df = BARTQueries.GetTotalRidersInNetworkByHourFrom(7, year)

    fig = px.scatter_mapbox(df,
                            lat='lat', lon='long', size='riders',
                            color_continuous_scale=px.colors.cyclical.IceFire,
                            size_max=15, zoom=10)

    fig.show()

    df = df[df['riders'] > 4000]

    plt.barh(df['source'], df['riders'])
    plt.suptitle('Total Riders Departing Station : {0}'.format(year))
    plt.xlabel('Stations')
    plt.ylabel('Riders')
    plt.xticks(rotation=45)
    plt.show()
示例#4
0
def CompareMultipleDayRidersFrom(startDate, endDate, origin, hour, minStations, minRiders, minNumber, dayInterval):
    """
    Complete run of multiple routes destination format

    :param startDate: Start Date to query
    :param endDate:  End Date to query
    :param origin: source station
    :param hour: hour to query
    :param minStations: minimum station intersections
    :param minRiders: min riders per station (must be at least 5)
    :param minNumber: min number total riders for route to be included
    :param dayInterval: interval for query or skip level
    """
    propList = []
    start_date = startDate
    end_date = endDate
    delta = timedelta(days=dayInterval)
    while start_date <= end_date:
        if start_date.weekday() < 5:
            sDate = start_date.strftime("%m-%d-%Y")
            da, df = BARTQueries.GetDailyRidersFrom(origin, hour, sDate)
            if len(da) > 0:
                propList.append(da)
        start_date += delta

    if (len(propList) > 1):
        BartLibs.PrintRoutes(propList)
        allStations, allStationsComplete = BartLibs.ScrubRiders(propList, minRiders, minStations, minNumber)
        stations = len(allStationsComplete[0])

        df = BartLibs.AllStationsToDF(allStationsComplete)
        PlotMeanRidersPerStation(df, allStationsComplete, origin)

        rejectHO, pVal = BartLibs.TestMultipleRoutes(allStations)
        PlotRouteDestinations(df, origin, startDate, endDate)
        TestMultipleRoutesAnova(df)
        title = "Tuesday From {0}, RejectHO: {3}\n PVal: {2:.5f}, Days: {1}, Stations:{4} ".format(origin,
                                                                                                   len(allStations),
                                                                                                   pVal, rejectHO,
                                                                                                   stations)
        # print(title)
        PlotMultiSetsTo(allStationsComplete, 2, title)
        dropRidersPerc = BartLibs.CalcDroppedRiders(propList, allStationsComplete)
        BartLibs.PrintRoutes(allStationsComplete)

        Plot3DRoutesTo(allStationsComplete, 2, title)
        PlotTimeSeriesRoutesTo(allStationsComplete, 2, title)

        PlotStationHistrogram(df, 'EMBR', "EMBR Station Riders 2019")
    else:
        print("No Stations Found")
示例#5
0
def TwoWayAnova(source, year):
    """
    Two way ANOVA for route, time, day analysis of variance test

    :param source: Source Station
    :param year: Year
    """
    hourlyRiders, df = BARTQueries.GetTotalRidersPerHourPerDOWForStationTEXT(source, year)
    # perform two-way ANOVA
    model = ols('riders ~ C(hour) + C(isodow) + C(hour):C(isodow)', data=df).fit()
    g = sm.stats.anova_lm(model, typ=2)
    print("\n\nRQ1 - TWO WAY ANOVA --------------------------------------")
    print(g)
    print("\n\nRQ1 ------------------------------------------------------")
示例#6
0
def PlotAverageRidersByHour(year):
    """
    Plot average riders by hour for a given year

    :param year: Year to plot
    """
    hourlyRiders, df = BARTQueries.GetTotalRidersPerHour(year)

    plt.bar(df['hour'], df['riders'])
    plt.suptitle('Total Riders : {0}'.format(year))
    plt.xlabel('Departure Hour')
    plt.ylabel('Riders')
    plt.xticks(rotation=0)
    plt.show()
示例#7
0
def RunBARTTimeSeriesZoomed(source, hour, year):
    """
    Runs complete time series tests, outputs plot set and test results

    :param source: Station to test
    :param hour: Hour of day
    :param year: year
    """
    plotdata = BARTQueries.GetAveragedWeekdayRidersFromSource(source, hour, year)
    title = "Daily Riders for {0} at {1}:00AM in {2}".format(source, hour, year)
    PlotTimeSeriesWithLimitBars(plotdata, title)

    smoothData = BartLibs.Smooth_1StandardDeviation(plotdata)
    PlotTimeSeriesWithLimitBarsZoomed(smoothData, title, False)
示例#8
0
def PlotTotalRidersByHourBySource(source, year):
    """
    Plot riders by hour and year by destination statino

    :param source: destination statino
    :param year: year to plot
    """
    hourlyRiders, df = BARTQueries.GetTotalRidersPerHourForStation(source, year)

    plt.bar(df['hour'], df['riders'])
    plt.suptitle('Total Riders : {0}'.format(year))
    plt.xlabel('Departure Hour')
    plt.ylabel('Riders')
    plt.xticks(rotation=0)
    plt.show()
示例#9
0
def GetTotalRidersPerHourPerDayForStation(source, year):
    """
    Returns the total number of riders per hour day for a year
    Produces bar plot

    :param source: Departure Station
    :param year: Year to compare
    """
    hourlyRiders, df = BARTQueries.GetTotalRidersPerHourForStation(source, year)

    plt.bar(df['hour'], df['riders'])
    plt.suptitle('Total Riders : {0}'.format(year))
    plt.xlabel('Departure Hour')
    plt.ylabel('Riders')
    plt.xticks(rotation=0)
    plt.show()
示例#10
0
def PlotYearlySumRidersPerOrigin(origin, year):
    """
    Plot total riders over a year for destination station

    :param origin: destination station
    :param year: year to summarize
    """
    hourlyRiders = BARTQueries.GetSumYearRidersPerHour(origin, year)
    cat_names = list(map(lambda x: x[1], hourlyRiders))
    barValues = list(map(lambda x: x[0], hourlyRiders))
    plt.bar(cat_names, barValues)
    plt.suptitle('Total Riders : {0}'.format(year))
    plt.xlabel('Hour')
    plt.ylabel('Riders')
    plt.xticks(rotation=90)
    plt.show()
示例#11
0
def CompareRidersPerISODOW(year):
    """
    Compares riders for a specific day of the week over a year
    produces bar plot

    :param year: Year to compare
    """
    hourlyRiders, df = BARTQueries.GetTotalRidersPerDOW(year)

    labels = ['Mon', 'Tues', 'Wed', 'Thurs', 'Fri']

    plt.bar(labels, df['riders'])
    plt.suptitle('Total Riders by Day : {0}'.format(year))
    plt.xlabel('Departure Hour')
    plt.ylabel('Riders')
    plt.xticks(rotation=0)
    plt.show()
示例#12
0
def CompareMultipleDayRidersTo(startDate, endDate, dest, hour, minStations, minRiders, minNumber, dayInterval):
    """
    Compares multiple routes over time frame
    Cleans stations, intersects and create route contingency table
    Produces plots, goodness of fit tests

    :param startDate: Start date for route query
    :param endDate:  End date for route query
    :param dest: The destination station
    :param hour: The hour to query
    :param minStations: Min number of stations to intersect to be considered in test table
    :param minRiders: Min riders to consider for each route station (min must be > 5)
    :param minNumber: Min number of total riders for train to be considered
    :param dayInterval: Skip day interval
    """
    propList = []
    start_date = startDate
    end_date = endDate
    delta = timedelta(days=dayInterval)
    while start_date <= end_date:
        if start_date.weekday() < 5:
            sDate = start_date.strftime("%m-%d-%Y")
            da, df = BARTQueries.GetDailyRidersTo(dest, hour, sDate)
            if len(da) > 0:
                propList.append(da)
        start_date += delta

    if (len(propList) > 1):
        BartLibs.PrintRoutes(propList)
        allStations, allStationsComplete = BartLibs.ScrubRiders(propList, minRiders, minStations, minNumber)

        stations = len(allStationsComplete[0])
        rejectHO, pVal = BartLibs.TestMultipleRoutes(allStations)
        title = "Tuesday From {0}, RejectHO: {3}\n PVal: {2:.5f}, Days: {1}, Stations:{4} ".format(dest,
                                                                                                   len(allStations),
                                                                                                   pVal, rejectHO,
                                                                                                   stations)
        # print(title)
        PlotMultiSetsTo(allStationsComplete, 1, title)
        dropRidersPerc = BartLibs.CalcDroppedRiders(propList, allStationsComplete)
        BartLibs.PrintRoutes(allStationsComplete)

        Plot3DRoutesTo(allStationsComplete, 1, title)
        PlotTimeSeriesRoutesTo(allStationsComplete, 1, title)
    else:
        print("No Stations Found")
示例#13
0
def CompareRidersPerISODOWForStation2(source, year):
    hourlyRiders, df = BARTQueries.GetTotalRidersPerDOWForStation(source, year)
    labels = []
    data = []

    for i in range(1, 6):
        dv = df[df['isodow'] == i].riders.tolist()
        labels.append(str(i))
        data.append(dv)

    labels = ['Mon', 'Tues', 'Wed', 'Thurs', 'Fri']
    # Creating plot
    bp = plt.boxplot(data, labels=labels, showfliers=False)
    plt.title("Riders by DOW, Station: {0}, Year:{1}".format(source, year))
    plt.xlabel('DOW')
    plt.ylabel('Riders')
    # show plot
    plt.show()
示例#14
0
def RunBARTTimeSeries2(source, hour, year):
    """
    Runs complete time series tests, outputs plot set and test results

    :param source: Station to test
    :param hour: Hour of day
    :param year: year
    """
    plotdata = BARTQueries.GetAveragedWeekdayRidersFromSource(source, hour, year)
    title = "Daily Riders for {0} at {1}:00AM in {2}".format(source, hour, year)
    PlotTimeSeriesWithLimitBars(plotdata, title)

    smoothData = BartLibs.Smooth_1StandardDeviation(plotdata)
    PlotTimeSeriesWithLimitBars(smoothData, title)

    PlotTimeSeriesFFT(smoothData, title)

    BartLibs.Decomposition(smoothData, 5)
    BartLibs.ACF(smoothData, 10)

    print("\n\nRQ1 - TIME SERIES AutoCorrelation -----------------------------")

    # ADF statistic to check stationary
    timeseries = adfuller(smoothData, autolag='AIC')
    pVal = timeseries[1]
    print("\n\n\nAugmented Dickey-Fuller Test: pval = {0}\n\n\n".format(pVal))
    # if timeseries[0] > timeseries[4]["5%"] :
    if pVal > 0.05:
        print("Failed to Reject Ho - Time Series is Non-Stationary")
    else:
        print("Reject Ho - Time Series is Stationary")

    model = sm.tsa.UnobservedComponents(smoothData,
                                        level='fixed intercept',
                                        freq_seasonal=[{'period': 50,
                                                        'harmonics': 5}])
    res_f = model.fit(disp=False)
    print(res_f.summary())
    # The first state variable holds our estimate of the intercept
    print("fixed intercept estimated as {0:.3f}".format(res_f.smoother_results.smoothed_state[0, -1:][0]))
    print("\n\nRQ1 --------------------------------------")

    res_f.plot_components()
    plt.show()
示例#15
0
def CompareRidersPerHourPerDayForStation(source, year):
    """
    Compares riders per hour per day for a given station

    :param source: Source or departure station
    :param year: year to compare
    """
    hourlyRiders, df = BARTQueries.GetTotalRidersPerHourPerDOWForStation(source, year)
    labels = []
    data = []

    for i in range(4, 21):
        dv = df[df['hour'] == i].riders.tolist()
        labels.append(str(i))
        data.append(dv)

    # Creating plot
    bp = plt.boxplot(data, labels=labels, showfliers=False)
    plt.title("Riders by Hour, Station: {0}, Year:{1}".format(source, year))
    plt.xlabel('Departure Hour')
    plt.ylabel('Riders')
    # show plot
    plt.show()