def entries_day_line(turnstile_weather): # Add Time Period turnstile_weather = time_period(turnstile_weather) q = """ select day_week as Day_Week,sum(ENTRIESn_hourly) as ENTRIESn_hourly from turnstile_weather group by day_week """ #Execute your SQL command against the pandas frame entries_day = pandasql.sqldf(q, locals()) plt.figure() plt.title('Turnstile Entries by Day (5/1/2011 - 5/31/2011)') plt.ylabel('Turnstile Entries (in millions)') plt.xlabel('Day of Week') y = entries_day['ENTRIESn_hourly']/1000000 x = entries_day['Day_Week'] labels = ['Mon','Tue','Wed','Thur','Fri', 'Sat', 'Sun'] plt.xticks(x, labels) plt.xlim(-1,7) plt.ylim(0,25) plt.plot(x,y,marker='.',linestyle='--') #print entries_day return plt
def entries_day_line(turnstile_weather): # Add Time Period turnstile_weather = time_period(turnstile_weather) q = """ select day_week as Day_Week,sum(ENTRIESn_hourly) as ENTRIESn_hourly from turnstile_weather group by day_week """ #Execute your SQL command against the pandas frame entries_day = pandasql.sqldf(q, locals()) plt.figure() plt.title('Turnstile Entries by Day (5/1/2011 - 5/31/2011)') plt.ylabel('Turnstile Entries (in millions)') plt.xlabel('Day of Week') y = entries_day['ENTRIESn_hourly'] / 1000000 x = entries_day['Day_Week'] labels = ['Mon', 'Tue', 'Wed', 'Thur', 'Fri', 'Sat', 'Sun'] plt.xticks(x, labels) plt.xlim(-1, 7) plt.ylim(0, 25) plt.plot(x, y, marker='.', linestyle='--') #print entries_day return plt
def entries_time_period_bar(turnstile_weather): # Add Time Period turnstile_weather = time_period(turnstile_weather) q = """ select time_period as Time_Period,sum(ENTRIESn_hourly) as ENTRIESn_hourly from turnstile_weather group by time_period """ #Execute your SQL command against the pandas frame entries_time_period = pandasql.sqldf(q, locals()) plt.figure() plt.title('Turnstile Entries by Time Period') plt.ylabel('Turnstile Entries (in millions)') plt.xlabel('Time Period') y = entries_time_period['ENTRIESn_hourly']/1000000 x = entries_time_period['Time_Period'] labels = ['Late Night','Weekends','Midday','Evening','Rush Hour'] plt.xticks(x, labels) plt.xlim(0,6) plt.ylim(0,25) plt.bar(x, y,width=0.25,align='center',color='DodgerBlue') #print entries_time_period return plt
def entries_time_period_bar(turnstile_weather): # Add Time Period turnstile_weather = time_period(turnstile_weather) q = """ select time_period as Time_Period,sum(ENTRIESn_hourly) as ENTRIESn_hourly from turnstile_weather group by time_period """ #Execute your SQL command against the pandas frame entries_time_period = pandasql.sqldf(q, locals()) plt.figure() plt.title('Turnstile Entries by Time Period') plt.ylabel('Turnstile Entries (in millions)') plt.xlabel('Time Period') y = entries_time_period['ENTRIESn_hourly'] / 1000000 x = entries_time_period['Time_Period'] labels = ['Late Night', 'Weekends', 'Midday', 'Evening', 'Rush Hour'] plt.xticks(x, labels) plt.xlim(0, 6) plt.ylim(0, 25) plt.bar(x, y, width=0.25, align='center', color='DodgerBlue') #print entries_time_period return plt
import numpy as np import pandas as pd import pandasql import statsmodels.api as sm from time_period import time_period # Load data input_filename = "turnstile_weather_v2.csv" df = pd.read_csv(input_filename) # Add Time Period df = time_period(df) # Create dummy units and combine dummy_units = pd.get_dummies(df['UNIT'], prefix='unit') dummy_hour = pd.get_dummies(df['time_period'], prefix='time') dummy_combined = dummy_units.join(dummy_hour) values = df[['ENTRIESn_hourly']] # response features = df[['rain', 'tempi', 'wspdi']].join(dummy_combined) # predictor features = sm.add_constant(features) # Adds a constant term to the predictor #features = features.drop('unit_R034', 1) mod = sm.OLS(values, features) res = mod.fit() print res.summary() #print res.params #print res.rsquared