def future_data_processing(df): '''Processing the exogenous features for the future prediction months to align with the model's fomrat''' future_df = transform_merge_data(df, df_median_housing_price, df_census, df_unemployment) future_transform_df = arima_by_zip_data_transform(future_df) future_transform_df = future_transform_df.groupby( 'Month_Year').sum().reset_index() return future_transform_df
def run_random_forest_forecast(df_eviction, df_median_housing_price, df_census, df_unemployment, df_future_data, months_ahead=3): '''Runs data_processing for all data and subsequently the random forest model forecast functions. See below for detailed doc strings within each''' eviction_median_housing = transform_merge_data(df_eviction, df_median_housing_price, df_census, df_unemployment) random_forest_forecast_df = random_forest_forecast(eviction_median_housing, months_ahead, df_future_data) return random_forest_forecast_df
def run_top_down_forecast(df_eviction, df_median_housing_price, df_census, df_unemployment, df_future_data, months_ahead=3): '''run each step of the forecasting model in order. Parameters are all unprocessed dataframes.''' eviction_median_housing = transform_merge_data(df_eviction, df_median_housing_price, df_census, df_unemployment) future_df = future_data_processing(df_future_data) predictions_by_month, y_hat = arimax_by_month_forecast( eviction_median_housing, months_ahead, future_df) top_down_prediction_df = top_down_forecast(eviction_median_housing, predictions_by_month, months_ahead) return top_down_prediction_df
def predict_evictions(df_eviction, df_median_housing_price, df_census, df_unemployment, df_future_data, months_ahead=3, plot_by_zip=False): '''Runs training models and forecast models, merges the results and predicts off of previous predictions made by the training models successively .''' #models top_down_forecast_df = run_top_down_forecast(df_eviction, df_median_housing_price, df_census, df_unemployment, df_future_data, months_ahead) random_forest_forecast_df = run_random_forest_forecast( df_eviction, df_median_housing_price, df_census, df_unemployment, df_future_data, months_ahead) #run training models. needs to be pickled. eviction_median_housing = transform_merge_data(df_eviction, df_median_housing_price, df_census, df_unemployment) random_forest_df, importance_dict = model_random_forest( eviction_median_housing, 10, 'auto') top_down_by_zip_df = top_down_estimation_by_zip(eviction_median_housing) merged_predictions = merge_all_models(top_down_forecast_df, random_forest_forecast_df) merged_training_data = merge_training_data(top_down_by_zip_df, random_forest_df) final_df = linear_regression_combination(merged_predictions, merged_training_data) if plot_by_zip: plot_by_zips(merged_training_data, final_df, zip_code=True) return final_df
def run_all_models(df_eviction,df_median_housing_price,df_census,df_unemployment): eviction_median_housing = transform_merge_data(df_eviction,df_median_housing_price, df_census, df_unemployment) zip_param_dictionary = {'94102': [(2, 1, 0),(1,0,0,6),4], '94103': [(0, 1, 1),(1,0,0,6),3], '94105': [(0, 0, 0),(0,0,0,0),4], '94107': [(3, 0, 0),(1,0,0,8),4], '94108': [(0, 1, 1),(1,0,0,6),4], '94109': [(0, 1, 1),(1,0,0,9),4], '94110': [(7, 1, 1),(2,0,0,7),4], '94111': [(0, 0, 0),(1,0,0,6),4], '94112': [(2, 1, 1),(1,0,0,12),4], '94114': [(1, 1, 1),(1,0,0,7),4], '94115': [(1, 1, 1),(2,0,0,3),4], '94116': [(1, 1, 1),(2,0,0,7),4], '94117': [(2, 1, 1),(2,0,0,7),4], '94118': [(1, 1, 1),(2,0,0,7),4], '94121': [(1, 1, 1),(2,0,0,7),4], '94122': [(1, 1, 1),(2,0,0,7),4], '94123': [(0, 1, 1),(1,0,0,12),4], '94124': [(0, 1, 1),(4,0,0,3),4], '94127': [(1, 0, 0),(1,0,0,3),4], '94131': [(1, 0, 0),(1,0,0,3),4], '94132': [(0, 1, 1),(1,0,0,6),3], '94133': [(3, 1, 1),(2,0,0,6),4], '94134': [(3, 1, 1),(2,0,0,6),4], '94158': [(0, 1, 0),(1,0,0,6),4], 'Unknown_ZIP': [(4, 1, 1),(0,0,0,0),4]} #models top_down_by_zip_df = top_down_estimation_by_zip(eviction_median_housing) arimax_by_zip_df, rmse = arimax_by_zip(eviction_median_housing, zip_param_dictionary) random_forest_df, importance_dict = model_random_forest(eviction_median_housing,10,'auto') #linear regression combination of all models merged_predictions = linear_regression_combination(arimax_by_zip_df, top_down_by_zip_df, random_forest_df) return merged_predictions
def future_data_processing(df): '''Basic processing/transformation of future exogenous variables to align them with the format of the past eviction data.''' future_df = transform_merge_data(df, df_median_housing_price, df_census, df_unemployment) return future_df
zips - list of zips to append back onto the data after it is fit months_list - list of months to append back onto the data after it is fit Output: predictions_df - dataframe with predictions of eviction notices, by zip, for each of the future months. """ rfr = RandomForestRegressor(n_estimators=100, max_features='auto') rfr.fit(X_train, y_train) y_hat = rfr.predict(X_test).tolist() predictions_df = pd.DataFrame(data={'predicted_evictions':y_hat,\ 'zip_code': zips, 'month_year':months_list}) predictions_df['month_year'] = pd.to_datetime(predictions_df['month_year']) return predictions_df def future_data_processing(df): '''Basic processing/transformation of future exogenous variables to align them with the format of the past eviction data.''' future_df = transform_merge_data(df, df_median_housing_price, df_census, df_unemployment) return future_df if __name__ == '__main__': eviction_median_housing = transform_merge_data(df_eviction, df_median_housing_price, df_census, df_unemployment) random_forest_forecast_df = random_forest_forecast(eviction_median_housing, 3, df_future_data) #eviction = run_random_forest_forecast(df_eviction,df_median_housing_price, df_census, df_unemployment,df_future_data,months_ahead=3)