Exemplos de transform_merge_data em Python, exemplos de data_processing_refactor.transform_merge_data em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: top_down_forecast.py Projeto: kennydurell/evictionprediction

def future_data_processing(df):
    '''Processing the exogenous features for the future prediction months to align with the model's fomrat'''

    future_df = transform_merge_data(df, df_median_housing_price, df_census,
                                     df_unemployment)
    future_transform_df = arima_by_zip_data_transform(future_df)
    future_transform_df = future_transform_df.groupby(
        'Month_Year').sum().reset_index()

    return future_transform_df

Exemplo n.º 2

0

Exibir arquivo

def run_random_forest_forecast(df_eviction,
                               df_median_housing_price,
                               df_census,
                               df_unemployment,
                               df_future_data,
                               months_ahead=3):
    '''Runs data_processing for all data and subsequently the random forest model forecast functions. See below for detailed doc strings within each'''

    eviction_median_housing = transform_merge_data(df_eviction,
                                                   df_median_housing_price,
                                                   df_census, df_unemployment)
    random_forest_forecast_df = random_forest_forecast(eviction_median_housing,
                                                       months_ahead,
                                                       df_future_data)

    return random_forest_forecast_df

Exemplo n.º 3

0

Exibir arquivo

Arquivo: top_down_forecast.py Projeto: kennydurell/evictionprediction

def run_top_down_forecast(df_eviction,
                          df_median_housing_price,
                          df_census,
                          df_unemployment,
                          df_future_data,
                          months_ahead=3):
    '''run each step of the forecasting model in order. Parameters are all unprocessed dataframes.'''

    eviction_median_housing = transform_merge_data(df_eviction,
                                                   df_median_housing_price,
                                                   df_census, df_unemployment)
    future_df = future_data_processing(df_future_data)
    predictions_by_month, y_hat = arimax_by_month_forecast(
        eviction_median_housing, months_ahead, future_df)
    top_down_prediction_df = top_down_forecast(eviction_median_housing,
                                               predictions_by_month,
                                               months_ahead)
    return top_down_prediction_df

Exemplo n.º 4

0

Exibir arquivo

def predict_evictions(df_eviction,
                      df_median_housing_price,
                      df_census,
                      df_unemployment,
                      df_future_data,
                      months_ahead=3,
                      plot_by_zip=False):
    '''Runs training models and forecast models, merges the results and predicts off of previous predictions made by the
    training models successively .'''

    #models

    top_down_forecast_df = run_top_down_forecast(df_eviction,
                                                 df_median_housing_price,
                                                 df_census, df_unemployment,
                                                 df_future_data, months_ahead)
    random_forest_forecast_df = run_random_forest_forecast(
        df_eviction, df_median_housing_price, df_census, df_unemployment,
        df_future_data, months_ahead)

    #run training models. needs to be pickled.
    eviction_median_housing = transform_merge_data(df_eviction,
                                                   df_median_housing_price,
                                                   df_census, df_unemployment)
    random_forest_df, importance_dict = model_random_forest(
        eviction_median_housing, 10, 'auto')
    top_down_by_zip_df = top_down_estimation_by_zip(eviction_median_housing)

    merged_predictions = merge_all_models(top_down_forecast_df,
                                          random_forest_forecast_df)

    merged_training_data = merge_training_data(top_down_by_zip_df,
                                               random_forest_df)

    final_df = linear_regression_combination(merged_predictions,
                                             merged_training_data)

    if plot_by_zip:
        plot_by_zips(merged_training_data, final_df, zip_code=True)

    return final_df

Exemplo n.º 5

0

Exibir arquivo

def run_all_models(df_eviction,df_median_housing_price,df_census,df_unemployment):
    eviction_median_housing = transform_merge_data(df_eviction,df_median_housing_price, df_census, df_unemployment)
    zip_param_dictionary = {'94102': [(2, 1, 0),(1,0,0,6),4],
                '94103': [(0, 1, 1),(1,0,0,6),3],
                 '94105': [(0, 0, 0),(0,0,0,0),4],
                 '94107': [(3, 0, 0),(1,0,0,8),4],
                 '94108': [(0, 1, 1),(1,0,0,6),4],
                 '94109': [(0, 1, 1),(1,0,0,9),4],
                 '94110': [(7, 1, 1),(2,0,0,7),4],
                 '94111': [(0, 0, 0),(1,0,0,6),4],
                 '94112': [(2, 1, 1),(1,0,0,12),4],
                 '94114': [(1, 1, 1),(1,0,0,7),4],
                 '94115': [(1, 1, 1),(2,0,0,3),4],
                 '94116': [(1, 1, 1),(2,0,0,7),4],
                 '94117': [(2, 1, 1),(2,0,0,7),4],
                 '94118': [(1, 1, 1),(2,0,0,7),4],
                 '94121': [(1, 1, 1),(2,0,0,7),4],
                 '94122': [(1, 1, 1),(2,0,0,7),4],
                 '94123': [(0, 1, 1),(1,0,0,12),4],
                 '94124': [(0, 1, 1),(4,0,0,3),4],
                 '94127': [(1, 0, 0),(1,0,0,3),4],
                 '94131': [(1, 0, 0),(1,0,0,3),4],
                 '94132': [(0, 1, 1),(1,0,0,6),3],
                 '94133': [(3, 1, 1),(2,0,0,6),4],
                 '94134': [(3, 1, 1),(2,0,0,6),4],
                 '94158': [(0, 1, 0),(1,0,0,6),4],
                 'Unknown_ZIP': [(4, 1, 1),(0,0,0,0),4]}
    #models
    top_down_by_zip_df = top_down_estimation_by_zip(eviction_median_housing)
    arimax_by_zip_df, rmse = arimax_by_zip(eviction_median_housing, zip_param_dictionary)
    random_forest_df, importance_dict = model_random_forest(eviction_median_housing,10,'auto')


    #linear regression combination of all models
    merged_predictions = linear_regression_combination(arimax_by_zip_df, top_down_by_zip_df, random_forest_df)

    return merged_predictions

Exemplo n.º 6

0

Exibir arquivo

def future_data_processing(df):
    '''Basic processing/transformation of future exogenous variables to align them with the format of the past eviction data.'''
    future_df = transform_merge_data(df, df_median_housing_price, df_census,
                                     df_unemployment)
    return future_df

Exemplo n.º 7

0

Exibir arquivo

    zips - list of zips to append back onto the data after it is fit
    months_list - list of months to append back onto the data after it is fit

    Output:
    predictions_df - dataframe with predictions of eviction notices, by zip, for each of the future months.
    """
    rfr = RandomForestRegressor(n_estimators=100, max_features='auto')
    rfr.fit(X_train, y_train)
    y_hat = rfr.predict(X_test).tolist()
    predictions_df = pd.DataFrame(data={'predicted_evictions':y_hat,\
                        'zip_code': zips, 'month_year':months_list})

    predictions_df['month_year'] = pd.to_datetime(predictions_df['month_year'])
    return predictions_df


def future_data_processing(df):
    '''Basic processing/transformation of future exogenous variables to align them with the format of the past eviction data.'''
    future_df = transform_merge_data(df, df_median_housing_price, df_census,
                                     df_unemployment)
    return future_df


if __name__ == '__main__':
    eviction_median_housing = transform_merge_data(df_eviction,
                                                   df_median_housing_price,
                                                   df_census, df_unemployment)
    random_forest_forecast_df = random_forest_forecast(eviction_median_housing,
                                                       3, df_future_data)
    #eviction = run_random_forest_forecast(df_eviction,df_median_housing_price, df_census, df_unemployment,df_future_data,months_ahead=3)