def main(): data = dataset.UNDevGoalsDataset() X, Y = data.preprocess_simple() # Run simple baseline model print('Predicting 2007 from 1972:2006') status_quo_preds = status_quo_model(X) status_quo_rmse = data.evaluate(status_quo_preds) print('Status quo model RMSE:', status_quo_rmse)
def main(): data = dataset.UNDevGoalsDataset() X, y = data.preprocess_simple() sq = models.status_quo_model(X) # Currently the arima model doesn't output a DataFrame #arima = models.arima(X) # This function returns a GroupBy object where each group is a development index gb = data.preprocess_for_viz() for name, group in gb: visualize_all(name, group) visualize_worst(name, group, sq)
def main(): data = dataset.UNDevGoalsDataset() X, Y = data.preprocess(pp_fn=preprocess_avg_NANs) X5, Y5 = data.preprocess(pp_fn=preprocess_avg_NANs, years_ahead=5) preds = data.predictions(model_name=arima, order=(1, 1, 1), lookback=5, preprocessed_data=X) preds5 = data.predictions(model_name=arima, order=(1, 1, 1), lookback=5, preprocessed_data=X5) preds = pd.Series(preds, index=X.index) preds5 = pd.Series(preds5, index=X.index) # This function returns a GroupBy object where each group is a development index gb = data.preprocess(pp_fn=preprocess_for_viz) for name, group in gb: visualize_worst(name, group, preds, preds5)
def main(): data = dataset.UNDevGoalsDataset() print('Predicting 2007 from 1972:2006') X_simple, Y_simple = data.preprocess(pp_fn=preprocess_simple) status_quo_predictions_simple = data.predictions( model_name=status_quo_model, preprocessed_data=X_simple) status_quo_simple_rmse = data.error( error_fn=RMSE, predictions=status_quo_predictions_simple) print('Status quo model RMSE with simple preprocessing:', status_quo_simple_rmse) X_improved, Y_improved = data.preprocess(pp_fn=preprocess_avg_NANs) status_quo_predictions_improved = data.predictions( model_name=status_quo_model, preprocessed_data=X_improved) status_quo_improved_rmse = data.error( error_fn=RMSE, predictions=status_quo_predictions_improved) print('Status quo model RMSE with better preprocessing:', status_quo_improved_rmse) arima_predictions_simple = data.predictions(model_name=arima, order=(1, 1, 1), lookback=5, preprocessed_data=X_simple) arima_rmse_simple = data.error(error_fn=RMSE, predictions=arima_predictions_simple) print('ARIMA model RMSE with simple preprocessing:', arima_rmse_simple) arima_predictions_improved = data.predictions(model_name=arima, order=(1, 1, 1), lookback=5, preprocessed_data=X_improved) arima_rmse_improved = data.error(error_fn=RMSE, predictions=arima_predictions_improved) print('ARIMA model RMSE with better preprocessing:', arima_rmse_improved)
def main(): data = dataset.UNDevGoalsDataset() X_simple, Y_simple = data.preprocess(pp_fn=preprocess_simple) X_simple_5, Y_simple_5 = data.preprocess(pp_fn=preprocess_simple, years_ahead=5) X_with_global_avg, Y_with_global_avg = data.preprocess( pp_fn=preprocess_avg_NANs) X_with_global_avg_5, Y_with_global_avg_5 = data.preprocess( pp_fn=preprocess_avg_NANs, years_ahead=5) X_with_cont_avg, Y_with_cont_avg = data.preprocess( pp_fn=preprocess_with_continent_interpolation) X_with_cont_avg_5, Y_with_cont_avg_5 = data.preprocess( pp_fn=preprocess_with_continent_interpolation, years_ahead=5) status_quo_predictions_simple = data.predictions( model_name=status_quo_model, preprocessed_data=X_simple) status_quo_simple_rmse = data.error( error_fn=RMSE, predictions=status_quo_predictions_simple) print('Status quo model RMSE with simple preprocessing (1 yr):', status_quo_simple_rmse) status_quo_predictions_simple_5 = data.predictions( model_name=status_quo_model, preprocessed_data=X_simple_5) status_quo_simple_rmse_5 = data.error( error_fn=RMSE, predictions=status_quo_predictions_simple_5) print('Status quo model RMSE with simple preprocessing (5 yr):', status_quo_simple_rmse_5) status_quo_rmse_avg = (status_quo_simple_rmse + status_quo_simple_rmse_5) / 2 print('Status quo model RMSE with simple preprocessing (avg):', status_quo_rmse_avg) print() status_quo_predictions_with_global_avg = data.predictions( model_name=status_quo_model, preprocessed_data=X_with_global_avg) status_quo_with_global_avg_rmse = data.error( error_fn=RMSE, predictions=status_quo_predictions_with_global_avg) print('Status quo model RMSE with global average imputation:', status_quo_with_global_avg_rmse) status_quo_predictions_with_cont_avg = data.predictions( model_name=status_quo_model, preprocessed_data=X_with_cont_avg) status_quo_with_cont_avg_rmse = data.error( error_fn=RMSE, predictions=status_quo_predictions_with_cont_avg) print('Status quo model RMSE with continent average imputation:', status_quo_with_cont_avg_rmse) print() X_improved, Y_improved = data.preprocess(pp_fn=preprocess_avg_NANs) status_quo_predictions_improved = data.predictions( model_name=status_quo_model, preprocessed_data=X_improved) status_quo_improved_rmse = data.error( error_fn=RMSE, predictions=status_quo_predictions_improved) print('Status quo model RMSE with better preprocessing (1 yr):', status_quo_improved_rmse) X_improved_5, Y_improved_5 = data.preprocess(pp_fn=preprocess_avg_NANs, years_ahead=5) status_quo_predictions_improved_5 = data.predictions( model_name=status_quo_model, preprocessed_data=X_improved_5) status_quo_improved_rmse_5 = data.error( error_fn=RMSE, predictions=status_quo_predictions_improved_5) print('Status quo model RMSE with better preprocessing (5 yr):', status_quo_improved_rmse_5) status_quo_improved_rmse_avg = (status_quo_improved_rmse + status_quo_improved_rmse_5) / 2 print('Status quo model RMSE with better preprocessing (avg):', status_quo_improved_rmse_avg) print() arima_predictions_simple = data.predictions(model_name=arima, order=(1, 1, 1), lookback=5, preprocessed_data=X_simple) arima_rmse_simple = data.error(error_fn=RMSE, predictions=arima_predictions_simple) print('ARIMA model RMSE with simple preprocessing (1 yr):', arima_rmse_simple) arima_predictions_simple_5 = data.predictions(model_name=arima, order=(1, 1, 1), lookback=5, preprocessed_data=X_simple_5) arima_rmse_simple_5 = data.error(error_fn=RMSE, predictions=arima_predictions_simple_5) print('ARIMA model RMSE with simple preprocessing (5 yr):', arima_rmse_simple_5) arima_rmse_avg = (arima_rmse_simple + arima_rmse_simple_5) / 2 print('ARIMA model RMSE with simple preprocessing (avg):', arima_rmse_avg) print() arima_predictions_with_global_avg = data.predictions( model_name=arima, order=(1, 1, 1), lookback=5, preprocessed_data=X_with_global_avg) arima_rmse_with_global_avg = data.error( error_fn=RMSE, predictions=arima_predictions_with_global_avg) print('ARIMA model RMSE with global average imputation (1 yr):', arima_rmse_with_global_avg) arima_predictions_with_global_avg_5 = data.predictions( model_name=arima, order=(1, 1, 1), lookback=5, preprocessed_data=X_with_global_avg_5) arima_rmse_with_global_avg_5 = data.error( error_fn=RMSE, predictions=arima_predictions_with_global_avg_5) print('ARIMA model RMSE with global average imputation (5 yr):', arima_rmse_with_global_avg_5) arima_rmse_with_global_avg_avg = (arima_rmse_with_global_avg + arima_rmse_with_global_avg_5) / 2 print('ARIMA model RMSE with global average imputation (avg):', arima_rmse_with_global_avg_avg) print() arima_predictions_with_cont_avg = data.predictions( model_name=arima, order=(1, 1, 1), lookback=5, preprocessed_data=X_with_cont_avg) arima_rmse_with_cont_avg = data.error( error_fn=RMSE, predictions=arima_predictions_with_cont_avg) print('ARIMA model RMSE with continent average imputation (1 yr):', arima_rmse_with_cont_avg) arima_predictions_with_cont_avg_5 = data.predictions( model_name=arima, order=(1, 1, 1), lookback=5, preprocessed_data=X_with_cont_avg_5) arima_rmse_with_cont_avg_5 = data.error( error_fn=RMSE, predictions=arima_predictions_with_cont_avg_5) print('ARIMA model RMSE with continent average imputation (5 yr):', arima_rmse_with_cont_avg_5) arima_rmse_with_cont_avg_avg = (arima_rmse_with_cont_avg + arima_rmse_with_cont_avg_5) / 2 print('ARIMA model RMSE with continent average imputation (avg):', arima_rmse_with_cont_avg_avg)
def main(): data = dataset.UNDevGoalsDataset() # X, Y = data.preprocess(pp_fn=preprocess_by_country_all_years) # assert False # Xtr, Ytr, Xval, Yval = data.preprocess(pp_fn=preprocess_by_country_one_year) # preds = data.predictions(model_name=mlp, preprocessed_data=(Xtr, Ytr, Xval, Yval)) # assert False #rmse = data.error(error_fn=RMSE, predictions=preds) #print(rmse) # Get all the preprocessed data X_simple, Y_simple = data.preprocess(pp_fn=preprocess_simple) X_simple_5, Y_simple_5 = data.preprocess(pp_fn=preprocess_simple, years_ahead=5) X_with_global_avg, Y_with_global_avg = data.preprocess( pp_fn=preprocess_avg_NANs) X_with_global_avg_5, Y_with_global_avg_5 = data.preprocess( pp_fn=preprocess_avg_NANs, years_ahead=5) X_with_cont_avg, Y_with_cont_avg = data.preprocess( pp_fn=preprocess_with_continent_interpolation) X_with_cont_avg_5, Y_with_cont_avg_5 = data.preprocess( pp_fn=preprocess_with_continent_interpolation, years_ahead=5) X_with_cont_avg_and_lin_interp, Y_with_cont_avg_and_lin_interp = data.preprocess( pp_fn=preprocess_with_continent_and_linear_interpolation) X_with_cont_avg_and_lin_interp_5, Y_with_cont_avg_and_lin_interp_5 = data.preprocess( pp_fn=preprocess_with_continent_and_linear_interpolation, years_ahead=5) # Status quo with simple preprocessing status_quo_predictions_simple = data.predictions( model_name=status_quo_model, preprocessed_data=X_simple) status_quo_simple_rmse = data.error( error_fn=RMSE, predictions=status_quo_predictions_simple) print('Status quo model RMSE with simple preprocessing (1 yr):', status_quo_simple_rmse) status_quo_predictions_simple_5 = data.predictions( model_name=status_quo_model, preprocessed_data=X_simple_5) status_quo_simple_rmse_5 = data.error( error_fn=RMSE, predictions=status_quo_predictions_simple_5) print('Status quo model RMSE with simple preprocessing (5 yr):', status_quo_simple_rmse_5) status_quo_rmse_avg = (status_quo_simple_rmse + status_quo_simple_rmse_5) / 2 print('Status quo model RMSE with simple preprocessing (avg):', status_quo_rmse_avg) print() # Status quo with global avg interpolation status_quo_predictions_with_global_avg = data.predictions( model_name=status_quo_model, preprocessed_data=X_with_global_avg) status_quo_with_global_avg_rmse = data.error( error_fn=RMSE, predictions=status_quo_predictions_with_global_avg) print('Status quo model RMSE with global average imputation (1 yr):', status_quo_with_global_avg_rmse) status_quo_predictions_with_global_avg_5 = data.predictions( model_name=status_quo_model, preprocessed_data=X_with_global_avg_5) status_quo_with_global_avg_rmse_5 = data.error( error_fn=RMSE, predictions=status_quo_predictions_with_global_avg_5) print('Status quo model RMSE with global average imputation (5 yr):', status_quo_with_global_avg_rmse_5) status_quo_predictions_with_global_avg_rmse_avg = ( status_quo_with_global_avg_rmse + status_quo_with_global_avg_rmse_5) / 2 print('Status quo model RMSE with global average imputation (avg):', status_quo_predictions_with_global_avg_rmse_avg) print() # Status quo with continent average interpolation status_quo_predictions_with_cont_avg = data.predictions( model_name=status_quo_model, preprocessed_data=X_with_cont_avg) status_quo_with_cont_avg_rmse = data.error( error_fn=RMSE, predictions=status_quo_predictions_with_cont_avg) print('Status quo model RMSE with continent average imputation (1yr):', status_quo_with_cont_avg_rmse) status_quo_predictions_with_cont_avg_5 = data.predictions( model_name=status_quo_model, preprocessed_data=X_with_cont_avg_5) status_quo_with_cont_avg_rmse_5 = data.error( error_fn=RMSE, predictions=status_quo_predictions_with_cont_avg_5) print('Status quo model RMSE with continent average imputation (5 yr):', status_quo_with_cont_avg_rmse_5) status_quo_predictions_with_cont_avg_rmse_avg = ( status_quo_with_cont_avg_rmse + status_quo_with_cont_avg_rmse_5) / 2 print('Status quo model RMSE with continent average imputation (avg):', status_quo_predictions_with_cont_avg_rmse_avg) print() # Status quo with continent average and linear interpolation status_quo_predictions_with_cont_avg_lin_interp = data.predictions( model_name=status_quo_model, preprocessed_data=X_with_cont_avg_and_lin_interp) status_quo_with_cont_avg_lin_interp_rmse = data.error( error_fn=RMSE, predictions=status_quo_predictions_with_cont_avg_lin_interp) print( 'Status quo model RMSE with continent average imputation and linear interpolation (1yr):', status_quo_with_cont_avg_lin_interp_rmse) status_quo_predictions_with_cont_avg_lin_interp_5 = data.predictions( model_name=status_quo_model, preprocessed_data=X_with_cont_avg_and_lin_interp_5) status_quo_with_cont_avg_lin_interp_rmse_5 = data.error( error_fn=RMSE, predictions=status_quo_predictions_with_cont_avg_lin_interp_5) print( 'Status quo model RMSE with continent average imputation and linear interpolation (5 yr):', status_quo_with_cont_avg_lin_interp_rmse_5) status_quo_predictions_with_cont_avg_lin_interp_rmse_avg = ( status_quo_with_cont_avg_lin_interp_rmse + status_quo_with_cont_avg_lin_interp_rmse_5) / 2 print( 'Status quo model RMSE with continent average imputation and linear interpolation (avg):', status_quo_predictions_with_cont_avg_lin_interp_rmse_avg) print() # Arima with simple preprocessing arima_predictions_simple = data.predictions(model_name=arima, preprocessed_data=X_simple) arima_rmse_simple = data.error(error_fn=RMSE, predictions=arima_predictions_simple) print('ARIMA model RMSE with simple preprocessing (1 yr):', arima_rmse_simple) arima_predictions_simple_5 = data.predictions(model_name=arima, preprocessed_data=X_simple_5, forward=5) arima_rmse_simple_5 = data.error(error_fn=RMSE, predictions=arima_predictions_simple_5) print('ARIMA model RMSE with simple preprocessing (5 yr):', arima_rmse_simple_5) arima_rmse_avg = (arima_rmse_simple + arima_rmse_simple_5) / 2 print('ARIMA model RMSE with simple preprocessing (avg):', arima_rmse_avg) print() # Arima with global avg interpolation arima_predictions_with_global_avg = data.predictions( model_name=arima, preprocessed_data=X_with_global_avg) arima_rmse_with_global_avg = data.error( error_fn=RMSE, predictions=arima_predictions_with_global_avg) print('ARIMA model RMSE with global average imputation (1 yr):', arima_rmse_with_global_avg) arima_predictions_with_global_avg_5 = data.predictions( model_name=arima, preprocessed_data=X_with_global_avg_5, forward=5) arima_rmse_with_global_avg_5 = data.error( error_fn=RMSE, predictions=arima_predictions_with_global_avg_5) print('ARIMA model RMSE with global average imputation (5 yr):', arima_rmse_with_global_avg_5) arima_rmse_with_global_avg_avg = (arima_rmse_with_global_avg + arima_rmse_with_global_avg_5) / 2 print('ARIMA model RMSE with global average imputation (avg):', arima_rmse_with_global_avg_avg) print() # VAR with global avg interpolation VAR_predictions_with_global_avg = data.predictions( model_name=var, lookback=4, preprocessed_data=X_with_global_avg) VAR_rmse_with_global_avg = data.error( error_fn=RMSE, predictions=VAR_predictions_with_global_avg) print('VAR model RMSE with global average imputation (1 yr):', VAR_rmse_with_global_avg) VAR_predictions_with_global_avg_5 = data.predictions( model_name=var, lookback=4, preprocessed_data=X_with_global_avg_5, forward=5) VAR_rmse_with_global_avg_5 = data.error( error_fn=RMSE, predictions=VAR_predictions_with_global_avg_5) print('VAR model RMSE with global average imputation (5 yr):', VAR_rmse_with_global_avg_5) VAR_rmse_with_global_avg_avg = (VAR_rmse_with_global_avg + VAR_rmse_with_global_avg_5) / 2 print('VAR model RMSE with global average imputation (avg):', VAR_rmse_with_global_avg_avg) print() # Arima with continent avg interpolation arima_predictions_with_cont_avg = data.predictions( model_name=arima, preprocessed_data=X_with_cont_avg) arima_rmse_with_cont_avg = data.error( error_fn=RMSE, predictions=arima_predictions_with_cont_avg) print('ARIMA model RMSE with continent average imputation (1 yr):', arima_rmse_with_cont_avg) arima_predictions_with_cont_avg_5 = data.predictions( model_name=arima, preprocessed_data=X_with_cont_avg_5, forward=5) arima_rmse_with_cont_avg_5 = data.error( error_fn=RMSE, predictions=arima_predictions_with_cont_avg_5) print('ARIMA model RMSE with continent average imputation (5 yr):', arima_rmse_with_cont_avg_5) arima_rmse_with_cont_avg_avg = (arima_rmse_with_cont_avg + arima_rmse_with_cont_avg_5) / 2 print('ARIMA model RMSE with continent average imputation (avg):', arima_rmse_with_cont_avg_avg) print() # Arima with continent average and linear interpolation arima_predictions_with_cont_avg_lin_interp = data.predictions( model_name=arima, preprocessed_data=X_with_cont_avg_and_lin_interp) arima_with_cont_avg_lin_interp_rmse = data.error( error_fn=RMSE, predictions=arima_predictions_with_cont_avg_lin_interp) print( 'Arima model RMSE with continent average imputation and linear interpolation (1yr):', arima_with_cont_avg_lin_interp_rmse) arima_predictions_with_cont_avg_lin_interp_5 = data.predictions( model_name=arima, preprocessed_data=X_with_cont_avg_and_lin_interp_5, forward=5) arima_with_cont_avg_lin_interp_rmse_5 = data.error( error_fn=RMSE, predictions=arima_predictions_with_cont_avg_lin_interp_5) print( 'Arima model RMSE with continent average imputation and linear interpolation (5 yr):', arima_with_cont_avg_lin_interp_rmse_5) arima_predictions_with_cont_avg_lin_interp_rmse_avg = ( arima_with_cont_avg_lin_interp_rmse + arima_with_cont_avg_lin_interp_rmse_5) / 2 print( 'Arima model RMSE with continent average imputation and linear interpolation (avg):', arima_predictions_with_cont_avg_lin_interp_rmse_avg) print()
def main(): num_args = len(sys.argv) if num_args > 1: if sys.argv[1] == 'arima_global_avg': data = dataset.UNDevGoalsDataset() X_submit = data.preprocess( pp_fn=preprocess_for_submission_with_global_avg_and_lin_interp) arima_predictions_with_global_avg_lin_interp = data.predictions( model_name=arima, preprocessed_data=X_submit, lookback=4) arima_predictions_with_global_avg_lin_interp_5 = data.predictions( model_name=arima, preprocessed_data=X_submit, lookback=4, forward=5) one_yr_output = pd.Series( arima_predictions_with_global_avg_lin_interp, index=X_submit.index, name='2008 [YR2008]') five_yr_output = pd.Series( arima_predictions_with_global_avg_lin_interp_5, index=X_submit.index, name='2012 [YR2012]') out_df = pd.concat([one_yr_output, five_yr_output], axis=1) out_df.to_csv('SubmissionRows.csv') elif sys.argv[1] == 'arima_continent_avg': data = dataset.UNDevGoalsDataset() X_submit = data.preprocess( pp_fn=preprocess_for_submission_with_cont_avg_and_lin_interp) arima_predictions_with_cont_avg_lin_interp = data.predictions( model_name=arima, preprocessed_data=X_submit, lookback=6) arima_predictions_with_cont_avg_lin_interp_5 = data.predictions( model_name=arima, preprocessed_data=X_submit, lookback=6, forward=5) one_yr_output = pd.Series( arima_predictions_with_cont_avg_lin_interp, index=X_submit.index, name='2008 [YR2008]') five_yr_output = pd.Series( arima_predictions_with_cont_avg_lin_interp_5, index=X_submit.index, name='2012 [YR2012]') out_df = pd.concat([one_yr_output, five_yr_output], axis=1) out_df.to_csv('SubmissionRows.csv') elif sys.argv[1] == 'arima_and_var': data = dataset.UNDevGoalsDataset() X_submit = data.preprocess( pp_fn=preprocess_for_submission_with_global_avg_and_lin_interp) arima_predictions_with_global_avg_lin_interp = data.predictions( model_name=arima_and_var, preprocessed_data=X_submit, var_lookback=6, forward=1, do_arima=False) arima_predictions_with_global_avg_lin_interp_5 = data.predictions( model_name=arima_and_var, preprocessed_data=X_submit, arima_lookback=4, forward=5, do_arima=True) one_yr_output = pd.Series( arima_predictions_with_global_avg_lin_interp, index=X_submit.index, name='2008 [YR2008]') five_yr_output = pd.Series( arima_predictions_with_global_avg_lin_interp_5, index=X_submit.index, name='2012 [YR2012]') out_df = pd.concat([one_yr_output, five_yr_output], axis=1) out_df.to_csv('SubmissionRows.csv') else: raise Exception( "You have to submit a valid type of submittable as a command line argument" )