if __name__ == '__main__': with open('./makefiles/year_list.pkl') as f: year_list = [2012, 2013, 2014, 2015] with open('./makefiles/columns_list.pkl') as f: columns_list = pickle.load(f) with open('./makefiles/columns_dict.pkl') as f: columns_dict = pickle.load(f) dfs_list = [] for year in year_list: df_path = '../data/csvs/fires_weather_' + str(year) + '.csv' df = pd.read_csv(df_path) dfs_list.append(df) df = combine_dfs(dfs_list) df = break_time_col(df, 'date_fire') nearby_fires_df_filepath = './modeling/nearby_done_df_' + str( columns_dict['add_nearby_fires']['dist_measure']) + '.pkl' # This process takes awhile, and I won't need to do it each time. if 'nearby_fires_done' not in columns_dict.keys(): dist_measure = columns_dict['add_nearby_fires']['dist_measure'] time_measures = columns_dict['add_nearby_fires']['time_measures'] df = gen_nearby_fires_count( df, dist_measure, time_measures, columns_dict['add_nearby_fires']['rate_measures']) with open(nearby_fires_df_filepath, 'w') as f: pickle.dump(df, f) else: with open(nearby_fires_df_filepath) as f: df = pickle.load(f)
import pandas as pd import sys import pickle from data_manip.general_featurization import combine_dfs, grab_columns, return_all_dummies, boolean_fire from data_manip.time_featurization import break_time_col if __name__ == '__main__': with open('../makefiles/year_list.pkl') as f: year_list = pickle.load(f) with open('../makefiles/columns_list.pkl') as f: columns_list = pickle.load(f) dfs_list = [] for year in year_list: df_path = '../../data/csvs/fires_' + str(year) + '.csv' df = pd.read_csv(df_path) dfs_list.append(df) df = combine_dfs(dfs_list) df = grab_columns(df, columns_list) df = break_time_col(df, 'date') df = boolean_fire(df) dummy_cols = ['year', 'month'] df = return_all_dummies(df, dummy_cols) with open('./input_df.pkl', 'w+') as f: pickle.dump(df, f)
with open('./makefiles/year_list.pkl') as f: year_list = [2012, 2013, 2014, 2015] with open('./makefiles/columns_list.pkl') as f: columns_list = pickle.load(f) with open('./makefiles/columns_dict.pkl') as f: columns_dict = pickle.load(f) dfs_list = [] for year in year_list: df_path = '../data/csvs/fires_weather_' + str(year) + '.csv' df = pd.read_csv(df_path) dfs_list.append(df) df = combine_dfs(dfs_list) df = break_time_col(df, 'date_fire') nearby_fires_df_filepath = './modeling/nearby_done_df_' + str(columns_dict['add_nearby_fires']['dist_measure']) + '.pkl' # This process takes awhile, and I won't need to do it each time. if 'nearby_fires_done' not in columns_dict.keys(): dist_measure = columns_dict['add_nearby_fires']['dist_measure'] time_measures = columns_dict['add_nearby_fires']['time_measures'] df = gen_nearby_fires_count(df, dist_measure, time_measures, columns_dict['add_nearby_fires']['rate_measures']) with open(nearby_fires_df_filepath, 'w') as f: pickle.dump(df, f) else: with open(nearby_fires_df_filepath) as f: df = pickle.load(f) df = grab_columns(df, columns_list) featurization_dict = {'all_dummies': return_all_dummies, 'bool_col': boolean_col, 'return_top_n': return_top_n,