def check_csvs(): csvs = [ file[:-4] + '.csv' for file in helper_functions.get_resampled_tif_list(dates) ] import pandas as pd for csv in csvs: data = pd.read_csv(csv) if (data.shape != (64915, 2)): print(csv)
def rasterstats(): print('Determining rasterstats') import pandas as pd from itertools import repeat tif_list = helper_functions.get_resampled_tif_list(dates) tif_list_remaining = [ file for file in tif_list if not os.path.exists(file[:-4] + '.csv') ] if len(tif_list_remaining) > 0: areas = pd.read_pickle('data_gloric/areas_gloric_pixel.pkl')[[ 'Reach_ID', 'pixels' ]].to_numpy() print('\tSpinning up pool') pool = mp.Pool(mp.cpu_count() - 2) pool.starmap(helper_functions.rasterstat, zip(repeat(areas), tif_list_remaining))
def sum_csv(): print('Sum csv') csvs = [ file[:-4] + '.csv' for file in helper_functions.get_resampled_tif_list(dates) ] if not os.path.exists('data_pmm/totals-' + csvs[0][45:69] + '-' + csvs[-1][45:69] + '.csv'): data = pd.read_csv('data_pmm/' + csvs[0][45:69] + '-' + csvs[-1][45:69] + '.csv') data = data.set_index('Reach_ID').sum( axis='columns').rename('total_rain') data.to_csv('data_pmm/totals-' + csvs[0][45:69] + '-' + csvs[-1][45:69] + '.csv', header=True) areas = gp.read_file('data_gloric/areas_gloric.shp')\ .set_index('Reach_ID',drop=False)\ .loc[:,['geometry']]\ .join(data)\ .reset_index() areas.to_file('data_pmm/totals-' + csvs[0][45:69] + '-' + csvs[-1][45:69] + '.shp')
def merge_csvs(): print('Merging csv\'s') csvs = [ file[:-4] + '.csv' for file in helper_functions.get_resampled_tif_list(dates) ] if not os.path.exists('data_pmm/' + csvs[0][45:69] + '-' + csvs[-1][45:69] + '.csv'): import pandas as pd data = pd.read_csv(csvs[0], delimiter=',') data.set_index('Reach_ID', inplace=True) data.rename(columns={'rain': csvs[0][45:69]}, inplace=True) for csv in csvs[1:]: data = data.join( pd.read_csv(csv, delimiter=',')\ .set_index('Reach_ID')\ .rename(columns ={'rain': csv[45:69]} ) ) data.to_csv('data_pmm/' + csvs[0][45:69] + '-' + csvs[-1][45:69] + '.csv') data.join(pd.read_pickle('data_gloric/areas_gloric_no_geo.pkl').set_index('Reach_ID').area_sk)\ .reset_index()\ .to_pickle('data_pmm/' + csvs[0][45:69] + '-' + csvs[-1][45:69] + '.pkl')