# loop over variables and seasons VARS = ['TMEAN'] for VAR in VARS: # import 3d (time, lat, lon) features with h5py.File(JRA_dir + 'JRA_{}_features_US_2015_2020.hdf'.format(VAR), 'r') as hdf_io: PRISM_T = hdf_io['{}_4km'.format(VAR)][...] REGRID_T = hdf_io['{}_REGRID'.format(VAR)][...] shape_3d = REGRID_T.shape RESULT_CLEAN = np.zeros(shape_3d) RESULT_025 = np.zeros((shape_3d[0],)+lon_025.shape) for n in range(shape_3d[0]): print('\t{}'.format(n)) X = (REGRID_T[n, ...], etopo_regrid) temp_unet = vu.pred_domain(X, land_mask, CGAN, param, method='norm_std') temp_025 = du.interp2d_wraper(lon_4km, lat_4km, temp_unet, lon_025, lat_025, method=interp_method) temp_4km = du.interp2d_wraper(lon_025, lat_025, temp_025, lon_4km, lat_4km, method=interp_method) RESULT_025[n, ...] = temp_025 RESULT_CLEAN[n, ...] = temp_4km RESULT_CLEAN[:, land_mask] = np.nan tuple_save = (lon_4km, lat_4km, PRISM_T, RESULT_CLEAN, RESULT_025, etopo_4km, etopo_regrid) label_save = ['lon_4km', 'lat_4km', '{}_4km'.format(VAR), '{}_REGRID'.format(VAR), '{}_025'.format(VAR), 'etopo_4km', 'etopo_regrid'] du.save_hdf5(tuple_save, label_save, out_dir=JRA_dir, filename='JRA_US_{}_clean_2015_2020.hdf'.format(VAR))
temp_grid_point = diff_T[ind_train, i, j] for k, date in enumerate(train_list): mon_ind = date.month - 1 month_sep[mon_ind, count[mon_ind]] = temp_grid_point[k] count[mon_ind] += 1 A, p, c = pu.fit_sin_annual(t_midmon, np.nanstd(month_sep, axis=1)) STD_sinefit[:, i, j] = pu.sinfunc_annual(t_all, A, p, c) ## interpolate fitted sine to 4-km (faster than interpolate first, and then do the fit) STD_4km = np.zeros((N_days, ) + shape_4km) MEAN_4km = np.zeros((N_days, ) + shape_4km) for i in range(N_days): STD_4km[i, ...] = du.interp2d_wraper(lon_025, lat_025, STD_sinefit[i, ...], lon_4km, lat_4km, method=interp_method) MEAN_4km[i, ...] = du.interp2d_wraper(lon_025, lat_025, MEAN_sinefit[i, ...], lon_4km, lat_4km, method=interp_method) ## apply to 4km and REGRID PRISM_T = (PRISM_T - MEAN_4km) / ( STD_4km) # "+1" for numerical stability, optional REGRID_T = (REGRID_T - MEAN_4km) / (STD_4km) dict_save['{}_STD'.format(VAR)] = STD_4km
lon_025 = hdf_io['lon_025'][...] lat_025 = hdf_io['lat_025'][...] jra_gamma = hdf_io['jra_gamma'][...] with h5py.File(JRA_dir + 'JRA_TMEAN_features_2015_2020.hdf', 'r') as hdf_io: TMEAN_REGRID = hdf_io['TMEAN_REGRID'][...] with nc.Dataset(BACKUP_dir + 'ETOPO1_Ice_g_gmt4.grd') as nc_obj: etopo_x = nc_obj.variables['x'][2000:] # subsetting north america etopo_y = nc_obj.variables['y'][6000:] etopo_z = nc_obj.variables['z'][6000:, 2000:] etopo_lon, etopo_lat = np.meshgrid(etopo_x, etopo_y) etopo_025 = du.interp2d_wraper(etopo_lon, etopo_lat, etopo_z, lon_025, lat_025, method=interp_method) etopo_regrid = du.interp2d_wraper(lon_025, lat_025, etopo_025, lon_4km, lat_4km, method=interp_method) print('Lapse rate correction') date_ref = 365 + 365 date_list = [ datetime(2018, 1, 1, 0) + timedelta(days=x) for x in range(date_ref) ] gamma_mon = [
np.arange(latlim[0], latlim[1], dy)) print('lon_4km.shape:{}; lon_025.shape:{}'.format(lon_4km.shape, lon_025.shape)) # ETOPO interp print('Process ETOPO') with nc.Dataset(BACKUP_dir + 'ETOPO1_Ice_g_gmt4.grd') as nc_obj: etopo_x = nc_obj.variables['x'][2000:7000] # subsetting north america etopo_y = nc_obj.variables['y'][6000:] etopo_z = nc_obj.variables['z'][6000:, 2000:7000] etopo_lon, etopo_lat = np.meshgrid(etopo_x, etopo_y) # coarse-graining ETOPO1 etopo_4km = du.interp2d_wraper(etopo_lon, etopo_lat, etopo_z, lon_4km, lat_4km, method=interp_method) etopo_025 = du.interp2d_wraper(etopo_lon, etopo_lat, etopo_z, lon_025, lat_025, method=interp_method) etopo_regrid = du.interp2d_wraper(lon_025, lat_025, etopo_025, lon_4km, lat_4km, method=interp_method) # =========================== #
for j in range(4): file_num = 4 * i + j with nc.Dataset(filenames[var][file_num], 'r') as nc_io: # single-time files temp_var[j, ...] = np.flipud( nc_io.variables[nc_keys[var]][0, ...]) # flipud on y-axis # aggregate from 6-hr to daily ncep_var[i, ...] = f(temp_var, axis=0) print('BC domain interpolation') for i in range(days): if i % 200 == 0: print('\tday index: {}'.format(i)) temp_interp = du.interp2d_wraper(lon_ncep, lat_ncep, ncep_var[i, ...], lon_4km, lat_4km, method=interp_method) # land mask applied temp_interp[land_mask] = np.nan var_4km[i, ...] = temp_interp print('Feature engineering') if var in ['TMAX', 'TMIN', 'TMEAN']: print('\tK to C') var_4km = var_4km - 273.15 ncep_var = ncep_var - 273.15 print('Merging climatology fields') for i in range(days): mon_id = date_list[i].month - 1 clim_4km[i, ...] = CLIM[var][mon_id, ...]
with nc.Dataset(name, 'r') as nc_io: T2 = nc_io.variables[nc_keys[VAR]][...] # (time, lat, lon) T2_fold = T2.reshape((L_day, 4) + grid_shape) TMEAN_temp = f(T2_fold, axis=1) jra_var[count:count + L_day, ...] = TMEAN_temp count += L_day print('Interpolation') for i in range(days_ref): if i % 200 == 0: print('\tday index: {}'.format(i)) temp_interp = du.interp2d_wraper(lon_025, lat_025, jra_var[i, ...], lon_4km, lat_4km, method=interp_method) # land mask applied temp_interp[land_mask] = np.nan jra_interp[i, ...] = temp_interp print('Feature engineering') if VAR in ['TMAX', 'TMIN', 'TMEAN']: print('\tK to C') jra_var = jra_var - 273.15 jra_interp = jra_interp - 273.15 data_save = (lon_4km, lat_4km, jra_interp, TMEAN_4km, etopo_regrid, land_mask) label_save = [
axis=0) STD_BASE = temp_data[()]['FIT_STD'][...] STD_BASE_leap = temp_data[()]['FIT_STD_leap'][...] STD_TMIN = np.concatenate( (STD_BASE, STD_BASE_leap, STD_BASE, STD_BASE, STD_BASE[0, ...][None, ...]), axis=0) L = len(STD_TMAX) grid_shape = REGRID_TMAX.shape FIT_TMAX_interp = np.ones(grid_shape) * 999 STD_TMAX_interp = np.ones(grid_shape) * 999 FIT_TMIN_interp = np.ones(grid_shape) * 999 STD_TMIN_interp = np.ones(grid_shape) * 999 for i in range(L): FIT_TMAX_interp[i, ...] = du.interp2d_wraper(lon_c, lat_c, FIT_TMAX[i, ...], lon, lat) STD_TMAX_interp[i, ...] = du.interp2d_wraper(lon_c, lat_c, STD_TMAX[i, ...], lon, lat) FIT_TMIN_interp[i, ...] = du.interp2d_wraper(lon_c, lat_c, FIT_TMIN[i, ...], lon, lat) STD_TMIN_interp[i, ...] = du.interp2d_wraper(lon_c, lat_c, STD_TMIN[i, ...], lon, lat) PRISM_TMAX = (PRISM_TMAX - FIT_TMAX_interp) / (STD_TMAX_interp) REGRID_TMAX = (REGRID_TMAX - FIT_TMAX_interp) / (STD_TMAX_interp) PRISM_TMIN = (PRISM_TMIN - FIT_TMIN_interp) / (STD_TMIN_interp) REGRID_TMIN = (REGRID_TMIN - FIT_TMIN_interp) / (STD_TMIN_interp) labels = [ 'PRISM_TMAX', 'PRISM_TMIN', 'FIT_TMAX', 'FIT_TMIN', 'STD_TMAX', 'STD_TMIN',
x025 = nc_io['g4_lon_2'][...] - 360 # <--- fix to [-180, 180] y025 = nc_io['g4_lat_1'][...] y025 = np.flipud(y025) lon_025, lat_025 = np.meshgrid(x025, y025) print('Preparing ETOPO data') with nc.Dataset(BACKUP_dir + 'ETOPO1_Ice_g_gmt4.grd') as nc_obj: etopo_x = nc_obj.variables['x'][2000:] # subsetting north america etopo_y = nc_obj.variables['y'][6000:] etopo_z = nc_obj.variables['z'][6000:, 2000:] etopo_lon, etopo_lat = np.meshgrid(etopo_x, etopo_y) # interp. etopo_025 = du.interp2d_wraper(etopo_lon, etopo_lat, etopo_z, lon_025, lat_025, method=interp_method) etopo_regrid = du.interp2d_wraper(lon_025, lat_025, etopo_025, lon_4km, lat_4km, method=interp_method) # processing keywords VARS = ['TMEAN'] # nc variable keys nc_keys = {} nc_keys['TMEAN'] = 'TMP_GDS4_HTGL' # analysis # filenames
land_mask_clean.ravel(), (lon_JRA, lat_JRA), method='linear') land_mask_JRA = flag_mask > 0 land_mask_JRA[:, :18] = True land_mask_JRA[:, 89:] = True ERA_raw[:, land_mask_ERA] = np.nan JRA_raw[:, land_mask_JRA] = np.nan # ERA interp grid_shape = lon_ERA.shape ERA_clean_raw = np.empty((N_pred_era, ) + grid_shape) for i in range(N_pred_era): ERA_clean_raw[i, ...] = du.interp2d_wraper(lon_clean, lat_clean, ERA_clean[i, ...], lon_ERA, lat_ERA, method=interp_method) ERA_clean_raw[:, land_mask_ERA] = np.nan # JRA interp grid_shape = lon_JRA.shape JRA_clean_raw = np.empty((N_pred_jra, ) + grid_shape) for i in range(N_pred_jra): JRA_clean_raw[i, ...] = du.interp2d_wraper(lon_clean, lat_clean, JRA_clean[i, ...], lon_JRA, lat_JRA, method=interp_method) JRA_clean_raw[:, land_mask_JRA] = np.nan
land_shp = Reader(shpfilename) shape_4km = lon_4km.shape land_id = np.ones(shape_4km)*np.nan for i in range(shape_4km[0]): for j in range(shape_4km[1]): temp_point = shapely.geometry.Point(lon_4km[i, j], lat_4km[i, j]) for n, shp in enumerate(land_shp.records()): if shp.geometry.contains(temp_point): land_id[i, j] = n land_mask = ~np.isnan(land_id) # ETOPO interp print('Process ETOPO') with nc.Dataset(BACKUP_dir+'ETOPO1_Ice_g_gmt4.grd') as nc_obj: etopo_x = nc_obj.variables['x'][2000:] # subsetting north america etopo_y = nc_obj.variables['y'][2000:] etopo_z = nc_obj.variables['z'][2000:, 2000:] etopo_lon, etopo_lat = np.meshgrid(etopo_x, etopo_y) etopo_4km = du.interp2d_wraper(etopo_lon, etopo_lat, etopo_z, lon_4km, lat_4km, method=interp_method) etopo_025 = du.interp2d_wraper(etopo_lon, etopo_lat, etopo_z, lon_025, lat_025, method=interp_method) etopo_regrid = du.interp2d_wraper(lon_025, lat_025, etopo_025, lon_4km, lat_4km, method=interp_method) # save hdf tuple_save = (lon_4km, lat_4km, etopo_4km, etopo_regrid, lon_025, lat_025, etopo_025, land_mask) label_save = ['lon_4km', 'lat_4km', 'etopo_4km', 'etopo_regrid', 'lon_025', 'lat_025', 'etopo_025', 'land_mask'] du.save_hdf5(tuple_save, label_save, out_dir=PRISM_dir, filename='ETOPO_regrid.hdf')