def interpolate_merra_hybrid(base_dir, EPSG, REGION, tdec, X, Y, VERSION='v1', VARIABLE='FAC', SIGMA=1.5, FILL_VALUE=None, EXTRAPOLATE=False): #-- set the input netCDF4 file for the variable of interest if VARIABLE in ('FAC','cum_smb_anomaly','height'): hybrid_file='gsfc_fdm_{0}_{1}.nc'.format(VERSION,REGION.lower()) if VARIABLE in ('FAC') and (VERSION == 'v0'): hybrid_file='gsfc_{0}_{1}.nc'.format('FAC',REGION.lower()) elif VARIABLE in ('p_minus_e','melt') and (VERSION == 'v0'): hybrid_file='m2_hybrid_{0}_cumul_{1}.nc'.format(VARIABLE,REGION.lower()) #-- Open the MERRA-2 Hybrid NetCDF file for reading fileID = netCDF4.Dataset(os.path.join(base_dir,hybrid_file), 'r') #-- Get data from each netCDF variable and remove singleton dimensions fd = {} fd[VARIABLE] = np.squeeze(fileID.variables[VARIABLE][:].copy()) xg = fileID.variables['x'][:,:].copy() yg = fileID.variables['y'][:,:].copy() fd['time'] = fileID.variables['time'][:].copy() #-- invalid data value fv = np.float(fileID.variables[VARIABLE]._FillValue) #-- input shape of MERRA-2 Hybrid firn data nt,nx,ny = np.shape(fd[VARIABLE]) #-- close the NetCDF files fileID.close() #-- time is year decimal at time step 5 days time_step = 5.0/365.25 #-- indices of specified ice mask i,j = np.nonzero(fd[VARIABLE][0,:,:] != fv) #-- create mask object for interpolating data fd['mask'] = np.zeros((nx,ny)) fd['mask'][i,j] = 1.0 #-- extract x and y coordinate arrays from grids fd['x'],fd['y'] = (xg[:,0],yg[0,:]) #-- use a gaussian filter to smooth mask gs = {} gs['mask'] = scipy.ndimage.gaussian_filter(fd['mask'], SIGMA, mode='constant', cval=0) #-- indices of smoothed ice mask ii,jj = np.nonzero(np.ceil(gs['mask']) == 1.0) #-- use a gaussian filter to smooth each firn field gs[VARIABLE] = np.ma.zeros((nt,nx,ny), fill_value=fv) gs[VARIABLE].mask = np.zeros((nt,nx,ny), dtype=np.bool) for t in range(nt): #-- replace fill values before smoothing data temp1 = np.zeros((nx,ny)) #-- reference to first firn field temp1[i,j] = fd[VARIABLE][t,i,j] - fd[VARIABLE][0,i,j] #-- smooth firn field temp2 = scipy.ndimage.gaussian_filter(temp1, SIGMA, mode='constant', cval=0) #-- scale output smoothed firn field gs[VARIABLE].data[t,ii,jj] = temp2[ii,jj]/gs['mask'][ii,jj] #-- replace valid firn values with original gs[VARIABLE].data[t,i,j] = temp1[i,j] #-- set mask variables for time gs[VARIABLE].mask[t,:,:] = (gs['mask'] == 0.0) #-- convert projection from input coordinates (EPSG) to model coordinates #-- MERRA-2 Hybrid models are rotated pole latitude and longitude MODEL_EPSG = set_projection(REGION) proj1 = pyproj.Proj("+init={0}".format(EPSG)) proj2 = pyproj.Proj("+init={0}".format(MODEL_EPSG)) ix,iy = pyproj.transform(proj1, proj2, X, Y) #-- check that input points are within convex hull of smoothed model points points = np.concatenate((xg[ii,jj,None],yg[ii,jj,None]),axis=1) triangle = scipy.spatial.Delaunay(points.data, qhull_options='Qt Qbb Qc Qz') interp_points = np.concatenate((ix[:,None],iy[:,None]),axis=1) valid = (triangle.find_simplex(interp_points) >= 0) #-- output interpolated arrays of variable npts = len(tdec) interp_data = np.ma.zeros((npts),fill_value=fv) #-- interpolation mask of invalid values interp_data.mask = np.ones((npts),dtype=np.bool) #-- type designating algorithm used (1: interpolate, 2: backward, 3:forward) interp_data.interpolation = np.zeros_like(tdec,dtype=np.uint8) #-- find days that can be interpolated if np.any((tdec >= fd['time'].min()) & (tdec <= fd['time'].max()) & valid): #-- indices of dates for interpolated days ind, = np.nonzero((tdec >= fd['time'].min()) & (tdec <= fd['time'].max()) & valid) #-- create an interpolator for firn height or air content RGI = scipy.interpolate.RegularGridInterpolator( (fd['time'],fd['x'],fd['y']), gs[VARIABLE].data) #-- create an interpolator for input mask MI = scipy.interpolate.RegularGridInterpolator( (fd['time'],fd['x'],fd['y']), gs[VARIABLE].mask) #-- interpolate to points interp_data.data[ind] = RGI.__call__(np.c_[tdec[ind],ix[ind],iy[ind]]) interp_data.mask[ind] = MI.__call__(np.c_[tdec[ind],ix[ind],iy[ind]]) #-- set interpolation type (1: interpolated) interp_data.interpolation[ind] = 1 #-- check if needing to extrapolate backwards in time count = np.count_nonzero((tdec < fd['time'].min()) & valid) if (count > 0) and EXTRAPOLATE: #-- indices of dates before firn model ind, = np.nonzero((tdec < fd['time'].min()) & valid) #-- calculate a regression model for calculating values #-- read first 10 years of data to create regression model N = np.int(10.0/time_step) #-- spatially interpolate variable to coordinates T = np.zeros((N)) DATA = np.zeros((count,N)) MASK = np.zeros((count,N)) #-- create interpolated time series for calculating regression model for k in range(N): #-- time at k T[k] = fd['time'][k] #-- spatially interpolate variable and mask f1 = scipy.interpolate.RectBivariateSpline(fd['x'], fd['y'], gs[VARIABLE].data[k,:,:], kx=1, ky=1) f2 = scipy.interpolate.RectBivariateSpline(fd['x'], fd['y'], gs[VARIABLE].mask[k,:,:], kx=1, ky=1) #-- create numpy masked array of interpolated values DATA[:,k] = f1.ev(ix[ind],iy[ind]) MASK[:,k] = f2.ev(ix[ind],iy[ind]) #-- calculate regression model for n,v in enumerate(ind): interp_data.data[v] = regress_model(T, DATA[n,:], tdec[v], ORDER=2, CYCLES=[0.25,0.5,1.0,2.0,4.0,5.0], RELATIVE=T[0]) #-- mask any invalid points interp_data.mask[ind] = np.any(MASK, axis=1) #-- set interpolation type (2: extrapolated backward) interp_data.interpolation[ind] = 2 #-- check if needing to extrapolate forward in time count = np.count_nonzero((tdec > fd['time'].max()) & valid) if (count > 0) and EXTRAPOLATE: #-- indices of dates after firn model ind, = np.nonzero((tdec > fd['time'].max()) & valid) #-- calculate a regression model for calculating values #-- read last 10 years of data to create regression model N = np.int(10.0/time_step) #-- spatially interpolate variable to coordinates T = np.zeros((N)) DATA = np.zeros((count,N)) MASK = np.zeros((count,N)) #-- create interpolated time series for calculating regression model for k in range(N): kk = nt - N + k #-- time at kk T[k] = fd['time'][kk] #-- spatially interpolate firn elevation or air content fspl = scipy.interpolate.RectBivariateSpline(fd['x'], fd['y'], gs[VARIABLE][kk,:,:], kx=1, ky=1) #-- spatially interpolate variable and mask f1 = scipy.interpolate.RectBivariateSpline(fd['x'], fd['y'], gs[VARIABLE].data[kk,:,:], kx=1, ky=1) f2 = scipy.interpolate.RectBivariateSpline(fd['x'], fd['y'], gs[VARIABLE].mask[kk,:,:], kx=1, ky=1) #-- create numpy masked array of interpolated values DATA[:,k] = f1.ev(ix[ind],iy[ind]) MASK[:,k] = f2.ev(ix[ind],iy[ind]) #-- calculate regression model for n,v in enumerate(ind): interp_data.data[v] = regress_model(T, FIRN[n,:], tdec[v], ORDER=2, CYCLES=[0.25,0.5,1.0,2.0,4.0,5.0], RELATIVE=T[-1]) #-- mask any invalid points interp_data.mask[ind] = np.any(MASK, axis=1) #-- set interpolation type (3: extrapolated forward) interp_data.interpolation[ind] = 3 #-- complete mask if any invalid in data invalid, = np.nonzero(interp_data.data == interp_data.fill_value) interp_data.mask[invalid] = True #-- replace fill value if specified if FILL_VALUE: interp_data.fill_value = FILL_VALUE interp_data.data[interp_data.mask] = interp_data.fill_value #-- return the interpolated values return interp_data
def interpolate_racmo_firn(base_dir, EPSG, MODEL, tdec, X, Y, VARIABLE='zs', SIGMA=1.5, FILL_VALUE=None, REFERENCE=False): #-- set parameters based on input model FIRN_FILE = {} if (MODEL == 'FGRN11'): #-- filename and directory for input FGRN11 file FIRN_FILE['zs'] = 'FDM_zs_FGRN11_1960-2016.nc' FIRN_FILE['FirnAir'] = 'FDM_FirnAir_FGRN11_1960-2016.nc' FIRN_DIRECTORY = ['RACMO', 'FGRN11_1960-2016'] #-- time is year decimal from 1960-01-01 at time_step 10 days time_step = 10.0 / 365.25 #-- rotation parameters rot_lat = -18.0 rot_lon = -37.5 elif (MODEL == 'FGRN055'): #-- filename and directory for input FGRN055 file FIRN_FILE['zs'] = 'FDM_zs_FGRN055_1960-2017_interpol.nc' FIRN_FILE['FirnAir'] = 'FDM_FirnAir_FGRN055_1960-2017_interpol.nc' FIRN_DIRECTORY = ['RACMO', 'FGRN055_1960-2017'] #-- time is year decimal from 1960-01-01 at time_step 10 days time_step = 10.0 / 365.25 #-- rotation parameters rot_lat = -18.0 rot_lon = -37.5 elif (MODEL == 'XANT27'): #-- filename and directory for input XANT27 file FIRN_FILE['zs'] = 'FDM_zs_ANT27_1979-2016.nc' FIRN_FILE['FirnAir'] = 'FDM_FirnAir_ANT27_1979-2016.nc' FIRN_DIRECTORY = ['RACMO', 'XANT27_1979-2016'] #-- time is year decimal from 1979-01-01 at time_step 10 days time_step = 10.0 / 365.25 #-- rotation parameters rot_lat = -180.0 rot_lon = 10.0 elif (MODEL == 'ASE055'): #-- filename and directory for input ASE055 file FIRN_FILE['zs'] = 'FDM_zs_ASE055_1979-2015.nc' FIRN_FILE['FirnAir'] = 'FDM_FirnAir_ASE055_1979-2015.nc' FIRN_DIRECTORY = ['RACMO', 'ASE055_1979-2015'] #-- time is year decimal from 1979-01-01 at time_step 10 days time_step = 10.0 / 365.25 #-- rotation parameters rot_lat = 167.0 rot_lon = 53.0 elif (MODEL == 'XPEN055'): #-- filename and directory for input XPEN055 file FIRN_FILE['zs'] = 'FDM_zs_XPEN055_1979-2016.nc' FIRN_FILE['FirnAir'] = 'FDM_FirnAir_XPEN055_1979-2016.nc' FIRN_DIRECTORY = ['RACMO', 'XPEN055_1979-2016'] #-- time is year decimal from 1979-01-01 at time_step 10 days time_step = 10.0 / 365.25 #-- rotation parameters rot_lat = -180.0 rot_lon = 30.0 #-- Open the RACMO NetCDF file for reading ddir = os.path.join(base_dir, *FIRN_DIRECTORY) fileID = netCDF4.Dataset(os.path.join(ddir, FIRN_FILE[VARIABLE]), 'r') fd = {} #-- invalid data value fv = np.float(fileID.variables[VARIABLE]._FillValue) #-- Get data from each netCDF variable and remove singleton dimensions fd[VARIABLE] = np.squeeze(fileID.variables[VARIABLE][:].copy()) #-- verify mask object for interpolating data fd[VARIABLE].mask |= (fd[VARIABLE].data[c:c + t, :, :] == fv) fd['lon'] = fileID.variables['lon'][:, :].copy() fd['lat'] = fileID.variables['lat'][:, :].copy() fd['time'] = fileID.variables['time'][:].copy() #-- input shape of RACMO firn data nt, ny, nx = np.shape(fd[VARIABLE]) #-- close the NetCDF files fileID.close() #-- indices of specified ice mask i, j = np.nonzero(fd[VARIABLE][0, :, :] != fv) #-- create mask object for interpolating data fd['mask'] = np.zeros((ny, nx)) fd['mask'][i, j] = 1.0 #-- use a gaussian filter to smooth mask gs = {} gs['mask'] = scipy.ndimage.gaussian_filter(fd['mask'], SIGMA, mode='constant', cval=0) #-- indices of smoothed ice mask ii, jj = np.nonzero(np.ceil(gs['mask']) == 1.0) #-- use a gaussian filter to smooth each firn field gs[VARIABLE] = np.ma.zeros((nt, ny, nx), fill_value=fv) gs[VARIABLE].mask = np.ma.zeros((nt, ny, nx), dtype=np.bool) for t in range(nt): #-- replace fill values before smoothing data temp1 = np.zeros((ny, nx)) #-- reference to first firn field if REFERENCE: temp1[i, j] = fd[VARIABLE][t, i, j] - fd[VARIABLE][0, i, j] else: temp1[i, j] = fd[VARIABLE][t, i, j].copy() #-- smooth firn field temp2 = scipy.ndimage.gaussian_filter(temp1, SIGMA, mode='constant', cval=0) #-- scale output smoothed firn field gs[VARIABLE][t, ii, jj] = temp2[ii, jj] / gs['mask'][ii, jj] #-- replace valid firn values with original gs[VARIABLE][t, i, j] = temp1[i, j] #-- set mask variables for time gs[VARIABLE].mask[t, :, :] = (gs['mask'] == 0.0) #-- rotated pole longitude and latitude of input model (model coordinates) xg, yg = rotate_coordinates(fd['lon'], fd['lat'], rot_lon, rot_lat) #-- recreate arrays to fix small floating point errors #-- (ensure that arrays are monotonically increasing) fd['x'] = np.linspace(np.mean(xg[:, 0]), np.mean(xg[:, -1]), nx) fd['y'] = np.linspace(np.mean(yg[0, :]), np.mean(yg[-1, :]), ny) #-- convert projection from input coordinates (EPSG) to model coordinates #-- RACMO models are rotated pole latitude and longitude proj1 = pyproj.Proj("+init={0}".format(EPSG)) proj2 = pyproj.Proj("+init=EPSG:{0:d}".format(4326)) ilon, ilat = pyproj.transform(proj1, proj2, X, Y) #-- calculate rotated pole coordinates of input coordinates ix, iy = rotate_coordinates(ilon, ilat, rot_lon, rot_lat) #-- check that input points are within convex hull of smoothed model points points = np.concatenate((xg[ii, jj, None], yg[ii, jj, None]), axis=1) triangle = scipy.spatial.Delaunay(points.data, qhull_options='Qt Qbb Qc Qz') interp_points = np.concatenate((ix[:, None], iy[:, None]), axis=1) valid = (triangle.find_simplex(interp_points) >= 0) #-- output interpolated arrays of firn variable (height or firn air content) npts = len(tdec) interp_data = np.ma.zeros((npts), fill_value=fv, dtype=np.float) interp_data.mask = np.ones((npts), dtype=np.bool) #-- type designating algorithm used (1:interpolate, 2:backward, 3:forward) interp_data.interpolation = np.zeros((npts), dtype=np.uint8) #-- find days that can be interpolated if np.any((tdec >= fd['time'].min()) & (tdec <= fd['time'].max()) & valid): #-- indices of dates for interpolated days ind, = np.nonzero((tdec >= fd['time'].min()) & (tdec <= fd['time'].max()) & valid) #-- create an interpolator for model variable RGI = scipy.interpolate.RegularGridInterpolator( (fd['time'], fd['y'], fd['x']), gs[VARIABLE].data) #-- create an interpolator for input mask MI = scipy.interpolate.RegularGridInterpolator( (fd['time'], fd['y'], fd['x']), gs[VARIABLE].mask) #-- interpolate to points interp_data.data[ind] = RGI.__call__(np.c_[tdec[ind], iy[ind], ix[ind]]) interp_data.mask[ind] = MI.__call__(np.c_[tdec[ind], iy[ind], ix[ind]]) #-- set interpolation type (1: interpolated) interp_data.interpolation[ind] = 1 #-- check if needing to extrapolate backwards in time count = np.count_nonzero((tdec < fd['time'].min()) & valid) if (count > 0): #-- indices of dates before firn model ind, = np.nonzero((tdec < fd['time'].min()) & valid) #-- calculate a regression model for calculating values #-- read first 10 years of data to create regression model N = 365 #-- spatially interpolate firn elevation or air content to coordinates FIRN = np.zeros((count, N)) MASK = np.zeros((count, N), dtype=np.bool) T = np.zeros((N)) #-- create interpolated time series for calculating regression model for k in range(N): #-- time at k T[k] = fd['time'][k] #-- spatially interpolate firn elevation or air content S1 = scipy.interpolate.RectBivariateSpline( fd['x'], fd['y'], gs[VARIABLE].data[k, :, :].T, kx=1, ky=1) S2 = scipy.interpolate.RectBivariateSpline( fd['x'], fd['y'], gs[VARIABLE].mask[k, :, :].T, kx=1, ky=1) #-- create numpy masked array of interpolated values FIRN[:, k] = S1.ev(ix[ind], iy[ind]) MASK[:, k] = S2.ev(ix[ind], iy[ind]) #-- calculate regression model for n, v in enumerate(ind): interp_data.data[v] = regress_model( T, FIRN[n, :], tdec[v], ORDER=2, CYCLES=[0.25, 0.5, 1.0, 2.0, 4.0, 5.0], RELATIVE=T[0]) #-- mask any invalid points interp_data.mask[ind] = np.any(MASK, axis=1) #-- set interpolation type (2: extrapolated backward) interp_data.interpolation[ind] = 2 #-- check if needing to extrapolate forward in time count = np.count_nonzero((tdec > fd['time'].max()) & valid) if (count > 0): #-- indices of dates after firn model ind, = np.nonzero((tdec > fd['time'].max()) & valid) #-- calculate a regression model for calculating values #-- read last 10 years of data to create regression model N = 365 #-- spatially interpolate firn elevation or air content to coordinates FIRN = np.zeros((count, N)) MASK = np.zeros((count, N), dtype=np.bool) T = np.zeros((N)) #-- spatially interpolate mask to coordinates mspl = scipy.interpolate.RectBivariateSpline(fd['x'], fd['y'], fd['mask'].T, kx=1, ky=1) interp_mask[ind] = mspl.ev(ix[ind], iy[ind]) #-- create interpolated time series for calculating regression model for k in range(N): kk = nt - N + k #-- time at k T[k] = fd['time'][kk] #-- spatially interpolate firn elevation or air content S1 = scipy.interpolate.RectBivariateSpline( fd['x'], fd['y'], gs[VARIABLE].data[kk, :, :].T, kx=1, ky=1) S2 = scipy.interpolate.RectBivariateSpline( fd['x'], fd['y'], gs[VARIABLE].mask[kk, :, :].T, kx=1, ky=1) #-- create numpy masked array of interpolated values FIRN[:, k] = S1.ev(ix[ind], iy[ind]) MASK[:, k] = S2.ev(ix[ind], iy[ind]) #-- calculate regression model for n, v in enumerate(ind): interp_data.data[v] = regress_model( T, FIRN[n, :], tdec[v], ORDER=2, CYCLES=[0.25, 0.5, 1.0, 2.0, 4.0, 5.0], RELATIVE=T[-1]) #-- mask any invalid points interp_data.mask[ind] = np.any(MASK, axis=1) #-- set interpolation type (3: extrapolated forward) interp_data.interpolation[ind] = 3 #-- complete mask if any invalid in data invalid, = np.nonzero(interp_data.data == interp_data.fill_value) interp_data.mask[invalid] = True #-- replace fill value if specified if FILL_VALUE: interp_data.fill_value = FILL_VALUE interp_data.data[interp_data.mask] = interp_data.fill_value #-- return the interpolated values return interp_data
def extrapolate_racmo_downscaled(base_dir, EPSG, VERSION, tdec, X, Y, VARIABLE='SMB', SEARCH='BallTree', NN=10, POWER=2.0, FILL_VALUE=None): #-- Full Directory Setup DIRECTORY = 'SMB1km_v{0}'.format(VERSION) #-- netcdf variable names input_products = {} input_products['SMB'] = 'SMB_rec' input_products['PRECIP'] = 'precip' input_products['RUNOFF'] = 'runoff' input_products['SNOWMELT'] = 'snowmelt' input_products['REFREEZE'] = 'refreeze' #-- version 1 was in separate files for each year if (VERSION == '1.0'): RACMO_MODEL = ['XGRN11', '2.3'] VARNAME = input_products[VARIABLE] SUBDIRECTORY = '{0}_v{1}'.format(VARNAME, VERSION) input_dir = os.path.join(base_dir, 'RACMO', DIRECTORY, SUBDIRECTORY) elif (VERSION == '2.0'): RACMO_MODEL = ['XGRN11', '2.3p2'] var = input_products[VARIABLE] VARNAME = var if VARIABLE in ('SMB', 'PRECIP') else '{0}corr'.format(var) input_dir = os.path.join(base_dir, 'RACMO', DIRECTORY) elif (VERSION == '3.0'): RACMO_MODEL = ['FGRN055', '2.3p2'] var = input_products[VARIABLE] VARNAME = var if (VARIABLE == 'SMB') else '{0}corr'.format(var) input_dir = os.path.join(base_dir, 'RACMO', DIRECTORY) #-- input cumulative netCDF4 file args = (RACMO_MODEL[0], RACMO_MODEL[1], VERSION, VARIABLE) input_file = '{0}_RACMO{1}_DS1km_v{2}_{3}_cumul.nc'.format(*args) #-- Open the RACMO NetCDF file for reading fileID = netCDF4.Dataset(os.path.join(input_dir, input_file), 'r') #-- input shape of RACMO data nt, ny, nx = fileID[VARNAME].shape #-- Get data from each netCDF variable d = {} #-- cell origins on the bottom right dx = np.abs(fileID.variables['x'][1] - fileID.variables['x'][0]) dy = np.abs(fileID.variables['y'][1] - fileID.variables['y'][0]) #-- latitude and longitude arrays at center of each cell d['LON'] = fileID.variables['LON'][:, :].copy() d['LAT'] = fileID.variables['LAT'][:, :].copy() #-- extract time (decimal years) d['TIME'] = fileID.variables['TIME'][:].copy() #-- mask object for interpolating data d['MASK'] = np.array(fileID.variables['MASK'][:], dtype=bool) i, j = np.nonzero(d['MASK']) #-- convert RACMO latitude and longitude to input coordinates (EPSG) crs1 = pyproj.CRS.from_string(EPSG) crs2 = pyproj.CRS.from_string("epsg:{0:d}".format(4326)) transformer = pyproj.Transformer.from_crs(crs1, crs2, always_xy=True) direction = pyproj.enums.TransformDirection.INVERSE #-- convert projection from model coordinates xg, yg = transformer.transform(d['LON'], d['LAT'], direction=direction) #-- construct search tree from original points #-- can use either BallTree or KDTree algorithms xy1 = np.concatenate((xg[i, j, None], yg[i, j, None]), axis=1) tree = BallTree(xy1) if (SEARCH == 'BallTree') else KDTree(xy1) #-- output extrapolated arrays of variable npts = len(tdec) extrap_data = np.ma.zeros((npts), dtype=np.float) extrap_data.data[:] = extrap_data.fill_value extrap_data.mask = np.zeros((npts), dtype=bool) #-- type designating algorithm used (1:interpolate, 2:backward, 3:forward) extrap_data.interpolation = np.zeros((npts), dtype=np.uint8) #-- find days that can be extrapolated if np.any((tdec >= d['TIME'].min()) & (tdec <= d['TIME'].max())): #-- indices of dates for interpolated days ind, = np.nonzero((tdec >= d['TIME'].min()) & (tdec < d['TIME'].max())) #-- reduce x, y and t coordinates xind, yind, tind = (X[ind], Y[ind], tdec[ind]) #-- determine which subset of time to read from the netCDF4 file f = scipy.interpolate.interp1d(d['TIME'], np.arange(nt), kind='linear', fill_value=(0, nt - 1), bounds_error=False) date_indice = f(tind).astype(np.int) #-- for each unique RACMO date #-- linearly interpolate in time between two RACMO maps #-- then then inverse distance weighting to extrapolate in space for k in np.unique(date_indice): kk, = np.nonzero(date_indice == k) count = np.count_nonzero(date_indice == k) #-- query the search tree to find the NN closest points xy2 = np.concatenate((xind[kk, None], yind[kk, None]), axis=1) dist, indices = tree.query(xy2, k=NN, return_distance=True) #-- normalized weights if POWER > 0 (typically between 1 and 3) #-- in the inverse distance weighting power_inverse_distance = dist**(-POWER) s = np.sum(power_inverse_distance, axis=1) w = power_inverse_distance / np.broadcast_to( s[:, None], (count, NN)) #-- RACMO variables for times before and after tdec var1 = fileID.variables[VARNAME][k, i, j].copy() var2 = fileID.variables[VARNAME][k + 1, i, j].copy() #-- linearly interpolate to date dt = (tind[kk] - d['TIME'][k]) / (d['TIME'][k + 1] - d['TIME'][k]) #-- spatially extrapolate using inverse distance weighting extrap_data[kk] = (1.0-dt)*np.sum(w*var1[indices],axis=1) + \ dt*np.sum(w*var2[indices], axis=1) extrap_data #-- set interpolation type (1: interpolated in time) extrap_data.interpolation[ind] = 1 #-- check if needing to extrapolate backwards in time count = np.count_nonzero((tdec < d['TIME'].min())) if (count > 0): #-- indices of dates before RACMO ind, = np.nonzero(tdec < d['TIME'].min()) #-- query the search tree to find the NN closest points xy2 = np.concatenate((X[ind, None], Y[ind, None]), axis=1) dist, indices = tree.query(xy2, k=NN, return_distance=True) #-- normalized weights if POWER > 0 (typically between 1 and 3) #-- in the inverse distance weighting power_inverse_distance = dist**(-POWER) s = np.sum(power_inverse_distance, axis=1) w = power_inverse_distance / np.broadcast_to(s[:, None], (count, NN)) #-- calculate a regression model for calculating values #-- read first 10 years of data to create regression model N = 120 #-- spatially interpolate variables to coordinates VAR = np.zeros((count, N)) T = np.zeros((N)) #-- create interpolated time series for calculating regression model for k in range(N): #-- time at k T[k] = d['TIME'][k] #-- spatially extrapolate variables var1 = fileID.variables[VARNAME][k, i, j].copy() VAR[:, k] = np.sum(w * var1[indices], axis=1) #-- calculate regression model for n, v in enumerate(ind): extrap_data[v] = regress_model( T, VAR[n, :], tdec[v], ORDER=2, CYCLES=[0.25, 0.5, 1.0, 2.0, 4.0, 5.0], RELATIVE=T[0]) #-- set interpolation type (2: extrapolated backwards in time) extrap_data.interpolation[ind] = 2 #-- check if needing to extrapolate forward in time count = np.count_nonzero((tdec > d['TIME'].max())) if (count > 0): #-- indices of dates after RACMO ind, = np.nonzero(tdec >= d['TIME'].max()) #-- query the search tree to find the NN closest points xy2 = np.concatenate((X[ind, None], Y[ind, None]), axis=1) dist, indices = tree.query(xy2, k=NN, return_distance=True) #-- normalized weights if POWER > 0 (typically between 1 and 3) #-- in the inverse distance weighting power_inverse_distance = dist**(-POWER) s = np.sum(power_inverse_distance, axis=1) w = power_inverse_distance / np.broadcast_to(s[:, None], (count, NN)) #-- calculate a regression model for calculating values #-- read last 10 years of data to create regression model N = 120 #-- spatially interpolate variables to coordinates FIRN = np.zeros((count, N)) T = np.zeros((N)) #-- create interpolated time series for calculating regression model for k in range(N): kk = nt - N + k #-- time at k T[k] = d['TIME'][kk] #-- spatially extrapolate variables var1 = fileID.variables[VARNAME][kk, i, j].copy() VAR[:, k] = np.sum(w * var1[indices], axis=1) #-- calculate regression model for n, v in enumerate(ind): extrap_data[v] = regress_model( T, VAR[n, :], tdec[v], ORDER=2, CYCLES=[0.25, 0.5, 1.0, 2.0, 4.0, 5.0], RELATIVE=T[-1]) #-- set interpolation type (3: extrapolated forward in time) extrap_data.interpolation[ind] = 3 #-- complete mask if any invalid in data invalid, = np.nonzero(extrap_data.data == extrap_data.fill_value) extrap_data.mask[invalid] = True #-- replace fill value if specified if FILL_VALUE: extrap_data.data[extrap_data.mask] = FILL_VALUE extrap_data.fill_value = FILL_VALUE #-- close the NetCDF files fileID.close() #-- return the extrapolated values return extrap_data
def interpolate_merra_hybrid(base_dir, EPSG, REGION, tdec, X, Y, VERSION='v1', VARIABLE='FAC', SIGMA=1.5, FILL_VALUE=None, EXTRAPOLATE=False, GZIP=False): #-- suffix if compressed suffix = '.gz' if GZIP else '' #-- set the input netCDF4 file for the variable of interest if VARIABLE in ('FAC', 'cum_smb_anomaly', 'SMB_a', 'height', 'h_a'): args = (VERSION, REGION.lower(), suffix) hybrid_file = 'gsfc_fdm_{0}_{1}.nc{2}'.format(*args) elif VARIABLE in ('smb', 'SMB', 'Me', 'Ra', 'Ru', 'Sn-Ev'): args = (VERSION, REGION.lower(), suffix) hybrid_file = 'gsfc_fdm_smb_{0}_{1}.nc{2}'.format(*args) elif VARIABLE in ('Me_a', 'Ra_a', 'Ru_a', 'Sn-Ev_a'): args = (VERSION, REGION.lower(), suffix) hybrid_file = 'gsfc_fdm_smb_cumul_{0}_{1}.nc{2}'.format(*args) elif VARIABLE in ('FAC') and (VERSION == 'v0'): args = ('FAC', REGION.lower(), suffix) hybrid_file = 'gsfc_{0}_{1}.nc{2}'.format(*args) elif VARIABLE in ('p_minus_e', 'melt') and (VERSION == 'v0'): args = (VARIABLE, REGION.lower(), suffix) hybrid_file = 'm2_hybrid_{0}_cumul_{1}.nc{2}'.format(*args) #-- Open the MERRA-2 Hybrid NetCDF file for reading if GZIP: #-- read as in-memory (diskless) netCDF4 dataset with gzip.open(os.path.join(base_dir, hybrid_file), 'r') as f: fileID = netCDF4.Dataset(uuid.uuid4().hex, memory=f.read()) else: #-- read netCDF4 dataset fileID = netCDF4.Dataset(os.path.join(base_dir, hybrid_file), 'r') #-- Get data from each netCDF variable and remove singleton dimensions fd = {} #-- time is year decimal at time step 5 days time_step = 5.0 / 365.25 #-- if extrapolating data: read the full dataset #-- if simply interpolating with fill values: reduce to a subset if EXTRAPOLATE: #-- read time variables fd['time'] = fileID.variables['time'][:].copy() #-- read full dataset and remove singleton dimensions fd[VARIABLE] = np.squeeze(fileID.variables[VARIABLE][:].copy()) else: #-- reduce grids to time period of input buffered by time steps tmin = np.min(tdec) - 2.0 * time_step tmax = np.max(tdec) + 2.0 * time_step #-- find indices to times nt, = fileID.variables['time'].shape f = scipy.interpolate.interp1d(fileID.variables['time'][:], np.arange(nt), kind='nearest', bounds_error=False, fill_value=(0, nt)) imin, imax = f((tmin, tmax)).astype(np.int) #-- read reduced time variables fd['time'] = fileID.variables['time'][imin:imax + 1].copy() #-- read reduced dataset and remove singleton dimensions fd[VARIABLE] = np.squeeze(fileID.variables[VARIABLE][imin:imax + 1, :, :]) #-- invalid data value fv = np.float(fileID.variables[VARIABLE]._FillValue) #-- input shape of MERRA-2 Hybrid firn data nt, nx, ny = np.shape(fd[VARIABLE]) #-- extract x and y coordinate arrays from grids if applicable #-- else create meshgrids of coordinate arrays if (np.ndim(fileID.variables['x'][:]) == 2): xg = fileID.variables['x'][:].copy() yg = fileID.variables['y'][:].copy() fd['x'], fd['y'] = (xg[:, 0], yg[0, :]) else: fd['x'] = fileID.variables['x'][:].copy() fd['y'] = fileID.variables['y'][:].copy() xg, yg = np.meshgrid(fd['x'], fd['y'], indexing='ij') #-- close the NetCDF files fileID.close() #-- indices of specified ice mask i, j = np.nonzero(fd[VARIABLE][0, :, :] != fv) #-- create mask object for interpolating data fd['mask'] = np.zeros((nx, ny)) fd['mask'][i, j] = 1.0 #-- use a gaussian filter to smooth mask gs = {} gs['mask'] = scipy.ndimage.gaussian_filter(fd['mask'], SIGMA, mode='constant', cval=0) #-- indices of smoothed ice mask ii, jj = np.nonzero(np.ceil(gs['mask']) == 1.0) #-- use a gaussian filter to smooth each firn field gs[VARIABLE] = np.ma.zeros((nt, nx, ny), fill_value=fv) gs[VARIABLE].mask = np.zeros((nt, nx, ny), dtype=bool) for t in range(nt): #-- replace fill values before smoothing data temp1 = np.zeros((nx, ny)) #-- reference to first firn field temp1[i, j] = fd[VARIABLE][t, i, j] - fd[VARIABLE][0, i, j] #-- smooth firn field temp2 = scipy.ndimage.gaussian_filter(temp1, SIGMA, mode='constant', cval=0) #-- scale output smoothed firn field gs[VARIABLE].data[t, ii, jj] = temp2[ii, jj] / gs['mask'][ii, jj] #-- replace valid firn values with original gs[VARIABLE].data[t, i, j] = temp1[i, j] #-- set mask variables for time gs[VARIABLE].mask[t, :, :] = (gs['mask'] == 0.0) #-- convert projection from input coordinates (EPSG) to model coordinates MODEL_EPSG = set_projection(REGION) crs1 = pyproj.CRS.from_string(EPSG) crs2 = pyproj.CRS.from_string(MODEL_EPSG) transformer = pyproj.Transformer.from_crs(crs1, crs2, always_xy=True) #-- calculate projected coordinates of input coordinates ix, iy = transformer.transform(X, Y) #-- check that input points are within convex hull of smoothed model points v, triangle = find_valid_triangulation(xg[ii, jj], yg[ii, jj]) #-- check if there is a valid triangulation if v: #-- check where points are within the complex hull of the triangulation interp_points = np.concatenate((ix[:, None], iy[:, None]), axis=1) valid = (triangle.find_simplex(interp_points) >= 0) else: #-- Check ix and iy against the bounds of x and y valid = (ix >= fd['x'].min()) & (ix <= fd['x'].max()) & \ (iy >= fd['y'].min()) & (iy <= fd['y'].max()) #-- output interpolated arrays of variable npts = len(tdec) interp_data = np.ma.zeros((npts), fill_value=fv) #-- interpolation mask of invalid values interp_data.mask = np.ones((npts), dtype=bool) #-- type designating algorithm used (1: interpolate, 2: backward, 3:forward) interp_data.interpolation = np.zeros_like(tdec, dtype=np.uint8) #-- find days that can be interpolated if np.any((tdec >= fd['time'].min()) & (tdec <= fd['time'].max()) & valid): #-- indices of dates for interpolated days ind, = np.nonzero((tdec >= fd['time'].min()) & (tdec <= fd['time'].max()) & valid) #-- create an interpolator for firn height or air content RGI = scipy.interpolate.RegularGridInterpolator( (fd['time'], fd['x'], fd['y']), gs[VARIABLE].data) #-- create an interpolator for input mask MI = scipy.interpolate.RegularGridInterpolator( (fd['time'], fd['x'], fd['y']), gs[VARIABLE].mask) #-- interpolate to points interp_data.data[ind] = RGI.__call__(np.c_[tdec[ind], ix[ind], iy[ind]]) interp_data.mask[ind] = MI.__call__(np.c_[tdec[ind], ix[ind], iy[ind]]) #-- set interpolation type (1: interpolated) interp_data.interpolation[ind] = 1 #-- check if needing to extrapolate backwards in time count = np.count_nonzero((tdec < fd['time'].min()) & valid) if (count > 0) and EXTRAPOLATE: #-- indices of dates before firn model ind, = np.nonzero((tdec < fd['time'].min()) & valid) #-- calculate a regression model for calculating values #-- read first 10 years of data to create regression model N = np.int(10.0 / time_step) #-- spatially interpolate variable to coordinates T = np.zeros((N)) DATA = np.zeros((count, N)) MASK = np.zeros((count, N)) #-- create interpolated time series for calculating regression model for k in range(N): #-- time at k T[k] = fd['time'][k] #-- spatially interpolate variable and mask f1 = scipy.interpolate.RectBivariateSpline( fd['x'], fd['y'], gs[VARIABLE].data[k, :, :], kx=1, ky=1) f2 = scipy.interpolate.RectBivariateSpline( fd['x'], fd['y'], gs[VARIABLE].mask[k, :, :], kx=1, ky=1) #-- create numpy masked array of interpolated values DATA[:, k] = f1.ev(ix[ind], iy[ind]) MASK[:, k] = f2.ev(ix[ind], iy[ind]) #-- calculate regression model for n, v in enumerate(ind): interp_data.data[v] = regress_model( T, DATA[n, :], tdec[v], ORDER=2, CYCLES=[0.25, 0.5, 1.0, 2.0, 4.0, 5.0], RELATIVE=T[0]) #-- mask any invalid points interp_data.mask[ind] = np.any(MASK, axis=1) #-- set interpolation type (2: extrapolated backward) interp_data.interpolation[ind] = 2 #-- check if needing to extrapolate forward in time count = np.count_nonzero((tdec > fd['time'].max()) & valid) if (count > 0) and EXTRAPOLATE: #-- indices of dates after firn model ind, = np.nonzero((tdec > fd['time'].max()) & valid) #-- calculate a regression model for calculating values #-- read last 10 years of data to create regression model N = np.int(10.0 / time_step) #-- spatially interpolate variable to coordinates T = np.zeros((N)) DATA = np.zeros((count, N)) MASK = np.zeros((count, N)) #-- create interpolated time series for calculating regression model for k in range(N): kk = nt - N + k #-- time at kk T[k] = fd['time'][kk] #-- spatially interpolate firn elevation or air content fspl = scipy.interpolate.RectBivariateSpline( fd['x'], fd['y'], gs[VARIABLE][kk, :, :], kx=1, ky=1) #-- spatially interpolate variable and mask f1 = scipy.interpolate.RectBivariateSpline( fd['x'], fd['y'], gs[VARIABLE].data[kk, :, :], kx=1, ky=1) f2 = scipy.interpolate.RectBivariateSpline( fd['x'], fd['y'], gs[VARIABLE].mask[kk, :, :], kx=1, ky=1) #-- create numpy masked array of interpolated values DATA[:, k] = f1.ev(ix[ind], iy[ind]) MASK[:, k] = f2.ev(ix[ind], iy[ind]) #-- calculate regression model for n, v in enumerate(ind): interp_data.data[v] = regress_model( T, DATA[n, :], tdec[v], ORDER=2, CYCLES=[0.25, 0.5, 1.0, 2.0, 4.0, 5.0], RELATIVE=T[-1]) #-- mask any invalid points interp_data.mask[ind] = np.any(MASK, axis=1) #-- set interpolation type (3: extrapolated forward) interp_data.interpolation[ind] = 3 #-- complete mask if any invalid in data invalid, = np.nonzero(interp_data.data == interp_data.fill_value) interp_data.mask[invalid] = True #-- replace fill value if specified if FILL_VALUE: interp_data.fill_value = FILL_VALUE interp_data.data[interp_data.mask] = interp_data.fill_value #-- return the interpolated values return interp_data
def extrapolate_mar_daily(DIRECTORY, EPSG, VERSION, tdec, X, Y, XNAME=None, YNAME=None, TIMENAME='TIME', VARIABLE='SMB', SIGMA=1.5, SEARCH='BallTree', NN=10, POWER=2.0, FILL_VALUE=None, EXTRAPOLATE=False): #-- start and end years to read SY = np.nanmin(np.floor(tdec)).astype(np.int) EY = np.nanmax(np.floor(tdec)).astype(np.int) YRS = '|'.join(['{0:4d}'.format(Y) for Y in range(SY, EY + 1)]) #-- regular expression pattern for MAR dataset rx = re.compile(r'{0}-(.*?)-(\d+)(_subset)?.nc$'.format(VERSION, YRS)) #-- create list of files to read input_files = sorted([f for f in os.listdir(DIRECTORY) if rx.match(f)]) #-- calculate number of time steps to read nt = 0 for f, FILE in enumerate(input_files): #-- Open the MAR NetCDF file for reading with netCDF4.Dataset(os.path.join(DIRECTORY, FILE), 'r') as fileID: nx = len(fileID.variables[XNAME][:]) ny = len(fileID.variables[YNAME][:]) TIME = fileID.variables[TIMENAME][:] try: nt += np.count_nonzero(TIME.data != TIME.fill_value) except AttributeError: nt += len(TIME) #-- python dictionary with file variables fd = {} fd['TIME'] = np.zeros((nt)) #-- python dictionary with gaussian filtered variables gs = {} #-- calculate cumulative sum of gaussian filtered values cumulative = np.zeros((ny, nx)) gs['CUMULATIVE'] = np.ma.zeros((nt, ny, nx), fill_value=FILL_VALUE) gs['CUMULATIVE'].mask = np.ones((nt, ny, nx), dtype=np.bool) #-- create a counter variable for filling variables c = 0 #-- for each file in the list for f, FILE in enumerate(input_files): #-- Open the MAR NetCDF file for reading with netCDF4.Dataset(os.path.join(DIRECTORY, FILE), 'r') as fileID: #-- number of time variables within file TIME = fileID.variables['TIME'][:] try: t = np.count_nonzero(TIME.data != TIME.fill_value) except AttributeError: t = len(TIME) #-- create a masked array with all data fd[VARIABLE] = np.ma.zeros((t, ny, nx), fill_value=FILL_VALUE) fd[VARIABLE].mask = np.zeros((t, ny, nx), dtype=np.bool) #-- surface type SRF = fileID.variables['SRF'][:] #-- indices of specified ice mask i, j = np.nonzero(SRF == 4) #-- ice fraction FRA = fileID.variables['FRA'][:] / 100.0 #-- Get data from netCDF variable and remove singleton dimensions tmp = np.squeeze(fileID.variables[VARIABLE][:]) #-- combine sectors for multi-layered data if (np.ndim(tmp) == 4): #-- create mask for combining data MASK = np.zeros((t, ny, nx)) MASK[:, i, j] = FRA[:t, 0, i, j] #-- combine data fd[VARIABLE][:] = MASK * tmp[:t, 0, :, :] + ( 1.0 - MASK) * tmp[:t, 1, :, :] else: #-- copy data fd[VARIABLE][:] = tmp[:t, :, :].copy() #-- verify mask object for interpolating data surf_mask = np.broadcast_to(SRF, (t, ny, nx)) fd[VARIABLE].mask[:, :, :] |= (surf_mask != 4) #-- combine mask object through time to create a single mask fd[VARIABLE].mask = fd[VARIABLE].data == fd[VARIABLE].fill_value fd['MASK'] = 1.0 - np.any(fd[VARIABLE].mask, axis=0).astype( np.float) #-- MAR coordinates fd['LON'] = fileID.variables['LON'][:, :].copy() fd['LAT'] = fileID.variables['LAT'][:, :].copy() #-- convert x and y coordinates to meters fd['x'] = 1000.0 * fileID.variables[XNAME][:].copy() fd['y'] = 1000.0 * fileID.variables[YNAME][:].copy() #-- extract delta time and epoch of time delta_time = fileID.variables[TIMENAME][:t].astype(np.float) units = fileID.variables[TIMENAME].units #-- convert epoch of time to Julian days Y1, M1, D1, h1, m1, s1 = [ float(d) for d in re.findall('\d+\.\d+|\d+', units) ] epoch_julian = calc_julian_day(Y1, M1, D1, HOUR=h1, MINUTE=m1, SECOND=s1) #-- calculate time array in Julian days Y2, M2, D2, h2, m2, s2 = convert_julian(epoch_julian + delta_time) #-- calculate time in year-decimal fd['TIME'][c:c + t] = convert_calendar_decimal(Y2, M2, D2, HOUR=h2, MINUTE=m2, SECOND=s2) #-- use a gaussian filter to smooth mask gs['MASK'] = scipy.ndimage.gaussian_filter(fd['MASK'], SIGMA, mode='constant', cval=0) #-- indices of smoothed ice mask ii, jj = np.nonzero(np.ceil(gs['MASK']) == 1.0) #-- use a gaussian filter to smooth each model field gs[VARIABLE] = np.ma.zeros((t, ny, nx), fill_value=FILL_VALUE) gs[VARIABLE].mask = np.ones((t, ny, nx), dtype=np.bool) #-- for each time for tt in range(t): #-- replace fill values before smoothing data temp1 = np.zeros((ny, nx)) i, j = np.nonzero(~fd[VARIABLE].mask[tt, :, :]) temp1[i, j] = fd[VARIABLE][tt, i, j].copy() #-- smooth spatial field temp2 = scipy.ndimage.gaussian_filter(temp1, SIGMA, mode='constant', cval=0) #-- scale output smoothed field gs[VARIABLE].data[tt, ii, jj] = temp2[ii, jj] / gs['MASK'][ii, jj] #-- replace valid values with original gs[VARIABLE].data[tt, i, j] = temp1[i, j] #-- set mask variables for time gs[VARIABLE].mask[tt, ii, jj] = False #-- calculate cumulative cumulative[ii, jj] += gs[VARIABLE][tt, ii, jj] gs['CUMULATIVE'].data[c + tt, ii, jj] = np.copy(cumulative[ii, jj]) gs['CUMULATIVE'].mask[c + tt, ii, jj] = False #-- add to counter c += t #-- convert MAR latitude and longitude to input coordinates (EPSG) proj1 = pyproj.Proj("+init={0}".format(EPSG)) proj2 = pyproj.Proj("+init=EPSG:{0:d}".format(4326)) xg, yg = pyproj.transform(proj2, proj1, fd['LON'], fd['LAT']) #-- construct search tree from original points #-- can use either BallTree or KDTree algorithms xy1 = np.concatenate((xg[i, j, None], yg[i, j, None]), axis=1) tree = BallTree(xy1) if (SEARCH == 'BallTree') else KDTree(xy1) #-- output interpolated arrays of output variable npts = len(tdec) extrap = np.ma.zeros((npts), fill_value=FILL_VALUE, dtype=np.float) extrap.mask = np.ones((npts), dtype=np.bool) #-- initially set all values to fill value extrap.data[:] = extrap.fill_value #-- type designating algorithm used (1:interpolate, 2:backward, 3:forward) extrap.interpolation = np.zeros((npts), dtype=np.uint8) #-- find days that can be interpolated if np.any((tdec >= fd['TIME'].min()) & (tdec < fd['TIME'].max())): #-- indices of dates for interpolated days ind, = np.nonzero((tdec >= fd['TIME'].min()) & (tdec < fd['TIME'].max())) #-- reduce x, y and t coordinates xind, yind, tind = (X[ind], Y[ind], tdec[ind]) #-- find indices for linearly interpolating in time f = scipy.interpolate.interp1d(fd['TIME'], np.arange(nt), kind='linear') date_indice = f(tind).astype(np.int) #-- for each unique model date #-- linearly interpolate in time between two model maps #-- then then inverse distance weighting to extrapolate in space for k in np.unique(date_indice): kk, = np.nonzero(date_indice == k) count = np.count_nonzero(date_indice == k) #-- query the search tree to find the NN closest points xy2 = np.concatenate((xind[kk, None], yind[kk, None]), axis=1) dist, indices = tree.query(xy2, k=NN, return_distance=True) #-- normalized weights if POWER > 0 (typically between 1 and 3) #-- in the inverse distance weighting power_inverse_distance = dist**(-POWER) s = np.sum(power_inverse_distance, axis=1) w = power_inverse_distance / np.broadcast_to( s[:, None], (count, NN)) #-- variable for times before and after tdec var1 = gs['CUMULATIVE'][k, i, j] var2 = gs['CUMULATIVE'][k + 1, i, j] #-- linearly interpolate to date dt = (tind[kk] - fd['TIME'][k]) / (fd['TIME'][k + 1] - fd['TIME'][k]) #-- spatially extrapolate using inverse distance weighting extrap.data[kk] = (1.0-dt)*np.sum(w*var1[indices],axis=1) + \ dt*np.sum(w*var2[indices], axis=1) #-- set interpolation type (1: interpolated in time) extrap.interpolation[ind] = 1 #-- check if needing to extrapolate backwards in time count = np.count_nonzero(tdec < fd['TIME'].min()) if (count > 0) and EXTRAPOLATE: #-- indices of dates before model ind, = np.nonzero(tdec < fd['TIME'].min()) #-- query the search tree to find the NN closest points xy2 = np.concatenate((X[ind, None], Y[ind, None]), axis=1) dist, indices = tree.query(xy2, k=NN, return_distance=True) #-- normalized weights if POWER > 0 (typically between 1 and 3) #-- in the inverse distance weighting power_inverse_distance = dist**(-POWER) s = np.sum(power_inverse_distance, axis=1) w = power_inverse_distance / np.broadcast_to(s[:, None], (count, NN)) #-- read the first year of data to create regression model N = 365 #-- calculate a regression model for calculating values #-- spatially interpolate variable to coordinates DATA = np.zeros((count, N)) TIME = np.zeros((N)) #-- create interpolated time series for calculating regression model for k in range(N): #-- time at k TIME[k] = fd['TIME'][k] #-- spatially extrapolate variable tmp = gs['CUMULATIVE'][k, i, j] DATA[:, k] = np.sum(w * tmp[indices], axis=1) #-- calculate regression model for n, v in enumerate(ind): extrap.data[v] = regress_model(TIME, DATA[n, :], tdec[v], ORDER=2, CYCLES=[0.25, 0.5, 1.0], RELATIVE=TIME[0]) #-- set interpolation type (2: extrapolated backwards in time) extrap.interpolation[ind] = 2 #-- check if needing to extrapolate forward in time count = np.count_nonzero(tdec >= fd['TIME'].max()) if (count > 0) and EXTRAPOLATE: #-- indices of dates after model ind, = np.nonzero(tdec >= fd['TIME'].max()) #-- query the search tree to find the NN closest points xy2 = np.concatenate((X[ind, None], Y[ind, None]), axis=1) dist, indices = tree.query(xy2, k=NN, return_distance=True) #-- normalized weights if POWER > 0 (typically between 1 and 3) #-- in the inverse distance weighting power_inverse_distance = dist**(-POWER) s = np.sum(power_inverse_distance, axis=1) w = power_inverse_distance / np.broadcast_to(s[:, None], (count, NN)) #-- read the last year of data to create regression model N = 365 #-- calculate a regression model for calculating values #-- spatially interpolate variable to coordinates DATA = np.zeros((count, N)) TIME = np.zeros((N)) #-- create interpolated time series for calculating regression model for k in range(N): kk = nt - N + k #-- time at kk TIME[k] = fd['TIME'][kk] #-- spatially extrapolate variable tmp = gs['CUMULATIVE'][kk, i, j] DATA[:, k] = np.sum(w * tmp[indices], axis=1) #-- calculate regression model for n, v in enumerate(ind): extrap.data[v] = regress_model(TIME, DATA[n, :], tdec[v], ORDER=2, CYCLES=[0.25, 0.5, 1.0], RELATIVE=TIME[-1]) #-- set interpolation type (3: extrapolated forward in time) extrap.interpolation[ind] = 3 #-- complete mask if any invalid in data invalid, = np.nonzero((extrap.data == extrap.fill_value) | np.isnan(extrap.data)) extrap.mask[invalid] = True #-- return the interpolated values return extrap
def interpolate_racmo_daily(base_dir, EPSG, MODEL, tdec, X, Y, VARIABLE='smb', SIGMA=1.5, FILL_VALUE=None, EXTRAPOLATE=False): #-- start and end years to read SY = np.nanmin(np.floor(tdec)).astype(np.int) EY = np.nanmax(np.floor(tdec)).astype(np.int) YRS = '|'.join(['{0:4d}'.format(Y) for Y in range(SY, EY + 1)]) #-- input list of files if (MODEL == 'FGRN055'): #-- filename and directory for input FGRN055 files file_pattern = 'RACMO2.3p2_FGRN055_{0}_daily_(\d+).nc' DIRECTORY = os.path.join(base_dir, 'RACMO', 'GL', 'RACMO2.3p2_FGRN055') #-- create list of files to read rx = re.compile(file_pattern.format(VARIABLE, YRS), re.VERBOSE) input_files = sorted([f for f in os.listdir(DIRECTORY) if rx.match(f)]) #-- calculate number of time steps to read nt = 0 for f, FILE in enumerate(input_files): #-- Open the RACMO NetCDF file for reading with netCDF4.Dataset(os.path.join(DIRECTORY, FILE), 'r') as fileID: nx = len(fileID.variables['rlon'][:]) ny = len(fileID.variables['rlat'][:]) nt += len(fileID.variables['time'][:]) #-- invalid data value fv = np.float(fileID.variables[VARIABLE]._FillValue) #-- scaling factor for converting units if (VARIABLE == 'hgtsrf'): scale_factor = 86400.0 elif (VARIABLE == 'smb'): scale_factor = 1.0 #-- python dictionary with file variables fd = {} fd['time'] = np.zeros((nt)) #-- python dictionary with gaussian filtered variables gs = {} #-- calculate cumulative sum of gaussian filtered values cumulative = np.zeros((ny, nx)) gs['cumulative'] = np.ma.zeros((nt, ny, nx), fill_value=fv) gs['cumulative'].mask = np.zeros((nt, ny, nx), dtype=np.bool) #-- create a counter variable for filling variables c = 0 #-- for each file in the list for f, FILE in enumerate(input_files): #-- Open the RACMO NetCDF file for reading with netCDF4.Dataset(os.path.join(DIRECTORY, FILE), 'r') as fileID: #-- number of time variables within file t = len(fileID.variables['time'][:]) fd[VARIABLE] = np.ma.zeros((t, ny, nx), fill_value=fv) fd[VARIABLE].mask = np.ones((t, ny, nx), dtype=np.bool) #-- Get data from netCDF variable and remove singleton dimensions tmp = np.squeeze(fileID.variables[VARIABLE][:]) fd[VARIABLE][:] = scale_factor * tmp #-- indices of specified ice mask i, j = np.nonzero(tmp[0, :, :] != fv) fd[VARIABLE].mask[:, i, j] = False #-- combine mask object through time to create a single mask fd['mask'] = 1.0 - np.any(fd[VARIABLE].mask, axis=0).astype( np.float) #-- racmo coordinates fd['lon'] = fileID.variables['lon'][:, :].copy() fd['lat'] = fileID.variables['lat'][:, :].copy() fd['x'] = fileID.variables['rlon'][:].copy() fd['y'] = fileID.variables['rlat'][:].copy() #-- rotated pole parameters proj4_params = fileID.variables['rotated_pole'].proj4_params #-- extract delta time and epoch of time delta_time = fileID.variables['time'][:].astype(np.float) units = fileID.variables['time'].units #-- convert epoch of time to Julian days Y1, M1, D1, h1, m1, s1 = [ float(d) for d in re.findall('\d+\.\d+|\d+', units) ] epoch_julian = calc_julian_day(Y1, M1, D1, HOUR=h1, MINUTE=m1, SECOND=s1) #-- calculate time array in Julian days Y2, M2, D2, h2, m2, s2 = convert_julian(epoch_julian + delta_time) #-- calculate time in year-decimal fd['time'][c:c + t] = convert_calendar_decimal(Y2, M2, D2, HOUR=h2, MINUTE=m2, SECOND=s2) #-- use a gaussian filter to smooth mask gs['mask'] = scipy.ndimage.gaussian_filter(fd['mask'], SIGMA, mode='constant', cval=0) #-- indices of smoothed ice mask ii, jj = np.nonzero(np.ceil(gs['mask']) == 1.0) #-- use a gaussian filter to smooth each model field gs[VARIABLE] = np.ma.zeros((t, ny, nx), fill_value=fv) gs[VARIABLE].mask = np.ones((t, ny, nx), dtype=np.bool) #-- for each time for tt in range(t): #-- replace fill values before smoothing data temp1 = np.zeros((ny, nx)) i, j = np.nonzero(~fd[VARIABLE].mask[tt, :, :]) temp1[i, j] = fd[VARIABLE][tt, i, j].copy() #-- smooth spatial field temp2 = scipy.ndimage.gaussian_filter(temp1, SIGMA, mode='constant', cval=0) #-- scale output smoothed field gs[VARIABLE][tt, ii, jj] = temp2[ii, jj] / gs['mask'][ii, jj] #-- replace valid values with original gs[VARIABLE][tt, i, j] = temp1[i, j] #-- set mask variables for time gs[VARIABLE].mask[tt, ii, jj] = False #-- calculate cumulative cumulative[ii, jj] += gs[VARIABLE][tt, ii, jj] gs['cumulative'].data[c + tt, ii, jj] = np.copy(cumulative[ii, jj]) gs['cumulative'].mask[c + tt, ii, jj] = False #-- add to counter c += t #-- convert projection from input coordinates (EPSG) to model coordinates #-- RACMO models are rotated pole latitude and longitude proj1 = pyproj.Proj("+init={0}".format(EPSG)) proj2 = pyproj.Proj(proj4_params) #-- calculate rotated pole coordinates of input coordinates ix, iy = pyproj.transform(proj1, proj2, X, Y) #-- check that input points are within convex hull of valid model points gs['x'], gs['y'] = np.meshgrid(fd['x'], fd['y']) v, triangle = find_valid_triangulation(gs['x'][ii, jj], gs['y'][ii, jj]) #-- check where points are within the complex hull of the triangulation if v: interp_points = np.concatenate((ix[:, None], iy[:, None]), axis=1) valid = (triangle.find_simplex(interp_points) >= 0) else: #-- Check ix and iy against the bounds of x and y valid = (ix >= fd['x'].min()) & (ix <= fd['x'].max()) & \ (iy >= fd['y'].min()) & (iy <= fd['y'].max()) #-- output interpolated arrays of model variable npts = len(tdec) interp = np.ma.zeros((npts), fill_value=fv, dtype=np.float) interp.mask = np.ones((npts), dtype=np.bool) #-- initially set all values to fill value interp.data[:] = interp.fill_value #-- type designating algorithm used (1:interpolate, 2:backward, 3:forward) interp.interpolation = np.zeros((npts), dtype=np.uint8) #-- find days that can be interpolated if np.any((tdec >= fd['time'].min()) & (tdec <= fd['time'].max()) & valid): #-- indices of dates for interpolated days ind, = np.nonzero((tdec >= fd['time'].min()) & (tdec <= fd['time'].max()) & valid) #-- create an interpolator for model variable RGI = scipy.interpolate.RegularGridInterpolator( (fd['time'], fd['y'], fd['x']), gs['cumulative'].data) #-- create an interpolator for input mask MI = scipy.interpolate.RegularGridInterpolator( (fd['time'], fd['y'], fd['x']), gs['cumulative'].mask) #-- interpolate to points interp.data[ind] = RGI.__call__(np.c_[tdec[ind], iy[ind], ix[ind]]) interp.mask[ind] = MI.__call__(np.c_[tdec[ind], iy[ind], ix[ind]]) #-- set interpolation type (1: interpolated) interp.interpolation[ind] = 1 #-- check if needing to extrapolate backwards in time count = np.count_nonzero((tdec < fd['time'].min()) & valid) if (count > 0) and EXTRAPOLATE: #-- indices of dates before model ind, = np.nonzero((tdec < fd['time'].min()) & valid) #-- read the first year of data to create regression model N = 365 #-- calculate a regression model for calculating values #-- spatially interpolate model variable to coordinates DATA = np.zeros((count, N)) MASK = np.zeros((count, N), dtype=np.bool) TIME = np.zeros((N)) #-- create interpolated time series for calculating regression model for k in range(N): #-- time at k TIME[k] = fd['time'][k] #-- spatially interpolate model variable S1 = scipy.interpolate.RectBivariateSpline( fd['x'], fd['y'], gs['cumulative'].data[k, :, :].T, kx=1, ky=1) S2 = scipy.interpolate.RectBivariateSpline( fd['x'], fd['y'], gs['cumulative'].mask[k, :, :].T, kx=1, ky=1) #-- create numpy masked array of interpolated values DATA[:, k] = S1.ev(ix[ind], iy[ind]) MASK[:, k] = S2.ev(ix[ind], iy[ind]) #-- calculate regression model for n, v in enumerate(ind): interp.data[v] = regress_model(TIME, DATA[n, :], tdec[v], ORDER=2, CYCLES=[0.25, 0.5, 1.0], RELATIVE=TIME[0]) #-- mask any invalid points interp.mask[ind] = np.any(MASK, axis=1) #-- set interpolation type (2: extrapolated backward) interp.interpolation[ind] = 2 #-- check if needing to extrapolate forward in time count = np.count_nonzero((tdec > fd['time'].max()) & valid) if (count > 0) and EXTRAPOLATE: #-- indices of dates after model ind, = np.nonzero((tdec > fd['time'].max()) & valid) #-- read the last year of data to create regression model N = 365 #-- calculate a regression model for calculating values #-- spatially interpolate model variable to coordinates DATA = np.zeros((count, N)) MASK = np.zeros((count, N), dtype=np.bool) TIME = np.zeros((N)) #-- create interpolated time series for calculating regression model for k in range(N): kk = nt - N + k #-- time at kk TIME[k] = fd['time'][kk] #-- spatially interpolate model variable S1 = scipy.interpolate.RectBivariateSpline( fd['x'], fd['y'], gs['cumulative'].data[kk, :, :].T, kx=1, ky=1) S2 = scipy.interpolate.RectBivariateSpline( fd['x'], fd['y'], gs['cumulative'].mask[kk, :, :].T, kx=1, ky=1) #-- create numpy masked array of interpolated values DATA[:, k] = S1.ev(ix[ind], iy[ind]) MASK[:, k] = S2.ev(ix[ind], iy[ind]) #-- calculate regression model for n, v in enumerate(ind): interp.data[v] = regress_model(TIME, DATA[n, :], tdec[v], ORDER=2, CYCLES=[0.25, 0.5, 1.0], RELATIVE=TIME[-1]) #-- mask any invalid points interp.mask[ind] = np.any(MASK, axis=1) #-- set interpolation type (3: extrapolated forward) interp.interpolation[ind] = 3 #-- complete mask if any invalid in data invalid, = np.nonzero((interp.data == interp.fill_value) | np.isnan(interp.data)) interp.mask[invalid] = True #-- replace fill value if specified if FILL_VALUE: interp.fill_value = FILL_VALUE interp.data[interp.mask] = interp.fill_value #-- return the interpolated values return interp
def racmo_integrate_firn_height(base_dir, MODEL, VARIABLE='zs', OUTPUT=True): #-- set parameters based on input model FIRN_FILE = {} if (MODEL == 'FGRN11'): #-- filename and directory for input FGRN11 file FIRN_FILE['zs'] = 'FDM_zs_FGRN11_1960-2016.nc' FIRN_FILE['FirnAir'] = 'FDM_FirnAir_FGRN11_1960-2016.nc' FIRN_DIRECTORY = ['RACMO', 'FGRN11_1960-2016'] FIRN_OUTPUT = 'FDM_{0}_FGRN11_1960-2016_Promice.txt' #-- time is year decimal from 1960-01-01 at time_step 10 days time_step = 10.0 / 365.25 #-- rotation parameters rot_lat = -18.0 rot_lon = -37.5 elif (MODEL == 'FGRN055'): #-- filename and directory for input FGRN055 file FIRN_FILE['zs'] = 'FDM_zs_FGRN055_1960-2017_interpol.nc' FIRN_FILE['FirnAir'] = 'FDM_FirnAir_FGRN055_1960-2017_interpol.nc' FIRN_FILE['Mask'] = 'FGRN055_Masks_5.5km.nc' FIRN_DIRECTORY = ['RACMO', 'FGRN055_1960-2017'] FIRN_OUTPUT = 'FDM_{0}_FGRN055_1960-2017_Promice.txt' #-- time is year decimal from 1960-01-01 at time_step 10 days time_step = 10.0 / 365.25 #-- rotation parameters rot_lat = -18.0 rot_lon = -37.5 #-- Open the RACMO NetCDF file for reading ddir = os.path.join(base_dir, *FIRN_DIRECTORY) fileID = netCDF4.Dataset(os.path.join(ddir, FIRN_FILE[VARIABLE]), 'r') #-- Get data from each netCDF variable and remove singleton dimensions fd = {} fd[VARIABLE] = np.squeeze(fileID.variables[VARIABLE][:].copy()) fd['lon'] = fileID.variables['lon'][:, :].copy() fd['lat'] = fileID.variables['lat'][:, :].copy() fd['time'] = fileID.variables['time'][:].copy() #-- invalid data value fv = np.float(fileID.variables[VARIABLE]._FillValue) #-- input shape of RACMO firn data nt, ny, nx = np.shape(fd[VARIABLE]) #-- close the NetCDF files fileID.close() #-- Open the RACMO Mask NetCDF file for reading fileID = netCDF4.Dataset(os.path.join(ddir, FIRN_FILE['Mask']), 'r') #-- Get data from each netCDF mask variable and remove singleton dimensions mask = {} for var in [ 'Area', 'Icemask_GR', 'Promicemask', 'Topography', 'lon', 'lat' ]: mask[var] = np.squeeze(fileID.variables[var][:].copy()) my, mx = np.shape(mask['Area']) #-- close the NetCDF files fileID.close() #-- rotated pole longitude and latitude of input model (model coordinates) xg, yg = rotate_coordinates(fd['lon'], fd['lat'], rot_lon, rot_lat) xmask, ymask = rotate_coordinates(mask['lon'], mask['lat'], rot_lon, rot_lat) #-- recreate arrays to fix small floating point errors #-- (ensure that arrays are monotonically increasing) mask['x'] = np.linspace(np.mean(xmask[:, 0]), np.mean(xmask[:, -1]), mx) mask['y'] = np.linspace(np.mean(ymask[0, :]), np.mean(ymask[-1, :]), my) #-- create an interpolator for input masks #-- masks are on the original RACMO grid and not the firn model grid IMI = scipy.interpolate.RegularGridInterpolator((mask['y'], mask['x']), mask['Icemask_GR']) PMI = scipy.interpolate.RegularGridInterpolator((mask['y'], mask['x']), mask['Promicemask']) AMI = scipy.interpolate.RegularGridInterpolator((mask['y'], mask['x']), mask['Area']) #-- interpolate masks to firn model coordinates Icemask_GR = IMI.__call__(np.c_[yg.flatten(), xg.flatten()]) Promicemask = PMI.__call__(np.c_[yg.flatten(), xg.flatten()]) #-- reshape, round to fix interpolation errors and convert to integers fd['Icemask_GR'] = np.round(Icemask_GR.reshape(ny, nx)).astype('i') fd['Promicemask'] = np.round(Promicemask.reshape(ny, nx)).astype('i') #-- interpolate area to firn model coordinates fd['Area'] = AMI.__call__(np.c_[yg.flatten(), xg.flatten()]).reshape(ny, nx) #-- clear memory of flattened interpolation masks Icemask_GR = None Promicemask = None #-- output integrated arrays of firn variable (height or firn air content) #-- for each land classification mask in km^3 firn_volume = np.full((nt, 3), fv, dtype=np.float) #-- extrapolate out in time two years tdec = np.arange(fd['time'][-1] + time_step, fd['time'][-1] + 2, time_step) ntx = len(tdec) firn_extrap = np.full((ntx, 3), fv, dtype=np.float) for m in range(3): #-- indices of specified mask (0==ocean, 1==ice caps outside Greenland) #-- masks of interest: Greenland ice sheet and peripheral glaciers (2-4) i, j = np.nonzero((fd[VARIABLE][0, :, :] != fv) & (fd['Icemask_GR'] == 1) & (fd['Promicemask'] == (m + 2))) #-- for each time for t in range(nt): #-- convert firn height change to km firn_volume[t, m] = np.sum(fd[VARIABLE][t, i, j] * fd['Area'][i, j] / 1e3) #-- calculate a regression model for calculating values #-- read last 10 years of data to create regression model N = 365 T = np.zeros((N)) FIRN = np.zeros((N)) #-- reduce time series for calculating regression model for k in range(N): kk = nt - N + k #-- time at k T[k] = fd['time'][kk] FIRN[k] = firn_volume[kk, m] #-- calculate regression model firn_extrap[:, m] = regress_model(T, FIRN, tdec, ORDER=2, CYCLES=[0.25, 0.5, 1.0, 2.0, 4.0, 5.0], RELATIVE=T[-1]) #-- combine into single arrays combined_time = np.concatenate((fd['time'], tdec), axis=0) combined_firn = np.concatenate((firn_volume, firn_extrap), axis=0) #-- print to file if OUTPUT: #-- open the file fid = open(os.path.join(ddir, FIRN_OUTPUT.format(VARIABLE)), 'w') #-- print for each time for i, t in enumerate(combined_time): args = (t, *combined_firn[i, :]) print('{0:0.4f}{1:12.4f}{2:12.4f}{3:12.4f}'.format(*args), file=fid) #-- close the file fid.close() #-- return the combined integrated values return (combined_firn, combined_time)
def extrapolate_merra_hybrid(base_dir, EPSG, REGION, tdec, X, Y, VERSION='v1', VARIABLE='FAC', SEARCH='BallTree', N=10, POWER=2.0, SIGMA=1.5, FILL_VALUE=None, EXTRAPOLATE=False): #-- set the input netCDF4 file for the variable of interest if VARIABLE in ('FAC', 'cum_smb_anomaly', 'height'): hybrid_file = 'gsfc_fdm_{0}_{1}.nc'.format(VERSION, REGION.lower()) if VARIABLE in ('FAC') and (VERSION == 'v0'): hybrid_file = 'gsfc_{0}_{1}.nc'.format('FAC', REGION.lower()) elif VARIABLE in ('p_minus_e', 'melt') and (VERSION == 'v0'): hybrid_file = 'm2_hybrid_{0}_cumul_{1}.nc'.format( VARIABLE, REGION.lower()) #-- Open the MERRA-2 Hybrid NetCDF file for reading fileID = netCDF4.Dataset(os.path.join(base_dir, hybrid_file), 'r') #-- Get data from each netCDF variable and remove singleton dimensions fd = {} fd[VARIABLE] = np.squeeze(fileID.variables[VARIABLE][:].copy()) xg = fileID.variables['x'][:, :].copy() yg = fileID.variables['y'][:, :].copy() fd['time'] = fileID.variables['time'][:].copy() #-- invalid data value fv = np.float(fileID.variables[VARIABLE]._FillValue) #-- input shape of MERRA-2 Hybrid firn data nt, nx, ny = np.shape(fd[VARIABLE]) #-- close the NetCDF files fileID.close() #-- time is year decimal at time step 5 days time_step = 5.0 / 365.25 #-- indices of specified ice mask i, j = np.nonzero(fd[VARIABLE][0, :, :] != fv) #-- create mask object for interpolating data fd['mask'] = np.zeros((nx, ny)) fd['mask'][i, j] = 1.0 #-- extract x and y coordinate arrays from grids fd['x'], fd['y'] = (xg[:, 0], yg[0, :]) #-- use a gaussian filter to smooth mask gs = {} gs['mask'] = scipy.ndimage.gaussian_filter(fd['mask'], SIGMA, mode='constant', cval=0) #-- indices of smoothed ice mask ii, jj = np.nonzero(np.ceil(gs['mask']) == 1.0) #-- use a gaussian filter to smooth each firn field gs[VARIABLE] = np.ma.zeros((nt, nx, ny), fill_value=fv) gs[VARIABLE].mask = np.zeros((nt, nx, ny), dtype=np.bool) for t in range(nt): #-- replace fill values before smoothing data temp1 = np.zeros((nx, ny)) #-- reference to first firn field temp1[i, j] = fd[VARIABLE][t, i, j] - fd[VARIABLE][0, i, j] #-- smooth firn field temp2 = scipy.ndimage.gaussian_filter(temp1, SIGMA, mode='constant', cval=0) #-- scale output smoothed firn field gs[VARIABLE].data[t, ii, jj] = temp2[ii, jj] / gs['mask'][ii, jj] #-- replace valid firn values with original gs[VARIABLE].data[t, i, j] = temp1[i, j] #-- set mask variables for time gs[VARIABLE].mask[t, :, :] = (gs['mask'] == 0.0) #-- convert projection from input coordinates (EPSG) to model coordinates #-- MERRA-2 Hybrid models are rotated pole latitude and longitude MODEL_EPSG = set_projection(REGION) proj1 = pyproj.Proj("+init={0}".format(EPSG)) proj2 = pyproj.Proj("+init={0}".format(MODEL_EPSG)) ix, iy = pyproj.transform(proj1, proj2, X, Y) #-- construct search tree from original points #-- can use either BallTree or KDTree algorithms xy1 = np.concatenate((xg[ii, jj, None], yg[ii, jj, None]), axis=1) tree = BallTree(xy1) if (SEARCH == 'BallTree') else KDTree(xy1) #-- output interpolated arrays of variable npts = len(tdec) extrap_data = np.ma.zeros((npts), fill_value=fv, dtype=np.float) extrap_data.mask = np.ones((npts), dtype=np.bool) #-- type designating algorithm used (1:interpolate, 2:backward, 3:forward) extrap_data.interpolation = np.zeros((npts), dtype=np.uint8) #-- find days that can be interpolated if np.any((tdec >= fd['time'].min()) & (tdec < fd['time'].max())): #-- indices of dates for interpolated days ind, = np.nonzero((tdec >= fd['time'].min()) & (tdec < fd['time'].max())) #-- reduce x, y and t coordinates xind, yind, tind = (X[ind], Y[ind], tdec[ind]) #-- find indices for linearly interpolating in time f = scipy.interpolate.interp1d(fd['time'], np.arange(nt), kind='linear') date_indice = f(tind).astype(np.int) #-- for each unique firn date #-- linearly interpolate in time between two firn maps #-- then then inverse distance weighting to extrapolate in space for k in np.unique(date_indice): kk, = np.nonzero(date_indice == k) count = np.count_nonzero(date_indice == k) #-- query the search tree to find the N closest points xy2 = np.concatenate((xind[kk, None], yind[kk, None]), axis=1) dist, indices = tree.query(xy2, k=N, return_distance=True) #-- normalized weights if POWER > 0 (typically between 1 and 3) #-- in the inverse distance weighting power_inverse_distance = dist**(-POWER) s = np.sum(power_inverse_distance, axis=1) w = power_inverse_distance / np.broadcast_to( s[:, None], (count, N)) #-- firn height or air content for times before and after tdec firn1 = gs[VARIABLE][k, ii, jj] firn2 = gs[VARIABLE][k + 1, ii, jj] #-- linearly interpolate to date dt = (tind[kk] - fd['time'][k]) / (fd['time'][k + 1] - fd['time'][k]) #-- spatially extrapolate using inverse distance weighting extrap_data[kk] = (1.0-dt)*np.sum(w*firn1[indices],axis=1) + \ dt*np.sum(w*firn2[indices], axis=1) #-- set interpolation type (1: interpolated in time) extrap_data.interpolation[ind] = 1 #-- check if needing to extrapolate backwards in time count = np.count_nonzero(tdec < fd['time'].min()) if (count > 0) and EXTRAPOLATE: #-- indices of dates before firn model ind, = np.nonzero(tdec < fd['time'].min()) #-- query the search tree to find the N closest points xy2 = np.concatenate((X[ind, None], Y[ind, None]), axis=1) dist, indices = tree.query(xy2, k=N, return_distance=True) #-- normalized weights if POWER > 0 (typically between 1 and 3) #-- in the inverse distance weighting power_inverse_distance = dist**(-POWER) s = np.sum(power_inverse_distance, axis=1) w = power_inverse_distance / np.broadcast_to(s[:, None], (count, N)) #-- calculate a regression model for calculating values #-- read first 10 years of data to create regression model N = np.int(10.0 / time_step) #-- spatially interpolate firn elevation or air content to coordinates FIRN = np.zeros((count, N)) T = np.zeros((N)) #-- create interpolated time series for calculating regression model for k in range(N): #-- time at k T[k] = fd['time'][k] #-- spatially extrapolate firn elevation or air content firn1 = gs[VARIABLE][k, ii, jj] FIRN[:, k] = np.sum(w * firn1[indices], axis=1) #-- calculate regression model for n, v in enumerate(ind): extrap_data[v] = regress_model( T, FIRN[n, :], tdec[v], ORDER=2, CYCLES=[0.25, 0.5, 1.0, 2.0, 4.0, 5.0], RELATIVE=T[0]) #-- set interpolation type (2: extrapolated backwards in time) extrap_data.interpolation[ind] = 2 #-- check if needing to extrapolate forward in time count = np.count_nonzero(tdec >= fd['time'].max()) if (count > 0) and EXTRAPOLATE: #-- indices of dates after firn model ind, = np.nonzero(tdec >= fd['time'].max()) #-- query the search tree to find the N closest points xy2 = np.concatenate((X[ind, None], Y[ind, None]), axis=1) dist, indices = tree.query(xy2, k=N, return_distance=True) #-- normalized weights if POWER > 0 (typically between 1 and 3) #-- in the inverse distance weighting power_inverse_distance = dist**(-POWER) s = np.sum(power_inverse_distance, axis=1) w = power_inverse_distance / np.broadcast_to(s[:, None], (count, N)) #-- calculate a regression model for calculating values #-- read last 10 years of data to create regression model N = np.int(10.0 / time_step) #-- spatially interpolate firn elevation or air content to coordinates FIRN = np.zeros((count, N)) T = np.zeros((N)) #-- create interpolated time series for calculating regression model for k in range(N): kk = nt - N + k #-- time at k T[k] = fd['time'][kk] #-- spatially extrapolate firn elevation or air content firn1 = gs[VARIABLE][kk, ii, jj] FIRN[:, k] = np.sum(w * firn1[indices], axis=1) #-- calculate regression model for n, v in enumerate(ind): extrap_data[v] = regress_model( T, FIRN[n, :], tdec[v], ORDER=2, CYCLES=[0.25, 0.5, 1.0, 2.0, 4.0, 5.0], RELATIVE=T[-1]) #-- set interpolation type (3: extrapolated forwards in time) extrap_data.interpolation[ind] = 3 #-- complete mask if any invalid in data invalid, = np.nonzero(extrap_data.data == extrap_data.fill_value) extrap_data.mask[invalid] = True #-- replace fill value if specified if FILL_VALUE: extrap_data.fill_value = FILL_VALUE extrap_data.data[extrap_data.mask] = extrap_data.fill_value #-- return the interpolated values return extrap_data
def extrapolate_racmo_firn(base_dir, EPSG, MODEL, tdec, X, Y, SEARCH='BallTree', NN=10, POWER=2.0, SIGMA=1.5, VARIABLE='zs', FILL_VALUE=None, REFERENCE=False): #-- set parameters based on input model FIRN_FILE = {} if (MODEL == 'FGRN11'): #-- filename and directory for input FGRN11 file FIRN_FILE['zs'] = 'FDM_zs_FGRN11_1960-2016.nc' FIRN_FILE['FirnAir'] = 'FDM_FirnAir_FGRN11_1960-2016.nc' FIRN_DIRECTORY = ['RACMO', 'FGRN11_1960-2016'] elif (MODEL == 'FGRN055'): #-- filename and directory for input FGRN055 file FIRN_FILE['zs'] = 'FDM_zs_FGRN055_1960-2017_interpol.nc' FIRN_FILE['FirnAir'] = 'FDM_FirnAir_FGRN055_1960-2017_interpol.nc' FIRN_DIRECTORY = ['RACMO', 'FGRN055_1960-2017'] elif (MODEL == 'XANT27'): #-- filename and directory for input XANT27 file FIRN_FILE['zs'] = 'FDM_zs_ANT27_1979-2016.nc' FIRN_FILE['FirnAir'] = 'FDM_FirnAir_ANT27_1979-2016.nc' FIRN_DIRECTORY = ['RACMO', 'XANT27_1979-2016'] elif (MODEL == 'ASE055'): #-- filename and directory for input ASE055 file FIRN_FILE['zs'] = 'FDM_zs_ASE055_1979-2015.nc' FIRN_FILE['FirnAir'] = 'FDM_FirnAir_ASE055_1979-2015.nc' FIRN_DIRECTORY = ['RACMO', 'ASE055_1979-2015'] elif (MODEL == 'XPEN055'): #-- filename and directory for input XPEN055 file FIRN_FILE['zs'] = 'FDM_zs_XPEN055_1979-2016.nc' FIRN_FILE['FirnAir'] = 'FDM_FirnAir_XPEN055_1979-2016.nc' FIRN_DIRECTORY = ['RACMO', 'XPEN055_1979-2016'] #-- Open the RACMO NetCDF file for reading ddir = os.path.join(base_dir, *FIRN_DIRECTORY) fileID = netCDF4.Dataset(os.path.join(ddir, FIRN_FILE[VARIABLE]), 'r') #-- Get data from each netCDF variable and remove singleton dimensions fd = {} fd[VARIABLE] = np.squeeze(fileID.variables[VARIABLE][:].copy()) fd['lon'] = fileID.variables['lon'][:, :].copy() fd['lat'] = fileID.variables['lat'][:, :].copy() fd['time'] = fileID.variables['time'][:].copy() #-- invalid data value fv = np.float(fileID.variables[VARIABLE]._FillValue) #-- input shape of RACMO firn data nt, ny, nx = np.shape(fd[VARIABLE]) #-- close the NetCDF files fileID.close() #-- indices of specified ice mask i, j = np.nonzero(fd[VARIABLE][0, :, :] != fv) #-- use a gaussian filter to smooth mask gs = {} gs['mask'] = scipy.ndimage.gaussian_filter(fd['mask'], SIGMA, mode='constant', cval=0) #-- indices of smoothed ice mask ii, jj = np.nonzero(np.ceil(gs['mask']) == 1.0) #-- use a gaussian filter to smooth each firn field gs[VARIABLE] = np.ma.zeros((nt, ny, nx), fill_value=fv) gs[VARIABLE].mask = np.ma.zeros((nt, ny, nx), dtype=bool) for t in range(nt): #-- replace fill values before smoothing data temp1 = np.zeros((ny, nx)) #-- reference to first firn field if REFERENCE: temp1[i, j] = fd[VARIABLE][t, i, j] - fd[VARIABLE][0, i, j] else: temp1[i, j] = fd[VARIABLE][t, i, j].copy() #-- smooth firn field temp2 = scipy.ndimage.gaussian_filter(temp1, SIGMA, mode='constant', cval=0) #-- scale output smoothed firn field gs[VARIABLE][t, ii, jj] = temp2[ii, jj] / gs['mask'][ii, jj] #-- replace valid firn values with original gs[VARIABLE][t, i, j] = temp1[i, j] #-- set mask variables for time gs[VARIABLE].mask[t, :, :] = (gs['mask'] == 0.0) #-- convert RACMO latitude and longitude to input coordinates (EPSG) crs1 = pyproj.CRS.from_string(EPSG) crs2 = pyproj.CRS.from_string("epsg:{0:d}".format(4326)) transformer = pyproj.Transformer.from_crs(crs1, crs2, always_xy=True) direction = pyproj.enums.TransformDirection.INVERSE #-- convert projection from model coordinates xg, yg = transformer.transform(fd['lon'], fd['lat'], direction=direction) #-- construct search tree from original points #-- can use either BallTree or KDTree algorithms xy1 = np.concatenate((xg[ii, jj, None], yg[ii, jj, None]), axis=1) tree = BallTree(xy1) if (SEARCH == 'BallTree') else KDTree(xy1) #-- output interpolated arrays of firn variable (height or firn air content) npts = len(tdec) extrap_data = np.ma.zeros((npts), fill_value=fv, dtype=np.float) extrap_data.data[:] = extrap_data.fill_value extrap_data.mask = np.zeros((npts), dtype=bool) #-- type designating algorithm used (1:interpolate, 2:backward, 3:forward) extrap_data.interpolation = np.zeros((npts), dtype=np.uint8) #-- find days that can be interpolated if np.any((tdec >= fd['time'].min()) & (tdec < fd['time'].max())): #-- indices of dates for interpolated days ind, = np.nonzero((tdec >= fd['time'].min()) & (tdec < fd['time'].max())) #-- reduce x, y and t coordinates xind, yind, tind = (X[ind], Y[ind], tdec[ind]) #-- find indices for linearly interpolating in time f = scipy.interpolate.interp1d(fd['time'], np.arange(nt), kind='linear') date_indice = f(tind).astype(np.int) #-- for each unique firn date #-- linearly interpolate in time between two firn maps #-- then then inverse distance weighting to extrapolate in space for k in np.unique(date_indice): kk, = np.nonzero(date_indice == k) count = np.count_nonzero(date_indice == k) #-- query the search tree to find the NN closest points xy2 = np.concatenate((xind[kk, None], yind[kk, None]), axis=1) dist, indices = tree.query(xy2, k=NN, return_distance=True) #-- normalized weights if POWER > 0 (typically between 1 and 3) #-- in the inverse distance weighting power_inverse_distance = dist**(-POWER) s = np.sum(power_inverse_distance, axis=1) w = power_inverse_distance / np.broadcast_to( s[:, None], (count, NN)) #-- firn height or air content for times before and after tdec firn1 = gs[VARIABLE][k, ii, jj] firn2 = gs[VARIABLE][k + 1, ii, jj] #-- linearly interpolate to date dt = (tind[kk] - fd['time'][k]) / (fd['time'][k + 1] - fd['time'][k]) #-- spatially extrapolate using inverse distance weighting extrap_data[kk] = (1.0-dt)*np.sum(w*firn1[indices],axis=1) + \ dt*np.sum(w*firn2[indices], axis=1) #-- set interpolation type (1: interpolated in time) extrap_data.interpolation[ind] = 1 #-- check if needing to extrapolate backwards in time count = np.count_nonzero(tdec < fd['time'].min()) if (count > 0): #-- indices of dates before firn model ind, = np.nonzero(tdec < fd['time'].min()) #-- query the search tree to find the NN closest points xy2 = np.concatenate((X[ind, None], Y[ind, None]), axis=1) dist, indices = tree.query(xy2, k=NN, return_distance=True) #-- normalized weights if POWER > 0 (typically between 1 and 3) #-- in the inverse distance weighting power_inverse_distance = dist**(-POWER) s = np.sum(power_inverse_distance, axis=1) w = power_inverse_distance / np.broadcast_to(s[:, None], (count, NN)) #-- calculate a regression model for calculating values #-- read first 10 years of data to create regression model N = 365 #-- spatially interpolate firn elevation or air content to coordinates FIRN = np.zeros((count, N)) T = np.zeros((N)) #-- create interpolated time series for calculating regression model for k in range(N): #-- time at k T[k] = gs['time'][k] #-- spatially extrapolate firn elevation or air content firn1 = fd[VARIABLE][k, ii, jj] FIRN[:, k] = np.sum(w * firn1[indices], axis=1) #-- calculate regression model for n, v in enumerate(ind): extrap_data[v] = regress_model( T, FIRN[n, :], tdec[v], ORDER=2, CYCLES=[0.25, 0.5, 1.0, 2.0, 4.0, 5.0], RELATIVE=T[0]) #-- set interpolation type (2: extrapolated backwards in time) extrap_data.interpolation[ind] = 2 #-- check if needing to extrapolate forward in time count = np.count_nonzero(tdec >= fd['time'].max()) if (count > 0): #-- indices of dates after firn model ind, = np.nonzero(tdec >= fd['time'].max()) #-- query the search tree to find the NN closest points xy2 = np.concatenate((X[ind, None], Y[ind, None]), axis=1) dist, indices = tree.query(xy2, k=NN, return_distance=True) #-- normalized weights if POWER > 0 (typically between 1 and 3) #-- in the inverse distance weighting power_inverse_distance = dist**(-POWER) s = np.sum(power_inverse_distance, axis=1) w = power_inverse_distance / np.broadcast_to(s[:, None], (count, NN)) #-- calculate a regression model for calculating values #-- read last 10 years of data to create regression model N = 365 #-- spatially interpolate firn elevation or air content to coordinates FIRN = np.zeros((count, N)) T = np.zeros((N)) #-- create interpolated time series for calculating regression model for k in range(N): kk = nt - N + k #-- time at k T[k] = fd['time'][kk] #-- spatially extrapolate firn elevation or air content firn1 = gs[VARIABLE][kk, ii, jj] FIRN[:, k] = np.sum(w * firn1[indices], axis=1) #-- calculate regression model for n, v in enumerate(ind): extrap_data[v] = regress_model( T, FIRN[n, :], tdec[v], ORDER=2, CYCLES=[0.25, 0.5, 1.0, 2.0, 4.0, 5.0], RELATIVE=T[-1]) #-- set interpolation type (3: extrapolated forward in time) extrap_data.interpolation[ind] = 3 #-- complete mask if any invalid in data invalid, = np.nonzero(extrap_data.data == extrap_data.fill_value) extrap_data.mask[invalid] = True #-- replace fill value if specified if FILL_VALUE: extrap_data.fill_value = FILL_VALUE extrap_data.data[extrap_data.mask] = extrap_data.fill_value #-- return the interpolated values return extrap_data
def interpolate_racmo_downscaled(base_dir, EPSG, VERSION, tdec, X, Y, VARIABLE='SMB', FILL_VALUE=None): #-- Full Directory Setup DIRECTORY = 'SMB1km_v{0}'.format(VERSION) #-- netcdf variable names input_products = {} input_products['SMB'] = 'SMB_rec' input_products['PRECIP'] = 'precip' input_products['RUNOFF'] = 'runoff' input_products['SNOWMELT'] = 'snowmelt' input_products['REFREEZE'] = 'refreeze' #-- version 1 was in separate files for each year if (VERSION == '1.0'): RACMO_MODEL = ['XGRN11', '2.3'] VARNAME = input_products[VARIABLE] SUBDIRECTORY = '{0}_v{1}'.format(VARNAME, VERSION) input_dir = os.path.join(base_dir, 'RACMO', DIRECTORY, SUBDIRECTORY) elif (VERSION == '2.0'): RACMO_MODEL = ['XGRN11', '2.3p2'] var = input_products[VARIABLE] VARNAME = var if VARIABLE in ('SMB', 'PRECIP') else '{0}corr'.format(var) input_dir = os.path.join(base_dir, 'RACMO', DIRECTORY) elif (VERSION == '3.0'): RACMO_MODEL = ['FGRN055', '2.3p2'] var = input_products[VARIABLE] VARNAME = var if (VARIABLE == 'SMB') else '{0}corr'.format(var) input_dir = os.path.join(base_dir, 'RACMO', DIRECTORY) #-- input cumulative netCDF4 file args = (RACMO_MODEL[0], RACMO_MODEL[1], VERSION, VARIABLE) input_file = '{0}_RACMO{1}_DS1km_v{2}_{3}_cumul.nc'.format(*args) #-- convert projection from input coordinates (EPSG) to model coordinates proj1 = pyproj.Proj("+init={0}".format(EPSG)) proj2 = pyproj.Proj("+init=EPSG:{0:d}".format(3413)) ix, iy = pyproj.transform(proj1, proj2, X, Y) #-- Open the RACMO NetCDF file for reading fileID = netCDF4.Dataset(os.path.join(input_dir, input_file), 'r') #-- input shape of RACMO data nt = fileID[VARNAME].shape[0] #-- Get data from each netCDF variable and remove singleton dimensions d = {} #-- cell origins on the bottom right dx = np.abs(fileID.variables['x'][1] - fileID.variables['x'][0]) dy = np.abs(fileID.variables['y'][1] - fileID.variables['y'][0]) #-- x and y arrays at center of each cell d['x'] = fileID.variables['x'][:].copy() - dx / 2.0 d['y'] = fileID.variables['y'][:].copy() - dy / 2.0 #-- extract time (decimal years) d['TIME'] = fileID.variables['TIME'][:].copy() #-- choose a subset of model variables that span the input data xr = [ix.min() - dx, ix.max() + dx] yr = [iy.min() - dy, iy.max() + dy] cols = np.flatnonzero((d['x'] >= xr[0]) & (d['x'] <= xr[1])) rows = np.flatnonzero((d['y'] >= yr[0]) & (d['y'] <= yr[1])) ny = rows.size nx = cols.size #-- mask object for interpolating data d['MASK'] = np.array(fileID.variables['MASK'][rows, cols], dtype=np.bool) d['x'] = d['x'][cols] d['y'] = d['y'][rows] # i,j = np.nonzero(d['MASK']) #-- check that input points are within convex hull of valid model points #xg,yg = np.meshgrid(d['x'],d['y']) #points = np.concatenate((xg[i,j,None],yg[i,j,None]),axis=1) #triangle = scipy.spatial.Delaunay(points.data, qhull_options='Qt Qbb Qc Qz') #interp_points = np.concatenate((ix[:,None],iy[:,None]),axis=1) #valid = (triangle.find_simplex(interp_points) >= 0) # Check ix and iy against the bounds of d['x'] and d['y'] valid = (ix >= d['x'].min()) & (ix <= d['x'].max()) & ( iy >= d['y'].min()) & (iy <= d['y'].max()) MI = scipy.interpolate.RegularGridInterpolator((d['y'], d['x']), d['MASK']) # check valid points against the mask: valid[valid] = MI.__call__(np.c_[iy[valid], ix[valid]]) #-- output interpolated arrays of variable npts = len(tdec) interp_data = np.ma.zeros((npts), fill_value=fv, dtype=np.float) #-- interpolation mask of invalid values interp_data.mask = np.ones((npts), dtype=np.bool) #-- type designating algorithm used (1:interpolate, 2:backward, 3:forward) interp_data.interpolation = np.zeros((npts), dtype=np.uint8) #-- find days that can be interpolated if np.any((tdec >= d['TIME'].min()) & (tdec <= d['TIME'].max()) & valid): #-- indices of dates for interpolated days ind, = np.nonzero((tdec >= d['TIME'].min()) & (tdec <= d['TIME'].max()) & valid) #-- determine which subset of time to read from the netCDF4 file f = scipy.interpolate.interp1d(d['TIME'], np.arange(nt), kind='linear', fill_value=(0, nt - 1), bounds_error=False) date_indice = f(tdec[ind]).astype(np.int) #-- months to read months = np.arange(date_indice.min(), np.minimum(date_indice.max() + 2, d['TIME'].size)) nm = len(months) #-- extract variable for months of interest d[VARNAME] = np.zeros((nm, ny, nx)) for i, m in enumerate(months): d[VARNAME][i, :, :] = fileID.variables[VARNAME][m, rows, cols].copy() #-- create an interpolator for variable RGI = scipy.interpolate.RegularGridInterpolator( (d['TIME'][months], d['y'], d['x']), d[VARNAME]) #-- interpolate to points interp_data.data[ind] = RGI.__call__(np.c_[tdec[ind], iy[ind], ix[ind]]) interp_data.mask[ind] = MI.__call__(np.c_[iy[ind], ix[ind]]) #-- set interpolation type (1: interpolated) interp_data.interpolation[ind] = 1 #-- check if needing to extrapolate backwards in time count = np.count_nonzero((tdec < d['TIME'].min()) & valid) if (count > 0): #-- indices of dates before RACMO model ind, = np.nonzero((tdec < d['TIME'].min()) & valid) #-- calculate a regression model for calculating values #-- read first 10 years of data to create regression model N = 120 #-- spatially interpolate variable to coordinates VAR = np.zeros((count, N)) T = np.zeros((N)) #-- spatially interpolate mask to coordinates mspl = scipy.interpolate.RectBivariateSpline(d['x'], d['y'], d['MASK'].T, kx=1, ky=1) interp_data.mask[ind] = mspl.ev(ix[ind], iy[ind]) #-- create interpolated time series for calculating regression model for k in range(N): #-- time at k T[k] = d['TIME'][k] #-- spatially interpolate variable spl = scipy.interpolate.RectBivariateSpline( d['x'], d['y'], fileID.variables[VARNAME][k, rows, cols].T, kx=1, ky=1) #-- create numpy masked array of interpolated values VAR[:, k] = spl.ev(ix[ind], iy[ind]) #-- calculate regression model for n, v in enumerate(ind): interp_data.data[v] = regress_model( T, VAR[n, :], tdec[v], ORDER=2, CYCLES=[0.25, 0.5, 1.0, 2.0, 4.0, 5.0], RELATIVE=T[0]) #-- set interpolation type (2: extrapolated backward) interp_data.interpolation[ind] = 2 #-- check if needing to extrapolate forward in time count = np.count_nonzero((tdec > d['TIME'].max()) & valid) if (count > 0): #-- indices of dates after RACMO model ind, = np.nonzero((tdec > d['TIME'].max()) & valid) #-- calculate a regression model for calculating values #-- read last 10 years of data to create regression model N = 120 #-- spatially interpolate variable to coordinates VAR = np.zeros((count, N)) T = np.zeros((N)) #-- spatially interpolate mask to coordinates mspl = scipy.interpolate.RectBivariateSpline(d['x'], d['y'], d['MASK'].T, kx=1, ky=1) interp_data.mask[ind] = mspl.ev(ix[ind], iy[ind]) #-- create interpolated time series for calculating regression model for k in range(N): kk = nt - N + k #-- time at k T[k] = d['TIME'][kk] #-- spatially interpolate variable spl = scipy.interpolate.RectBivariateSpline( d['x'], d['y'], fileID.variables[VARNAME][kk, rows, cols].T, kx=1, ky=1) #-- create numpy masked array of interpolated values VAR[:, k] = spl.ev(ix[ind], iy[ind]) #-- calculate regression model for n, v in enumerate(ind): interp_data.data[v] = regress_model( T, VAR[n, :], tdec[v], ORDER=2, CYCLES=[0.25, 0.5, 1.0, 2.0, 4.0, 5.0], RELATIVE=T[-1]) #-- set interpolation type (3: extrapolated forward) interp_data.interpolation[ind] = 3 #-- complete mask if any invalid in data invalid, = np.nonzero(interp_data.data == interp_data.fill_value) interp_data.mask[invalid] = True #-- replace fill value if specified if FILL_VALUE: interp_data.fill_value = FILL_VALUE interp_data.data[interp_data.mask] = interp_data.fill_value #-- close the NetCDF files fileID.close() #-- return the interpolated values return interp_data
def interpolate_mar_daily(DIRECTORY, EPSG, VERSION, tdec, X, Y, XNAME=None, YNAME=None, TIMENAME='TIME', VARIABLE='SMB', SIGMA=1.5, FILL_VALUE=None, EXTRAPOLATE=False): #-- start and end years to read SY = np.nanmin(np.floor(tdec)).astype(np.int) EY = np.nanmax(np.floor(tdec)).astype(np.int) YRS = '|'.join(['{0:4d}'.format(Y) for Y in range(SY, EY + 1)]) #-- regular expression pattern for MAR dataset rx = re.compile('{0}-(.*?)-(\d+)(_subset)?.nc$'.format(VERSION, YRS)) #-- MAR model projection: Polar Stereographic (Oblique) #-- Earth Radius: 6371229 m #-- True Latitude: 0 #-- Center Longitude: -40 #-- Center Latitude: 70.5 proj4_params = ("+proj=sterea +lat_0=+70.5 +lat_ts=0 +lon_0=-40.0 " "+a=6371229 +no_defs") #-- create list of files to read input_files = sorted([f for f in os.listdir(DIRECTORY) if rx.match(f)]) #-- calculate number of time steps to read nt = 0 for f, FILE in enumerate(input_files): #-- Open the MAR NetCDF file for reading with netCDF4.Dataset(os.path.join(DIRECTORY, FILE), 'r') as fileID: nx = len(fileID.variables[XNAME][:]) ny = len(fileID.variables[YNAME][:]) nt += len(fileID.variables[TIMENAME][:]) #-- python dictionary with file variables fd = {} fd['TIME'] = np.zeros((nt)) #-- python dictionary with gaussian filtered variables gs = {} #-- calculate cumulative sum of gaussian filtered values cumulative = np.zeros((ny, nx)) gs['CUMULATIVE'] = np.ma.zeros((nt, ny, nx), fill_value=FILL_VALUE) gs['CUMULATIVE'].mask = np.ones((nt, ny, nx), dtype=np.bool) #-- create a counter variable for filling variables c = 0 #-- for each file in the list for f, FILE in enumerate(input_files): #-- Open the MAR NetCDF file for reading with netCDF4.Dataset(os.path.join(DIRECTORY, FILE), 'r') as fileID: #-- number of time variables within file t = len(fileID.variables['TIME'][:]) #-- create a masked array with all data fd[VARIABLE] = np.ma.zeros((t, ny, nx), fill_value=FILL_VALUE) fd[VARIABLE].mask = np.zeros((t, ny, nx), dtype=np.bool) #-- surface type SRF = fileID.variables['SRF'][:] #-- indices of specified ice mask i, j = np.nonzero(SRF == 4) #-- ice fraction FRA = fileID.variables['FRA'][:] / 100.0 #-- Get data from netCDF variable and remove singleton dimensions tmp = np.squeeze(fileID.variables[VARIABLE][:]) #-- combine sectors for multi-layered data if (np.ndim(tmp) == 4): #-- create mask for combining data MASK = np.zeros((nt, ny, nx)) MASK[:, i, j] = FRA[:, 0, i, j] #-- combine data fd[VARIABLE][:] = MASK * tmp[:, 0, :, :] + ( 1.0 - MASK) * tmp[:, 1, :, :] else: #-- copy data fd[VARIABLE][:] = tmp.copy() #-- verify mask object for interpolating data surf_mask = np.broadcast_to(SRF, (t, ny, nx)) fd[VARIABLE].mask[:, :, :] |= (surf_mask != 4) #-- combine mask object through time to create a single mask fd['MASK'] = 1.0 - np.any(fd[VARIABLE].mask, axis=0).astype( np.float) #-- MAR coordinates fd['LON'] = fileID.variables['LON'][:, :].copy() fd['LAT'] = fileID.variables['LAT'][:, :].copy() #-- convert x and y coordinates to meters fd['x'] = 1000.0 * fileID.variables[XNAME][:].copy() fd['y'] = 1000.0 * fileID.variables[YNAME][:].copy() #-- extract delta time and epoch of time delta_time = fileID.variables[TIMENAME][:].astype(np.float) units = fileID.variables[TIMENAME].units #-- convert epoch of time to Julian days Y1, M1, D1, h1, m1, s1 = [ float(d) for d in re.findall('\d+\.\d+|\d+', units) ] epoch_julian = calc_julian_day(Y1, M1, D1, HOUR=h1, MINUTE=m1, SECOND=s1) #-- calculate time array in Julian days Y2, M2, D2, h2, m2, s2 = convert_julian(epoch_julian + delta_time) #-- calculate time in year-decimal fd['TIME'][c:c + t] = convert_calendar_decimal(Y2, M2, D2, HOUR=h2, MINUTE=m2, SECOND=s2) #-- use a gaussian filter to smooth mask gs['MASK'] = scipy.ndimage.gaussian_filter(fd['MASK'], SIGMA, mode='constant', cval=0) #-- indices of smoothed ice mask ii, jj = np.nonzero(np.ceil(gs['MASK']) == 1.0) #-- use a gaussian filter to smooth each model field gs[VARIABLE] = np.ma.zeros((t, ny, nx), fill_value=FILL_VALUE) gs[VARIABLE].mask = np.ones((t, ny, nx), dtype=np.bool) #-- for each time for tt in range(t): #-- replace fill values before smoothing data temp1 = np.zeros((ny, nx)) i, j = np.nonzero(~fd[VARIABLE].mask[tt, :, :]) temp1[i, j] = fd[VARIABLE][tt, i, j].copy() #-- smooth spatial field temp2 = scipy.ndimage.gaussian_filter(temp1, SIGMA, mode='constant', cval=0) #-- scale output smoothed field gs[VARIABLE].data[tt, ii, jj] = temp2[ii, jj] / gs['MASK'][ii, jj] #-- replace valid values with original gs[VARIABLE].data[tt, i, j] = temp1[i, j] #-- set mask variables for time gs[VARIABLE].mask[tt, ii, jj] = False #-- calculate cumulative cumulative[ii, jj] += gs[VARIABLE][tt, ii, jj] gs['CUMULATIVE'].data[c + tt, ii, jj] = np.copy(cumulative[ii, jj]) gs['CUMULATIVE'].mask[c + tt, ii, jj] = False #-- add to counter c += t #-- convert projection from input coordinates (EPSG) to model coordinates proj1 = pyproj.Proj("+init={0}".format(EPSG)) proj2 = pyproj.Proj(proj4_params) #-- calculate projected coordinates of input coordinates ix, iy = pyproj.transform(proj1, proj2, X, Y) #-- check that input points are within convex hull of valid model points gs['x'], gs['y'] = np.meshgrid(fd['x'], fd['y']) points = np.concatenate((gs['x'][ii, jj, None], gs['y'][ii, jj, None]), axis=1) triangle = scipy.spatial.Delaunay(points.data, qhull_options='Qt Qbb Qc Qz') interp_points = np.concatenate((ix[:, None], iy[:, None]), axis=1) valid = (triangle.find_simplex(interp_points) >= 0) #-- output interpolated arrays of model variable npts = len(tdec) interp = np.ma.zeros((npts), fill_value=FILL_VALUE, dtype=np.float) interp.mask = np.ones((npts), dtype=np.bool) #-- initially set all values to fill value interp.data[:] = interp.fill_value #-- type designating algorithm used (1:interpolate, 2:backward, 3:forward) interp.interpolation = np.zeros((npts), dtype=np.uint8) #-- find days that can be interpolated if np.any((tdec >= fd['TIME'].min()) & (tdec <= fd['TIME'].max()) & valid): #-- indices of dates for interpolated days ind, = np.nonzero((tdec >= fd['TIME'].min()) & (tdec <= fd['TIME'].max()) & valid) #-- create an interpolator for model variable RGI = scipy.interpolate.RegularGridInterpolator( (fd['TIME'], fd['y'], fd['x']), gs['CUMULATIVE'].data) #-- create an interpolator for input mask MI = scipy.interpolate.RegularGridInterpolator( (fd['TIME'], fd['y'], fd['x']), gs['CUMULATIVE'].mask) #-- interpolate to points interp.data[ind] = RGI.__call__(np.c_[tdec[ind], iy[ind], ix[ind]]) interp.mask[ind] = MI.__call__(np.c_[tdec[ind], iy[ind], ix[ind]]) #-- set interpolation type (1: interpolated) interp.interpolation[ind] = 1 #-- check if needing to extrapolate backwards in time count = np.count_nonzero((tdec < fd['TIME'].min()) & valid) if (count > 0) and EXTRAPOLATE: #-- indices of dates before model ind, = np.nonzero((tdec < fd['TIME'].min()) & valid) #-- read the first year of data to create regression model N = 365 #-- calculate a regression model for calculating values #-- spatially interpolate model variable to coordinates DATA = np.zeros((count, N)) MASK = np.zeros((count, N), dtype=np.bool) TIME = np.zeros((N)) #-- create interpolated time series for calculating regression model for k in range(N): #-- time at k TIME[k] = fd['TIME'][k] #-- spatially interpolate model variable S1 = scipy.interpolate.RectBivariateSpline( fd['x'], fd['y'], gs['CUMULATIVE'].data[k, :, :].T, kx=1, ky=1) S2 = scipy.interpolate.RectBivariateSpline( fd['x'], fd['y'], gs['CUMULATIVE'].mask[k, :, :].T, kx=1, ky=1) #-- create numpy masked array of interpolated values DATA[:, k] = S1.ev(ix[ind], iy[ind]) MASK[:, k] = S2.ev(ix[ind], iy[ind]) #-- calculate regression model for n, v in enumerate(ind): interp.data[v] = regress_model(TIME, DATA[n, :], tdec[v], ORDER=2, CYCLES=[0.25, 0.5, 1.0], RELATIVE=TIME[0]) #-- mask any invalid points interp.mask[ind] = np.any(MASK, axis=1) #-- set interpolation type (2: extrapolated backward) interp.interpolation[ind] = 2 #-- check if needing to extrapolate forward in time count = np.count_nonzero((tdec > fd['TIME'].max()) & valid) if (count > 0) and EXTRAPOLATE: #-- indices of dates after model ind, = np.nonzero((tdec > fd['TIME'].max()) & valid) #-- read the last year of data to create regression model N = 365 #-- calculate a regression model for calculating values #-- spatially interpolate model variable to coordinates DATA = np.zeros((count, N)) MASK = np.zeros((count, N), dtype=np.bool) TIME = np.zeros((N)) #-- create interpolated time series for calculating regression model for k in range(N): kk = nt - N + k #-- time at kk TIME[k] = fd['TIME'][kk] #-- spatially interpolate model variable S1 = scipy.interpolate.RectBivariateSpline( fd['x'], fd['y'], gs['CUMULATIVE'].data[kk, :, :].T, kx=1, ky=1) S2 = scipy.interpolate.RectBivariateSpline( fd['x'], fd['y'], gs['CUMULATIVE'].mask[kk, :, :].T, kx=1, ky=1) #-- create numpy masked array of interpolated values DATA[:, k] = S1.ev(ix[ind], iy[ind]) MASK[:, k] = S2.ev(ix[ind], iy[ind]) #-- calculate regression model for n, v in enumerate(ind): interp.data[v] = regress_model(TIME, DATA[n, :], tdec[v], ORDER=2, CYCLES=[0.25, 0.5, 1.0], RELATIVE=TIME[-1]) #-- mask any invalid points interp.mask[ind] = np.any(MASK, axis=1) #-- set interpolation type (3: extrapolated forward) interp.interpolation[ind] = 3 #-- complete mask if any invalid in data invalid, = np.nonzero((interp.data == interp.fill_value) | np.isnan(interp.data)) interp.mask[invalid] = True #-- return the interpolated values return interp
def extrapolate_racmo_daily(base_dir, EPSG, MODEL, tdec, X, Y, VARIABLE='smb', SIGMA=1.5, SEARCH='BallTree', NN=10, POWER=2.0, FILL_VALUE=None, EXTRAPOLATE=False): #-- start and end years to read SY = np.nanmin(np.floor(tdec)).astype(np.int) EY = np.nanmax(np.floor(tdec)).astype(np.int) YRS = '|'.join(['{0:4d}'.format(Y) for Y in range(SY, EY + 1)]) #-- input list of files if (MODEL == 'FGRN055'): #-- filename and directory for input FGRN055 files file_pattern = 'RACMO2.3p2_FGRN055_{0}_daily_(\d+).nc' DIRECTORY = os.path.join(base_dir, 'RACMO', 'GL', 'RACMO2.3p2_FGRN055') #-- create list of files to read rx = re.compile(file_pattern.format(VARIABLE, YRS), re.VERBOSE) input_files = sorted([f for f in os.listdir(DIRECTORY) if rx.match(f)]) #-- calculate number of time steps to read nt = 0 for f, FILE in enumerate(input_files): #-- Open the RACMO NetCDF file for reading with netCDF4.Dataset(os.path.join(DIRECTORY, FILE), 'r') as fileID: nx = len(fileID.variables['rlon'][:]) ny = len(fileID.variables['rlat'][:]) nt += len(fileID.variables['time'][:]) #-- invalid data value fv = np.float(fileID.variables[VARIABLE]._FillValue) #-- scaling factor for converting units if (VARIABLE == 'hgtsrf'): scale_factor = 86400.0 elif (VARIABLE == 'smb'): scale_factor = 1.0 #-- python dictionary with file variables fd = {} fd['time'] = np.zeros((nt)) #-- python dictionary with gaussian filtered variables gs = {} #-- calculate cumulative sum of gaussian filtered values cumulative = np.zeros((ny, nx)) gs['cumulative'] = np.ma.zeros((nt, ny, nx), fill_value=fv) gs['cumulative'].mask = np.zeros((nt, ny, nx), dtype=np.bool) #-- create a counter variable for filling variables c = 0 #-- for each file in the list for f, FILE in enumerate(input_files): #-- Open the RACMO NetCDF file for reading with netCDF4.Dataset(os.path.join(DIRECTORY, FILE), 'r') as fileID: #-- number of time variables within file t = len(fileID.variables['time'][:]) fd[VARIABLE] = np.ma.zeros((t, ny, nx), fill_value=fv) fd[VARIABLE].mask = np.ones((t, ny, nx), dtype=np.bool) #-- Get data from netCDF variable and remove singleton dimensions tmp = np.squeeze(fileID.variables[VARIABLE][:]) fd[VARIABLE][:] = scale_factor * tmp #-- indices of specified ice mask i, j = np.nonzero(tmp[0, :, :] != fv) fd[VARIABLE].mask[:, i, j] = False #-- combine mask object through time to create a single mask fd['mask'] = 1.0 - np.any(fd[VARIABLE].mask, axis=0).astype( np.float) #-- racmo coordinates fd['lon'] = fileID.variables['lon'][:, :].copy() fd['lat'] = fileID.variables['lat'][:, :].copy() fd['x'] = fileID.variables['rlon'][:].copy() fd['y'] = fileID.variables['rlat'][:].copy() #-- rotated pole parameters proj4_params = fileID.variables['rotated_pole'].proj4_params #-- extract delta time and epoch of time delta_time = fileID.variables['time'][:].astype(np.float) units = fileID.variables['time'].units #-- convert epoch of time to Julian days Y1, M1, D1, h1, m1, s1 = [ float(d) for d in re.findall('\d+\.\d+|\d+', units) ] epoch_julian = calc_julian_day(Y1, M1, D1, HOUR=h1, MINUTE=m1, SECOND=s1) #-- calculate time array in Julian days Y2, M2, D2, h2, m2, s2 = convert_julian(epoch_julian + delta_time) #-- calculate time in year-decimal fd['time'][c:c + t] = convert_calendar_decimal(Y2, M2, D2, HOUR=h2, MINUTE=m2, SECOND=s2) #-- use a gaussian filter to smooth mask gs['mask'] = scipy.ndimage.gaussian_filter(fd['mask'], SIGMA, mode='constant', cval=0) #-- indices of smoothed ice mask ii, jj = np.nonzero(np.ceil(gs['mask']) == 1.0) #-- use a gaussian filter to smooth each model field gs[VARIABLE] = np.ma.zeros((t, ny, nx), fill_value=fv) gs[VARIABLE].mask = np.ones((t, ny, nx), dtype=np.bool) #-- for each time for tt in range(t): #-- replace fill values before smoothing data temp1 = np.zeros((ny, nx)) i, j = np.nonzero(~fd[VARIABLE].mask[tt, :, :]) temp1[i, j] = fd[VARIABLE][tt, i, j].copy() #-- smooth spatial field temp2 = scipy.ndimage.gaussian_filter(temp1, SIGMA, mode='constant', cval=0) #-- scale output smoothed field gs[VARIABLE][tt, ii, jj] = temp2[ii, jj] / gs['mask'][ii, jj] #-- replace valid values with original gs[VARIABLE][tt, i, j] = temp1[i, j] #-- set mask variables for time gs[VARIABLE].mask[tt, ii, jj] = False #-- calculate cumulative cumulative[ii, jj] += gs[VARIABLE][tt, ii, jj] gs['cumulative'].data[c + tt, ii, jj] = np.copy(cumulative[ii, jj]) gs['cumulative'].mask[c + tt, ii, jj] = False #-- add to counter c += t #-- convert RACMO latitude and longitude to input coordinates (EPSG) proj1 = pyproj.Proj("+init={0}".format(EPSG)) proj2 = pyproj.Proj("+init=EPSG:{0:d}".format(4326)) xg, yg = pyproj.transform(proj2, proj1, fd['lon'], fd['lat']) #-- construct search tree from original points #-- can use either BallTree or KDTree algorithms xy1 = np.concatenate((xg[i, j, None], yg[i, j, None]), axis=1) tree = BallTree(xy1) if (SEARCH == 'BallTree') else KDTree(xy1) #-- output interpolated arrays of variable npts = len(tdec) extrap = np.ma.zeros((npts), fill_value=fv, dtype=np.float) extrap.mask = np.ones((npts), dtype=np.bool) #-- initially set all values to fill value extrap.data[:] = extrap.fill_value #-- type designating algorithm used (1:interpolate, 2:backward, 3:forward) extrap.interpolation = np.zeros((npts), dtype=np.uint8) #-- find days that can be interpolated if np.any((tdec >= fd['time'].min()) & (tdec < fd['time'].max())): #-- indices of dates for interpolated days ind, = np.nonzero((tdec >= fd['time'].min()) & (tdec < fd['time'].max())) #-- reduce x, y and t coordinates xind, yind, tind = (X[ind], Y[ind], tdec[ind]) #-- find indices for linearly interpolating in time f = scipy.interpolate.interp1d(fd['time'], np.arange(nt), kind='linear') date_indice = f(tind).astype(np.int) #-- for each unique racmo date #-- linearly interpolate in time between two racmo maps #-- then then inverse distance weighting to extrapolate in space for k in np.unique(date_indice): kk, = np.nonzero(date_indice == k) count = np.count_nonzero(date_indice == k) #-- query the search tree to find the NN closest points xy2 = np.concatenate((xind[kk, None], yind[kk, None]), axis=1) dist, indices = tree.query(xy2, k=NN, return_distance=True) #-- normalized weights if POWER > 0 (typically between 1 and 3) #-- in the inverse distance weighting power_inverse_distance = dist**(-POWER) s = np.sum(power_inverse_distance, axis=1) w = power_inverse_distance / np.broadcast_to( s[:, None], (count, NN)) #-- variable for times before and after tdec var1 = gs['cumulative'][k, i, j] var2 = gs['cumulative'][k + 1, i, j] #-- linearly interpolate to date dt = (tind[kk] - fd['time'][k]) / (fd['time'][k + 1] - fd['time'][k]) #-- spatially extrapolate using inverse distance weighting extrap[kk] = (1.0-dt)*np.sum(w*var1[indices],axis=1) + \ dt*np.sum(w*var2[indices], axis=1) #-- set interpolation type (1: interpolated in time) extrap.interpolation[ind] = 1 #-- check if needing to extrapolate backwards in time count = np.count_nonzero(tdec < fd['time'].min()) if (count > 0) and EXTRAPOLATE: #-- indices of dates before model ind, = np.nonzero(tdec < fd['time'].min()) #-- query the search tree to find the NN closest points xy2 = np.concatenate((X[ind, None], Y[ind, None]), axis=1) dist, indices = tree.query(xy2, k=NN, return_distance=True) #-- normalized weights if POWER > 0 (typically between 1 and 3) #-- in the inverse distance weighting power_inverse_distance = dist**(-POWER) s = np.sum(power_inverse_distance, axis=1) w = power_inverse_distance / np.broadcast_to(s[:, None], (count, NN)) #-- read the first year of data to create regression model N = 365 #-- calculate a regression model for calculating values #-- spatially interpolate variable to coordinates DATA = np.zeros((count, N)) TIME = np.zeros((N)) #-- create interpolated time series for calculating regression model for k in range(N): #-- time at k TIME[k] = fd['time'][k] #-- spatially extrapolate variable tmp = gs['cumulative'][k, i, j] DATA[:, k] = np.sum(w * tmp[indices], axis=1) #-- calculate regression model for n, v in enumerate(ind): extrap[v] = regress_model(TIME, DATA[n, :], tdec[v], ORDER=2, CYCLES=[0.25, 0.5, 1.0], RELATIVE=TIME[0]) #-- set interpolation type (2: extrapolated backwards in time) extrap.interpolation[ind] = 2 #-- check if needing to extrapolate forward in time count = np.count_nonzero(tdec >= fd['time'].max()) if (count > 0) and EXTRAPOLATE: #-- indices of dates after racmo model ind, = np.nonzero(tdec >= fd['time'].max()) #-- query the search tree to find the NN closest points xy2 = np.concatenate((X[ind, None], Y[ind, None]), axis=1) dist, indices = tree.query(xy2, k=NN, return_distance=True) #-- normalized weights if POWER > 0 (typically between 1 and 3) #-- in the inverse distance weighting power_inverse_distance = dist**(-POWER) s = np.sum(power_inverse_distance, axis=1) w = power_inverse_distance / np.broadcast_to(s[:, None], (count, NN)) #-- read the last year of data to create regression model N = 365 #-- calculate a regression model for calculating values #-- spatially interpolate variable to coordinates DATA = np.zeros((count, N)) TIME = np.zeros((N)) #-- create interpolated time series for calculating regression model for k in range(N): kk = nt - N + k #-- time at kk TIME[k] = fd['time'][kk] #-- spatially extrapolate variable tmp = gs['cumulative'][kk, i, j] DATA[:, k] = np.sum(w * tmp[indices], axis=1) #-- calculate regression model for n, v in enumerate(ind): extrap[v] = regress_model(TIME, DATA[n, :], tdec[v], ORDER=2, CYCLES=[0.25, 0.5, 1.0], RELATIVE=TIME[-1]) #-- set interpolation type (3: extrapolated forward in time) extrap.interpolation[ind] = 3 #-- complete mask if any invalid in data invalid, = np.nonzero((extrap.data == extrap.fill_value) | np.isnan(extrap.data)) extrap.mask[invalid] = True #-- replace fill value if specified if FILL_VALUE: extrap.fill_value = FILL_VALUE extrap.data[extrap.mask] = extrap.fill_value #-- return the interpolated values return extrap
def interpolate_mar_daily(DIRECTORY, EPSG, VERSION, tdec, X, Y, XNAME=None, YNAME=None, TIMENAME='TIME', VARIABLE='SMB', SIGMA=1.5, FILL_VALUE=None, EXTRAPOLATE=False): #-- start and end years to read SY = np.nanmin(np.floor(tdec)).astype(np.int) EY = np.nanmax(np.floor(tdec)).astype(np.int) YRS = '|'.join(['{0:4d}'.format(Y) for Y in range(SY, EY + 1)]) #-- regular expression pattern for MAR dataset rx = re.compile(r'{0}-(.*?)-(\d+)(_subset)?.nc$'.format(VERSION, YRS)) #-- MAR model projection: Polar Stereographic (Oblique) #-- Earth Radius: 6371229 m #-- True Latitude: 0 #-- Center Longitude: -40 #-- Center Latitude: 70.5 proj4_params = ("+proj=sterea +lat_0=+70.5 +lat_ts=0 +lon_0=-40.0 " "+a=6371229 +no_defs") #-- create list of files to read try: input_files = sorted([f for f in os.listdir(DIRECTORY) if rx.match(f)]) except Exception as e: print(f"failed to find files matching {VERSION} in {DIRECTORY}") raise (e) #-- calculate number of time steps to read nt = 0 for f, FILE in enumerate(input_files): #-- Open the MAR NetCDF file for reading with netCDF4.Dataset(os.path.join(DIRECTORY, FILE), 'r') as fileID: nx = len(fileID.variables[XNAME][:]) ny = len(fileID.variables[YNAME][:]) TIME = fileID.variables[TIMENAME][:] try: nt += np.count_nonzero(TIME.data != TIME.fill_value) except AttributeError: nt += len(TIME) #-- python dictionary with file variables fd = {} fd['TIME'] = np.zeros((nt)) #-- python dictionary with gaussian filtered variables gs = {} #-- calculate cumulative sum of gaussian filtered values cumulative = np.zeros((ny, nx)) gs['CUMULATIVE'] = np.ma.zeros((nt, ny, nx), fill_value=FILL_VALUE) gs['CUMULATIVE'].mask = np.ones((nt, ny, nx), dtype=bool) #-- create a counter variable for filling variables c = 0 #-- for each file in the list for f, FILE in enumerate(input_files): #-- Open the MAR NetCDF file for reading with netCDF4.Dataset(os.path.join(DIRECTORY, FILE), 'r') as fileID: #-- number of time variables within file TIME = fileID.variables['TIME'][:] try: t = np.count_nonzero(TIME.data != TIME.fill_value) except AttributeError: t = len(TIME) #-- create a masked array with all data fd[VARIABLE] = np.ma.zeros((t, ny, nx), fill_value=FILL_VALUE) fd[VARIABLE].mask = np.zeros((t, ny, nx), dtype=bool) #-- surface type SRF = fileID.variables['SRF'][:] #-- indices of specified ice mask i, j = np.nonzero(SRF == 4) #-- ice fraction FRA = fileID.variables['FRA'][:] / 100.0 #-- Get data from netCDF variable and remove singleton dimensions tmp = np.squeeze(fileID.variables[VARIABLE][:]) #-- combine sectors for multi-layered data if (np.ndim(tmp) == 4): #-- create mask for combining data MASK = np.zeros((t, ny, nx)) MASK[:, i, j] = FRA[:t, 0, i, j] #-- combine data fd[VARIABLE][:] = MASK * tmp[:t, 0, :, :] + ( 1.0 - MASK) * tmp[:t, 1, :, :] else: #-- copy data fd[VARIABLE][:] = tmp[:t, :, :].copy() #-- verify mask object for interpolating data surf_mask = np.broadcast_to(SRF, (t, ny, nx)) fd[VARIABLE].mask = fd[VARIABLE].data == fd[VARIABLE].fill_value fd[VARIABLE].mask[:, :, :] |= (surf_mask != 4) #-- combine mask object through time to create a single mask fd['MASK'] = 1.0 - np.any(fd[VARIABLE].mask, axis=0).astype( np.float) #-- MAR coordinates fd['LON'] = fileID.variables['LON'][:, :].copy() fd['LAT'] = fileID.variables['LAT'][:, :].copy() #-- convert x and y coordinates to meters fd['x'] = 1000.0 * fileID.variables[XNAME][:].copy() fd['y'] = 1000.0 * fileID.variables[YNAME][:].copy() #-- extract delta time and epoch of time delta_time = fileID.variables[TIMENAME][:t].astype(np.float) date_string = fileID.variables[TIMENAME].units #-- extract epoch and units epoch, to_secs = SMBcorr.time.parse_date_string(date_string) #-- calculate time array in Julian days JD = SMBcorr.time.convert_delta_time(delta_time * to_secs, epoch1=epoch, epoch2=(1858, 11, 17, 0, 0, 0), scale=1.0 / 86400.0) + 2400000.5 #-- convert from Julian days to calendar dates YY, MM, DD, hh, mm, ss = SMBcorr.time.convert_julian(JD) #-- calculate time in year-decimal fd['TIME'][c:c + t] = SMBcorr.time.convert_calendar_decimal(YY, MM, day=DD, hour=hh, minute=mm, second=ss) #-- use a gaussian filter to smooth mask gs['MASK'] = scipy.ndimage.gaussian_filter(fd['MASK'], SIGMA, mode='constant', cval=0) #-- indices of smoothed ice mask ii, jj = np.nonzero(np.ceil(gs['MASK']) == 1.0) #-- use a gaussian filter to smooth each model field gs[VARIABLE] = np.ma.zeros((t, ny, nx), fill_value=FILL_VALUE) gs[VARIABLE].mask = np.ones((t, ny, nx), dtype=bool) #-- for each time for tt in range(t): #-- replace fill values before smoothing data temp1 = np.zeros((ny, nx)) i, j = np.nonzero(~fd[VARIABLE].mask[tt, :, :]) temp1[i, j] = fd[VARIABLE][tt, i, j].copy() #-- smooth spatial field temp2 = scipy.ndimage.gaussian_filter(temp1, SIGMA, mode='constant', cval=0) #-- scale output smoothed field gs[VARIABLE].data[tt, ii, jj] = temp2[ii, jj] / gs['MASK'][ii, jj] #-- replace valid values with original gs[VARIABLE].data[tt, i, j] = temp1[i, j] #-- set mask variables for time gs[VARIABLE].mask[tt, ii, jj] = False #-- calculate cumulative cumulative[ii, jj] += gs[VARIABLE][tt, ii, jj] gs['CUMULATIVE'].data[c + tt, ii, jj] = np.copy(cumulative[ii, jj]) gs['CUMULATIVE'].mask[c + tt, ii, jj] = False #-- add to counter c += t #-- convert projection from input coordinates (EPSG) to model coordinates crs1 = pyproj.CRS.from_string(EPSG) crs2 = pyproj.CRS.from_string(proj4_params) transformer = pyproj.Transformer.from_crs(crs1, crs2, always_xy=True) #-- calculate projected coordinates of input coordinates ix, iy = transformer.transform(X, Y) #-- check that input points are within convex hull of valid model points gs['x'], gs['y'] = np.meshgrid(fd['x'], fd['y']) v, triangle = find_valid_triangulation(gs['x'][ii, jj], gs['y'][ii, jj]) #-- check if there is a valid triangulation if v: #-- check where points are within the complex hull of the triangulation interp_points = np.concatenate((ix[:, None], iy[:, None]), axis=1) valid = (triangle.find_simplex(interp_points) >= 0) else: #-- Check ix and iy against the bounds of x and y valid = (ix >= fd['x'].min()) & (ix <= fd['x'].max()) & \ (iy >= fd['y'].min()) & (iy <= fd['y'].max()) #-- output interpolated arrays of model variable npts = len(tdec) interp = np.ma.zeros((npts), fill_value=FILL_VALUE, dtype=np.float) interp.mask = np.ones((npts), dtype=bool) #-- initially set all values to fill value interp.data[:] = interp.fill_value #-- type designating algorithm used (1:interpolate, 2:backward, 3:forward) interp.interpolation = np.zeros((npts), dtype=np.uint8) #-- find days that can be interpolated if np.any((tdec >= fd['TIME'].min()) & (tdec <= fd['TIME'].max()) & valid): #-- indices of dates for interpolated days ind, = np.nonzero((tdec >= fd['TIME'].min()) & (tdec <= fd['TIME'].max()) & valid) #-- create an interpolator for model variable RGI = scipy.interpolate.RegularGridInterpolator( (fd['TIME'], fd['y'], fd['x']), gs['CUMULATIVE'].data) #-- create an interpolator for input mask MI = scipy.interpolate.RegularGridInterpolator( (fd['TIME'], fd['y'], fd['x']), gs['CUMULATIVE'].mask) #-- interpolate to points interp.data[ind] = RGI.__call__(np.c_[tdec[ind], iy[ind], ix[ind]]) interp.mask[ind] = MI.__call__(np.c_[tdec[ind], iy[ind], ix[ind]]) #-- set interpolation type (1: interpolated) interp.interpolation[ind] = 1 #-- check if needing to extrapolate backwards in time count = np.count_nonzero((tdec < fd['TIME'].min()) & valid) if (count > 0) and EXTRAPOLATE: #-- indices of dates before model ind, = np.nonzero((tdec < fd['TIME'].min()) & valid) #-- read the first year of data to create regression model N = 365 #-- calculate a regression model for calculating values #-- spatially interpolate model variable to coordinates DATA = np.zeros((count, N)) MASK = np.zeros((count, N), dtype=bool) TIME = np.zeros((N)) #-- create interpolated time series for calculating regression model for k in range(N): #-- time at k TIME[k] = fd['TIME'][k] #-- spatially interpolate model variable S1 = scipy.interpolate.RectBivariateSpline( fd['x'], fd['y'], gs['CUMULATIVE'].data[k, :, :].T, kx=1, ky=1) S2 = scipy.interpolate.RectBivariateSpline( fd['x'], fd['y'], gs['CUMULATIVE'].mask[k, :, :].T, kx=1, ky=1) #-- create numpy masked array of interpolated values DATA[:, k] = S1.ev(ix[ind], iy[ind]) MASK[:, k] = S2.ev(ix[ind], iy[ind]) #-- calculate regression model for n, v in enumerate(ind): interp.data[v] = regress_model(TIME, DATA[n, :], tdec[v], ORDER=2, CYCLES=[0.25, 0.5, 1.0], RELATIVE=TIME[0]) #-- mask any invalid points interp.mask[ind] = np.any(MASK, axis=1) #-- set interpolation type (2: extrapolated backward) interp.interpolation[ind] = 2 #-- check if needing to extrapolate forward in time count = np.count_nonzero((tdec > fd['TIME'].max()) & valid) if (count > 0) and EXTRAPOLATE: #-- indices of dates after model ind, = np.nonzero((tdec > fd['TIME'].max()) & valid) #-- read the last year of data to create regression model N = 365 #-- calculate a regression model for calculating values #-- spatially interpolate model variable to coordinates DATA = np.zeros((count, N)) MASK = np.zeros((count, N), dtype=bool) TIME = np.zeros((N)) #-- create interpolated time series for calculating regression model for k in range(N): kk = nt - N + k #-- time at kk TIME[k] = fd['TIME'][kk] #-- spatially interpolate model variable S1 = scipy.interpolate.RectBivariateSpline( fd['x'], fd['y'], gs['CUMULATIVE'].data[kk, :, :].T, kx=1, ky=1) S2 = scipy.interpolate.RectBivariateSpline( fd['x'], fd['y'], gs['CUMULATIVE'].mask[kk, :, :].T, kx=1, ky=1) #-- create numpy masked array of interpolated values DATA[:, k] = S1.ev(ix[ind], iy[ind]) MASK[:, k] = S2.ev(ix[ind], iy[ind]) #-- calculate regression model for n, v in enumerate(ind): interp.data[v] = regress_model(TIME, DATA[n, :], tdec[v], ORDER=2, CYCLES=[0.25, 0.5, 1.0], RELATIVE=TIME[-1]) #-- mask any invalid points interp.mask[ind] = np.any(MASK, axis=1) #-- set interpolation type (3: extrapolated forward) interp.interpolation[ind] = 3 #-- complete mask if any invalid in data invalid, = np.nonzero((interp.data == interp.fill_value) | np.isnan(interp.data)) interp.mask[invalid] = True #-- return the interpolated values return interp
def extrapolate_merra_hybrid(base_dir, EPSG, REGION, tdec, X, Y, VERSION='v1', VARIABLE='FAC', SEARCH='BallTree', N=10, POWER=2.0, SIGMA=1.5, FILL_VALUE=None, EXTRAPOLATE=False, GZIP=False): #-- suffix if compressed suffix = '.gz' if GZIP else '' #-- set the input netCDF4 file for the variable of interest if VARIABLE in ('FAC', 'cum_smb_anomaly', 'SMB_a', 'height', 'h_a'): args = (VERSION, REGION.lower(), suffix) hybrid_file = 'gsfc_fdm_{0}_{1}.nc{2}'.format(*args) elif VARIABLE in ('smb', 'SMB', 'Me', 'Ra', 'Ru', 'Sn-Ev'): args = (VERSION, REGION.lower(), suffix) hybrid_file = 'gsfc_fdm_smb_{0}_{1}.nc{2}'.format(*args) elif VARIABLE in ('Me_a', 'Ra_a', 'Ru_a', 'Sn-Ev_a'): args = (VERSION, REGION.lower(), suffix) hybrid_file = 'gsfc_fdm_smb_cumul_{0}_{1}.nc{2}'.format(*args) elif VARIABLE in ('FAC') and (VERSION == 'v0'): args = ('FAC', REGION.lower(), suffix) hybrid_file = 'gsfc_{0}_{1}.nc{2}'.format(*args) elif VARIABLE in ('p_minus_e', 'melt') and (VERSION == 'v0'): args = (VARIABLE, REGION.lower(), suffix) hybrid_file = 'm2_hybrid_{0}_cumul_{1}.nc{2}'.format(*args) #-- Open the MERRA-2 Hybrid NetCDF file for reading if GZIP: #-- read as in-memory (diskless) netCDF4 dataset with gzip.open(os.path.join(base_dir, hybrid_file), 'r') as f: fileID = netCDF4.Dataset(uuid.uuid4().hex, memory=f.read()) else: #-- read netCDF4 dataset fileID = netCDF4.Dataset(os.path.join(base_dir, hybrid_file), 'r') #-- Get data from each netCDF variable and remove singleton dimensions fd = {} #-- time is year decimal at time step 5 days time_step = 5.0 / 365.25 #-- if extrapolating data: read the full dataset #-- if simply interpolating with fill values: reduce to a subset if EXTRAPOLATE: #-- read time variables fd['time'] = fileID.variables['time'][:].copy() #-- read full dataset and remove singleton dimensions fd[VARIABLE] = np.squeeze(fileID.variables[VARIABLE][:].copy()) else: #-- reduce grids to time period of input buffered by time steps tmin = np.min(tdec) - 2.0 * time_step tmax = np.max(tdec) + 2.0 * time_step #-- find indices to times nt, = fileID.variables['time'].shape f = scipy.interpolate.interp1d(fileID.variables['time'][:], np.arange(nt), kind='nearest', bounds_error=False, fill_value=(0, nt)) imin, imax = f((tmin, tmax)).astype(np.int) #-- read reduced time variables fd['time'] = fileID.variables['time'][imin:imax + 1].copy() #-- read reduced dataset and remove singleton dimensions fd[VARIABLE] = np.squeeze(fileID.variables[VARIABLE][imin:imax + 1, :, :]) #-- invalid data value fv = np.float(fileID.variables[VARIABLE]._FillValue) #-- input shape of MERRA-2 Hybrid firn data nt, nx, ny = np.shape(fd[VARIABLE]) #-- extract x and y coordinate arrays from grids if applicable #-- else create meshgrids of coordinate arrays if (np.ndim(fileID.variables['x'][:]) == 2): xg = fileID.variables['x'][:].copy() yg = fileID.variables['y'][:].copy() fd['x'], fd['y'] = (xg[:, 0], yg[0, :]) else: fd['x'] = fileID.variables['x'][:].copy() fd['y'] = fileID.variables['y'][:].copy() xg, yg = np.meshgrid(fd['x'], fd['y'], indexing='ij') #-- close the NetCDF files fileID.close() #-- indices of specified ice mask i, j = np.nonzero(fd[VARIABLE][0, :, :] != fv) #-- create mask object for interpolating data fd['mask'] = np.zeros((nx, ny)) fd['mask'][i, j] = 1.0 #-- use a gaussian filter to smooth mask gs = {} gs['mask'] = scipy.ndimage.gaussian_filter(fd['mask'], SIGMA, mode='constant', cval=0) #-- indices of smoothed ice mask ii, jj = np.nonzero(np.ceil(gs['mask']) == 1.0) #-- use a gaussian filter to smooth each firn field gs[VARIABLE] = np.ma.zeros((nt, nx, ny), fill_value=fv) gs[VARIABLE].mask = np.zeros((nt, nx, ny), dtype=bool) for t in range(nt): #-- replace fill values before smoothing data temp1 = np.zeros((nx, ny)) #-- reference to first firn field temp1[i, j] = fd[VARIABLE][t, i, j] - fd[VARIABLE][0, i, j] #-- smooth firn field temp2 = scipy.ndimage.gaussian_filter(temp1, SIGMA, mode='constant', cval=0) #-- scale output smoothed firn field gs[VARIABLE].data[t, ii, jj] = temp2[ii, jj] / gs['mask'][ii, jj] #-- replace valid firn values with original gs[VARIABLE].data[t, i, j] = temp1[i, j] #-- set mask variables for time gs[VARIABLE].mask[t, :, :] = (gs['mask'] == 0.0) #-- pyproj transformer for converting to input coordinates (EPSG) MODEL_EPSG = set_projection(REGION) crs1 = pyproj.CRS.from_string(EPSG) crs2 = pyproj.CRS.from_string(MODEL_EPSG) transformer = pyproj.Transformer.from_crs(crs1, crs2, always_xy=True) direction = pyproj.enums.TransformDirection.INVERSE #-- convert projection from model coordinates xg, yg = transformer.transform(fd['x'], fd['y'], direction=direction) #-- construct search tree from original points #-- can use either BallTree or KDTree algorithms xy1 = np.concatenate((xg[ii, jj, None], yg[ii, jj, None]), axis=1) tree = BallTree(xy1) if (SEARCH == 'BallTree') else KDTree(xy1) #-- output interpolated arrays of variable npts = len(tdec) extrap_data = np.ma.zeros((npts), fill_value=fv, dtype=np.float) extrap_data.mask = np.ones((npts), dtype=bool) #-- type designating algorithm used (1:interpolate, 2:backward, 3:forward) extrap_data.interpolation = np.zeros((npts), dtype=np.uint8) #-- find days that can be interpolated if np.any((tdec >= fd['time'].min()) & (tdec < fd['time'].max())): #-- indices of dates for interpolated days ind, = np.nonzero((tdec >= fd['time'].min()) & (tdec < fd['time'].max())) #-- reduce x, y and t coordinates xind, yind, tind = (X[ind], Y[ind], tdec[ind]) #-- find indices for linearly interpolating in time f = scipy.interpolate.interp1d(fd['time'], np.arange(nt), kind='linear') date_indice = f(tind).astype(np.int) #-- for each unique firn date #-- linearly interpolate in time between two firn maps #-- then then inverse distance weighting to extrapolate in space for k in np.unique(date_indice): kk, = np.nonzero(date_indice == k) count = np.count_nonzero(date_indice == k) #-- query the search tree to find the N closest points xy2 = np.concatenate((xind[kk, None], yind[kk, None]), axis=1) dist, indices = tree.query(xy2, k=N, return_distance=True) #-- normalized weights if POWER > 0 (typically between 1 and 3) #-- in the inverse distance weighting power_inverse_distance = dist**(-POWER) s = np.sum(power_inverse_distance, axis=1) w = power_inverse_distance / np.broadcast_to( s[:, None], (count, N)) #-- firn height or air content for times before and after tdec firn1 = gs[VARIABLE][k, ii, jj] firn2 = gs[VARIABLE][k + 1, ii, jj] #-- linearly interpolate to date dt = (tind[kk] - fd['time'][k]) / (fd['time'][k + 1] - fd['time'][k]) #-- spatially extrapolate using inverse distance weighting extrap_data[kk] = (1.0-dt)*np.sum(w*firn1[indices],axis=1) + \ dt*np.sum(w*firn2[indices], axis=1) #-- set interpolation type (1: interpolated in time) extrap_data.interpolation[ind] = 1 #-- check if needing to extrapolate backwards in time count = np.count_nonzero(tdec < fd['time'].min()) if (count > 0) and EXTRAPOLATE: #-- indices of dates before firn model ind, = np.nonzero(tdec < fd['time'].min()) #-- query the search tree to find the N closest points xy2 = np.concatenate((X[ind, None], Y[ind, None]), axis=1) dist, indices = tree.query(xy2, k=N, return_distance=True) #-- normalized weights if POWER > 0 (typically between 1 and 3) #-- in the inverse distance weighting power_inverse_distance = dist**(-POWER) s = np.sum(power_inverse_distance, axis=1) w = power_inverse_distance / np.broadcast_to(s[:, None], (count, N)) #-- calculate a regression model for calculating values #-- read first 10 years of data to create regression model N = np.int(10.0 / time_step) #-- spatially interpolate firn elevation or air content to coordinates FIRN = np.zeros((count, N)) T = np.zeros((N)) #-- create interpolated time series for calculating regression model for k in range(N): #-- time at k T[k] = fd['time'][k] #-- spatially extrapolate firn elevation or air content firn1 = gs[VARIABLE][k, ii, jj] FIRN[:, k] = np.sum(w * firn1[indices], axis=1) #-- calculate regression model for n, v in enumerate(ind): extrap_data[v] = regress_model( T, FIRN[n, :], tdec[v], ORDER=2, CYCLES=[0.25, 0.5, 1.0, 2.0, 4.0, 5.0], RELATIVE=T[0]) #-- set interpolation type (2: extrapolated backwards in time) extrap_data.interpolation[ind] = 2 #-- check if needing to extrapolate forward in time count = np.count_nonzero(tdec >= fd['time'].max()) if (count > 0) and EXTRAPOLATE: #-- indices of dates after firn model ind, = np.nonzero(tdec >= fd['time'].max()) #-- query the search tree to find the N closest points xy2 = np.concatenate((X[ind, None], Y[ind, None]), axis=1) dist, indices = tree.query(xy2, k=N, return_distance=True) #-- normalized weights if POWER > 0 (typically between 1 and 3) #-- in the inverse distance weighting power_inverse_distance = dist**(-POWER) s = np.sum(power_inverse_distance, axis=1) w = power_inverse_distance / np.broadcast_to(s[:, None], (count, N)) #-- calculate a regression model for calculating values #-- read last 10 years of data to create regression model N = np.int(10.0 / time_step) #-- spatially interpolate firn elevation or air content to coordinates FIRN = np.zeros((count, N)) T = np.zeros((N)) #-- create interpolated time series for calculating regression model for k in range(N): kk = nt - N + k #-- time at k T[k] = fd['time'][kk] #-- spatially extrapolate firn elevation or air content firn1 = gs[VARIABLE][kk, ii, jj] FIRN[:, k] = np.sum(w * firn1[indices], axis=1) #-- calculate regression model for n, v in enumerate(ind): extrap_data[v] = regress_model( T, FIRN[n, :], tdec[v], ORDER=2, CYCLES=[0.25, 0.5, 1.0, 2.0, 4.0, 5.0], RELATIVE=T[-1]) #-- set interpolation type (3: extrapolated forwards in time) extrap_data.interpolation[ind] = 3 #-- complete mask if any invalid in data invalid, = np.nonzero(extrap_data.data == extrap_data.fill_value) extrap_data.mask[invalid] = True #-- replace fill value if specified if FILL_VALUE: extrap_data.fill_value = FILL_VALUE extrap_data.data[extrap_data.mask] = extrap_data.fill_value #-- return the interpolated values return extrap_data