def hist_sum(da_escvel, levels, start, end) -> np.ndarray: """From a DataArray of escape velocities and levels array, create an array for the sum of histogram bins over a series of decades.""" bins = np.array(levels) hist_sum = histogram(da_escvel[start], bins=[bins]).data for i in range(start + 1, end): h = histogram(da_escvel[i], bins=[bins]).data hist_sum += h return hist_sum
def vertical_rebin(data, bin_data, bins, dz, vert_dim="st_ocean"): nanmask = np.isnan(data) # Should we also check the bin data for nans? full_sum = histogram( bin_data.where(~nanmask), bins=[bins], weights=(data * dz).where(~nanmask), dim=[vert_dim], ) return full_sum
def compute_values(ds, geometry, dataset, variable, years, depth): if dataset == 'historic': start_date = years[0] end_date = years[1] else: start_date = np.datetime64( datetime.strptime(f'{years[0]}-12-31', "%Y-%m-%d")) end_date = np.datetime64( datetime.strptime(f'{years[1]}-12-31', "%Y-%m-%d")) xmin, ymax, xmax, ymin = geometry.bounds ds_index = ds.where(ds['mask'].isin(0.0)).sel(depth=depth, lon=slice(xmin, xmax), lat=slice(ymin, ymax), time=slice( start_date, end_date)) # Get difference between two dates diff = ds_index.loc[dict(time=end_date)] - ds_index.loc[dict( time=start_date)] # Get counts and binds of the histogram if (dataset == 'experimental') and (variable == 'stocks'): diff = diff[variable] / 10. else: diff = diff[variable] nBinds = binds(dataset, variable) bindsRange = ranges(dataset, variable) bins = np.linspace(bindsRange[0], bindsRange[1], nBinds + 1) h = histogram(diff, bins=[bins], dim=['lat', 'lon'], block_size=1) counts = h.values mean_diff = diff.mean(skipna=True).values if (dataset == 'experimental') and (variable == 'stocks'): mean_values = ds_index[variable].mean(['lon', 'lat']).values / 10. else: mean_values = ds_index[variable].mean(['lon', 'lat']).values if dataset == 'historic': mean_years = ds.coords.get('time').values else: mean_years = [ int(str(x).split('-')[0]) for x in ds_index.coords.get('time').values ] # Replace NaNs with "" if np.isnan(mean_diff): mean_diff = None mean_values = [None if np.isnan(x) else x for x in mean_values] return counts, bins, mean_diff, mean_years, mean_values
def hist_mean(da_escvel: xr.DataArray, levels: np.ndarray, start: int, end: int, name: str = 'Esc Vel Distribution') -> np.ndarray: """From a DataArray of escape velocities, levels array, start and end index, and name, create a DataArray for the mean of histogram bins over a series of decades.""" bins = np.array(levels) hist_sum = histogram(da_escvel[start], bins=[bins]).data for i in range(start + 1, end): h = histogram(da_escvel[i], bins=[bins]).data hist_sum += h hist_mean = hist_sum / hist_sum.sum() hist_mean = xr.DataArray(hist_mean, dims=['bin_edges'], coords=[np.delete(bins, len(bins) - 1)]).rename(name) return hist_mean
def calc_E(l,dl,l_i_vals,c=None,weight=None,dims=['xh','yh','zl']): '''Transport of mass or tracer across contours of [l], due to processes [dl]. l : xr.DataArray; Intensive variable (e.g. temp) that defines layers across which transport will be determined. dl : xr.DataArray; Tendency of l due to (individual or total sum of) diffusive processes, e.g. heat tendencies Multiple processes are included as Dataarrays inside a dataset. l_i_vals : ndarray or xr.DataArray; Interface values of l; transport will be across midpoints of these interfaces c : None or scalar or xr.DataArray If included, evaluating transport of tracer as opposed to volume A scalar could be density, to get mass rather than volume transport weight : xr.DataArray; Distribution of weights by which to multiply [dl] to remove spatial dimension of units. E.g. heat flux in Wm-2 should be multiplied by the area of the grid cell to recover total heating. dims : list of string; Dimensions along which to perform histogram.''' import xarray as xr import numpy as np from xhistogram.xarray import histogram # Get the spacing of contours of l delta_l_vals = np.diff(l_i_vals) # xhistogram has some curiousities around nan-values, so exclude them nanmask = np.isnan(l) # Specify what should be integrated # (this is rather clunky with if statements, but is meant to avoid loading # unnecessary variables, such as dummy [c]'s or [weight]'s) if c==None: if weight is None: weights = dl else: weights = dl*weight else: if weight is None: weights = c*dl else: weights = c*dl*weight # Integrate [weights] within layers of [l] and divide by [delta_l] E = histogram(l.where(~nanmask), bins=[l_i_vals], weights=weights.where(~nanmask), dim=dims, block_size=1)/delta_l_vals return E
def calc_P(p, l, l_i_vals, area, greaterthan=True): '''Integration of quantity [p] across volume with [l] greater than *mid-point* values between layer interfaces [l_i_vals] (i.e. the center layer values). This is done so that the output of calc_P aligns with that of calc_G and calc_E, when given the same l_i_vals. *** IMPORTANT *** Limits of l_i_vals should be such that their mid-points span the full range of l ***************** p : quantity to be integrated (e.g. tendencies of independent tracer [c]) l : intensive variable, to define volume boundary l_i_vals : interface values of l (integral is done for l[i] >= 0.5*(l_i_vals[i]+l_i_vals[i+1])) area : 2d distribution of horizontal area (dx*dy) greaterthan : boolean, True if the integral should be over contours greater than each layer interface. False for less than.''' # Get the mid-points of the layers (as defined by their interfaces) l_l_vals = 0.5 * (l_i_vals[:-1] + l_i_vals[1:]) # xhistogram has some curiousities around nan-values, so exclude them nanmask = np.isnan(l) # Integrate p between each layer mid-point P_l = histogram(l.where(~nanmask), bins=[l_l_vals], weights=(p * area).where(~nanmask), dim=['xh', 'yh', 'zl'], block_size=1) # Cumulatively sum each layer (concatenate zeros at start), and take away from total sum to reverse the order of the summation # i.e. this the cumulative sum of integrated [p] from largest to smallest [l_l] # Concatenate zeroes # At the same time, reassign the coordinates to align with the layer mid-points # P = xr.concat([P_l.sum(l.name+'_bin'),(P_l.sum(l.name+'_bin')-P_l.cumsum(l.name+'_bin'))], # dim=l.name+'_bin').assign_coords({l.name+'_bin':l_l_vals}) P_l_cumsum = xr.concat([ xr.zeros_like(P_l.isel({l.name + '_bin': 0})), P_l.cumsum(l.name + '_bin') ], dim=l.name + '_bin') if greaterthan: P = (P_l.sum(l.name + '_bin') - P_l_cumsum).assign_coords( {l.name + '_bin': l_l_vals}) else: P = P_l_cumsum.assign_coords({l.name + '_bin': l_l_vals}) return P
def calc_volumetric_cumsum(l,p,l_l_vals,weight=None,dims=['xh','yh','zl'],greaterthan=True,): '''Integration of quantity [p] across volume with [l] greater than layer defined by contours l_l_vals. *** IMPORTANT *** Limits of l_l_vals must span the full range of l. ***************** l : xr.DataArray; Intensive variable, to define volume boundary p : xr.DataArray; Quantity to be integrated (e.g. thickness) l_l_vals : ndarray or xr.DataArray Values of the contours for which the integration will be calculated *** This is currently set up such that the layer values are increasing *** *** I expect it can be generalized, but have not explored this yet *** weight : Distribution of weights by which to multiply [p] to remove spatial dimension of units. E.g. thickness in units 'm' should be multiplied by the area of the grid cell to recover total volume. greaterthan : boolean; True if the integral should be over contours greater than each contour. False for less than.''' # xhistogram has some curiousities around nan-values, so exclude them nanmask = np.isnan(l) # Specify what should be integrated if weight is None: weights = p else: weights = p*weight # Integrate p between each contour layer P_l = histogram(l.where(~nanmask),bins=[l_l_vals],weights=weights.where(~nanmask),dim=dims,block_size=1) # Cumulatively sum each layer (concatenate zeros at start), and take away from total sum to reverse the order of the summation # i.e. this the cumulative sum of integrated [p] from largest to smallest [l_l] P_l_cumsum = xr.concat([xr.zeros_like(P_l.isel({l.name+'_bin':0})),P_l.cumsum(l.name+'_bin')],dim=l.name+'_bin') if greaterthan: # Take cumulative sum away from total sum to reverse the order # Assign coordinates to match the contours (rather than the interfaces) P = (P_l.sum(l.name+'_bin')-P_l_cumsum).assign_coords({l.name+'_bin':l_l_vals}) else: P = P_l_cumsum.assign_coords({l.name+'_bin':l_l_vals}) return P
def calc_G(l, dl, l_i_vals, area, plot=False): '''Mass transport across contours of [l]. l : intensive variable, to evaluate transport across dl : summed tendency of (vertically integrated) l due to diffusive processes; e.g. heat tendency if l if temperature l_i_vals : interface values of l area : 2d distribution of horizontal area (dx*dy)''' # Get the spacing of contours of l delta_l_vals = np.diff(l_i_vals) # xhistogram has some curiousities around nan-values, so exclude them nanmask = np.isnan(l) # Integrate [dl] within layers of [l] and divide by [delta_l] G = histogram(l.where(~nanmask), bins=[l_i_vals], weights=(dl * area).where(~nanmask), dim=['xh', 'yh', 'zl'], block_size=1) / delta_l_vals if plot: G.plot() return G
def total_rebin_layerintegral(ds, bin_data, bins, dim, area, block_size='auto', verbose=False): """Rebin dataset [ds] in multiple dimensions, and integrate terms within new layers. """ ds = ds.copy() ds_rebinned = xr.Dataset() for var in ds.data_vars: if ds[var].dtype == 'float': if verbose: print(var) nanmask = np.isnan(ds[var]) ds_rebinned[var] = histogram(bin_data.where(~nanmask), bins=[bins], dim=dim, weights=(ds[var] * area).where(~nanmask), block_size=block_size) return ds_rebinned
def calc_E_old(ds, l, l_i_vals, dl, c, area, z='depth', xdim='xh', ydim='yh', zldim='zl', zidim='zi', binning=None): '''Evaluation of the transport of tracer [c] across contours of tracer [l] l and c should be given as strings and both be contained in Dataset ds dl corresponds to the diffusive time tendency of tracer *content* of l, i.e. the tendency of rho*l*h where h is layer thickness, and rho is in situ density, thus in units of [kg m^-3]*[l tracer unit]*[m]*[s-1]. Then E(l') = \frac{1}{\Delta l}\iint_{l=l'} c*dl dA This uses the fact that the tracer content tendency is equal to the rho * tracer tendency integrated within a layer (which is formulation for wmt in Groeskamp et al, 2019).''' E = xr.Dataset() # Calculate tracer at layer centres from interface values l_l_vals = 0.5 * (l_i_vals[:-1] + l_i_vals[1:]) # Calculate delta_tracer for each layer delta_l_vals = np.diff(l_i_vals) # Place in DataArrays l_i = xr.DataArray(l_i_vals, dims=['l_i'], coords={'l_i': l_i_vals}) l_l = xr.DataArray(l_l_vals, dims=['l_l'], coords={'l_l': l_l_vals}) delta_l = xr.DataArray(delta_l_vals, dims=['l_l'], coords={'l_l': l_l_vals}) # Calculate cdl cdl = ds[c] * dl # Bin into layers of l, and sum up across space if binning is None: work = (cdl * area).sum(dim=[xdim, ydim]) work = work.rename({vertc: 'l_l'}) if binning == 'xhistogram': nanmask = np.isnan(cdl) work = histogram(ds[l].where(~nanmask), bins=[l_i.values], dim=['xh', 'yh', 'zl', 'time'], weights=(cdl * area).where(~nanmask), block_size=None) work = work.rename({l + '_bin': 'l_l'}) if binning == 'busecke': l_depth_i = vc.linear_interpolation_regrid(ds[z + '_l'], ds[l], l_i, z_bounds=ds[z + '_i'], target_value_dim='l_i', z_bounds_dim=zidim, z_dim=zldim) cdl_remapped = conservative_remap(cdl, z_bnds_source=ds[z + '_i'], z_bnds_target=l_depth_i, z_dim=zldim, z_bnd_dim=zidim, z_bnd_dim_target='regridded', mask=True) work = (cdl_remapped * area).sum(dim=[xdim, ydim]) work = work.rename({'remapped': 'l_l'}) E['l_depth_i'] = l_depth_i E['E'] = work / delta_l E['dE'] = E['E'].interp( l_l=l_i, method='linear').diff('l_i').assign_coords( l_i=l_l.values).drop('l_l').rename({'l_i': 'l_l'}) return E
dsdx_slice = dsdx.isel( eta_rho=etaslice, xi_rho=xislice, s_rho=-1).sel(ocean_time=slice(str(y) + '-06-15', str(y) + '-07-25')) dsdy_slice = dsdy.isel( eta_rho=etaslice, xi_rho=xislice, s_rho=-1).sel(ocean_time=slice(str(y) + '-06-15', str(y) + '-07-25')) sgradbins = np.linspace(-0.001, 0.001, 100) dsdx_slice.name = 'dsdx' dsdy_slice.name = 'dsdy' # #We need to remove the grid attributes from the variables. It will generate this annoying error code # #because of an invalid character. dsdx_slice.attrs = '' dsdy_slice.attrs = '' print('Computing histogam') dsdx_hist = histogram(dsdx_slice, bins=[sgradbins], density=True) dsdy_hist = histogram(dsdy_slice, bins=[sgradbins], density=True) dsdx_hist.attrs = '' dsdy_hist.attrs = '' print('Saving histogram') dsdx_hist.to_netcdf( '/home/dylan/Variability/histograms/dsdx/histograms_dsdx_%i.nc' % y) dsdy_hist.to_netcdf( '/home/dylan/Variability/histograms/dsdy/histograms_dsdy_%i.nc' % y)
def calculate_AMOC_sigma_z(domain, ds, fn=None): """ calculate the AMOC in depth and density space """ assert domain in ['ocn', 'ocn_low'] for q in ['PD', 'VVEL', 'DXT', 'DYT', 'DXU', 'DYU', 'REGION_MASK']: assert q in ds (grid, ds_) = pop_tools.to_xgcm_grid_dataset(ds) ds_['DZU'] = xr_DZ_xgcm(domain=domain, grid='U') metrics = { ('X'): ['DXT', 'DXU'], # X distances ('Y'): ['DYT', 'DYU'], # Y distances ('Z'): ['DZU'], # Z distances } coords = { 'X': { 'center': 'nlon_t', 'right': 'nlon_u' }, 'Y': { 'center': 'nlat_t', 'right': 'nlat_u' }, 'Z': { 'center': 'z_t', 'left': 'z_w_top', 'right': 'z_w_bot' } } grid = xgcm.Grid(ds_, metrics=metrics, coords=coords) print('merged annual datasets do not convert to U/T-lat/lons') if 'nlat' in ds_.VVEL.dims: rn = {'nlat': 'nlat_u', 'nlon': 'nlon_u'} ac = {'nlat_u': ds_.nlat_u, 'nlon_u': ds_.nlon_u} ds_['VVEL'] = ds_.VVEL.rename(rn).assign_coords() if 'nlat' in ds_.PD.dims: rn = {'nlat': 'nlat_t', 'nlon': 'nlon_t'} ac = {'nlat_t': ds_.nlat_t, 'nlon_t': ds_.nlon_t} ds_['PD'] = ds_.PD.rename(rn).assign_coords(ac) print('interpolating density to UU point') ds_['PD'] = grid.interp(grid.interp(ds_['PD'], 'X'), 'Y') print('interpolating REGION_MASK to UU point') fn_MASK = f'{path_prace}/MOC/AMOC_MASK_uu_{domain}.nc' if os.path.exists(fn_MASK): AMOC_MASK_uu = xr.open_dataarray(fn_MASK) else: MASK_uu = grid.interp(grid.interp(ds_.REGION_MASK, 'Y'), 'X') AMOC_MASK_uu = xr.DataArray(np.in1d( MASK_uu, [-12, 6, 7, 8, 9, 11, 12]).reshape(MASK_uu.shape), dims=MASK_uu.dims, coords=MASK_uu.coords) AMOC_MASK_uu.to_netcdf(fn_MASK) print('AMOC(y,z); [cm^3/s] -> [Sv]') AMOC_yz = (grid.integrate( grid.cumint(ds_.VVEL.where(AMOC_MASK_uu), 'Z', boundary='fill'), 'X') / 1e12) # AMOC_yz = (ds_.VVEL*ds_.DZU*ds_.DXU).where(AMOC_MASK_uu).sum('nlon_u').cumsum('z_t')/1e12 AMOC_yz = AMOC_yz.rename({'z_w_top': 'z_t'}).assign_coords({'z_t': ds.z_t}) AMOC_yz.name = 'AMOC(y,z)' print('AMOC(sigma_0,z); [cm^3/s] -> [Sv]') if int(ds_.PD.isel(z_t=0).mean().values) == 0: PD, PDbins = ds_.PD * 1000, np.arange(-10, 7, .05) if int(ds_.PD.isel(z_t=0).mean().values) == 1: PD, PDbins = (ds_.PD - 1) * 1000, np.arange(5, 33, .05) print('histogram') weights = ds_.VVEL.where(AMOC_MASK_uu) * ds_.DZU * ds_.DXU / 1e12 # ds_.PD.isel(z_t=0).plot() AMOC_sz = histogram(PD, bins=[PDbins], dim=['z_t'], weights=weights).sum('nlon_u', skipna=True).cumsum('PD_bin').T AMOC_sz.name = 'AMOC(y,PD)' # output to file if fn is not None: xr.merge([AMOC_yz, AMOC_sz]).to_netcdf(fn) return AMOC_yz, AMOC_sz
def x_hist(data, bins): data_arr = xr.DataArray(data, dims=["a", "b"], name="aaa") res = xh.histogram(data_arr, bins=bins, dim="b") return res.data
xislice = slice(270, 405) #Compute vertical relative vorticity rv = xroms.relative_vorticity(ds.u, ds.v, ds.u.attrs['grid']) #Interpolate to the rho points. rv = grid.interp(rv, 'Z') rv = rv.isel(eta_v=etaslice, xi_u=xislice, s_rho=-1) #Note we're going to need to select the second to last vertical value since its the w-points fx = grid.interp(ds.f, 'X', boundary='extend') fxy = grid.interp(fx, 'Y', boundary='extend') f = fxy.isel(eta_v=etaslice, xi_u=xislice) RVn = rv / f zetabins = np.linspace(-2.5, 2.5, 100) RVn.name = 'relative_vorticity_n' RVn_slice = RVn.sel(ocean_time=slice(str(y) + '-06-15', str(y) + '-07-25')) #We need to remove the grid attributes from the variables. It will generate this annoying error code #because of an invalid character. RVn_slice.attrs = '' print('Computing histogam') zetaf_hist = histogram(RVn_slice, bins=[zetabins], density=True) print('Saving histogram') zetaf_hist.to_netcdf( '/home/dylan/Variability/histograms/rho/histograms_zetaf_%i.nc' % y)
q_u_total = salt_u * ds.u * dA_u q_u_box = q_u_total.isel(xi_u=xislice, eta_rho=etaslice) salt_u_box = salt_u.isel(xi_u=xislice, eta_rho=etaslice) q_u_right = q_u_box.isel(xi_u=-1) q_u_right.name = 'q_right' salt_u_right = salt_u_box.isel(xi_u=-1) salt_u_right.name = 'salt_right' q_u_left = q_u_box.isel(xi_u=0) q_u_left.name = 'q_left' salt_u_left = salt_u_box.isel(xi_u=0) salt_u_left.name = 'salt_left' qlefthist = histogram(salt_u_left, bins=[salt_bins], weights=q_u_left, dim=['s_rho', 'eta_rho']) qleft = qlefthist.sum(axis=0) Qleft = qlefthist.sum(axis=0) * dsalt Qleftin = qlefthist.where(qleft > 0).sum(axis=1) * dsalt Qlefttout = qlefthist.where(qleft < 0).sum(axis=1) * dsalt qleft.to_netcdf('qleft_%02d.nc' % m) print('qleft_%02d.nc DONE!' % m, flush=True) Qleft.to_netcdf('Qleft_%02d.nc' % m) Qleftin.to_netcdf('Qleftin_%02d.nc' % m) Qleftout.to_netcdf('Qleftout_%02d.nc' % m) print('Qleftout_%02d.nc' % m, flush=True) qrighthist = histogram(salt_u_right, bins=[salt_bins],
.dropna('npart') trajY = cm.interp_to_coords(trajT, ctr.loc[rng], Zeq.loc[rng], interpDim='Z') \ .transpose('npart','time') \ .reset_coords(drop=True) diffY = trajY.diff('time')**2 / 300 # remove nan according to trajY diffY, xpos = xr.align(diffY, xpos) diffY, zpos = xr.align(diffY, zpos) hsum = histogram(zpos, xpos, dim=['npart'], weights=np.abs(diffY), bins=[binZ, binX]) hcnt = histogram(zpos, xpos, dim=['npart'], bins=[binZ, binX]) histSum.append(hsum) histCnt.append(hcnt) # hstSum = xr.merge(histSum) # hstCnt = xr.merge(histCnt) #%% write files for data in histSum: for tim in data: tt = int(tim.time.values / 300) print(tt)