def eddy_decomp(var, nt, lon1, lon2, taxis=0): """Decompose variable into mean and eddy fields.""" lonname = atm.get_coord(var, 'lon', 'name') tstr = 'Time mean (%d-%s rolling)' % (nt, var.dims[taxis]) lonstr = atm.latlon_labels([lon1, lon2], 'lon', deg_symbol=False) lonstr = 'zonal mean (' + '-'.join(lonstr) + ')' name, attrs, coords, dims = atm.meta(var) varbar = atm.rolling_mean(var, nt, axis=taxis, center=True) varbarzon = atm.subset(varbar, {lonname : (lon1, lon2)}) varbarzon = varbarzon.mean(dim=lonname) varbarzon.attrs = attrs comp = xray.Dataset() comp[name + '_AVG'] = varbarzon comp[name + '_AVG'].attrs['component'] = tstr + ', ' + lonstr comp[name + '_ST'] = varbar - varbarzon comp[name + '_ST'].attrs = attrs comp[name + '_ST'].attrs['component'] = 'Stationary eddy' comp[name + '_TR'] = var - varbar comp[name + '_TR'].attrs = attrs comp[name + '_TR'].attrs['component'] = 'Transient eddy' return comp
def composite(data, compdays, return_avg=True, daynm='Dayrel'): """Return composite data fields for selected days. Parameters ---------- data : xray.DataArray Daily data to composite. compdays: dict of arrays or lists Lists of days to include in each composite. return_avg : bool, optional If True, return the mean of the selected days, otherwise return the extracted individual days for each composite. daynnm : str, optional Name of day dimension in data. Returns ------- comp : dict of xray.DataArrays Composite data fields for each key in compdays.keys(). """ comp = collections.OrderedDict() _, attrs, _, _ = atm.meta(data) for key in compdays: comp[key] = atm.subset(data, {daynm : (compdays[key], None)}) if return_avg: comp[key] = comp[key].mean(dim=daynm) comp[key].attrs = attrs comp[key].attrs[daynm] = compdays[key] return comp
def rolling(data, nroll): center = True _, _, coords, _ = atm.meta(data) dims = data.shape vals = np.zeros(dims) if len(dims) > 1: nyears = dims[0] for y in range(nyears): vals[y] = pd.rolling_mean(data.values[y], nroll, center=center) else: vals = pd.rolling_mean(data.values, nroll, center=center) data_out = xray.DataArray(vals, coords=coords) return data_out
def daily_rel2onset(data, d_onset, npre, npost): """Return subset of daily data aligned relative to onset day. Parameters ---------- data : xray.DataArray Daily data. d_onset : ndarray Array of onset date (day of year) for each year. npre, npost : int Number of days before and after onset to extract. Returns ------- data_out : xray.DataArray Subset of N days of daily data for each year, where N = npre + npost + 1 and the day dimension is dayrel = day - d_onset. """ name, attrs, coords, dimnames = atm.meta(data) yearnm = atm.get_coord(data, 'year', 'name') daynm = atm.get_coord(data, 'day', 'name') years = atm.makelist(atm.get_coord(data, 'year')) if isinstance(d_onset, xray.DataArray): d_onset = d_onset.values else: d_onset = atm.makelist(d_onset) relnm = daynm + 'rel' for y, year in enumerate(years): dmin, dmax = d_onset[y] - npre, d_onset[y] + npost subset_dict = {yearnm : (year, None), daynm : (dmin, dmax)} sub = atm.subset(data, subset_dict) sub = sub.rename({daynm : relnm}) sub[relnm] = sub[relnm] - d_onset[y] sub[relnm].attrs['long_name'] = 'Day of year relative to onset day' if y == 0: data_out = sub else: data_out = xray.concat([data_out, sub], dim=yearnm) data_out.attrs['d_onset'] = d_onset return data_out
def applymask(data, mask): _, _, coords, _ = atm.meta(data) maskbig = atm.biggify(mask, data, tile=True) vals = np.ma.masked_array(data, maskbig).filled(np.nan) data_out = xray.DataArray(vals, coords=coords) return data_out
def onset_HOWI(uq_int, vq_int, npts=50, nroll=7, days_pre=range(138, 145), days_post=range(159, 166), yearnm='year', daynm='day', maxbreak=7): """Return monsoon Hydrologic Onset/Withdrawal Index. Parameters ---------- uq_int, vq_int : xray.DataArrays Vertically integrated moisture fluxes. npts : int, optional Number of points to use to define HOWI index. nroll : int, optional Number of days for rolling mean. days_pre, days_post : list of ints, optional Default values correspond to May 18-24 and June 8-14 (numbered as non-leap year). yearnm, daynm : str, optional Name of year and day dimensions in DataArray maxbreak: Maximum number of days with index <=0 to consider a break in monsoon season rather than end of monsoon season. Returns ------- howi : xray.Dataset HOWI daily timeseries for each year and monsoon onset and retreat days for each year. Reference --------- J. Fasullo and P. J. Webster, 2003: A hydrological definition of Indian monsoon onset and withdrawal. J. Climate, 16, 3200-3211. Notes ----- In some years the HOWI index can give a bogus onset or bogus retreat when the index briefly goes above or below 0 for a few days. To deal with these cases, I'm defining the monsoon season as the longest set of consecutive days with HOWI that is positive or has been negative for no more than `maxbreak` number of days (monsoon break). """ _, _, coords, _ = atm.meta(uq_int) latnm = atm.get_coord(uq_int, 'lat', 'name') lonnm = atm.get_coord(uq_int, 'lon', 'name') ds = xray.Dataset() ds['uq'] = uq_int ds['vq'] = vq_int ds['vimt'] = np.sqrt(ds['uq']**2 + ds['vq']**2) # Climatological moisture fluxes dsbar = ds.mean(dim=yearnm) ds['uq_bar'], ds['vq_bar'] = dsbar['uq'], dsbar['vq'] ds['vimt_bar'] = np.sqrt(ds['uq_bar']**2 + ds['vq_bar']**2) # Pre- and post- monsoon climatology composites dspre = atm.subset(dsbar, {daynm : (days_pre, None)}).mean(dim=daynm) dspost = atm.subset(dsbar, {daynm : (days_post, None)}).mean(dim=daynm) dsdiff = dspost - dspre ds['uq_bar_pre'], ds['vq_bar_pre'] = dspre['uq'], dspre['vq'] ds['uq_bar_post'], ds['vq_bar_post'] = dspost['uq'], dspost['vq'] ds['uq_bar_diff'], ds['vq_bar_diff'] = dsdiff['uq'], dsdiff['vq'] # Magnitude of vector difference vimt_bar_diff = np.sqrt(dsdiff['uq']**2 + dsdiff['vq']**2) ds['vimt_bar_diff'] = vimt_bar_diff # Top N difference vectors def top_n(data, n): """Return a mask with the highest n values in 2D array.""" vals = data.copy() mask = np.ones(vals.shape, dtype=bool) for k in range(n): i, j = np.unravel_index(np.nanargmax(vals), vals.shape) mask[i, j] = False vals[i, j] = np.nan return mask # Mask to extract top N points mask = top_n(vimt_bar_diff, npts) ds['mask'] = xray.DataArray(mask, coords={latnm: coords[latnm], lonnm: coords[lonnm]}) # Apply mask to DataArrays def applymask(data, mask): _, _, coords, _ = atm.meta(data) maskbig = atm.biggify(mask, data, tile=True) vals = np.ma.masked_array(data, maskbig).filled(np.nan) data_out = xray.DataArray(vals, coords=coords) return data_out ds['vimt_bar_masked'] = applymask(ds['vimt_bar'], mask) ds['vimt_bar_diff_masked'] = applymask(vimt_bar_diff, mask) ds['uq_masked'] = applymask(ds['uq'], mask) ds['vq_masked'] = applymask(ds['vq'], mask) ds['vimt_masked'] = np.sqrt(ds['uq_masked']**2 + ds['vq_masked']**2) # Timeseries data averaged over selected N points ds['howi_clim_raw'] = ds['vimt_bar_masked'].mean(dim=latnm).mean(dim=lonnm) ds['howi_raw'] = ds['vimt_masked'].mean(dim=latnm).mean(dim=lonnm) # Normalize howi_min = ds['howi_clim_raw'].min().values howi_max = ds['howi_clim_raw'].max().values def applynorm(data): return 2 * (data - howi_min) / (howi_max - howi_min) - 1 ds['howi_norm'] = applynorm(ds['howi_raw']) ds['howi_clim_norm'] = applynorm(ds['howi_clim_raw']) # Apply n-day rolling mean def rolling(data, nroll): center = True _, _, coords, _ = atm.meta(data) dims = data.shape vals = np.zeros(dims) if len(dims) > 1: nyears = dims[0] for y in range(nyears): vals[y] = pd.rolling_mean(data.values[y], nroll, center=center) else: vals = pd.rolling_mean(data.values, nroll, center=center) data_out = xray.DataArray(vals, coords=coords) return data_out ds['howi_norm_roll'] = rolling(ds['howi_norm'], nroll) ds['howi_clim_norm_roll'] = rolling(ds['howi_clim_norm'], nroll) # Index timeseries dataset howi = xray.Dataset() howi['tseries'] = ds['howi_norm_roll'] howi['tseries_clim'] = ds['howi_clim_norm_roll'] # Find zero crossings for onset and withdrawal indices nyears = len(howi[yearnm]) onset = np.zeros(nyears, dtype=int) retreat = np.zeros(nyears, dtype=int) for y in range(nyears): # List of days with positive HOWI index pos = howi[daynm].values[howi['tseries'][y].values > 0] # In case of extra zero crossings, find the longest set of days # with positive index splitpos = atm.splitdays(pos) lengths = np.array([len(v) for v in splitpos]) imonsoon = lengths.argmax() monsoon = splitpos[imonsoon] # In case there is a break in the monsoon season, check the # sets of days before and after and add to monsoon season # if applicable if imonsoon > 0: predays = splitpos[imonsoon - 1] if monsoon.min() - predays.max() <= maxbreak: predays = np.arange(predays.min(), monsoon.min()) monsoon = np.concatenate([predays, monsoon]) if imonsoon < len(splitpos) - 1: postdays = splitpos[imonsoon + 1] if postdays.min() - monsoon.max() <= maxbreak: postdays = np.arange(monsoon.max() + 1, postdays.max() + 1) monsoon = np.concatenate([monsoon, postdays]) # Onset and retreat days onset[y] = monsoon[0] retreat[y] = monsoon[-1] + 1 howi['onset'] = xray.DataArray(onset, coords={yearnm : howi[yearnm]}) howi['retreat'] = xray.DataArray(retreat, coords={yearnm : howi[yearnm]}) howi.attrs = {'npts' : npts, 'nroll' : nroll, 'maxbreak' : maxbreak, 'days_pre' : days_pre, 'days_post' : days_post} return howi, ds
tseries[onset_nm] = index['tseries'] # ENSO enso = utils.get_enso_indices(years) enso = xray.DataArray(enso[enso_nm]).rename({'Year' : 'year'}) # ---------------------------------------------------------------------- # Climatology index_clim = index.mean(dim='year') tseries_clim = tseries.mean(dim='year') enso_clim = enso.mean(dim='year').values # Tile the climatology to each year for plot_tseries_together vals = atm.biggify(tseries_clim[onset_nm], index['tseries'].values, tile=True) _, _, coords, dims = atm.meta(index['tseries']) ts_clim = xray.DataArray(vals, name=tseries_clim[onset_nm].name, coords=coords, dims=dims) tseries[onset_nm + '_clim'] = ts_clim # ---------------------------------------------------------------------- # Timeseries relative to onset, shifted to 0 at onset day npre, npost = 0, 200 tseries_rel = xray.Dataset() for key in tseries.data_vars: tseries_rel[key] = daily_rel2onset(tseries[key], onset, npre, npost, yearnm='year', daynm='day') if key.startswith('CHP') or key.endswith('ACC'): tseries_rel[key] = tseries_rel[key] - tseries_rel[key][:, 0]
# See testing/testing-indices-onset_TT.py for details. # Select vertical pressure level to use, or None to use 200-600mb # vertical mean plev = None # Read daily data from each year if plev is None: T = atm.combine_daily_years('Tbar', ttfiles, years, yearname='year') else: T = atm.combine_daily_years('T', ttfiles, years, yearname='year', subset_dict={'plev' : (plev, plev)}) # Remove extra dimension (vertical) pdim = atm.get_coord(T, 'plev', 'dim') pname = atm.get_coord(T, 'plev', 'name') name, attrs, coords, dims = atm.meta(T) dims = list(dims) dims.pop(pdim) coords = atm.odict_delete(coords, pname) T = xray.DataArray(np.squeeze(T.values), dims=dims, coords=coords, name=name, attrs=attrs) # Calculate index north=(5, 30, 40, 100) south=(-15, 5, 40, 100) index['TT'] = indices.onset_TT(T, north=north, south=south) # Some weirdness going on in 1991, for now just set to NaN for nm in ['ttn', 'tts', 'tseries']: vals = index['TT'][nm].values vals = np.ma.masked_array(vals, abs(vals) > 1e30).filled(np.nan)
def calc_fluxes(year, month, var_ids=['u', 'q', 'T', 'theta', 'theta_e', 'hgt'], concat_dim='TIME', scratchdir=None, keepscratch=False, verbose=True): """Return the monthly mean of MERRA daily fluxes. Reads MERRA daily data from OpenDAP urls, computes fluxes, and returns the monthly mean of the daily variable and its zonal and meridional fluxes. Parameters ---------- year, month : int Numeric year and month (1-12). var_ids : list of str, optional IDs of variables to include. concat_dim : str, optional Name of dimension for concatenation. scratchdir : str, optional Directory path to store temporary files while processing data. If omitted, the current working directory is used. keepscratch : bool, optional If True, scratch files are kept in scratchdir. Otherwise they are deleted. verbose : bool, optional If True, print updates while processing files. Returns ------- data : xray.Dataset Mean of daily data and the mean of the daily zonal fluxes (u * var) and meridional fluxes (v * var), for each variable in var_ids. """ nms = [get_varname(nm) for nm in atm.makelist(var_ids)] u_nm, v_nm = get_varname('u'), get_varname('v') nms.extend([u_nm, v_nm]) if 'theta' in nms: nms.append(get_varname('T')) if 'theta_e' in nms: nms.extend([get_varname('T'), get_varname('q')]) nms = set(nms) days = range(1, atm.days_this_month(year, month) + 1) def scratchfile(nm, k, year, month, day): filestr = '%s_level%d_%d%02d%02d.nc' % (nm, k, year, month, day) if scratchdir is not None: filestr = scratchdir + '/' + filestr return filestr # Read metadata from one file to get pressure-level array dataset = 'p_daily' url = url_list(dataset, return_dict=False)[0] with xray.open_dataset(url) as ds: pname = atm.get_coord(ds, 'plev', 'name') plev = atm.get_coord(ds, 'plev') # Pressure levels in Pa for theta/theta_e calcs p_units = atm.pres_units(ds[pname].units) pres = atm.pres_convert(plev, p_units, 'Pa') # Get daily data (raw and calculate extended variables) def get_data(nms, pres, year, month, day, concat_dim, subset_dict, verbose): # Lists of raw and extended variables ids = list(nms) ext = [] for var in ['theta', 'theta_e']: if var in ids: ext.append(var) ids.remove(var) # Read raw data and calculate extended variables data = read_daily(ids, year, month, day, concat_dim=concat_dim, subset_dict=subset_dict, verbose=verbose) if 'theta' in ext: print_if('Computing potential temperature', verbose) T = data[get_varname('T')] data['theta'] = atm.potential_temp(T, pres) if 'theta_e' in ext: print_if('Computing equivalent potential temperature', verbose) T = data[get_varname('T')] q = data[get_varname('q')] data['theta_e'] = atm.equiv_potential_temp(T, pres, q) return data # Iterate over vertical levels for k, p in enumerate(plev): subset_dict = {pname : (p, p)} print_if('Pressure-level %.1f' % p, verbose) files = [] for day in days: # Read data for this level and day ds = get_data(nms, pres[k], year, month, day, concat_dim, subset_dict, verbose) # Compute fluxes print_if('Computing fluxes', verbose) u = ds[get_varname('u')] v = ds[get_varname('v')] for nm in var_ids: var = ds[get_varname(nm)] varname, attrs, _, _ = atm.meta(var) u_var = u * var v_var = v * var u_var.name = get_varname(u_nm) + '*' + var.name units = var.attrs['units'] + ' * ' + u.attrs['units'] u_var.attrs['units'] = units v_var.name = get_varname(v_nm) + '*' + var.name v_var.attrs['units'] = units ds[u_var.name] = u_var ds[v_var.name] = v_var # Save to temporary scratch file filenm = scratchfile('fluxes', k, year, month, day) files.append(filenm) print_if('Saving to scratch file ' + filenm, verbose) ds.to_netcdf(filenm) # Concatenate daily scratch files ds = atm.load_concat(files) if not keepscratch: for f in files: os.remove(f) # Compute monthly means print_if('Computing monthly means', verbose) if k == 0: data = ds.mean(dim=concat_dim) else: data = xray.concat([data, ds.mean(dim=concat_dim)], dim=pname) for var in data.data_vars: data[var].attrs = ds[var].attrs return data
# ---------------------------------------------------------------------- # Data and calcs # Onset index with xray.open_dataset(indfile) as index: index.load() index = index.sel(year=years) d0 = index[ind_nm].values # Precip data if pcp_nm == 'cmap': pcp = precipdat.read_cmap(pcpfiles, yearmin=min(years), yearmax=max(years)) # Interpolate to daily resolution name, attrs, coords, dimnames = atm.meta(pcp) days = np.arange(3, 364) interp_func = scipy.interpolate.interp1d(pcp['day'], pcp, axis=1) vals = interp_func(days) coords['day'] = xray.DataArray(days, coords={'day' : days}) pcp = xray.DataArray(vals, dims=dimnames, coords=coords, name=name, attrs=attrs) else: pcp = atm.combine_daily_years(None, pcpfiles, years, yearname='year', subset_dict=subset_dict) # Wrap from following year to get extended daily range daymin = min(d0) - npre daymax = max(d0) + npost pcp = utils.wrapyear_all(pcp, daymin=daymin, daymax=daymax)