def wrapyear_all(data, daymin, daymax): """Wrap daily data to extended ranges over each year in yearly data.""" def extract_year(data, year, years): if year in years: data_out = atm.subset(data, {'year' : (year, year)}) else: data_out = None return data_out daynm = atm.get_coord(data, 'day', 'name') days = np.arange(daymin, daymax + 1) days = xray.DataArray(days, name=daynm, coords={daynm : days}) years = atm.get_coord(data, 'year') yearnm = atm.get_coord(data, 'year', 'name') for y, year in enumerate(years): year_prev, year_next = year - 1, year + 1 var = extract_year(data, year, years) var_prev = extract_year(data, year_prev, years) var_next = extract_year(data, year_next, years) var_out = wrapyear(var, var_prev, var_next, daymin, daymax, year) var_out = atm.expand_dims(var_out, 'year', year, axis=0) var_out = var_out.reindex_like(days) if y == 0: data_out = var_out else: data_out = xray.concat([data_out, var_out], dim=yearnm) return data_out
def concat_plevs(datadir, year, varnm, plevs, pdim, version): pname = 'Height' for i, plev in enumerate(plevs): filenm = datafile(datadir, varnm, plev, year, version) print('Reading ' + filenm) with xray.open_dataset(filenm) as ds: var_in = ds[varnm].load() var_in = atm.expand_dims(var_in, pname, plev, axis=1) if i == 0: var = var_in else: var = xray.concat([var, var_in], dim=pname) return var
def get_data_rel(varid, plev, years, datafiles, data, onset, npre, npost): """Return daily data aligned relative to onset/withdrawal day. """ years = atm.makelist(years) onset = atm.makelist(onset) datafiles = atm.makelist(datafiles) daymin = min(onset) - npre daymax = max(onset) + npost # For a single year, add extra year before/after, if necessary wrap_single = False years_in = years if len(years) == 1 and var_type(varid) == 'basic': filenm = datafiles[0] year = years[0] if daymin < 1: wrap_single = True file_pre = filenm.replace(str(year), str(year - 1)) if os.path.isfile(file_pre): years_in = [year - 1] + years_in datafiles = [file_pre] + datafiles if daymax > len(atm.season_days('ANN', year)): wrap_single = True file_post = filenm.replace(str(year), str(year + 1)) if os.path.isfile(file_post): years_in = years_in + [year + 1] datafiles = datafiles + [file_post] var = get_daily_data(varid, plev, years_in, datafiles, data, daymin=daymin, daymax=daymax) # Get rid of extra years if wrap_single: var = atm.subset(var, {'year' : (years[0], years[0])}) # Make sure year dimension is included for single year if len(years) == 1 and 'year' not in var.dims: var = atm.expand_dims(var, 'year', years[0], axis=0) # Align relative to onset day # (not needed for calc variables since they're already aligned) if var_type(varid) == 'basic': print('Aligning data relative to onset day') var = daily_rel2onset(var, onset, npre, npost) return var
with xray.open_dataset(datafile) as ds: data = utils.wrapyear(ds, ds_pre, ds_post, daymin, daymax, year=year) data.attrs = ds.attrs return data for plev in plevs: for y, year in enumerate(years): datafile = datafiles[plev][y] d_onset, d_retreat = onset[y], retreat[y] d0 = int(index[ind_nm][y].values) ds_rel = xray.Dataset() ds = get_data(datafile, year, d0, npre, npost) ds_rel.attrs = ds.attrs for nm in ds.data_vars: var = atm.expand_dims(ds[nm], 'year', year) ds_rel[nm] = utils.daily_rel2onset(var, d_onset, npre, npost) ds_rel.attrs['d_onset'] = d_onset ds_rel.attrs['d_retreat'] = d_retreat savefile = savefiles[plev][y] print('Saving to ' + savefile) ds_rel.to_netcdf(savefile) # ---------------------------------------------------------------------- # Compute climatologies and save yearstr = '%d-%d' % (years.min(), years.max()) for plev in plevs: relfiles = savefiles[plev] savefile = savestr % plev + '_' + yearstr + '.nc'
def onset_changepoint_merged(precip_acc, order=1, yearnm='year', daynm='day'): """Return monsoon onset/retreat based on changepoint in precip. Uses piecewise least-squares fit of data to detect changepoints. I've modified the original method by doing a 3-piecewise fit of the entire year, rather than a 2-piecewise fit of an onset range and retreat range. Parameters ---------- precip_acc : xray.DataArray Accumulated precipitation. order : int, optional Order of polynomial to fit. yearnm, daynm : str, optional Name of year and day dimensions in precip_acc. Returns ------- chp : xray.Dataset Onset/retreat days, daily timeseries, piecewise polynomial fits, and rss values. Reference --------- Cook, B. I., & Buckley, B. M. (2009). Objective determination of monsoon season onset, withdrawal, and length. Journal of Geophysical Research, 114(D23), D23109. doi:10.1029/2009JD012795 """ def split(x, n1, n2): return x[:n1], x[n1:n2], x[n2:] def piecewise_polyfit(x, y, n1, n2, order): y = np.ma.masked_array(y, np.isnan(y)) x1, x2, x3 = split(x, n1, n2) y1, y2, y3 = split(y, n1, n2) p1 = np.ma.polyfit(x1, y1, order) p2 = np.ma.polyfit(x2, y2, order) p3 = np.ma.polyfit(x3, y3, order) if np.isnan(p1).any() or np.isnan(p2).any() or np.isnan(p3).any(): raise ValueError('NaN for polyfit coeffs. Check data.') ypred1 = np.polyval(p1, x1) ypred2 = np.polyval(p2, x2) ypred3 = np.polyval(p3, x3) ypred = np.concatenate([ypred1, ypred2, ypred3]) rss = np.sum((y - ypred)**2) return ypred, rss def find_changepoint(x, y, order): rss = {} for n1 in range(2, len(x)- 4): print(n1) for n2 in range(n1 + 2, len(x) - 2): _, rssval = piecewise_polyfit(x, y, n1, n2, order) rss[(n1, n2)] = rssval keys = rss.keys() rssvec = np.nan * np.ones(len(keys)) for i, key in enumerate(keys): rssvec[i] = rss[key] i0 = np.nanargmin(rssvec) n1, n2 = keys[i0] x1, x2 = x[n1], x[n2] ypred, _ = piecewise_polyfit(x, y, n1, n2, order) return x1, x2, ypred, rss if yearnm not in precip_acc.dims: precip_acc = atm.expand_dims(precip_acc, yearnm, -1, axis=0) years = precip_acc[yearnm].values days = precip_acc[daynm].values chp = xray.Dataset() chp['tseries'] = precip_acc onset = np.nan * np.ones(years.shape) retreat = np.nan * np.ones(years.shape) pred = np.nan * np.ones(precip_acc.shape) for y, year in enumerate(years): print (year) results = find_changepoint(days, precip_acc[y], order) onset[y], retreat[y], pred[y,:], _ = results chp['onset'] = xray.DataArray(onset, dims=[yearnm], coords={yearnm : years}) chp['retreat'] = xray.DataArray(retreat, dims=[yearnm], coords={yearnm : years}) chp['tseries_fit'] = xray.DataArray(pred, dims=[yearnm, daynm], coords={yearnm : years, daynm : days}) chp.attrs['order'] = order return chp
def onset_changepoint(precip_acc, onset_range=(1, 250), retreat_range=(201, 366), order=1, yearnm='year', daynm='day'): """Return monsoon onset/retreat based on changepoint in precip. Uses piecewise least-squares fit of data to detect changepoints. Parameters ---------- precip_acc : xray.DataArray Accumulated precipitation. onset_range, retreat_range : 2-tuple of ints, optional Range of days to use when calculating onset / retreat. order : int, optional Order of polynomial to fit. yearnm, daynm : str, optional Name of year and day dimensions in precip_acc. Returns ------- chp : xray.Dataset Onset/retreat days, daily timeseries, piecewise polynomial fits, and rss values. Reference --------- Cook, B. I., & Buckley, B. M. (2009). Objective determination of monsoon season onset, withdrawal, and length. Journal of Geophysical Research, 114(D23), D23109. doi:10.1029/2009JD012795 """ def split(x, n): return x[:n], x[n:] def piecewise_polyfit(x, y, n, order=1): y = np.ma.masked_array(y, np.isnan(y)) x1, x2 = split(x, n) y1, y2 = split(y, n) p1 = np.ma.polyfit(x1, y1, order) p2 = np.ma.polyfit(x2, y2, order) if np.isnan(p1).any() or np.isnan(p2).any(): raise ValueError('NaN for polyfit coeffs. Check data.') ypred1 = np.polyval(p1, x1) ypred2 = np.polyval(p2, x2) ypred = np.concatenate([ypred1, ypred2]) rss = np.sum((y - ypred)**2) return ypred, rss def find_changepoint(x, y, order=1): rss = np.nan * x for n in range(2, len(x)- 2): _, rssval = piecewise_polyfit(x, y, n, order) rss[n] = rssval n0 = np.nanargmin(rss) x0 = x[n0] ypred, _ = piecewise_polyfit(x, y, n0) return x0, ypred, rss if yearnm not in precip_acc.dims: precip_acc = atm.expand_dims(precip_acc, yearnm, -1, axis=0) years = precip_acc[yearnm].values chp = xray.Dataset() chp['tseries'] = precip_acc for key, drange in zip(['onset', 'retreat'], [onset_range, retreat_range]): print('Calculating ' + key) print(drange) dmin, dmax = drange precip_sub = atm.subset(precip_acc, {daynm : (dmin, dmax)}) dsub = precip_sub[daynm].values d_cp = np.nan * np.ones(years.shape) pred = np.nan * np.ones(precip_sub.shape) rss = np.nan * np.ones(precip_sub.shape) for y, year in enumerate(years): # Cut out any NaNs from day range pcp_yr = precip_sub[y] ind = np.where(np.isfinite(pcp_yr))[0] islice = slice(ind.min(), ind.max() + 1) pcp_yr = pcp_yr[islice] days_yr = pcp_yr[daynm].values print('%d (%d-%d)' % (year, min(days_yr), max(days_yr))) results = find_changepoint(days_yr, pcp_yr, order) d_cp[y], pred[y, islice], rss[y, islice] = results chp[key] = xray.DataArray(d_cp, dims=[yearnm], coords={yearnm : years}) chp['tseries_fit_' + key] = xray.DataArray( pred, dims=[yearnm, daynm], coords={yearnm : years, daynm : dsub}) chp['rss_' + key] = xray.DataArray( rss, dims=[yearnm, daynm], coords={yearnm : years, daynm : dsub}) chp.attrs['order'] = order chp.attrs['onset_range'] = onset_range chp.attrs['retreat_range'] = retreat_range return chp
def get_daily_data(varid, plev, years, datafiles, data, daymin=1, daymax=366, yearnm='year'): """Return daily data (basic variable or calculated variable). Data is read from datafiles if varnm is a basic variable. If varnm is a calculated variable (e.g. potential temperature), the base variables for calculation are provided in the dict data. """ years = atm.makelist(years) datafiles = atm.makelist(datafiles) if isinstance(plev, int) or isinstance(plev, float): pres = atm.pres_convert(plev, 'hPa', 'Pa') elif plev == 'LML' and 'PS' in data: pres = data['PS'] else: pres = None def get_var(data, varnm, plev=None): if plev is None: plev = '' elif plev == 'LML' and varnm == 'QV': varnm = 'Q' return data[varnm + str(plev)] if var_type(varid) == 'calc': print('Computing ' + varid) if varid == 'THETA': var = atm.potential_temp(get_var(data, 'T', plev), pres) elif varid == 'THETA_E': var = atm.equiv_potential_temp(get_var(data, 'T', plev), pres, get_var(data, 'QV', plev)) elif varid == 'DSE': var = atm.dry_static_energy(get_var(data, 'T', plev), get_var(data, 'H', plev)) elif varid == 'MSE': var = atm.moist_static_energy(get_var(data, 'T', plev), get_var(data, 'H', plev), get_var(data, 'QV', plev)) elif varid == 'VFLXMSE': Lv = atm.constants.Lv.values var = data['VFLXCPT'] + data['VFLXPHI'] + data['VFLXQV'] * Lv var.attrs['units'] = data['VFLXCPT'].attrs['units'] var.attrs['long_name'] = 'Vertically integrated MSE meridional flux' else: with xray.open_dataset(datafiles[0]) as ds: if varid not in ds.data_vars: varid = varid + str(plev) var = atm.combine_daily_years(varid, datafiles, years, yearname=yearnm, subset_dict={'day' : (daymin, daymax)}) var = atm.squeeze(var) # Make sure year dimension is included for single year if len(years) == 1 and 'year' not in var.dims: var = atm.expand_dims(var, yearnm, years[0], axis=0) # Wrap years for extended day ranges if daymin < 1 or daymax > 366: var = wrapyear_all(var, daymin, daymax) # Convert precip and evap to mm/day if varid in ['precip', 'PRECTOT', 'EVAP']: var = atm.precip_convert(var, var.attrs['units'], 'mm/day') return var