def get_HM_data(self, filepath, dim='latitude'): self.filepath = filepath self.dim = dim if self.seldates is not None: self.kwrgs_load['seldates'] = self.seldates_ext self.ds_seldates = functions_pp.import_ds_timemeanbins(self.filepath, **self.kwrgs_load) ds_name = self.ds_seldates.name if self.rollingmeanwindow is not None: # apply rolling mean self.ds = self.ds_seldates.rolling(time=self.rollingmeanwindow).mean() else: self.ds = self.ds_seldates # calculating std based on seldates self.std = self.ds.sel(time=self.seldates).std(dim='time') if self.t_test == True: self.ds_all = self.ds.sel(time=self.seldates) # now that we have std over seldates, select dates for HM self.ds = self.ds.sel(time=np.concatenate(self.event_lagged)) else: self.kwrgs_load['seldates'] = np.concatenate(self.event_lagged) self.ds = functions_pp.import_ds_timemeanbins(self.filepath, **self.kwrgs_load) ds_name = self.ds.name if self.name is None: self.name = ds_name if 'units' in list(self.ds.attrs.keys()): self.units = self.ds.attrs['units'] if self.standardize: self.units = 'std [-]' self.ds = self.ds / self.std if self.event_dates is not None: self.xarray = self.ds.copy().rename({'time':'lag'}) self.xarray = self.xarray.assign_coords(lag=np.concatenate(self.lag_axes)) else: self.xarray = self.ds if self.zoomdim is not None: xarray_w = self.xarray.sel(latitude=slice(self.zoomdim[0], self.zoomdim[1])) xarray_w = functions_pp.area_weighted(xarray_w) else: xarray_w = functions_pp.area_weighted(self.xarray) xarray_meandim = xarray_w.mean(dim=dim) self.xr_HM = xarray_meandim.groupby('lag').mean() if self.t_test: full = (self.ds_all/self.std).mean(dim=dim) self.xr_mask = self.xr_HM.astype(bool).copy() pvals = np.zeros_like(self.xr_mask.values, dtype=float) for i, lag in enumerate(self.xr_mask.lag.values): sample = xarray_meandim.sel(lag=lag) T, p, mask = Welchs_t_test(sample, full, equal_var=False) pvals[i] = p self.xr_mask.values = pvals
def calc_spatcov(full_timeserie, pattern, area_wght=True): #%% mask = np.ma.make_mask(np.isnan(pattern.values) == False) n_time = full_timeserie.time.size n_space = pattern.size if area_wght == True: pattern = functions_pp.area_weighted(pattern) # select only gridcells where there is not a nan full_ts = np.nan_to_num( np.reshape(full_timeserie.values, (n_time, n_space))) pattern = np.nan_to_num(np.reshape(pattern.values, (n_space))) mask_pattern = np.reshape(mask, (n_space)) full_ts = full_ts[:, mask_pattern] pattern = pattern[mask_pattern] spatcov = np.zeros((n_time)) for t in range(n_time): # Corr(X,Y) = cov(X,Y) / ( std(X)*std(Y) ) # cov(X,Y) = E( (x_i - mu_x) * (y_i - mu_y) ) # covself[t] = np.mean( (full_ts[t] - np.mean(full_ts[t])) * (pattern - np.mean(pattern)) ) M = np.stack((full_ts[t], pattern)) spatcov[t] = np.cov(M)[ 0, 1] #/ (np.sqrt(np.cov(M)[0,0]) * np.sqrt(np.cov(M)[1,1])) dates_test = full_timeserie.time # cov xarray spatcov = xr.DataArray(spatcov, coords=[dates_test.values], dims=['time']) #%% return spatcov
def ENSO_34(file_path, ex, df_splits=None): #%% # file_path = '/Users/semvijverberg/surfdrive/Data_era5/input_raw/sst_1979-2018_1_12_daily_2.5deg.nc' ''' See http://www.cgd.ucar.edu/staff/cdeser/docs/deser.sstvariability.annrevmarsci10.pdf ''' if df_splits is None: RV = ex[ex['RV_name']] df_splits, ex = functions_pp.rand_traintest_years(RV, ex) seldates = None else: seldates = df_splits.loc[0].index kwrgs_pp = { 'selbox': { 'la_min': -5, # select domain in degrees east 'la_max': 5, 'lo_min': -170, 'lo_max': -120 }, 'seldates': seldates } ds = core_pp.import_ds_lazy(file_path, **kwrgs_pp) to_freq = ex['tfreq'] if to_freq != 1: ds, dates = functions_pp.time_mean_bins(ds, ex, to_freq=to_freq, seldays='all') ds['time'] = dates dates = pd.to_datetime(ds.time.values) splits = df_splits.index.levels[0] list_splits = [] for s in splits: progress = 100 * (s + 1) / splits.size print(f"\rProgress ENSO traintest set {progress}%)", end="") data = functions_pp.area_weighted(ds).mean(dim=('latitude', 'longitude')) list_splits.append( pd.DataFrame(data=data.values, index=dates, columns=['0_900_ENSO34'])) df_ENSO = pd.concat(list_splits, axis=0, keys=splits) #%% return df_ENSO
def PNA_z500(filepath_z): ''' From Liu et al. 2015: Recent contrasting winter temperature changes over North America linked to enhanced positive Pacific‐North American pattern. https://onlinelibrary.wiley.com/doi/abs/10.1002/2015GL065656 PNA = z1 - z2 + z3 - z4 z1 = Z (15 - 25N, 180 - 140W) z2 = Z (40 - 50N, 180 - 140W) z3 = Z (45 - 60N, 125 - 105W) z4 = Z (25 - 35N, 90 - 70W) Parameters ---------- filepath : TYPE filepath to SST Netcdf4. Returns ------- PNA. ''' load = core_pp.import_ds_lazy progressBar(1, 4) z1 = functions_pp.area_weighted( load(filepath_z, **{'selbox': (180, 220, 15, 25)})) progressBar(2, 4) z2 = functions_pp.area_weighted( load(filepath_z, **{'selbox': (180, 220, 40, 50)})) z3 = functions_pp.area_weighted( load(filepath_z, **{'selbox': (235, 255, 45, 60)})) progressBar(3, 4) z4 = functions_pp.area_weighted( load(filepath_z, **{'selbox': (270, 290, 25, 35)})) progressBar(4, 4) PNA = z1.mean(dim=('latitude', 'longitude')) - z2.mean(dim=('latitude', 'longitude')) \ + z3.mean(dim=('latitude', 'longitude')) - z4.mean(dim=('latitude', 'longitude')) return PNA.to_dataframe(name='PNA')
def ENSO_34(filepath, df_splits=None, get_ENSO_states: bool = True): #%% # file_path = '/Users/semvijverberg/surfdrive/Data_era5/input_raw/sst_1979-2018_1_12_daily_2.5deg.nc' ''' See http://www.cgd.ucar.edu/staff/cdeser/docs/deser.sstvariability.annrevmarsci10.pdf selbox has format of (lon_min, lon_max, lat_min, lat_max) ''' # if df_splits is None: # seldates = None # else: # seldates = df_splits.loc[0].index # {'la_min':-5, # select domain in degrees east # 'la_max':5, # 'lo_min':-170, # 'lo_max':-120}, kwrgs_pp = { 'selbox': (190, 240, -5, 5), 'format_lon': 'only_east', 'seldates': None } ds = core_pp.import_ds_lazy(filepath, **kwrgs_pp) dates = pd.to_datetime(ds.time.values) data = functions_pp.area_weighted(ds).mean(dim=('latitude', 'longitude')) df_ENSO = pd.DataFrame(data=data.values, index=dates, columns=['ENSO34']) if df_splits is not None: splits = df_splits.index.levels[0] df_ENSO = pd.concat([df_ENSO] * splits.size, axis=0, keys=splits) if get_ENSO_states: ''' From Anderson 2017 - Life cycles of agriculturally relevant ENSO teleconnections in North and South America. http://doi.wiley.com/10.1002/joc.4916 mean boreal wintertime (October, November, December) SST anomaly amplitude in the Niño 3.4 region exceeded 1 of 2 standard deviation. ''' if hasattr(df_ENSO.index, 'levels'): df_ENSO_s = df_ENSO.loc[0] else: df_ENSO_s = df_ENSO dates = df_ENSO_s.index df_3monthmean = df_ENSO_s.rolling(3, center=True, min_periods=1).mean() std_ENSO = df_3monthmean.std() OND, groups = core_pp.get_subdates(dates, start_end_date=('10-01', '12-31'), returngroups=True) OND_ENSO = df_3monthmean.loc[OND].groupby(groups).mean() nino_yrs = OND_ENSO[OND_ENSO > df_3monthmean.mean() + std_ENSO][:].dropna().index #+ 1 nina_yrs = OND_ENSO[OND_ENSO < df_3monthmean.mean() - std_ENSO][:].dropna().index #+ 1 neutral = [ y for y in OND_ENSO.index if y not in core_pp.flatten([nina_yrs, nino_yrs]) ] states = {} for i, d in enumerate(dates): if d.year in nina_yrs: states[d.year] = -1 if d.year in neutral: states[d.year] = 0 if d.year in nino_yrs: states[d.year] = 1 cycle_list = [] for s, v in [('EN', 1), ('LN', -1)]: ENSO_cycle = {d.year: 0 for d in dates} for i, year in enumerate(np.unique(dates.year)): # d = dates[1] # if states[year] == v: # s = 'EN' # elif states[year] == -1: # s = 'LN' if states[year] == v: ENSO_cycle[year] = f'{s}0' if year - 1 in dates.year and states[year - 1] != v: ENSO_cycle[year - 1] = f'{s}-1' if year + 1 in dates.year and states[year + 1] != v: ENSO_cycle[year + 1] = f'{s}+1' cycle_list.append(ENSO_cycle) time_index = pd.to_datetime([f'{y}-01-01' for y in states.keys()]) df_state = pd.concat([ pd.Series(states), pd.Series(cycle_list[0]), pd.Series(cycle_list[1]) ], axis=1, keys=['state', 'EN_cycle', 'LN_cycle']) df_state.index = time_index if hasattr(df_ENSO.index, 'levels'): # copy to other traintest splits df_state = pd.concat([df_state] * splits.size, keys=splits) composites = np.zeros(3, dtype=object) for i, yrs in enumerate([nina_yrs, neutral, nino_yrs]): composite = [d for d in dates if d.year in yrs] composites[i] = ds.sel(time=composite).mean(dim='time') composites = xr.concat(composites, dim='state') composites['state'] = ['Nina', 'Neutral', 'Nino'] plot_maps.plot_corr_maps(composites, row_dim='state', hspace=0.5) out = df_ENSO, [ np.array(nina_yrs), np.array(neutral), np.array(nino_yrs) ], df_state else: out = df_ENSO #%% return out
#%% Soy bean USDA raw_filename = '/Users/semvijverberg/Dropbox/VIDI_Coumou/Paper3_Sem/GDHY_MIRCA2000_Soy/USDA/usda_soy.nc' selbox = [250, 290, 28, 50] ds = core_pp.import_ds_lazy(raw_filename, var='variable', selbox=selbox).rename({'z': 'time'}) ds.name = 'Soy_Yield' ds['time'] = pd.to_datetime([f'{y+1949}-01-01' for y in ds.time.values]) ds.attrs['dataset'] = 'USDA' ds.attrs['planting_months'] = 'May/June' ds.attrs['harvest_months'] = 'October' ts = functions_pp.area_weighted(ds).mean( dim=('latitude', 'longitude')) # old, but silly to do area-weighted mean cl.store_netcdf( ts, filepath= '/Users/semvijverberg/Dropbox/VIDI_Coumou/Paper3_Sem/GDHY_MIRCA2000_Soy/USDA/usda_soy_spatial_mean_ts.nc' ) #%% Maize yield USDA raw_filename = os.path.join( '/Users/semvijverberg/surfdrive/VU_Amsterdam/GDHY_MIRCA2000_Soy/USDA/usda_maize.nc' ) ds = core_pp.import_ds_lazy(raw_filename)['variable'].rename({'z': 'time'}) ds.name = 'Maize_Yield' ds['time'] = pd.to_datetime([f'{y+1949}-01-01' for y in ds.time.values])