Пример #1
0
    def get_HM_data(self, filepath, dim='latitude'):
        self.filepath = filepath
        self.dim = dim
        if self.seldates is not None:
            self.kwrgs_load['seldates'] = self.seldates_ext
            self.ds_seldates = functions_pp.import_ds_timemeanbins(self.filepath, **self.kwrgs_load)
            ds_name = self.ds_seldates.name

            if self.rollingmeanwindow is not None:
            # apply rolling mean
                self.ds = self.ds_seldates.rolling(time=self.rollingmeanwindow).mean()
            else:
                self.ds = self.ds_seldates
            # calculating std based on seldates
            self.std = self.ds.sel(time=self.seldates).std(dim='time')
            if self.t_test == True:
                self.ds_all = self.ds.sel(time=self.seldates)
            # now that we have std over seldates, select dates for HM
            self.ds = self.ds.sel(time=np.concatenate(self.event_lagged))
        else:
            self.kwrgs_load['seldates'] = np.concatenate(self.event_lagged)
            self.ds = functions_pp.import_ds_timemeanbins(self.filepath, **self.kwrgs_load)
            ds_name = self.ds.name

        if self.name is None:
                self.name = ds_name

        if 'units' in list(self.ds.attrs.keys()):
            self.units = self.ds.attrs['units']

        if self.standardize:
            self.units = 'std [-]'
            self.ds = self.ds / self.std



        if self.event_dates is not None:
            self.xarray = self.ds.copy().rename({'time':'lag'})
            self.xarray = self.xarray.assign_coords(lag=np.concatenate(self.lag_axes))
        else:
            self.xarray = self.ds

        if self.zoomdim is not None:
            xarray_w = self.xarray.sel(latitude=slice(self.zoomdim[0],
                                                      self.zoomdim[1]))
            xarray_w = functions_pp.area_weighted(xarray_w)
        else:
            xarray_w = functions_pp.area_weighted(self.xarray)
        xarray_meandim = xarray_w.mean(dim=dim)
        self.xr_HM = xarray_meandim.groupby('lag').mean()
        if self.t_test:
            full = (self.ds_all/self.std).mean(dim=dim)
            self.xr_mask = self.xr_HM.astype(bool).copy()
            pvals = np.zeros_like(self.xr_mask.values, dtype=float)
            for i, lag in enumerate(self.xr_mask.lag.values):
                sample = xarray_meandim.sel(lag=lag)
                T, p, mask = Welchs_t_test(sample, full, equal_var=False)
                pvals[i] = p
            self.xr_mask.values = pvals
Пример #2
0
def calc_spatcov(full_timeserie, pattern, area_wght=True):
    #%%
    mask = np.ma.make_mask(np.isnan(pattern.values) == False)
    n_time = full_timeserie.time.size
    n_space = pattern.size

    if area_wght == True:
        pattern = functions_pp.area_weighted(pattern)
    # select only gridcells where there is not a nan
    full_ts = np.nan_to_num(
        np.reshape(full_timeserie.values, (n_time, n_space)))
    pattern = np.nan_to_num(np.reshape(pattern.values, (n_space)))

    mask_pattern = np.reshape(mask, (n_space))
    full_ts = full_ts[:, mask_pattern]
    pattern = pattern[mask_pattern]

    spatcov = np.zeros((n_time))
    for t in range(n_time):
        # Corr(X,Y) = cov(X,Y) / ( std(X)*std(Y) )
        # cov(X,Y) = E( (x_i - mu_x) * (y_i - mu_y) )
        # covself[t] = np.mean( (full_ts[t] - np.mean(full_ts[t])) * (pattern - np.mean(pattern)) )
        M = np.stack((full_ts[t], pattern))
        spatcov[t] = np.cov(M)[
            0, 1]  #/ (np.sqrt(np.cov(M)[0,0]) * np.sqrt(np.cov(M)[1,1]))

    dates_test = full_timeserie.time
    # cov xarray
    spatcov = xr.DataArray(spatcov, coords=[dates_test.values], dims=['time'])
    #%%
    return spatcov
Пример #3
0
def ENSO_34(file_path, ex, df_splits=None):
    #%%
    #    file_path = '/Users/semvijverberg/surfdrive/Data_era5/input_raw/sst_1979-2018_1_12_daily_2.5deg.nc'
    '''
    See http://www.cgd.ucar.edu/staff/cdeser/docs/deser.sstvariability.annrevmarsci10.pdf    
    '''
    if df_splits is None:
        RV = ex[ex['RV_name']]
        df_splits, ex = functions_pp.rand_traintest_years(RV, ex)
        seldates = None
    else:
        seldates = df_splits.loc[0].index

    kwrgs_pp = {
        'selbox': {
            'la_min': -5,  # select domain in degrees east
            'la_max': 5,
            'lo_min': -170,
            'lo_max': -120
        },
        'seldates': seldates
    }

    ds = core_pp.import_ds_lazy(file_path, **kwrgs_pp)

    to_freq = ex['tfreq']
    if to_freq != 1:
        ds, dates = functions_pp.time_mean_bins(ds,
                                                ex,
                                                to_freq=to_freq,
                                                seldays='all')
        ds['time'] = dates

    dates = pd.to_datetime(ds.time.values)
    splits = df_splits.index.levels[0]

    list_splits = []
    for s in splits:

        progress = 100 * (s + 1) / splits.size
        print(f"\rProgress ENSO traintest set {progress}%)", end="")

        data = functions_pp.area_weighted(ds).mean(dim=('latitude',
                                                        'longitude'))

        list_splits.append(
            pd.DataFrame(data=data.values,
                         index=dates,
                         columns=['0_900_ENSO34']))

    df_ENSO = pd.concat(list_splits, axis=0, keys=splits)
    #%%
    return df_ENSO
Пример #4
0
def PNA_z500(filepath_z):
    '''
    From Liu et al. 2015: Recent contrasting winter temperature changes over
    North America linked to enhanced positive Pacific‐North American pattern.

    https://onlinelibrary.wiley.com/doi/abs/10.1002/2015GL065656

    PNA = z1 - z2 + z3 - z4
    z1 = Z (15 - 25N, 180 - 140W)
    z2 = Z (40 - 50N, 180 - 140W)
    z3 = Z (45 - 60N, 125 - 105W)
    z4 = Z (25 - 35N, 90 - 70W)

    Parameters
    ----------
    filepath : TYPE
        filepath to SST Netcdf4.

    Returns
    -------
    PNA.

    '''
    load = core_pp.import_ds_lazy
    progressBar(1, 4)
    z1 = functions_pp.area_weighted(
        load(filepath_z, **{'selbox': (180, 220, 15, 25)}))
    progressBar(2, 4)
    z2 = functions_pp.area_weighted(
        load(filepath_z, **{'selbox': (180, 220, 40, 50)}))
    z3 = functions_pp.area_weighted(
        load(filepath_z, **{'selbox': (235, 255, 45, 60)}))
    progressBar(3, 4)
    z4 = functions_pp.area_weighted(
        load(filepath_z, **{'selbox': (270, 290, 25, 35)}))
    progressBar(4, 4)
    PNA = z1.mean(dim=('latitude', 'longitude')) - z2.mean(dim=('latitude', 'longitude')) \
        + z3.mean(dim=('latitude', 'longitude')) - z4.mean(dim=('latitude', 'longitude'))
    return PNA.to_dataframe(name='PNA')
Пример #5
0
def ENSO_34(filepath, df_splits=None, get_ENSO_states: bool = True):
    #%%
    #    file_path = '/Users/semvijverberg/surfdrive/Data_era5/input_raw/sst_1979-2018_1_12_daily_2.5deg.nc'
    '''
    See http://www.cgd.ucar.edu/staff/cdeser/docs/deser.sstvariability.annrevmarsci10.pdf
    selbox has format of (lon_min, lon_max, lat_min, lat_max)
    '''

    # if df_splits is None:
    #     seldates = None
    # else:
    #     seldates = df_splits.loc[0].index

    #    {'la_min':-5, # select domain in degrees east
    #     'la_max':5,
    #     'lo_min':-170,
    #     'lo_max':-120},

    kwrgs_pp = {
        'selbox': (190, 240, -5, 5),
        'format_lon': 'only_east',
        'seldates': None
    }

    ds = core_pp.import_ds_lazy(filepath, **kwrgs_pp)
    dates = pd.to_datetime(ds.time.values)
    data = functions_pp.area_weighted(ds).mean(dim=('latitude', 'longitude'))
    df_ENSO = pd.DataFrame(data=data.values, index=dates, columns=['ENSO34'])
    if df_splits is not None:
        splits = df_splits.index.levels[0]
        df_ENSO = pd.concat([df_ENSO] * splits.size, axis=0, keys=splits)

    if get_ENSO_states:
        '''
        From Anderson 2017 - Life cycles of agriculturally relevant ENSO
        teleconnections in North and South America.
        http://doi.wiley.com/10.1002/joc.4916
        mean boreal wintertime (October, November, December) SST anomaly amplitude
        in the Niño 3.4 region exceeded 1 of 2 standard deviation.
        '''
        if hasattr(df_ENSO.index, 'levels'):
            df_ENSO_s = df_ENSO.loc[0]
        else:
            df_ENSO_s = df_ENSO
        dates = df_ENSO_s.index
        df_3monthmean = df_ENSO_s.rolling(3, center=True, min_periods=1).mean()
        std_ENSO = df_3monthmean.std()
        OND, groups = core_pp.get_subdates(dates,
                                           start_end_date=('10-01', '12-31'),
                                           returngroups=True)
        OND_ENSO = df_3monthmean.loc[OND].groupby(groups).mean()
        nino_yrs = OND_ENSO[OND_ENSO > df_3monthmean.mean() +
                            std_ENSO][:].dropna().index  #+ 1
        nina_yrs = OND_ENSO[OND_ENSO < df_3monthmean.mean() -
                            std_ENSO][:].dropna().index  #+ 1
        neutral = [
            y for y in OND_ENSO.index
            if y not in core_pp.flatten([nina_yrs, nino_yrs])
        ]
        states = {}
        for i, d in enumerate(dates):
            if d.year in nina_yrs:
                states[d.year] = -1
            if d.year in neutral:
                states[d.year] = 0
            if d.year in nino_yrs:
                states[d.year] = 1

        cycle_list = []
        for s, v in [('EN', 1), ('LN', -1)]:
            ENSO_cycle = {d.year: 0 for d in dates}
            for i, year in enumerate(np.unique(dates.year)):
                # d = dates[1]
                # if states[year] == v:
                #     s = 'EN'
                # elif states[year] == -1:
                #     s = 'LN'
                if states[year] == v:
                    ENSO_cycle[year] = f'{s}0'
                    if year - 1 in dates.year and states[year - 1] != v:
                        ENSO_cycle[year - 1] = f'{s}-1'
                    if year + 1 in dates.year and states[year + 1] != v:
                        ENSO_cycle[year + 1] = f'{s}+1'
            cycle_list.append(ENSO_cycle)

        time_index = pd.to_datetime([f'{y}-01-01' for y in states.keys()])
        df_state = pd.concat([
            pd.Series(states),
            pd.Series(cycle_list[0]),
            pd.Series(cycle_list[1])
        ],
                             axis=1,
                             keys=['state', 'EN_cycle', 'LN_cycle'])
        df_state.index = time_index

        if hasattr(df_ENSO.index, 'levels'):  # copy to other traintest splits
            df_state = pd.concat([df_state] * splits.size, keys=splits)

        composites = np.zeros(3, dtype=object)
        for i, yrs in enumerate([nina_yrs, neutral, nino_yrs]):
            composite = [d for d in dates if d.year in yrs]
            composites[i] = ds.sel(time=composite).mean(dim='time')
        composites = xr.concat(composites, dim='state')
        composites['state'] = ['Nina', 'Neutral', 'Nino']

        plot_maps.plot_corr_maps(composites, row_dim='state', hspace=0.5)
        out = df_ENSO, [
            np.array(nina_yrs),
            np.array(neutral),
            np.array(nino_yrs)
        ], df_state
    else:
        out = df_ENSO
    #%%
    return out
Пример #6
0
#%% Soy bean USDA

raw_filename = '/Users/semvijverberg/Dropbox/VIDI_Coumou/Paper3_Sem/GDHY_MIRCA2000_Soy/USDA/usda_soy.nc'

selbox = [250, 290, 28, 50]
ds = core_pp.import_ds_lazy(raw_filename, var='variable',
                            selbox=selbox).rename({'z': 'time'})
ds.name = 'Soy_Yield'

ds['time'] = pd.to_datetime([f'{y+1949}-01-01' for y in ds.time.values])
ds.attrs['dataset'] = 'USDA'
ds.attrs['planting_months'] = 'May/June'
ds.attrs['harvest_months'] = 'October'

ts = functions_pp.area_weighted(ds).mean(
    dim=('latitude', 'longitude'))  # old, but silly to do area-weighted mean
cl.store_netcdf(
    ts,
    filepath=
    '/Users/semvijverberg/Dropbox/VIDI_Coumou/Paper3_Sem/GDHY_MIRCA2000_Soy/USDA/usda_soy_spatial_mean_ts.nc'
)
#%% Maize yield USDA

raw_filename = os.path.join(
    '/Users/semvijverberg/surfdrive/VU_Amsterdam/GDHY_MIRCA2000_Soy/USDA/usda_maize.nc'
)

ds = core_pp.import_ds_lazy(raw_filename)['variable'].rename({'z': 'time'})
ds.name = 'Maize_Yield'

ds['time'] = pd.to_datetime([f'{y+1949}-01-01' for y in ds.time.values])