Exemplo n.º 1
0
def prep_wwv_proxy():
    """
    Make a wwv proxy index that uses the K-index from Bunge and Clarke (2014)
    for the time period between 1955 and 1979
    """

    reader_wwv = data_reader(startdate='1980-01', enddate='2018-12')
    wwv = reader_wwv.read_csv('wwv')

    reader_kindex = data_reader(startdate='1955-01', enddate='1979-12')
    kindex = reader_kindex.read_csv('kindex') * 10e12

    wwv_proxy = kindex.append(wwv)
    wwv_proxy.to_csv(join(processeddir, f'wwv_proxy.csv'), header=True)
Exemplo n.º 2
0
def calc_warm_pool_edge():
    """
    calculate the warm pool edge
    """
    reader = data_reader(startdate='1948-01',
                         enddate='2020-10',
                         lon_min=120,
                         lon_max=290)  # enddate: was 2018-12
    sst = reader.read_netcdf('sst', dataset='ERSSTv5', processed='')

    sst_eq = sst.loc[dict(latitude=0)]
    warm_pool_edge = np.zeros(sst_eq.shape[0])
    indeces = np.zeros(sst_eq.shape[0])

    # TODO  not very efficent
    for i in range(sst_eq.shape[0]):
        index = np.argwhere(sst_eq[i].values > 28.).max()
        indeces[i] = index

        slope = sst_eq[i, index] - sst_eq[i, index - 1]

        intercept28C = (sst_eq[i, index] - 28.) * slope + index

        warm_pool_edge[i] = intercept28C * 2.5 * 111.321

    df = pd.DataFrame(data=warm_pool_edge,
                      index=sst.time.values,
                      columns=['total'])
    df.index.name = 'time'

    df.to_csv(join(processeddir, 'wp_edge.csv'))
Exemplo n.º 3
0
def evaluation_nll(model_name,
                   mean_name='mean',
                   std_name='std',
                   filename=None,
                   start='1963-01',
                   end='2017-12'):
    """
    Evaluate the model using the negativ log-likelihood skill for the full time series.
    """
    reader = data_reader(startdate=start, enddate=end)

    # scores for the full timeseries
    nll = np.zeros(n_lead)

    # ONI observation
    obs = reader.read_csv('oni')

    for i in range(n_lead):
        pred_all = reader.read_forecasts(model_name,
                                         lead_times[i],
                                         filename=filename)
        mean = pred_all[mean_name]
        std = pred_all[std_name]

        # calculate all seasons scores
        nll[i] = nll_gaussian(obs, mean, std)

    return nll
Exemplo n.º 4
0
def evaluation_correlation(model_name,
                           variable_name='mean',
                           start='1963-01',
                           end='2017-12'):
    """
    Evaluate the model using the correlation skill for the full time series.

    :type model_name: str
    :param model_name: The name of the model.

    :type variable_name: str
    :param variable_name: The name of the variable which shell be evaluated\
    against the ONI prediction.

    :returns: The correlation skill for the 0, 3, 6, 9, 12 and 15-month lead\
    time and the corresponding p values.
    """
    reader = data_reader(startdate=start, enddate=end)

    # scores for the full timeseries
    r = np.zeros(n_lead)
    p = np.zeros(n_lead)

    # ONI observation
    obs = reader.read_csv('oni')

    for i in range(n_lead):
        pred_all = reader.read_forecasts(model_name, lead_times[i])
        pred = pred_all[variable_name]

        # calculate all seasons scores
        r[i], p[i] = pearsonr(obs, pred)
    return r, p
Exemplo n.º 5
0
def evaluation_srmse(model_name, variable_name='mean'):
    """
    Evaluate the model using the standardized root-mean-squarred error (SRMSE)
    for the full time series. Standardized means that the the the RMSE of each
    season is divided by the corresponding standard deviation of the ONI in
    that season (standard deviation has a seasonal cycle). Then, these
    seasonal SRMSE averaged to get the SRMSE of the full time series..

    :type model_name: str
    :param model_name: The name of the model.

    :type variable_name: str
    :param variable_name: The name of the variable which shell be evaluated\
    against the ONI prediction.

    :returns: The standardized RMSE for the 0, 3, 6, 9, 12 and 15-month lead\
    time.
    """
    reader = data_reader(startdate='1963-01', enddate='2017-12')

    # scores for the full timeseries
    srmse = np.zeros(n_lead)

    # ONI observation
    obs = reader.read_csv('oni')

    for i in range(n_lead):
        pred_all = reader.read_forecasts(model_name, lead_times[i])
        pred = pred_all[variable_name]

        srmse[i] = mean_srmse(obs, pred,
                              obs.index - pd.tseries.offsets.MonthBegin(1))

    return srmse
Exemplo n.º 6
0
def evaluation_decadal_nll(model_name,
                           mean_name='mean',
                           std_name='std',
                           filename=None):
    """
    Evaluate the model in the decades 1963-1971, 1972-1981, ..., 2012-2017 \
    using the negative log-likelihood.
    """
    reader = data_reader(startdate='1963-01', enddate='2017-12')

    # decadal scores
    decadal_nll = np.zeros((n_lead, n_decades - 1))

    # ONI observation
    obs = reader.read_csv('oni')
    obs_time = obs.index

    for i in range(n_lead):

        pred_all = reader.read_forecasts(model_name, lead_times[i], filename)
        pred_mean = pred_all[mean_name]
        pred_std = pred_all[std_name]

        for j in range(n_decades - 1):

            indeces = (obs_time >= f'{decades[j]}-01-01') & (
                obs_time <= f'{decades[j+1]}-12-01')

            decadal_nll[i, j] = nll_gaussian(obs[indeces], pred_mean[indeces],
                                             pred_std[indeces])

    return decadal_nll
Exemplo n.º 7
0
    def load_data(self,
                  variable,
                  dataset,
                  processed='anom',
                  startyear=1949,
                  endyear=2018,
                  lon_min=120,
                  lon_max=280,
                  lat_min=-30,
                  lat_max=30):
        """
        Load data for PCA analysis from the desired postprocessed data set.

        :type variable: str
        :param variable: The variable for which the PCA will be done.

        :type dataset: str
        :param dataset: The data set that should be used for the PCA.

        :type processed: str
        :param processed: Either '','anom' or 'normanom'.

        :param startyear: The start year for the time series for which the PCA\
        is done.

        :param endyear: The last year for the time series for which the PCA\
        is done.

        :param lon_min,lon_max: The minimum and the maximum values of the\
        longitude grid for which the metrics shell be computed \
        (from 0 to 360 degrees east)

        :param lat_min,lat_max: The min and the max values of the latitude\
        grid for which the metrics shell be computed (from -180 to 180 degrees\
        east)

        """

        self.variable = variable
        self.dataset = dataset
        self.processed = processed

        self.startdate = pd.to_datetime(str(startyear))
        self.enddate = (pd.to_datetime(str(endyear)) +
                        pd.tseries.offsets.YearEnd(0))

        self.reader = data_reader(startdate=self.startdate,
                                  enddate=self.enddate,
                                  lon_min=lon_min,
                                  lon_max=lon_max,
                                  lat_min=lat_min,
                                  lat_max=lat_max)

        data = self.reader.read_netcdf(variable, dataset, processed)

        self.set_eof_array(data)
Exemplo n.º 8
0
    def __init__(self,
                 variable,
                 dataset,
                 processed='anom',
                 threshold=None,
                 edge_density=None,
                 startyear=1948,
                 endyear=2018,
                 window_size=12,
                 lon_min=120,
                 lon_max=260,
                 lat_min=-30,
                 lat_max=30,
                 verbose=0):

        self.variable = variable
        self.dataset = dataset
        self.processed = processed

        self.threshold = threshold
        self.edge_density = edge_density

        self.startyear = str(startyear)
        self.endyear = str(endyear)

        self.startdate = pd.to_datetime(self.startyear)
        self.enddate = pd.to_datetime(self.endyear) \
            + pd.tseries.offsets.YearEnd(0)

        self.window_size = window_size
        self.window_start = self.startdate
        self.window_end = self.window_start \
            + pd.tseries.offsets.MonthEnd(self.window_size)

        self.lon_min = lon_min
        self.lon_max = lon_max
        self.lat_min = lat_min
        self.lat_max = lat_max

        self.reader = data_reader(startdate=self.window_start,
                                  enddate=self.window_end,
                                  lon_min=self.lon_min,
                                  lon_max=self.lon_max,
                                  lat_min=self.lat_min,
                                  lat_max=self.lat_max)
        self.initalizeSeries()

        if verbose == 0:
            logger.setLevel(logging.DEBUG)
        elif verbose == 1:
            logger.setLevel(logging.INFO)
        elif verbose == 2:
            logger.setLevel(logging.WARNING)
        elif verbose == 3:
            logger.setLevel(logging.ERROR)
Exemplo n.º 9
0
def prep_wwv_proxy():
    """
    Make a wwv proxy index that uses the K-index from Bunge and Clarke (2014)
    for the time period between 1955 and 1979
    """
    print(f"Prepare WWV proxy.")
    wwv_raw = pd.read_csv(join(rawdir, 'wwv.dat'),
                          delim_whitespace=True,
                          header=4)
    wwv_end = str(wwv_raw['date'].iloc[-1])
    endyr = wwv_end[:4]
    endmth = wwv_end[4:]

    reader_wwv = data_reader(startdate='1980-01', enddate=endyr + '-' + endmth)
    wwv = reader_wwv.read_csv('wwv')

    reader_kindex = data_reader(startdate='1955-01', enddate='1979-12')
    kindex = reader_kindex.read_csv('kindex') * 10e12

    wwv_proxy = kindex.append(wwv)
    wwv_proxy.to_csv(join(processeddir, f'wwv_proxy.csv'), header=True)
Exemplo n.º 10
0
def pipeline_noise(lead_time,  return_persistance=False):
    """
    Data pipeline for the processing of the data before the MLR
    is trained.

    :type lead_time: int
    :param lead_time: The lead time in month.

    :type return_persistance: boolean
    :param return_persistance: Return as the persistance as well.

    :returns: The feature "X" (at observation time), the label "y" (at lead
    time), the target season "timey" (least month) and if selected the
    label at observation time "y_persistance". Hence, the output comes as:
    X, y, timey, y_persistance.
    """
    # initialize the reader
    reader = data_reader(startdate='1960-01', enddate='2017-12')

    np.random.seed(0)

    # load data
    oni = reader.read_csv('oni')
    wwv = reader.read_csv('wwv_proxy')
    iod = reader.read_csv('iod')

    # the shift data by 3 in addition to lead time shift (due to definition
    # of lead time) as in barnston et al. (2012)
    shift = 3

    # make feature
    Xorg = np.stack((oni, wwv, iod), axis=1)

    for i in range(100):
        random_noise = np.random.normal(size=len(oni)).reshape(len(oni), 1)
        Xorg = np.concatenate((Xorg, random_noise), axis=1)

    X = Xorg[:-lead_time-shift,:]


    # arange label
    yorg = oni.values
    y = yorg[lead_time + shift:]

    # get the time axis of the label
    timey = oni.index[lead_time + shift:]

    if return_persistance:
        y_persistance = yorg[: - lead_time - shift]
        return X, y, timey, y_persistance
    else:
        return X, y, timey
Exemplo n.º 11
0
def evaluation_seasonal_correlation(model_name,
                                    variable_name='mean',
                                    background='all'):
    """
    Evaluate the model in different seasons using the correlation skill.

    :type model_name: str
    :param model_name: The name of the model.

    :type variable_name: str
    :param variable_name: The name of the variable which shell be evaluated\
    against the ONI prediction.

    :returns: The correlation skill for different seasons and the \
    0, 3, 6, 9, 12 and 15-month lead time and the corresponding p values for\
    the respective seasons and lead times. The returned arrays have the shape \
    (lead time, season). The season corresponding to the the array entry [:,0]\
    is DJF and to [:,1] is JFM (and so on).
    """
    reader = data_reader(startdate='1963-01', enddate='2017-12')

    # seasonal scores
    seasonal_r = np.zeros((n_lead, 12))
    seasonal_p = np.zeros((n_lead, 12))

    # ONI observation
    oni = reader.read_csv('oni')

    if background == "el-nino-like":
        obs = oni[(oni.index.year >= 1982) & (oni.index.year <= 2001)]
    elif background == "la-nina-like":
        obs = oni[(oni.index.year < 1982) | (oni.index.year > 2001)]
    elif background == "barnston_2019":
        obs = oni[(oni.index.year >= 1982) | (oni.index.year > 2015)]
    elif background == "all":
        obs = oni
    obs_time = obs.index

    for i in range(n_lead):

        pred_all = reader.read_forecasts(model_name, lead_times[i]).loc[{
            'target_season':
            obs_time
        }]
        pred = pred_all[variable_name]

        seasonal_r[i, :], seasonal_p[i, :] = seasonal_correlation(
            obs, pred, obs_time - pd.tseries.offsets.MonthBegin(1))

    return seasonal_r, seasonal_p
Exemplo n.º 12
0
def evaluation_decadal_correlation(model_name,
                                   variable_name='mean',
                                   start='1963-01',
                                   end='2017-12'):
    """
    Evaluate the model in the decades 1963-1971, 1972-1981, ..., 2012-2017 using the correlation skill-

    :type model_name: str
    :param model_name: The name of the model.

    :type variable_name: str
    :param variable_name: The name of the variable which shell be evaluated\
    against the ONI prediction.

    :returns: The correlation skill for the 0, 3, 6, 9, 12 and 15-month lead\
    time and the corresponding p values for the respective decades. The\
    returned arrays have the shape (lead time, decades).
    """
    reader = data_reader(startdate=start, enddate=end)

    # decadal scores
    decadal_r = np.zeros((n_lead, n_decades - 1))
    decadal_p = np.zeros((n_lead, n_decades - 1))

    # ONI observation
    obs = reader.read_csv('oni')
    obs_time = obs.index

    for i in range(n_lead):
        pred_all = reader.read_forecasts(model_name, lead_times[i])
        pred = pred_all[variable_name]

        for j in range(n_decades - 1):
            indeces = (obs_time >= f'{decades[j]}-01-01') & (
                obs_time <= f'{decades[j+1]-1}-12-01')
            decadal_r[i, j], decadal_p[i,
                                       j] = pearsonr(obs[indeces].values,
                                                     pred[indeces].values)

    return decadal_r, decadal_p
Exemplo n.º 13
0
def evaluation_decadal_srmse(model_name, variable_name='mean', decadal=None):
    """
    Evaluate the model in the decades 1963-1971, 1972-1981, ..., 2012-2017 \
    using the standardized RMSE.

    :type model_name: str
    :param model_name: The name of the model.

    :type variable_name: str
    :param variable_name: The name of the variable which shell be evaluated\
    against the ONI prediction.

    :returns: The SRMSE for the 0, 3, 6, 9, 12 and 15-month lead\
    time respective decades. The returned array has the shape (lead time, \
    decades).
    """
    reader = data_reader(startdate='1963-01', enddate='2017-12')

    # decadal scores
    decadal_srmse = np.zeros((n_lead, n_decades - 1))

    # ONI observation
    obs = reader.read_csv('oni')
    obs_time = obs.index

    for i in range(n_lead):

        pred_all = reader.read_forecasts(model_name, lead_times[i])
        pred = pred_all[variable_name]

        for j in range(n_decades - 1):

            indeces = (obs_time >= f'{decades[j]}-01-01') & (
                obs_time <= f'{decades[j+1]}-12-01')

            decadal_srmse[i, j] = mean_srmse(
                obs[indeces], pred[indeces],
                obs.index[indeces] - pd.tseries.offsets.MonthBegin(1))

    return decadal_srmse
Exemplo n.º 14
0
def pipeline_small(lead_time, return_persistance=False):
    """
    Data pipeline for the processing of the data before the Deep Ensemble
    is trained.

    :type lead_time: int
    :param lead_time: The lead time in month.

    :type return_persistance: boolean
    :param return_persistance: Return as the persistance as well.

    :returns: The feature "X" (at observation time), the label "y" (at lead
    time), the target season "timey" (least month) and if selected the
    label at observation time "y_persistance". Hence, the output comes as:
    X, y, timey, y_persistance.
    """
    reader = data_reader(startdate='1960-01', enddate='2017-12')

    # indeces
    oni = reader.read_csv('oni')

    iod = reader.read_csv('iod')
    wwv = reader.read_csv('wwv_proxy')

    # network metrics
    network_ssh = reader.read_statistic('network_metrics',
                                        variable='zos',
                                        dataset='ORAS4',
                                        processed="anom")
    c2_ssh = network_ssh['fraction_clusters_size_2']
    H_ssh = network_ssh['corrected_hamming_distance']

    #wind stress
    taux = reader.read_netcdf('taux', dataset='NCEP', processed='anom')

    taux_WP = taux.loc[dict(lat=slice(2.5, -2.5), lon=slice(120, 160))]
    taux_WP_mean = taux_WP.mean(dim='lat').mean(dim='lon')

    # decadel variation of leading eof
    pca_dec = reader.read_statistic('pca',
                                    variable='dec_sst',
                                    dataset='ERSSTv5',
                                    processed='anom')['pca1']

    # time lag
    time_lag = 2

    # shift such that lead time corresponds to the definition of lead time
    shift = 3

    # process features
    feature_unscaled = np.stack(
        (
            oni,
            oni.index.month,
            wwv,
            #iod,
            #taux_WP_mean,
            #c2_ssh,
            H_ssh,
            pca_dec),
        axis=1)

    # scale each feature
    scalerX = StandardScaler()
    Xorg = scalerX.fit_transform(feature_unscaled)

    # set nans to 0.
    Xorg = np.nan_to_num(Xorg)

    # arange the feature array
    X = Xorg[:-lead_time - shift, :]
    X = include_time_lag(X, max_lag=time_lag)

    # arange label
    yorg = oni.values
    y = yorg[lead_time + time_lag + shift:]

    # get the time axis of the label
    timey = oni.index[lead_time + time_lag + shift:]

    if return_persistance:
        y_persistance = yorg[time_lag:-lead_time - shift]
        return X, y, timey, y_persistance
    else:
        return X, y, timey
Exemplo n.º 15
0
from sklearn.preprocessing import StandardScaler

from ninolearn.IO.read_processed import data_reader
from ninolearn.plot.evaluation import plot_correlation
from ninolearn.plot.prediction import plot_prediction
from ninolearn.learn.evaluation.skillMeasures import rmse
from ninolearn.learn.models.dem import DEM
from ninolearn.utils import print_header, include_time_lag, basin_means
from ninolearn.pathes import modeldir

K.clear_session()

#%% =============================================================================
# read data
# =============================================================================
reader = data_reader(startdate='1960-01', enddate='2017-12')

# NINO3.4 Index
nino34 = reader.read_csv('oni')
#%%
# Other indeces
iod = reader.read_csv('iod')
wwv = reader.read_csv('wwv_proxy')

# seasonal cycle
sc = np.cos(np.arange(len(nino34)) / 12 * 2 * np.pi)

# SSH network metrics
network_ssh = reader.read_statistic('network_metrics',
                                    variable='zos',
                                    dataset='ORAS4',
Exemplo n.º 16
0
    1991,
    1994,
    2002,
    2004,
    2006,
    2009  #, 2015
])

lanina_ep = np.array([1964, 1970, 1973, 1988, 1998, 2007, 2010])

lanina_cp = np.array(
    [1954, 1955, 1967, 1971, 1974, 1975, 1984, 1995, 2000, 2001, 2011])

year = 2008
reader = data_reader(startdate=f'{year}-01',
                     enddate=f'{year}-12',
                     lon_min=30,
                     lon_max=300)

oni = reader.read_csv('oni')

spring = np.array([month in [3, 4, 5] for month in oni.index.month])
summer = np.array([month in [6, 7, 8] for month in oni.index.month])
autumn = np.array([month in [9, 10, 11] for month in oni.index.month])

winter = np.array([month in [11, 12] for month in oni.index.month])
winter_p1 = np.array([month in [1, 2] for month in oni.index.month])

index = winter

# =============================================================================
# Read data
Exemplo n.º 17
0
def read_ssta(index,
              data_dir,
              get_mask=False,
              stack_lon_lat=True,
              resolution=2.5,
              dataset="ERSSTv5",
              fill_nan=0,
              start_date='1871-01',
              end_date='2019-12',
              lon_min=190,
              lon_max=240,
              lat_min=-5,
              lat_max=5,
              reader=None):
    """

    :param index: choose target index (e.g. ONI, Nino3.4, ICEN)
    :param start_date:
    :param end_date:
    :param lon_min:
    :param lon_max:
    :param lat_min:
    :param lat_max:
    :param reader: If a data_reader is passed, {start,end}_date and {lat, lon}_{min, max} will be ignored.
    :return:
    """
    if index in ["Nino3.4", "ONI"]:
        k = 5 if index == "Nino3.4" else 3
    elif index == "ICEN":
        k = 3
    elif index[-3:] == "mon":
        k = int(index[-4])  # eg 1mon
    else:
        raise ValueError("Unknown index")

    if reader is None:
        reader = data_reader(data_dir=data_dir,
                             startdate=start_date,
                             enddate=end_date,
                             lon_min=lon_min,
                             lon_max=lon_max,
                             lat_min=lat_min,
                             lat_max=lat_max)
        check_chosen_coordinates(index,
                                 lon_min=lon_min,
                                 lon_max=lon_max,
                                 lat_min=lat_min,
                                 lat_max=lat_max)

    resolution_suffix = f"{resolution}x{resolution}"
    ssta = reader.read_netcdf('sst',
                              dataset=dataset,
                              processed='anom',
                              suffix=resolution_suffix)
    ssta = ssta.rolling(time=k).mean()[
        k - 1:]  # single months SSTAs --> rolling mean over k months SSTAs

    if stack_lon_lat:
        lats, lons = ssta.get_index('lat'), ssta.get_index('lon')
        ssta = ssta.stack(cord=['lat', 'lon'])
        ssta.attrs["Lons"] = lons
        ssta.attrs["Lats"] = lats
    if fill_nan is not None:
        if fill_nan == "trim":
            ssta_old_index = ssta.get_index('cord')
            ssta = ssta.dropna(dim='cord')
            print(
                f"Dropped {len(ssta_old_index) - len(ssta.get_index('cord'))} nodes."
            )
            # print("Dropped coordinates:", set(ssta_old_index).difference(set(ssta.get_index("cord"))))
            # print(flattened_ssta.loc["1970-01", (0, 290)]) --> will raise error
        else:
            ssta = ssta.fillna(fill_nan)

    if get_mask:
        index_mask, train_mask = get_index_mask(
            ssta,
            index=index,
            flattened_too=True,
            is_data_flattened=stack_lon_lat)
        train_mask = np.array(train_mask)
        return ssta, train_mask
    return ssta
Exemplo n.º 18
0
    1987,
    1991,
    1994,
    2002,
    2004,
    2006,
    2009  #, 2015
])

lanina_ep = np.array([1964, 1970, 1973, 1988, 1998, 2007, 2010])

lanina_cp = np.array(
    [1954, 1955, 1967, 1971, 1974, 1975, 1984, 1995, 2000, 2001, 2011])

reader = data_reader(startdate='1974-06',
                     enddate='2017-12',
                     lon_min=30,
                     lon_max=300)

oni = reader.read_csv('oni')

spring = np.array([month in [
    1,
    2,
    3,
] for month in oni.index.month])
summer = np.array([month in [6, 7, 8] for month in oni.index.month])
autumn = np.array([month in [12] for month in oni.index.month])

winter = np.array([month in [12] for month in oni.index.month])
winter_p1 = np.array([month in [1, 2] for month in oni.index.month])
Exemplo n.º 19
0
    1991,
    1994,
    2002,
    2004,
    2006,
    2009  #, 2015
])

lanina_ep = np.array([1964, 1970, 1973, 1988, 1998, 2007, 2010])

lanina_cp = np.array(
    [1954, 1955, 1967, 1971, 1974, 1975, 1984, 1995, 2000, 2001, 2011])

year = 2008
reader = data_reader(startdate=f'1980-01',
                     enddate=f'2010-12',
                     lon_min=30,
                     lon_max=300)

# =============================================================================
# Read data
# =============================================================================
taux = reader.read_netcdf('taux', dataset='NCEP', processed='')
taux = taux.sortby('lat', ascending=False)
tauy = reader.read_netcdf('tauy', dataset='NCEP', processed='')
tauy = tauy.sortby('lat', ascending=False)

sst = reader.read_netcdf('sst', dataset='ERSSTv5', processed='')
sst = sst.sortby('lat', ascending=False)
#ssh = reader.read_netcdf('sshg', dataset='GODAS', processed='anom')
#ssh = ssh.sortby('lat', ascending=False)
#olr =  - reader.read_netcdf('olr', dataset='NCAR', processed='anom')
Exemplo n.º 20
0
import matplotlib.pyplot as plt
from ninolearn.IO.read_processed import data_reader
from ninolearn.private import plotdir

from os.path import join

reader = data_reader(startdate='1950-02', enddate='2018-12', lon_min=30)
oni = reader.read_csv('oni')
iod = reader.read_csv('iod')
iod = iod.rolling(window=3, center=False).mean()

plt.close("all")
plt.subplots(figsize=(8, 3))
plt.fill_between(oni.index, oni, 0, label="ONI")
plt.plot(iod, 'k', label="DMI")
plt.legend(loc=2)

plt.xlim(iod.index[0], iod.index[-1])
plt.ylim(-2.5, 2.5)
plt.xlabel("Time [Year]")
plt.ylabel('Index Value')
plt.tight_layout()

plt.savefig(join(plotdir, 'iod_oni.pdf'))
Exemplo n.º 21
0
def pipeline(lead_time, return_persistance=False):
    """
    Data pipeline for the processing of the data before the Deep Ensemble
    is trained.

    :type lead_time: int
    :param lead_time: The lead time in month.

    :type return_persistance: boolean
    :param return_persistance: Return as the persistance as well.

    :returns: The feature "X" (at observation time), the label "y" (at lead
    time), the target season "timey" (least month) and if selected the
    label at observation time "y_persistance". Hence, the output comes as:
    X, y, timey, y_persistance.
    """      
    reader = data_reader(startdate='1960-01', enddate=endyr+'-'+endmth)

    # indices
    oni = reader.read_csv('oni')
    dmi = reader.read_csv('dmi')
    wwv = reader.read_csv('wwv_proxy')

    # seasonal cycle
    cos = np.cos(np.arange(len(oni))/12*2*np.pi)

    # wind stress
    taux = reader.read_netcdf('taux', dataset='NCEP', processed='anom')

    taux_WP = taux.loc[dict(lat=slice(2.5,-2.5), lon=slice(120, 160))]
    taux_WP_mean = taux_WP.mean(dim='lat').mean(dim='lon')

    # include values from 3 and 6 months previously as predictor variables
    n_lags = 3
    step = 3

    # shift such that lead time corresponds to the definition of lead time
    shift = 3

    # process features
    feature_unscaled = np.stack((oni,
                                 wwv,
                                 dmi,
                                 cos,
                                 taux_WP_mean
                                 ), axis=1)

    # scale each feature
    scalerX = StandardScaler()
    Xorg = scalerX.fit_transform(feature_unscaled)

    # set nans to 0.
    Xorg = np.nan_to_num(Xorg)
    np.save(join(infodir,'Xorg'), Xorg) 

    # arange the feature array
    X = Xorg[:-lead_time-shift,:]
    X = include_time_lag(X, n_lags=n_lags, step=step)

    # arange label
    yorg = oni.values
    y = yorg[lead_time + n_lags*step + shift:]

    # get the time axis of the label
    timey = oni.index[lead_time + n_lags*step + shift:]

    if return_persistance:
        y_persistance = yorg[n_lags*step: - lead_time - shift]
        return X, y, timey, y_persistance

    else:
        return X, y, timey
Exemplo n.º 22
0
from ninolearn.IO.read_processed import data_reader

import matplotlib.pyplot as plt
plt.close("all")
reader = data_reader(startdate='1978-01', enddate='2018-12', lon_min=30)

wp_edge = reader.read_csv('wp_edge', processed='total')
wp_edge_dec = wp_edge.rolling(24, center=False).mean()

olr = reader.read_netcdf('olr', dataset='NCAR', processed='anom')
olr = olr.sortby('lat', ascending=False)
olr_cp = olr.loc[dict(lat=slice(2.5, -2.5),
                      lon=slice(160, 180))].mean(dim='lat',
                                                 skipna=True).mean(dim='lon',
                                                                   skipna=True)
olr_cp_dec = olr_cp.rolling(time=24, center=False).mean()

time = wp_edge.index

fig, ax1 = plt.subplots()
ax1.plot(time, -olr_cp_dec, 'r', label='Mean SST')

ax2 = ax1.twinx()
ax2.plot(time, wp_edge_dec, 'k')
Exemplo n.º 23
0
def pipeline_small(lead_time, return_persistance=False):
    """
    Data pipeline for the processing of the data before the Deep Ensemble
    is trained.

    :type lead_time: int
    :param lead_time: The lead time in month.

    :type return_persistance: boolean
    :param return_persistance: Return as the persistance as well.

    :returns: The feature "X" (at observation time), the label "y" (at lead
    time), the target season "timey" (least month) and if selected the
    label at observation time "y_persistance". Hence, the output comes as:
    X, y, timey, y_persistance.
    """
    reader = data_reader(startdate='1960-01', enddate='2017-12')

    # indeces
    oni = reader.read_csv('oni')

    wwv = reader.read_csv('wwv_proxy')

    # seasonal cycle
    sc = np.cos(np.arange(len(oni)) / 12 * 2 * np.pi)

    # decadel variation of leading eof
    pca_dec = reader.read_statistic('pca',
                                    variable='dec_sst',
                                    dataset='ERSSTv5',
                                    processed='anom')['pca1']

    # time lag
    time_lag = 3

    # shift such that lead time corresponds to the definition of lead time
    shift = 3

    # process features
    feature_unscaled = np.stack((oni, sc, wwv, pca_dec), axis=1)

    # scale each feature
    scalerX = StandardScaler()
    Xorg = scalerX.fit_transform(feature_unscaled)

    # set nans to 0.
    Xorg = np.nan_to_num(Xorg)

    # arange the feature array
    X = Xorg[:-lead_time - shift, :]
    X = include_time_lag(X, max_lag=time_lag)

    # arange label
    yorg = oni.values
    y = yorg[lead_time + time_lag + shift:]

    # get the time axis of the label
    timey = oni.index[lead_time + time_lag + shift:]

    if return_persistance:
        y_persistance = yorg[time_lag:-lead_time - shift]
        return X, y, timey, y_persistance
    else:
        return X, y, timey
Exemplo n.º 24
0
from ninolearn.preprocess.pca import pca
from ninolearn.IO.read_processed import data_reader
import matplotlib.pyplot as plt

plt.close("all")

# =============================================================================
# Decadel PCAs
# =============================================================================

reader = data_reader(startdate='1955-02', enddate='2018-12', lon_min=120, lon_max=300)
hca = reader.read_netcdf('hca', dataset='NODC', processed='anom')

hca_decadel = hca.rolling(time=60, center=False).mean()
hca_decadel.attrs = hca.attrs.copy()
hca_decadel.name = f'dec_{hca.name}'

pca_hca_decadel = pca(n_components=6)

pca_hca_decadel.set_eof_array(hca_decadel)
pca_hca_decadel.compute_pca()
pca_hca_decadel.plot_eof()
pca_hca_decadel.save(extension='.csv', filename='dec_hca_NODC_anom')

reader = data_reader(startdate='1955-01', enddate='2018-12',lon_min=120, lon_max=300)
sst = reader.read_netcdf('sst', dataset='ERSSTv5', processed='anom')

sst_decadel = sst.rolling(time=60, center=False).mean()
sst_decadel.attrs = sst.attrs.copy()
sst_decadel.name = f'dec_{sst.name}'
Exemplo n.º 25
0
from ninolearn.IO.read_processed import data_reader
from ninolearn.utils import basin_means

from ninolearn.utils import pearson_lag
from ninolearn.private import plotdir

import numpy as np
from os.path import join


plt.close("all")

start = '1963'
end = '2017'

reader = data_reader(startdate=f'{start}-01', enddate=f'{end}-12', lon_min=30)
oni = reader.read_csv('oni')
wwv = reader.read_csv('wwv_proxy')
taux = reader.read_netcdf('taux', dataset='NCEP', processed='anom')
taux_WP_mean, taux_CP_mean, taux_EP_mean = basin_means(taux, lat1=7.5, lat2=-7.5)
iod = reader.read_csv('iod')

network = reader.read_statistic('network_metrics', variable='zos',
                           dataset='ORAS4', processed="anom")
c2 = network['fraction_clusters_size_2']
H = network['corrected_hamming_distance']


max_lag = 19
lead_time_arr = np.arange(-3, max_lag-2)
Exemplo n.º 26
0
def pipeline(lead_time, return_persistance=False):
    """
    Data pipeline for the processing of the data before the Deep Ensemble
    is trained.

    :type lead_time: int
    :param lead_time: The lead time in month.

    :type return_persistance: boolean
    :param return_persistance: Return as the persistance as well.

    :returns: The feature "X" (at observation time), the label "y" (at lead
    time), the target season "timey" (least month) and if selected the
    label at observation time "y_persistance". Hence, the output comes as:
    X, y, timey, y_persistance.
    """
    reader = data_reader(startdate='1960-01', enddate='2017-12')

    # indeces
    oni = reader.read_csv('oni')

    iod = reader.read_csv('iod')
    wwv = reader.read_csv('wwv_proxy')

    # seasonal cycle
    cos = np.cos(np.arange(len(oni)) / 12 * 2 * np.pi)

    # network metrics
    network_ssh = reader.read_statistic('network_metrics',
                                        variable='zos',
                                        dataset='ORAS4',
                                        processed="anom")
    H_ssh = network_ssh['corrected_hamming_distance']

    #wind stress
    taux = reader.read_netcdf('taux', dataset='NCEP', processed='anom')

    taux_WP = taux.loc[dict(lat=slice(2.5, -2.5), lon=slice(120, 160))]
    taux_WP_mean = taux_WP.mean(dim='lat').mean(dim='lon')

    # time lag
    n_lags = 3
    step = 3

    # shift such that lead time corresponds to the definition of lead time
    shift = 3

    # process features
    feature_unscaled = np.stack((
        oni,
        wwv,
        iod,
        cos,
        taux_WP_mean,
        H_ssh,
    ),
                                axis=1)

    # scale each feature
    scalerX = StandardScaler()
    Xorg = scalerX.fit_transform(feature_unscaled)

    # set nans to 0.
    Xorg = np.nan_to_num(Xorg)

    # arange the feature array
    X = Xorg[:-lead_time - shift, :]
    X = include_time_lag(X, n_lags=n_lags, step=step)

    # arange label
    yorg = oni.values
    y = yorg[lead_time + n_lags * step + shift:]

    # get the time axis of the label
    timey = oni.index[lead_time + n_lags * step + shift:]

    if return_persistance:
        y_persistance = yorg[n_lags * step:-lead_time - shift]
        return X, y, timey, y_persistance

    else:
        return X, y, timey
    def data_gen():
        k = 0
        kmax = len(label)
        while k < kmax:

            yield true.loc[time[k]], pred.loc[time[k]], time[k]
            k += 1

    ani = animation.FuncAnimation(fig, update, data_gen, interval=100)
    return ani


#%% =============================================================================
# Data
# =============================================================================
reader = data_reader(startdate='1959-11', enddate='2017-12')
sst = reader.read_netcdf('sst', dataset='ERSSTv5',
                         processed='anom').rolling(time=3).mean()[2:]
oni = reader.read_csv('oni')[2:]

# select
feature = sst.copy(deep=True)
label = sst.copy(deep=True)

# preprocess data
feature_unscaled = feature.values.reshape(feature.shape[0], -1)
label_unscaled = label.values.reshape(label.shape[0], -1)

scaler_f = StandardScaler()
Xorg = scaler_f.fit_transform(feature_unscaled)
Exemplo n.º 28
0
    stdp2 = stdp2.where(stdp2_cls != i, other=thresholds[i] + 0.125)
    mode = mode.where(mode_cls != i, other=thresholds[i] + 0.125)

ds_save = xr.Dataset({
    'median': median,
    'stdm1': stdm1,
    'stdp1': stdp1,
    'stdm2': stdm2,
    'stdp2': stdp2
})

ds_save.to_netcdf(join(processeddir, f'{model_name}_prob_forecasts.nc'))

start = '1963-01'
end = '2017-12'
reader = data_reader(startdate=start, enddate=end)
oni = reader.read_csv('oni')


#%% =============================================================================
# Plot Hindcasts
# =============================================================================
def plot_timeseries(lead, ax):
    ax.axhline(0, c='grey', linestyle='--')
    ax.plot(oni, 'k', lw=2)
    ax.set_xlim(oni.index[0], oni.index[-1])
    ax.fill_between(data.target_season.values,
                    stdm2.loc[{
                        'lead': lead
                    }],
                    stdp2.loc[{
Exemplo n.º 29
0
from mpl_toolkits.basemap import Basemap
from sklearn import linear_model
from sklearn.metrics import r2_score
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import pearsonr
from ninolearn.IO.read_processed import data_reader
from os.path import join
from ninolearn.private import plotdir
# =============================================================================
# Read
# =============================================================================
reader = data_reader(startdate='1980-01',
                     enddate='2018-11',
                     lon_min=100,
                     lon_max=300)
iod = reader.read_csv('iod')
nino = reader.read_csv('nino3M')

taux = reader.read_netcdf('taux', dataset='NCEP', processed='anom')
taux = taux.sortby('lat', ascending=False)

sst = reader.read_netcdf('sst', dataset='ERSSTv5', processed='anom')
sst = sst.sortby('lat', ascending=False)

olr = -reader.read_netcdf('olr', dataset='NCAR', processed='anom')
olr = olr.sortby('lat', ascending=False)

# =============================================================================
# Regression analysis
# =============================================================================