def pipeline_small(lead_time, return_persistance=False): """ Data pipeline for the processing of the data before the Deep Ensemble is trained. :type lead_time: int :param lead_time: The lead time in month. :type return_persistance: boolean :param return_persistance: Return as the persistance as well. :returns: The feature "X" (at observation time), the label "y" (at lead time), the target season "timey" (least month) and if selected the label at observation time "y_persistance". Hence, the output comes as: X, y, timey, y_persistance. """ reader = data_reader(startdate='1960-01', enddate='2017-12') # indeces oni = reader.read_csv('oni') iod = reader.read_csv('iod') wwv = reader.read_csv('wwv_proxy') # network metrics network_ssh = reader.read_statistic('network_metrics', variable='zos', dataset='ORAS4', processed="anom") c2_ssh = network_ssh['fraction_clusters_size_2'] H_ssh = network_ssh['corrected_hamming_distance'] #wind stress taux = reader.read_netcdf('taux', dataset='NCEP', processed='anom') taux_WP = taux.loc[dict(lat=slice(2.5, -2.5), lon=slice(120, 160))] taux_WP_mean = taux_WP.mean(dim='lat').mean(dim='lon') # decadel variation of leading eof pca_dec = reader.read_statistic('pca', variable='dec_sst', dataset='ERSSTv5', processed='anom')['pca1'] # time lag time_lag = 2 # shift such that lead time corresponds to the definition of lead time shift = 3 # process features feature_unscaled = np.stack( ( oni, oni.index.month, wwv, #iod, #taux_WP_mean, #c2_ssh, H_ssh, pca_dec), axis=1) # scale each feature scalerX = StandardScaler() Xorg = scalerX.fit_transform(feature_unscaled) # set nans to 0. Xorg = np.nan_to_num(Xorg) # arange the feature array X = Xorg[:-lead_time - shift, :] X = include_time_lag(X, max_lag=time_lag) # arange label yorg = oni.values y = yorg[lead_time + time_lag + shift:] # get the time axis of the label timey = oni.index[lead_time + time_lag + shift:] if return_persistance: y_persistance = yorg[time_lag:-lead_time - shift] return X, y, timey, y_persistance else: return X, y, timey
def pipeline(lead_time, return_persistance=False): """ Data pipeline for the processing of the data before the Deep Ensemble is trained. :type lead_time: int :param lead_time: The lead time in month. :type return_persistance: boolean :param return_persistance: Return as the persistance as well. :returns: The feature "X" (at observation time), the label "y" (at lead time), the target season "timey" (least month) and if selected the label at observation time "y_persistance". Hence, the output comes as: X, y, timey, y_persistance. """ reader = data_reader(startdate='1960-01', enddate='2017-12') # indeces oni = reader.read_csv('oni') iod = reader.read_csv('iod') wwv = reader.read_csv('wwv_proxy') # seasonal cycle cos = np.cos(np.arange(len(oni)) / 12 * 2 * np.pi) # network metrics network_ssh = reader.read_statistic('network_metrics', variable='zos', dataset='ORAS4', processed="anom") H_ssh = network_ssh['corrected_hamming_distance'] #wind stress taux = reader.read_netcdf('taux', dataset='NCEP', processed='anom') taux_WP = taux.loc[dict(lat=slice(2.5, -2.5), lon=slice(120, 160))] taux_WP_mean = taux_WP.mean(dim='lat').mean(dim='lon') # time lag n_lags = 3 step = 3 # shift such that lead time corresponds to the definition of lead time shift = 3 # process features feature_unscaled = np.stack(( oni, wwv, iod, cos, taux_WP_mean, H_ssh, ), axis=1) # scale each feature scalerX = StandardScaler() Xorg = scalerX.fit_transform(feature_unscaled) # set nans to 0. Xorg = np.nan_to_num(Xorg) # arange the feature array X = Xorg[:-lead_time - shift, :] X = include_time_lag(X, n_lags=n_lags, step=step) # arange label yorg = oni.values y = yorg[lead_time + n_lags * step + shift:] # get the time axis of the label timey = oni.index[lead_time + n_lags * step + shift:] if return_persistance: y_persistance = yorg[n_lags * step:-lead_time - shift] return X, y, timey, y_persistance else: return X, y, timey
c2_ssh, pca_dec), axis=1) #feature_unscaled = np.concatenate((feature_unscaled, sst_equator), # axis=1) scaler = StandardScaler() Xorg = scaler.fit_transform(feature_unscaled) Xorg = np.nan_to_num(Xorg) X = Xorg[:-lead_time - shift, :] futureX = Xorg[-lead_time - shift - time_lag:, :] X = include_time_lag(X, max_lag=time_lag) futureX = include_time_lag(futureX, max_lag=time_lag) yorg = nino34.values y = yorg[lead_time + time_lag + shift:] timey = nino34.index[lead_time + time_lag + shift:] futuretime = pd.date_range(start='2019-01-01', end=pd.to_datetime('2019-01-01') + pd.tseries.offsets.MonthEnd(lead_time + shift), freq='MS') test_indeces = (timey >= '2012-01-01') & (timey <= '2017-12-01') train_indeces = np.invert(test_indeces)
taux_WP_mean, c2_ssh, H_ssh, pca_dec ), axis=1) # scale each feature scalerX = StandardScaler() Xorg = scalerX.fit_transform(feature_unscaled) # set nans to 0. Xorg = np.nan_to_num(Xorg) # arange the feature array X = Xorg[:-lead_time-shift,:] X = include_time_lag(X, max_lag=time_lag) # arange label yorg = oni.values y = yorg[lead_time + time_lag + shift:] # get the time axis of the label timey = oni.index[lead_time + time_lag + shift:] test_indeces = (timey>=f'2001-01-01') & (timey<=f'2011-12-01') train_indeces = np.invert(test_indeces) trainX, trainy = X[train_indeces,:], y[train_indeces] testX, testy = X[test_indeces,:], y[test_indeces] model = DEM(layers=32, l1_hidden=0.001, verbose=1) model.fit(trainX, trainy)
def pipeline_small(lead_time, return_persistance=False): """ Data pipeline for the processing of the data before the Deep Ensemble is trained. :type lead_time: int :param lead_time: The lead time in month. :type return_persistance: boolean :param return_persistance: Return as the persistance as well. :returns: The feature "X" (at observation time), the label "y" (at lead time), the target season "timey" (least month) and if selected the label at observation time "y_persistance". Hence, the output comes as: X, y, timey, y_persistance. """ reader = data_reader(startdate='1960-01', enddate='2017-12') # indeces oni = reader.read_csv('oni') wwv = reader.read_csv('wwv_proxy') # seasonal cycle sc = np.cos(np.arange(len(oni)) / 12 * 2 * np.pi) # decadel variation of leading eof pca_dec = reader.read_statistic('pca', variable='dec_sst', dataset='ERSSTv5', processed='anom')['pca1'] # time lag time_lag = 3 # shift such that lead time corresponds to the definition of lead time shift = 3 # process features feature_unscaled = np.stack((oni, sc, wwv, pca_dec), axis=1) # scale each feature scalerX = StandardScaler() Xorg = scalerX.fit_transform(feature_unscaled) # set nans to 0. Xorg = np.nan_to_num(Xorg) # arange the feature array X = Xorg[:-lead_time - shift, :] X = include_time_lag(X, max_lag=time_lag) # arange label yorg = oni.values y = yorg[lead_time + time_lag + shift:] # get the time axis of the label timey = oni.index[lead_time + time_lag + shift:] if return_persistance: y_persistance = yorg[time_lag:-lead_time - shift] return X, y, timey, y_persistance else: return X, y, timey
def pipeline(lead_time, return_persistance=False): """ Data pipeline for the processing of the data before the Deep Ensemble is trained. :type lead_time: int :param lead_time: The lead time in month. :type return_persistance: boolean :param return_persistance: Return as the persistance as well. :returns: The feature "X" (at observation time), the label "y" (at lead time), the target season "timey" (least month) and if selected the label at observation time "y_persistance". Hence, the output comes as: X, y, timey, y_persistance. """ reader = data_reader(startdate='1960-01', enddate=endyr+'-'+endmth) # indices oni = reader.read_csv('oni') dmi = reader.read_csv('dmi') wwv = reader.read_csv('wwv_proxy') # seasonal cycle cos = np.cos(np.arange(len(oni))/12*2*np.pi) # wind stress taux = reader.read_netcdf('taux', dataset='NCEP', processed='anom') taux_WP = taux.loc[dict(lat=slice(2.5,-2.5), lon=slice(120, 160))] taux_WP_mean = taux_WP.mean(dim='lat').mean(dim='lon') # include values from 3 and 6 months previously as predictor variables n_lags = 3 step = 3 # shift such that lead time corresponds to the definition of lead time shift = 3 # process features feature_unscaled = np.stack((oni, wwv, dmi, cos, taux_WP_mean ), axis=1) # scale each feature scalerX = StandardScaler() Xorg = scalerX.fit_transform(feature_unscaled) # set nans to 0. Xorg = np.nan_to_num(Xorg) np.save(join(infodir,'Xorg'), Xorg) # arange the feature array X = Xorg[:-lead_time-shift,:] X = include_time_lag(X, n_lags=n_lags, step=step) # arange label yorg = oni.values y = yorg[lead_time + n_lags*step + shift:] # get the time axis of the label timey = oni.index[lead_time + n_lags*step + shift:] if return_persistance: y_persistance = yorg[n_lags*step: - lead_time - shift] return X, y, timey, y_persistance else: return X, y, timey
from ninolearn.pathes import modeldir, infodir, preddir from ninolearn.learn.models.dem import DEM from ninolearn.learn.fit import decades from s0_start import start_pred_y, start_pred_m # ============================================================================= # Getting feature vector # ============================================================================= Xorg = np.load(join(infodir,'Xorg.npy')) # include values of 3 and 6 months previously n_lags = 3 step = 3 X = include_time_lag(Xorg, n_lags = n_lags, step=step) X = X[-1:,:] # now use only the latest observation to produce forecast # ============================================================================= # For each lead time, load ensemble of models and make prediction # ============================================================================= lead_times = np.load(join(infodir,'lead_times.npy')) predictions = np.zeros((2,len(lead_times))) # first row: mean, second row: std print_header("Making predictions") for i in np.arange(len(lead_times)): print("Lead time "+str(lead_times[i])+" months") dem = DEM(layers=1, neurons = 32, dropout=0.05, noise_in=0.0, noise_sigma=0.,