def processInputCrossSection(self, request: TaskRequest, node: OpNode, inputDset: EDASDataset) -> EDASDataset: nModes = int(node.getParm("modes", 16)) center = bool(node.getParm("center", "false")) merged_input_data, info = self.get_input_array(inputDset) shapes = info['shapes'] slicers = info['slicers'] solver = Eof(merged_input_data, center=center) results = [] for iMode, eofs_result in enumerate(solver.eofs(neofs=nModes)): for iVar, eofs_data in enumerate( self.getResults(eofs_result, slicers, shapes)): input = inputDset.inputs[iVar] results.append( EDASArray("-".join(["eof-", str(iMode), input.name]), input.domId, eofs_data)) pcs_result = solver.pcs(npcs=nModes) pcs = EDASArray( "pcs[" + inputDset.id + "]", inputDset.inputs[0].domId, EDASArray.cleanupCoords(pcs_result, { "mode": "m", "pc": "m" }).transpose()) results.append(pcs) fracs = solver.varianceFraction(neigs=nModes) pves = [str(round(float(frac * 100.), 1)) + '%' for frac in fracs] for result in results: result["pves"] = str(pves) return EDASDataset.init(self.renameResults(results, node), inputDset.attrs)
def get_eofs(x): import numpy as np import xarray from eofs.xarray import Eof from matplotlib import pyplot as plt coslat = np.cos(np.deg2rad(x.lat)).clip(0., 1.) wgts = np.sqrt(coslat)[..., np.newaxis] from eddof import get_eddof DF = np.empty(np.shape(x[0, :, :])) for i in range(0, len(x.lat)): for j in range(0, len(x.lon)): DF[i, j] = get_eddof(x[:, i, j].values) edof = np.mean(DF) print(edof) solver = Eof(x, weights=wgst, ddof=edof) var = solver.varianceFraction() plt.figure(1) plt.bar(np.arange(0, len(var), 1), var * 100) plt.show() plt.close() n = input('Cuantos PC extraer: ') eof = solver.eofs(neofs=n, eofscaling=2) pc = solver.pcs(npcs=n, pcscaling=1) vf = var[:n] EOFs = [eof, pc, vf] return EOFs
def PDO(dat1, dat2): solver = Eof(dat1) pc1 = solver.pcs(npcs=1, pcscaling=1) sigs = solver.eigenvalues() eofs = solver.eofs() eof1 = eofs[0] # normalise sigs sigs1 = sigs / sigs.sum() # not used eof1 = solver.eofsAsCorrelation(neofs=1) # pcs=solver.pcs() # filter first pdf Nt = dat1.time.size fp = fft.fft(pc1[:, 0]) x = fft.fftfreq(Nt, 1 / 12.) # cycles per year # lowpass filter at 0.1 per year i = abs(x) <= .1 fp_fil = fp * i #plt.plot(x,abs(fp)) #plt.plot(x,abs(fp_fil)) pfil = fft.ifft(fp_fil) # pc1_fil = pc1[:, 0] * 0 + np.real(pfil) #plt.plot(pc1_fil) #print(pc1) print(pc1_fil) tmp = np.imag(pfil) print(tmp.max) # correlate with sst field astd, bstd, abstd, bhat = regresst(pc1_fil, dat2) r = abstd / (astd * bstd) slope = r * bstd return sigs1, eof1, slope, pc1, pc1_fil, x, fp_fil
def test_eof_values(shape, n_modes, weight, wrap): """Test values relative to Eof package""" data = example_da(shape, wrap=wrap) lat_dim = f"dim_{len(shape)-1}" xeof.core.LAT_NAME = lat_dim sensor_dims = [f"dim_{i}" for i in range(1, len(shape))] if weight == "none": weights = None elif weight == "sqrt_cos_lat": weights = np.cos(data[lat_dim] * np.pi / 180)**0.5 elif weight == "random": weights = data.isel(time=0).copy() weight = weights.compute() res = eof( data, sensor_dims=sensor_dims, sample_dim="time", weight=weight, n_modes=n_modes, norm_PCs=False, ) Eof_solver = Eof(data, weights=weights, center=False) ver_pcs = Eof_solver.pcs(pcscaling=0, npcs=n_modes) ver_eofs = Eof_solver.eofs(eofscaling=0, neofs=n_modes) ver_EV = Eof_solver.varianceFraction(neigs=n_modes) npt.assert_allclose(abs(res["pc"]), abs(ver_pcs)) npt.assert_allclose(abs(res["eof"]), abs(ver_eofs)) npt.assert_allclose(res["explained_var"], ver_EV)
def Decompose(self): '''perform EOF decomposition''' solver = Eof(self.rawdata) var_frac = solver.varianceFraction() cumvar = np.cumsum(var_frac.values) self.npcs = np.where(cumvar >= self.prop_variance)[0].min() self.pc = solver.pcs(npcs = self.npcs) # time series of PC
def EOF_SST_analysis(xa, weights, neofs=1, npcs=1, fn=None): """ Empirical Orthogonal Function of SST(t,x,y) field """ assert type(xa) == xr.core.dataarray.DataArray assert type(weights) == xr.core.dataarray.DataArray assert 'time' in xa.dims assert np.shape(xa[0, :, :]) == np.shape(weights) # anomalies by removing time mean xa = xa - xa.mean(dim='time') # Retrieve the leading EOF, expressed as the covariance between the leading PC # time series and the input xa anomalies at each grid point. solver = Eof(xa, weights=weights) eofs = solver.eofsAsCovariance(neofs=neofs) pcs = solver.pcs(npcs=npcs, pcscaling=1) if fn != None: xr.merge([eofs, pcs]).to_netcdf(fn) return eofs, pcs
def EOF_SST_analysis(self, xa, weights, n=1, fn=None): """ Empirical Orthogonal Function analysis of SST(t,x,y) field; from `SST.py` """ assert type(xa)==xr.core.dataarray.DataArray assert type(weights)==xr.core.dataarray.DataArray assert 'time' in xa.dims assert np.shape(xa[0,:,:])==np.shape(weights) # anomalies by removing time mean xa = xa - xa.mean(dim='time') # Retrieve the leading EOF, expressed as the covariance between the leading PC # time series and the input xa anomalies at each grid point. solver = Eof(xa, weights=weights) eofs = solver.eofsAsCovariance(neofs=n) pcs = solver.pcs(npcs=n, pcscaling=1) eigs = solver.eigenvalues(neigs=n) varF = solver.varianceFraction(neigs=n) ds = xr.merge([eofs, pcs, eigs, varF]) if fn!=None: ds.to_netcdf(fn) return ds
def plot_pca_analysis(ds, fig_output_path, title=''): print(title) # var = "T" print('done load') nbpcs = 3 solver = Eof(ds.dropna(dim="time", how="all")) pcas = solver.pcs(npcs=nbpcs, pcscaling=1) eofs = solver.eofs(neofs=nbpcs, eofscaling=1) fig, axes = plt.subplots(5, 2, figsize=(20, 20)) fig.suptitle(title, fontsize=12) pcas.plot.line(ax=axes[0, 0], x='time') pcas.resample(time='M').mean('time').plot.line(ax=axes[1, 0], x='time') axes[1, 0].set_title('Monthly mean') pcas.resample(time='Y').mean('time').plot.line(ax=axes[2, 0], x='time') axes[2, 0].set_title('Annual mean') pcas.groupby('time.month').mean('time').plot.line(ax=axes[3, 0], x='month') axes[3, 0].set_title('By Month') pcas.groupby('time.hour').mean('time').plot.line(ax=axes[4, 0], x='hour') axes[4, 0].set_title('By Hour') for pc in range(nbpcs): # eofs.isel(mode=pc).plot(ax=axes[pc, 1]) eofs.to_dataframe().unstack().T.loc[:, pc].plot.bar(ax=axes[pc, 1]) solver.varianceFraction().isel(mode=slice(0, nbpcs)).plot(ax=axes[3, 1]) plt.tight_layout() plt.tight_layout() fig.suptitle(title) plt.savefig(fig_output_path + title + '.pdf', bbox_inches='tight')
def eofunc_pcs(data, npcs=1, time_dim=0, pcscaling=0, weights=None, center=True, ddof=1, meta=False): """ Computes the principal components (time projection) in the empirical orthogonal function analysis. Note: `eofunc_pcs` allows to perform the analysis that was previously done via the NCL function `eofunc_ts`. However, there are a few changes to the NCL flow such as : (1) Only `np.nan` is supported as missing value, (2) EOFs are computed only from covariance matrix and there is no support for computation from correlation matrix, (3) percentage of non-missing points that must exist at any single point is no longer an input. This implementation uses `eofs` package (https://anaconda.org/conda-forge/eofs), which is built upon the following study: Dawson, Andrew, "eofs: A library for EOF analysis of meteorological, oceanographic, and climate data," Journal of Open Research Software, vol. 4, no. 1, 2016. Further information about this package can be found at: https://ajdawson.github.io/eofs/latest/index.html# This implementation provides a few conveniences to the user on top of `eofs` package that are described below in the Parameters section. Parameters ---------- data : :class:`xarray.DataArray` or :class:`numpy.ndarray` or :class:`list` Should contain numbers or `np.nan` for missing value representation. It must be at least a 2-dimensional array. When input data is of type `xarray.DataArray`, `eofs.xarray` interface assumes the left-most dimension (i.e. `dim_0`) is the `time` dimension. In this case, that dimension should have the name "time". When input data is of type `numpy.ndarray` or `list`, this function still assumes the leftmost dimension to be the number of observations or `time` dimension: however, in this case, user is allowed to input otherwise. If the input do not have its leftmost dimension as the `time` or number of observations, then the user should specify with `time_dim=x` to define which dimension must be treated as time or number of observations npcs: A scalar integer that specifies the number of principal components (i.e. eigenvalues and eigenvectors) to be returned. This is usually less than or equal to the minimum number of observations or number of variables. time_dim: An integer defining the time dimension if it is not the leftmost dimension. When input data is of type `xarray.DataArray`, this is ignored (assuming `xarray.DataArray` has its leftmost dimension with the exact name 'time'). It must be between ``0`` and ``data.ndim - 1`` or it could be ``-1`` indicating the last dimension. Defaults to 0. Note: The `time_dim` argument allows to perform the EOF analysis that was previously done via the NCL function `eofunc_ts_n`. pcscaling: (From `eofs` package): Sets the scaling of the retrieved PCs. The following values are accepted: - 0 : Un-scaled PCs (default). - 1 : PCs are divided by the square-root of their eigenvalues. - 2 : PCs are multiplied by the square-root of their eigenvalues. weights: (From `eofs` package): An array of weights whose shape is compatible with those of the input array dataset. The weights can have the same shape as dataset or a shape compatible with an array broadcast (i.e., the shape of the weights can can match the rightmost parts of the shape of the input array dataset). If the input array dataset does not require weighting then the value None may be used. Defaults to None (no weighting). center: (From `eofs` package): If True, the mean along the first axis of dataset (the time-mean) will be removed prior to analysis. If False, the mean along the first axis will not be removed. Defaults to True (mean is removed). The covariance interpretation relies on the input data being anomaly data with a time-mean of 0. Therefore this option should usually be set to True. Setting this option to True has the useful side effect of propagating missing values along the time dimension, ensuring that a solution can be found even if missing values occur in different locations at different times. ddof: (From `eofs` package): ‘Delta degrees of freedom’. The divisor used to normalize the covariance matrix is N - ddof where N is the number of samples. Defaults to 1. meta: If set to True and the input array is an Xarray, the metadata from the input array will be copied to the output array. Defaults to False. Returns ------- """ data, solver = _generate_eofs_solver(data, time_dim=time_dim, weights=weights, center=center, ddof=ddof) # Checking number of EOFs if npcs <= 0: raise ValueError( "ERROR eofunc_pcs: num_pcs must be a positive non-zero integer value." ) solver = Eof(data, weights=weights, center=center, ddof=ddof) pcs = solver.pcs(npcs=npcs, pcscaling=pcscaling) pcs = pcs.transpose() # Populate attributes for output attrs = {} if meta: attrs = data.attrs dims = ["pc", "time"] if meta: coords = {"time": data.coords[data.dims[time_dim]]} else: coords = {} return xr.DataArray(pcs, attrs=attrs, dims=dims, coords=coords)
def main(args): #environmental constants if platform.system() == 'Windows': in_dir='../examples/' out_dir='../regressors/' reg_dir='../regressors/'#${in_dir}'regresory_2013/' nc_gen=True pdf_gen=False plus = '' else: n_samples = int(os.environ['n_samples']) in_dir = os.environ['in_dir'] #out_dir = os.environ['out_dir'] reg_dir = os.environ['reg_dir'] pdf_gen = os.environ['pdf_gen'] nc_gen = os.environ['nc_gen'] what_re = args.what_re vari = args.vari i_year = args.i_year s_year = args.s_year e_year = args.e_year in_file_name = args.in_file_name if args.verbose: print('dataset: ', what_re) print('variable: ', vari) print('initial year of dataset: ', i_year) print('initial year of analysis: ', s_year) print('end year of analysis: ', e_year) print('input filename: ', in_file_name) print('data opening') in_netcdf = in_dir + in_file_name print(in_netcdf) ds = xr.open_dataset(in_netcdf) #print(ds) lat_name = fce.get_coords_name(ds, 'latitude') lat = ds.coords[lat_name] nlat = lat.shape[0] lev_name = fce.get_coords_name(ds, 'pressure') if ds.coords[lev_name].attrs['units'] == 'Pa': lev = ds.coords[lev_name]/100. ds[lev_name] = lev else: lev = ds.coords[lev_name] n = ds.coords['time'].shape[0] #it may happen that the field is 3D (longitude is missing) try: lon_name = fce.get_coords_name(ds, 'longitude') lon = ds.coords[lon_name] nlon = lon.shape[0] except: nlon = 1 #print nlat, nlev, n, nlon #zonal mean if nlon != 1: uwnd = ds[vari].mean(lon_name) else: uwnd = ds[vari] #equatorial average and level selection sel_dict = {lev_name: fce.coord_Between(lev,10,50), lat_name: fce.coord_Between(lat,-10,10)} zm_u = uwnd.sel(**sel_dict).mean(lat_name) #period selection times = pd.date_range(str(s_year)+'-01-01', str(e_year)+'-12-31', name='time', freq = 'M') zm_u_sel = zm_u.sel(time = times, method='ffill') #nearest #remove seasonality climatology = zm_u_sel.groupby('time.month').mean('time') anomalies = zm_u_sel.groupby('time.month') - climatology #print anomalies #sys.exit() #additional constants npca = 30 norm=2 #5 norms=3 #5 what_sp = '' # what solar proxy? print("regressors' openning") global reg#, reg_names, nr reg, reg_names, history = fce.configuration_ccmi(what_re, what_sp, norm, 'no_qbo' , i_year, s_year, e_year, reg_dir) nr = reg.shape[1] #print(anomalies) #extracting of other variability by MLR stacked = anomalies.stack(allpoints = [lev_name]) stacked = stacked.reset_coords(drop=True) resids = stacked.groupby('allpoints').apply(xr_regression) resids = resids.rename({'dim_0': 'time'}) resids['time'] = times #EOF analysis solver = Eof(resids.T, weights=None) #sys.exit() #coslat = np.cos(np.deg2rad(lat)).clip(0.,1.) #wgts = np.sqrt(coslat)[np.newaxis,...] for i in range(npca): var_eofs = solver.varianceFraction(neigs=i) #print var_eofs if np.sum(var_eofs) > 0.95: npca=i total_variance = np.sum(var_eofs) print(total_variance, ' % based on ', i, ' components') break var_eofs = solver.varianceFraction(neigs=npca) pcs = solver.pcs(npcs=npca, pcscaling=1) nte = solver.northTest(neigs=npca, vfscaled=True) subdir = './' if pdf_gen: fig = plt.figure(figsize=(11,8)) ax1 = fig.add_subplot(111) ax1.set_title(str(npca)+' PCAs cover '+str(np.round(total_variance*100, 2))+'% of total variance') for i in xrange(npca): #plotting pcs[:,i].plot(linewidth = 2, ax = ax1, label = 'pca '+str(i+1)) ax1.set_xlabel('time [years]') ax1.set_ylabel('QBO index') ax1.set_title('') ax1.legend(loc = 'best') plt.savefig(reg_dir+'qbo_'+what_re+'_pcas.pdf', bbox_inches='tight') plt.close(fig) if nc_gen: #save to netcdf #print(pcs[:,0]) for i in range(npca): pcs_ds = pcs[:,i].to_dataset(name = 'index') pcs_ds.to_netcdf(reg_dir+r'qbo_'+what_re+'_pc'+str(i+1)+pripona_nc)
If *True*, the mean along the first axis of *dataset* (the time-mean) will be removed prior to analysis. If *False*, the mean along the first axis will not be removed. Defaults to *True* (mean is removed). The covariance interpretation relies on the input data being anomaly data with a time-mean of 0. Therefore this option should usually be set to *True*. Setting this option to *True* has the useful side effect of propagating missing values along the time dimension, ensuring that a solution can be found even if missing values occur in different locations at different times. ''' lambdas = solver.eigenvalues() vf = solver.varianceFraction() Nerror = solver.northTest(vfscaled=True) pcs = solver.pcs() #(time, mode) eofs = solver.eofsAsCovariance() ''' plt.figure() plt.subplot(3,2,1) pcs[:, 0].plot()#color='b', linewidth=2) ax = plt.gca() ax.axhline(0, color='k') ax.set_xlabel('Year') ax.set_ylabel('PC1 amplitude') plt.grid() plt.subplot(3,2,2) pcs[:, 1].plot() ax = plt.gca() ax.axhline(0, color='k') ax.set_xlabel('Year')
def eof_orca_latlon_box(run, var, modes, lon_bnds, lat_bnds, pathfile, plot, time, eoftype): if (var == 'temp'): key = 'votemper' key1 = "votemper" elif (var == 'sal'): key = 'vosaline' key1 = "vosaline" elif (var == 'MLD'): key = 'somxl010' key1 = "somxl010" # read data ds = xr.open_dataset(pathfile) #ds["time_counter"] = ds['time_counter']+(np.datetime64('0002-01-01')-np.datetime64('0001-01-01')) if time == 'comparison': ds = ds.sel(time_counter=slice('1958-01-01', '2006-12-31')) # cut box for EOF at surface if var == 'MLD': data = ds[key].sel(lon=slice(lon_bnds[0], lon_bnds[1]), lat=slice(lat_bnds[0], lat_bnds[1])) #data = cut_latlon_box(ds[key][:,:,:],ds.lon,ds.lat, # lon_bnds,lat_bnds) else: data = ds[key][:, 0, :, :].sel(lon=slice(lon_bnds[0], lon_bnds[1]), lat=slice(lat_bnds[0], lat_bnds[1])) #data = cut_latlon_box(ds[key][:,0,:,:],ds.lon,ds.lat, # lon_bnds,lat_bnds) data = data.to_dataset() # detrend data data[key1] = (['time_counter', 'lat', 'lon'], signal.detrend(data[key].fillna(0), axis=0, type='linear')) #data=data.where(data!=0) # remove seasonal cycle and drop unnecessary coordinates if 'time_centered' in list(data.coords): data = deseason_month(data).drop('month').drop( 'time_centered') # somehow pca doesn't work otherwise else: data = deseason_month(data).drop( 'month') # somehow pca doesn't work otherwise # set 0 values back to nan data = data.where(data != 0) # EOF analysis #Square-root of cosine of latitude weights are applied before the computation of EOFs. coslat = np.cos(np.deg2rad(data['lat'].values)) coslat, _ = np.meshgrid(coslat, np.arange(0, len(data['lon']))) wgts = np.sqrt(coslat) solver = Eof(data[key], weights=wgts.transpose()) pcs = solver.pcs(npcs=modes, pcscaling=1) if eoftype == 'correlation': eof = solver.eofsAsCorrelation(neofs=modes) elif eoftype == 'covariance': eof = solver.eofsAsCovariance(neofs=modes) else: eof = solver.eofs(neofs=modes) varfr = solver.varianceFraction(neigs=4) print(varfr) #----------- Plotting -------------------- plt.close("all") if plot == 1: for i in np.arange(0, modes): fig = plt.figure(figsize=(8, 2)) ax1 = fig.add_axes([0.1, 0.1, 0.3, 0.9], projection=ccrs.PlateCarree()) # main axes ax1.set_extent( (lon_bnds[0], lon_bnds[1], lat_bnds[0], lat_bnds[1])) # discrete colormap cmap = plt.get_cmap('RdYlBu', len(np.arange(10, 30)) - 1) #inferno similar to cmo thermal eof[i, :, :].plot(ax=ax1, cbar_kwargs={'label': 'Correlation'}, transform=ccrs.PlateCarree(), x='lon', y='lat', add_colorbar=True, cmap=cmap) gl = map_stuff(ax1) gl.xlocator = mticker.FixedLocator([100, 110, 120]) gl.ylocator = mticker.FixedLocator(np.arange(-35, -10, 5)) plt.text(116, -24, str(np.round(varfr[i].values, decimals=2)), horizontalalignment='center', verticalalignment='center', transform=ccrs.PlateCarree(), fontsize=8) ax2 = fig.add_axes([0.5, 0.1, 0.55, 0.9]) # main axes plt.plot(pcs.time_counter, pcs[:, i].values, linewidth=0.1, color='k') anomaly(ax2, pcs.time_counter.values, pcs.values[:, i], [0, 0]) ax2.set_xlim( [pcs.time_counter[0].values, pcs.time_counter[-1].values]) plt.savefig(pathplots + 'eof_as' + eoftype + '_mode' + str(i) + '_' + time + '_' + run + '_' + var + '.png', dpi=300, bbox_inches='tight', pad_inches=0.1) plt.show() #---------------------------------------------- return pcs, eof, varfr
def main(args): #environmental constants if platform.system() == 'Windows': in_dir = '../examples/' out_dir = '../regressors/' reg_dir = '../regressors/' #${in_dir}'regresory_2013/' nc_gen = True pdf_gen = False plus = '' else: n_samples = int(os.environ['n_samples']) in_dir = os.environ['in_dir'] #out_dir = os.environ['out_dir'] reg_dir = os.environ['reg_dir'] pdf_gen = os.environ['pdf_gen'] nc_gen = os.environ['nc_gen'] what_re = args.what_re vari = args.vari i_year = args.i_year s_year = args.s_year e_year = args.e_year in_file_name = args.in_file_name if args.verbose: print('dataset: ', what_re) print('variable: ', vari) print('initial year of dataset: ', i_year) print('initial year of analysis: ', s_year) print('end year of analysis: ', e_year) print('input filename: ', in_file_name) print('data opening') in_netcdf = in_dir + in_file_name print(in_netcdf) ds = xr.open_dataset(in_netcdf) #print(ds) lat_name = fce.get_coords_name(ds, 'latitude') lat = ds.coords[lat_name] nlat = lat.shape[0] lev_name = fce.get_coords_name(ds, 'pressure') if ds.coords[lev_name].attrs['units'] == 'Pa': lev = ds.coords[lev_name] / 100. ds[lev_name] = lev else: lev = ds.coords[lev_name] n = ds.coords['time'].shape[0] #it may happen that the field is 3D (longitude is missing) try: lon_name = fce.get_coords_name(ds, 'longitude') lon = ds.coords[lon_name] nlon = lon.shape[0] except: nlon = 1 #print nlat, nlev, n, nlon #zonal mean if nlon != 1: uwnd = ds[vari].mean(lon_name) else: uwnd = ds[vari] #equatorial average and level selection sel_dict = { lev_name: fce.coord_Between(lev, 10, 50), lat_name: fce.coord_Between(lat, -10, 10) } zm_u = uwnd.sel(**sel_dict).mean(lat_name) #period selection times = pd.date_range(str(s_year) + '-01-01', str(e_year) + '-12-31', name='time', freq='M') zm_u_sel = zm_u.sel(time=times, method='ffill') #nearest #remove seasonality climatology = zm_u_sel.groupby('time.month').mean('time') anomalies = zm_u_sel.groupby('time.month') - climatology #print anomalies #sys.exit() #additional constants npca = 30 norm = 2 #5 norms = 3 #5 what_sp = '' # what solar proxy? print("regressors' openning") global reg #, reg_names, nr reg, reg_names, history = fce.configuration_ccmi(what_re, what_sp, norm, 'no_qbo', i_year, s_year, e_year, reg_dir) nr = reg.shape[1] #print(anomalies) #extracting of other variability by MLR stacked = anomalies.stack(allpoints=[lev_name]) stacked = stacked.reset_coords(drop=True) resids = stacked.groupby('allpoints').apply(xr_regression) resids = resids.rename({'dim_0': 'time'}) resids['time'] = times #EOF analysis solver = Eof(resids.T, weights=None) #sys.exit() #coslat = np.cos(np.deg2rad(lat)).clip(0.,1.) #wgts = np.sqrt(coslat)[np.newaxis,...] for i in range(npca): var_eofs = solver.varianceFraction(neigs=i) #print var_eofs if np.sum(var_eofs) > 0.95: npca = i total_variance = np.sum(var_eofs) print(total_variance, ' % based on ', i, ' components') break var_eofs = solver.varianceFraction(neigs=npca) pcs = solver.pcs(npcs=npca, pcscaling=1) nte = solver.northTest(neigs=npca, vfscaled=True) subdir = './' if pdf_gen: fig = plt.figure(figsize=(11, 8)) ax1 = fig.add_subplot(111) ax1.set_title( str(npca) + ' PCAs cover ' + str(np.round(total_variance * 100, 2)) + '% of total variance') for i in xrange(npca): #plotting pcs[:, i].plot(linewidth=2, ax=ax1, label='pca ' + str(i + 1)) ax1.set_xlabel('time [years]') ax1.set_ylabel('QBO index') ax1.set_title('') ax1.legend(loc='best') plt.savefig(reg_dir + 'qbo_' + what_re + '_pcas.pdf', bbox_inches='tight') plt.close(fig) if nc_gen: #save to netcdf #print(pcs[:,0]) for i in range(npca): pcs_ds = pcs[:, i].to_dataset(name='index') pcs_ds.to_netcdf(reg_dir + r'qbo_' + what_re + '_pc' + str(i + 1) + pripona_nc)
str(int(solver_along.varianceFraction()[0].values * 100)) + '% of variance') plot(squeeze(solver_along.eofs()[1, :]), -dat.depth[::skipnum], label='mode 2: ' + str(int(solver_along.varianceFraction()[1].values * 100)) + '% of variance') gca().set_yticklabels('') xlabel('across-stream velocity [m/s]') xlim([-1, 1]) legend(loc=(0.2, 1.01)) suptitle('CF' + str(ii + 1)) savefig('../figures/VertModes/eof_cf' + str(ii + 1) + '.png') figure(figsize=(12, 4)) solver_across.pcs().sel(mode=0).plot(label='along-stream') solver_along.pcs().sel(mode=0).plot(label='across-stream') legend() title('CF' + str(ii + 1) + ' first mode time series') savefig('../figures/VertModes/eof_1804/eof_cf' + str(ii + 1) + '_tseries.png') # # figure(figsize=(12,4)) # solver.pcs().sel(mode=1).plot() # title('CF'+str(ii+1)+' second mode time series') ############################################################################ ## Fit exponential to mean density and see what the solution looks like! ############################################################################
valid = ~np.isnan(sst_obs[:,la,lo]) if (valid.any()==True): sst_obs[:,la,lo] = signal.detrend(sst_obs[:,la,lo], axis=0, \ type='linear') elif (valid.all()==False): sst_obs[:,la,lo] = np.nan ''' # EOF for model coslat_mdl = np.cos(np.deg2rad(sst_mdl.coords['lat'].values)) wgts_mdl = np.sqrt(coslat_mdl)[..., np.newaxis] solver_mdl = Eof(sst_mdl, weights=wgts_mdl, center=True) lambdas_mdl=solver_mdl.eigenvalues() vf_mdl = solver_mdl.varianceFraction() Nerror_mdl = solver_mdl.northTest(vfscaled=True) pcs_mdl = solver_mdl.pcs() #(time, mode) eofs_mdl = solver_mdl.eofs() # EOF for obs coslat_obs = np.cos(np.deg2rad(sst_obs.coords['lat'].values)) wgts_obs = np.sqrt(coslat_obs)[..., np.newaxis] solver_obs = Eof(sst_obs, weights=wgts_obs, center=True) lambdas_obs=solver_obs.eigenvalues() vf_obs = solver_obs.varianceFraction() Nerror_obs = solver_obs.northTest(vfscaled=True) pcs_obs = solver_obs.pcs() #(time, mode) eofs_obs = solver_obs.eofs() ## plotting
ds_spring_sib2 = ds_spring_sib2.sel(spring=idx) ds_spring = ds_spring.sel(spring=idx) plt.scatter(ds_spring_sib2.min('time').to_dataframe(), ds_spring.min('time').to_dataframe()) ####################### # PCA Analysis ####################### nbpcs = 3 solver_sib2 = Eof(ds_spring_sib2.dropna(dim="time", how="all")) pcas_sib2 = solver_sib2.pcs(npcs=nbpcs, pcscaling=1) eofs_sib2 = solver_sib2.eofs(neofs=nbpcs, eofscaling=1) solver = Eof(ds_spring.dropna(dim="time", how="all")) pcas = solver.pcs(npcs=nbpcs, pcscaling=1) eofs = solver.eofs(neofs=nbpcs, eofscaling=1) fig, axes = plt.subplots(3, 4, figsize=(20, 20)) pcas.to_dataframe().unstack().plot(ax=axes[0,0]) pcas_sib2.to_dataframe().unstack().plot(ax=axes[0,1]) df_eofs = eofs.to_dataframe().unstack().T df_eofs_sib2 = eofs_sib2.to_dataframe().unstack().T df_eofs.index = df_eofs.index.levels[1] df_eofs_sib2.index = df_eofs_sib2.index.levels[1]
(2) Compare to see differences ''' start5 = time.time() os.chdir("/home/ubuntu") lon = 180 lat = 90 dim = lon * lat months = 24 data = np.resize(x1, [dim, months]) solver = Eof(xr.DataArray(anomalies.data, dims=['time', 'lat', 'lon'])) pcs = solver.pcs(npcs=3, pcscaling=1) eofs = solver.eofs(neofs=5, eofscaling=1) variance_fractions = solver.varianceFraction() variance_fractions = solver.varianceFraction(neigs=3) print(variance_fractions) myFile1 = open('PC1.csv', 'w') with myFile1: writer = csv.writer(myFile1) writer.writerows(eofs[0, :, :].data) myFile2 = open('PC2.csv', 'w') with myFile2: writer = csv.writer(myFile2) writer.writerows(eofs[1, :, :].data)
import xarray as xr from eofs.xarray import Eof import datetime import os # Read preprocessed data. DATA_FILE = "/LFASGI/sandroal/data_sets/GIMMS/ppdata_ndvi.nc" DS = xr.open_dataset(DATA_FILE) # Create an EOF solver to do the EOF analysis. Memory intensive operation. solver = Eof(DS.ndvi) # Retrieve EOFs, principal component time series, fraction of explained # variance, and eigenvalues as xarray DataArray objects for all modes. EOFs = solver.eofs() PCs = solver.pcs() FRACs = solver.varianceFraction() EIGs = solver.eigenvalues() # Attributes for xarray DataSet objects. attrs = {} attrs["Description"] = "Empirical orthogonal functions to NDVI (GIMMS) " + \ "in its original temporal and spatial resolutions" attrs["Build"] = "By Alex Araujo" attrs["Date"] = datetime.datetime.now().strftime("%B %d, %Y; %Hh:%Mmin:%Ss") attrs["Source"] = os.path.abspath(__file__) # Set these attributes to results. Must transform from xarray DataArray to # DataSets before exporting results as netcdf files. DAs = [EOFs, PCs, FRACs, EIGs] names = ["eofs", "pcs", "fracs", "eigs"]
# Read SST anomalies using the xarray module. The file contains November-March # averages of SST anomaly in the central and northern Pacific. filename = example_data_path('sst_ndjfm_anom.nc') sst = xr.open_dataset(filename)['sst'] # Create an EOF solver to do the EOF analysis. Square-root of cosine of # latitude weights are applied before the computation of EOFs. coslat = np.cos(np.deg2rad(sst.coords['latitude'].values)) wgts = np.sqrt(coslat)[..., np.newaxis] solver = Eof(sst, weights=wgts) # Retrieve the leading EOF, expressed as the correlation between the leading # PC time series and the input SST anomalies at each grid point, and the # leading PC time series itself. eof1 = solver.eofsAsCorrelation(neofs=1) pc1 = solver.pcs(npcs=1, pcscaling=1) # Plot the leading EOF expressed as correlation in the Pacific domain. clevs = np.linspace(-1, 1, 11) ax = plt.axes(projection=ccrs.PlateCarree(central_longitude=190)) fill = eof1[0].plot.contourf(ax=ax, levels=clevs, cmap=plt.cm.RdBu_r, add_colorbar=False, transform=ccrs.PlateCarree()) ax.add_feature(cfeature.LAND, facecolor='w', edgecolor='k') cb = plt.colorbar(fill, orientation='horizontal') cb.set_label('correlation coefficient', fontsize=12) ax.set_title('EOF1 expressed as correlation', fontsize=16) # Plot the leading PC time series. plt.figure() pc1[:, 0].plot(color='b', linewidth=2) ax = plt.gca()
sst = sst[:, :-1, :-1] tendsst = tendsst[:, :-1, :-1] tot_area = np.sum(areas) weights = areas / tot_area #Calculate EOFs for SST, Qs and Qo. Should weight by area? solver = Eof(sst, weights=weights) sst_eof = solver.eofs(neofs=3, eofscaling=2) sst_eof_varfracs = solver.varianceFraction() solver = Eof(Qr, weights=weights) Qo_eof = solver.eofs(neofs=3, eofscaling=2) Qo_pc = solver.pcs(npcs=3, pcscaling=2) Qo_eof_varfracs = solver.varianceFraction() Qo_rec = solver.reconstructedField(5) Qo_rec_var = Qo_rec.var(dim='time') #get projection (pseudo-PCs) associated with Qo EOFs # Qo_eof_projsst = solver.projectField(sst, neofs=3) # Qo_eof_projQo = solver.projectField(Qr, neofs=3) # Qo_eof_projsst_pcs = Qo_eof_projsst[:,0] # Qo_eof_projQo_pcs = Qo_eof_projQo[:,0] #standardize the pseudo-PCS