def Decompose(self): '''perform EOF decomposition''' solver = Eof(self.rawdata) var_frac = solver.varianceFraction() cumvar = np.cumsum(var_frac.values) self.npcs = np.where(cumvar >= self.prop_variance)[0].min() self.pc = solver.pcs(npcs = self.npcs) # time series of PC
def test_eof_values(shape, n_modes, weight, wrap): """Test values relative to Eof package""" data = example_da(shape, wrap=wrap) lat_dim = f"dim_{len(shape)-1}" xeof.core.LAT_NAME = lat_dim sensor_dims = [f"dim_{i}" for i in range(1, len(shape))] if weight == "none": weights = None elif weight == "sqrt_cos_lat": weights = np.cos(data[lat_dim] * np.pi / 180)**0.5 elif weight == "random": weights = data.isel(time=0).copy() weight = weights.compute() res = eof( data, sensor_dims=sensor_dims, sample_dim="time", weight=weight, n_modes=n_modes, norm_PCs=False, ) Eof_solver = Eof(data, weights=weights, center=False) ver_pcs = Eof_solver.pcs(pcscaling=0, npcs=n_modes) ver_eofs = Eof_solver.eofs(eofscaling=0, neofs=n_modes) ver_EV = Eof_solver.varianceFraction(neigs=n_modes) npt.assert_allclose(abs(res["pc"]), abs(ver_pcs)) npt.assert_allclose(abs(res["eof"]), abs(ver_eofs)) npt.assert_allclose(res["explained_var"], ver_EV)
def processInputCrossSection(self, request: TaskRequest, node: OpNode, inputDset: EDASDataset) -> EDASDataset: nModes = int(node.getParm("modes", 16)) center = bool(node.getParm("center", "false")) merged_input_data, info = self.get_input_array(inputDset) shapes = info['shapes'] slicers = info['slicers'] solver = Eof(merged_input_data, center=center) results = [] for iMode, eofs_result in enumerate(solver.eofs(neofs=nModes)): for iVar, eofs_data in enumerate( self.getResults(eofs_result, slicers, shapes)): input = inputDset.inputs[iVar] results.append( EDASArray("-".join(["eof-", str(iMode), input.name]), input.domId, eofs_data)) pcs_result = solver.pcs(npcs=nModes) pcs = EDASArray( "pcs[" + inputDset.id + "]", inputDset.inputs[0].domId, EDASArray.cleanupCoords(pcs_result, { "mode": "m", "pc": "m" }).transpose()) results.append(pcs) fracs = solver.varianceFraction(neigs=nModes) pves = [str(round(float(frac * 100.), 1)) + '%' for frac in fracs] for result in results: result["pves"] = str(pves) return EDASDataset.init(self.renameResults(results, node), inputDset.attrs)
def get_eofs(x): import numpy as np import xarray from eofs.xarray import Eof from matplotlib import pyplot as plt coslat = np.cos(np.deg2rad(x.lat)).clip(0., 1.) wgts = np.sqrt(coslat)[..., np.newaxis] from eddof import get_eddof DF = np.empty(np.shape(x[0, :, :])) for i in range(0, len(x.lat)): for j in range(0, len(x.lon)): DF[i, j] = get_eddof(x[:, i, j].values) edof = np.mean(DF) print(edof) solver = Eof(x, weights=wgst, ddof=edof) var = solver.varianceFraction() plt.figure(1) plt.bar(np.arange(0, len(var), 1), var * 100) plt.show() plt.close() n = input('Cuantos PC extraer: ') eof = solver.eofs(neofs=n, eofscaling=2) pc = solver.pcs(npcs=n, pcscaling=1) vf = var[:n] EOFs = [eof, pc, vf] return EOFs
def plot_pca_analysis(ds, fig_output_path, title=''): print(title) # var = "T" print('done load') nbpcs = 3 solver = Eof(ds.dropna(dim="time", how="all")) pcas = solver.pcs(npcs=nbpcs, pcscaling=1) eofs = solver.eofs(neofs=nbpcs, eofscaling=1) fig, axes = plt.subplots(5, 2, figsize=(20, 20)) fig.suptitle(title, fontsize=12) pcas.plot.line(ax=axes[0, 0], x='time') pcas.resample(time='M').mean('time').plot.line(ax=axes[1, 0], x='time') axes[1, 0].set_title('Monthly mean') pcas.resample(time='Y').mean('time').plot.line(ax=axes[2, 0], x='time') axes[2, 0].set_title('Annual mean') pcas.groupby('time.month').mean('time').plot.line(ax=axes[3, 0], x='month') axes[3, 0].set_title('By Month') pcas.groupby('time.hour').mean('time').plot.line(ax=axes[4, 0], x='hour') axes[4, 0].set_title('By Hour') for pc in range(nbpcs): # eofs.isel(mode=pc).plot(ax=axes[pc, 1]) eofs.to_dataframe().unstack().T.loc[:, pc].plot.bar(ax=axes[pc, 1]) solver.varianceFraction().isel(mode=slice(0, nbpcs)).plot(ax=axes[3, 1]) plt.tight_layout() plt.tight_layout() fig.suptitle(title) plt.savefig(fig_output_path + title + '.pdf', bbox_inches='tight')
def EOF_SST_analysis(self, xa, weights, n=1, fn=None): """ Empirical Orthogonal Function analysis of SST(t,x,y) field; from `SST.py` """ assert type(xa)==xr.core.dataarray.DataArray assert type(weights)==xr.core.dataarray.DataArray assert 'time' in xa.dims assert np.shape(xa[0,:,:])==np.shape(weights) # anomalies by removing time mean xa = xa - xa.mean(dim='time') # Retrieve the leading EOF, expressed as the covariance between the leading PC # time series and the input xa anomalies at each grid point. solver = Eof(xa, weights=weights) eofs = solver.eofsAsCovariance(neofs=n) pcs = solver.pcs(npcs=n, pcscaling=1) eigs = solver.eigenvalues(neigs=n) varF = solver.varianceFraction(neigs=n) ds = xr.merge([eofs, pcs, eigs, varF]) if fn!=None: ds.to_netcdf(fn) return ds
def reof(stack: xr.DataArray, variance_threshold: float = 0.727, n_modes: int = 4) -> xr.Dataset: """Function to perform rotated empirical othogonal function (eof) on a spatial timeseries args: stack (xr.DataArray): DataArray of spatial temporal values with coord order of (t,y,x) variance_threshold(float, optional): optional fall back value to select number of eof modes to use. Only used if n_modes is less than 1. default = 0.727 n_modes (int, optional): number of eof modes to use. default = 4 returns: xr.Dataset: rotated eof dataset with spatial modes, temporal modes, and mean values as variables """ # extract out some dimension shape information shape3d = stack.shape spatial_shape = shape3d[1:] shape2d = (shape3d[0],np.prod(spatial_shape)) # flatten the data from [t,y,x] to [t,...] da_flat = xr.DataArray( stack.values.reshape(shape2d), coords = [stack.time,np.arange(shape2d[1])], dims=['time','space'] ) #logger.debug(da_flat) ## find the temporal mean for each pixel center = da_flat.mean(dim='time') centered = da_flat - center # get an eof solver object # explicitly set center to false since data is already #solver = Eof(centered,center=False) solver = Eof(centered,center=False) # check if the n_modes keyword is set to a realistic value # if not get n_modes based on variance explained if n_modes < 0: n_modes = int((solver.varianceFraction().cumsum() < variance_threshold).sum()) # calculate to spatial eof values eof_components = solver.eofs(neofs=n_modes).transpose() # get the indices where the eof is valid data non_masked_idx = np.where(np.logical_not(np.isnan(eof_components[:,0])))[0] # create a "blank" array to set roated values to rotated = eof_components.values.copy() # # waiting for release of sklean version >= 0.24 # # until then have a placeholder function to do the rotation # fa = FactorAnalysis(n_components=n_modes, rotation="varimax") # rotated[non_masked_idx,:] = fa.fit_transform(eof_components[non_masked_idx,:]) # apply varimax rotation to eof components # placeholder function until sklearn version >= 0.24 rotated[non_masked_idx,:] = _ortho_rotation(eof_components[non_masked_idx,:]) # project the original time series data on the rotated eofs projected_pcs = np.dot(centered[:,non_masked_idx], rotated[non_masked_idx,:]) # reshape the rotated eofs to a 3d array of [y,x,c] spatial_rotated = rotated.reshape(spatial_shape+(n_modes,)) # structure the spatial and temporal reof components in a Dataset reof_ds = xr.Dataset( { "spatial_modes": (["lat","lon","mode"],spatial_rotated), "temporal_modes":(["time","mode"],projected_pcs), "center": (["lat","lon"],center.values.reshape(spatial_shape)) }, coords = { "lon":(["lon"],stack.lon), "lat":(["lat"],stack.lat), "time":stack.time, "mode": np.arange(n_modes)+1 } ) return reof_ds
for ii in range(3, 8): if ii < 4: skipnum = 10 else: skipnum = 50 solver_across = Eof(dat['across track velocity'][:, ::skipnum, :].T[:, :, ii]) solver_along = Eof(dat['along track velocity'][:, ::skipnum, :].T[:, :, ii]) figure(figsize=(10, 5)) subplot(121) plot(squeeze(solver_across.eofs(neofs=1)), -dat.depth[::skipnum], label='mode 1: ' + str(int(solver_across.varianceFraction()[0].values * 100)) + '% of variance') plot(squeeze(solver_across.eofs()[1, :]), -dat.depth[::skipnum], label='mode 2: ' + str(int(solver_across.varianceFraction()[1].values * 100)) + '% of variance') ylabel('depth [m]') xlabel('along-stream velocity [m/s]') xlim([-1, 1]) legend(loc=(0.2, 1.01)) subplot(122) plot(squeeze(solver_along.eofs(neofs=1)), -dat.depth[::skipnum], label='mode 1: ' + str(int(solver_along.varianceFraction()[0].values * 100)) +
def main(args): #environmental constants if platform.system() == 'Windows': in_dir='../examples/' out_dir='../regressors/' reg_dir='../regressors/'#${in_dir}'regresory_2013/' nc_gen=True pdf_gen=False plus = '' else: n_samples = int(os.environ['n_samples']) in_dir = os.environ['in_dir'] #out_dir = os.environ['out_dir'] reg_dir = os.environ['reg_dir'] pdf_gen = os.environ['pdf_gen'] nc_gen = os.environ['nc_gen'] what_re = args.what_re vari = args.vari i_year = args.i_year s_year = args.s_year e_year = args.e_year in_file_name = args.in_file_name if args.verbose: print('dataset: ', what_re) print('variable: ', vari) print('initial year of dataset: ', i_year) print('initial year of analysis: ', s_year) print('end year of analysis: ', e_year) print('input filename: ', in_file_name) print('data opening') in_netcdf = in_dir + in_file_name print(in_netcdf) ds = xr.open_dataset(in_netcdf) #print(ds) lat_name = fce.get_coords_name(ds, 'latitude') lat = ds.coords[lat_name] nlat = lat.shape[0] lev_name = fce.get_coords_name(ds, 'pressure') if ds.coords[lev_name].attrs['units'] == 'Pa': lev = ds.coords[lev_name]/100. ds[lev_name] = lev else: lev = ds.coords[lev_name] n = ds.coords['time'].shape[0] #it may happen that the field is 3D (longitude is missing) try: lon_name = fce.get_coords_name(ds, 'longitude') lon = ds.coords[lon_name] nlon = lon.shape[0] except: nlon = 1 #print nlat, nlev, n, nlon #zonal mean if nlon != 1: uwnd = ds[vari].mean(lon_name) else: uwnd = ds[vari] #equatorial average and level selection sel_dict = {lev_name: fce.coord_Between(lev,10,50), lat_name: fce.coord_Between(lat,-10,10)} zm_u = uwnd.sel(**sel_dict).mean(lat_name) #period selection times = pd.date_range(str(s_year)+'-01-01', str(e_year)+'-12-31', name='time', freq = 'M') zm_u_sel = zm_u.sel(time = times, method='ffill') #nearest #remove seasonality climatology = zm_u_sel.groupby('time.month').mean('time') anomalies = zm_u_sel.groupby('time.month') - climatology #print anomalies #sys.exit() #additional constants npca = 30 norm=2 #5 norms=3 #5 what_sp = '' # what solar proxy? print("regressors' openning") global reg#, reg_names, nr reg, reg_names, history = fce.configuration_ccmi(what_re, what_sp, norm, 'no_qbo' , i_year, s_year, e_year, reg_dir) nr = reg.shape[1] #print(anomalies) #extracting of other variability by MLR stacked = anomalies.stack(allpoints = [lev_name]) stacked = stacked.reset_coords(drop=True) resids = stacked.groupby('allpoints').apply(xr_regression) resids = resids.rename({'dim_0': 'time'}) resids['time'] = times #EOF analysis solver = Eof(resids.T, weights=None) #sys.exit() #coslat = np.cos(np.deg2rad(lat)).clip(0.,1.) #wgts = np.sqrt(coslat)[np.newaxis,...] for i in range(npca): var_eofs = solver.varianceFraction(neigs=i) #print var_eofs if np.sum(var_eofs) > 0.95: npca=i total_variance = np.sum(var_eofs) print(total_variance, ' % based on ', i, ' components') break var_eofs = solver.varianceFraction(neigs=npca) pcs = solver.pcs(npcs=npca, pcscaling=1) nte = solver.northTest(neigs=npca, vfscaled=True) subdir = './' if pdf_gen: fig = plt.figure(figsize=(11,8)) ax1 = fig.add_subplot(111) ax1.set_title(str(npca)+' PCAs cover '+str(np.round(total_variance*100, 2))+'% of total variance') for i in xrange(npca): #plotting pcs[:,i].plot(linewidth = 2, ax = ax1, label = 'pca '+str(i+1)) ax1.set_xlabel('time [years]') ax1.set_ylabel('QBO index') ax1.set_title('') ax1.legend(loc = 'best') plt.savefig(reg_dir+'qbo_'+what_re+'_pcas.pdf', bbox_inches='tight') plt.close(fig) if nc_gen: #save to netcdf #print(pcs[:,0]) for i in range(npca): pcs_ds = pcs[:,i].to_dataset(name = 'index') pcs_ds.to_netcdf(reg_dir+r'qbo_'+what_re+'_pc'+str(i+1)+pripona_nc)
print(chlorophyll.dims) surfchl = chlorophyll[:, 14, :, :] print(xr.DataArray(surfchl).shape) chl_mean = surfchl.mean(dim='time') print(xr.DataArray(chl_mean).shape) anomaly = surfchl - chl_mean print(xr.DataArray(anomaly).shape) solver = Eof(xr.DataArray(anomaly)) eof1 = solver.eofsAsCorrelation(neofs=1) pc1 = solver.pcs(npcs=1, pcscaling=1) plt.pcolormesh(lon, lat, eof1[0], cmap=plt.cm.RdBu_r) plt.xlabel('Longitude') plt.ylabel('Latitude') plt.title('EOF1 expressed as Correlation') cbar = plt.colorbar() cbar.set_label('Correlation Coefficient', rotation=270) plt.show() plt.plot(timearray, pc1[:, 0]) plt.xlabel('Year') plt.ylabel('Normalized Units') plt.title('PC1 Time Series') plt.show() vF1 = solver.varianceFraction(neigs=6) percentarray = vF1 * 100 array1 = [1, 2, 3, 4, 5, 6] plt.bar(array1, percentarray) plt.title('Scree Plot') plt.xlabel('Mode') plt.ylabel('Percent of Variance Explained') plt.show()
# --- read netcdf file dset = xr.open_dataset('asstdt_pacific.nc') # --- select djf months sst = dset['sst'].sel(time=np.in1d(dset['time.month'], [1, 2, 12])) # --- square-root of cosine of latitude weights coslat = np.cos(np.deg2rad(sst.coords['lat'].values)) wgts = np.sqrt(coslat)[..., np.newaxis] # --- eof solver solver = Eof(sst, weights=wgts) # --- eof results eofs = solver.eofsAsCorrelation(neofs=2) pcs = solver.pcs(npcs=2, pcscaling=1) variance_fractions = solver.varianceFraction() north_test = solver.northTest(vfscaled=True) # --- spatial patterns fig, ax = plot.subplots(axwidth=5, nrows=2, tight=True, proj='pcarree', proj_kw={'lon_0': 180}) # --- format options ax.format(land=False, coast=True, innerborders=True, borders=True, large='15px', labels=False, latlim=(31, -31), lonlim=(119, 291), geogridlinewidth=0, abcloc='ul') # a) first EOF mode map1 = ax[0].contourf(dset['lon'], dset['lat'], eofs[0, :, :], levels=np.arange(-0.5, 0.6, 0.1), cmap='Div', extend='both')
# center = True if we want to remove the mean; =False if no need to remove the mean ''' If *True*, the mean along the first axis of *dataset* (the time-mean) will be removed prior to analysis. If *False*, the mean along the first axis will not be removed. Defaults to *True* (mean is removed). The covariance interpretation relies on the input data being anomaly data with a time-mean of 0. Therefore this option should usually be set to *True*. Setting this option to *True* has the useful side effect of propagating missing values along the time dimension, ensuring that a solution can be found even if missing values occur in different locations at different times. ''' lambdas = solver.eigenvalues() vf = solver.varianceFraction() Nerror = solver.northTest(vfscaled=True) pcs = solver.pcs() #(time, mode) eofs = solver.eofsAsCovariance() ''' plt.figure() plt.subplot(3,2,1) pcs[:, 0].plot()#color='b', linewidth=2) ax = plt.gca() ax.axhline(0, color='k') ax.set_xlabel('Year') ax.set_ylabel('PC1 amplitude') plt.grid() plt.subplot(3,2,2) pcs[:, 1].plot() ax = plt.gca()
psl = psl.sel(time=slice(start, end)) psl_obs = xr.open_dataset('processed_data/remap-woa09_psl_Amon_ERA-Int.nc')[ 'psl'] #units Pa psl_obs = psl_obs.sel(time=slice(start, end)) # In[246]: psl_sof20s = psl.sel(lat=slice(-90, -20)) psl_sof20s = psl_sof20s - psl_sof20s.mean(dim='time') coslat = np.cos(np.deg2rad(psl_sof20s.coords['lat'].values)).clip(0., 1.) wgts = np.sqrt(coslat)[..., np.newaxis] #psl_sof20s solver = Eof(psl_sof20s, weights=wgts) sh_eof = solver.eofsAsCorrelation(neofs=1) var_s = solver.varianceFraction(neigs=1) psl_sof20s_obs = psl_obs.sel(lat=slice(-90, -20)) psl_sof20s_obs = psl_sof20s_obs - psl_sof20s_obs.mean(dim='time') #psl_sof20s solver_obs = Eof(psl_sof20s_obs, weights=wgts) sh_eof_obs = solver_obs.eofsAsCorrelation(neofs=1) var_s_obs = solver_obs.varianceFraction(neigs=1) # In[247]: import iris import iris.coord_categorisation cube = iris.load_cube( 'processed_data/remap-woa09_psl_Amon_CanESM5_historical_r1i1p1f1_gn_185001-201412.nc'
from eofs.xarray import Eof import datetime import os # Read preprocessed data. DATA_FILE = "/LFASGI/sandroal/data_sets/GIMMS/ppdata_ndvi.nc" DS = xr.open_dataset(DATA_FILE) # Create an EOF solver to do the EOF analysis. Memory intensive operation. solver = Eof(DS.ndvi) # Retrieve EOFs, principal component time series, fraction of explained # variance, and eigenvalues as xarray DataArray objects for all modes. EOFs = solver.eofs() PCs = solver.pcs() FRACs = solver.varianceFraction() EIGs = solver.eigenvalues() # Attributes for xarray DataSet objects. attrs = {} attrs["Description"] = "Empirical orthogonal functions to NDVI (GIMMS) " + \ "in its original temporal and spatial resolutions" attrs["Build"] = "By Alex Araujo" attrs["Date"] = datetime.datetime.now().strftime("%B %d, %Y; %Hh:%Mmin:%Ss") attrs["Source"] = os.path.abspath(__file__) # Set these attributes to results. Must transform from xarray DataArray to # DataSets before exporting results as netcdf files. DAs = [EOFs, PCs, FRACs, EIGs] names = ["eofs", "pcs", "fracs", "eigs"] files = ["ppdata_ndvi_eofs_eofs.nc",
areas = r**2 * coslats * dlats * dlons Q_s = Q_s[:, :-1, :-1] Qr = Qr[:, :-1, :-1] sst = sst[:, :-1, :-1] tendsst = tendsst[:, :-1, :-1] tot_area = np.sum(areas) weights = areas / tot_area #Calculate EOFs for SST, Qs and Qo. Should weight by area? solver = Eof(sst, weights=weights) sst_eof = solver.eofs(neofs=3, eofscaling=2) sst_eof_varfracs = solver.varianceFraction() solver = Eof(Qr, weights=weights) Qo_eof = solver.eofs(neofs=3, eofscaling=2) Qo_pc = solver.pcs(npcs=3, pcscaling=2) Qo_eof_varfracs = solver.varianceFraction() Qo_rec = solver.reconstructedField(5) Qo_rec_var = Qo_rec.var(dim='time') #get projection (pseudo-PCs) associated with Qo EOFs # Qo_eof_projsst = solver.projectField(sst, neofs=3) # Qo_eof_projQo = solver.projectField(Qr, neofs=3)
start5 = time.time() os.chdir("/home/ubuntu") lon = 180 lat = 90 dim = lon * lat months = 24 data = np.resize(x1, [dim, months]) solver = Eof(xr.DataArray(anomalies.data, dims=['time', 'lat', 'lon'])) pcs = solver.pcs(npcs=3, pcscaling=1) eofs = solver.eofs(neofs=5, eofscaling=1) variance_fractions = solver.varianceFraction() variance_fractions = solver.varianceFraction(neigs=3) print(variance_fractions) myFile1 = open('PC1.csv', 'w') with myFile1: writer = csv.writer(myFile1) writer.writerows(eofs[0, :, :].data) myFile2 = open('PC2.csv', 'w') with myFile2: writer = csv.writer(myFile2) writer.writerows(eofs[1, :, :].data) myFile3 = open('PC3.csv', 'w') with myFile3:
def eof_orca_latlon_box(run, var, modes, lon_bnds, lat_bnds, pathfile, plot, time, eoftype): if (var == 'temp'): key = 'votemper' key1 = "votemper" elif (var == 'sal'): key = 'vosaline' key1 = "vosaline" elif (var == 'MLD'): key = 'somxl010' key1 = "somxl010" # read data ds = xr.open_dataset(pathfile) #ds["time_counter"] = ds['time_counter']+(np.datetime64('0002-01-01')-np.datetime64('0001-01-01')) if time == 'comparison': ds = ds.sel(time_counter=slice('1958-01-01', '2006-12-31')) # cut box for EOF at surface if var == 'MLD': data = ds[key].sel(lon=slice(lon_bnds[0], lon_bnds[1]), lat=slice(lat_bnds[0], lat_bnds[1])) #data = cut_latlon_box(ds[key][:,:,:],ds.lon,ds.lat, # lon_bnds,lat_bnds) else: data = ds[key][:, 0, :, :].sel(lon=slice(lon_bnds[0], lon_bnds[1]), lat=slice(lat_bnds[0], lat_bnds[1])) #data = cut_latlon_box(ds[key][:,0,:,:],ds.lon,ds.lat, # lon_bnds,lat_bnds) data = data.to_dataset() # detrend data data[key1] = (['time_counter', 'lat', 'lon'], signal.detrend(data[key].fillna(0), axis=0, type='linear')) #data=data.where(data!=0) # remove seasonal cycle and drop unnecessary coordinates if 'time_centered' in list(data.coords): data = deseason_month(data).drop('month').drop( 'time_centered') # somehow pca doesn't work otherwise else: data = deseason_month(data).drop( 'month') # somehow pca doesn't work otherwise # set 0 values back to nan data = data.where(data != 0) # EOF analysis #Square-root of cosine of latitude weights are applied before the computation of EOFs. coslat = np.cos(np.deg2rad(data['lat'].values)) coslat, _ = np.meshgrid(coslat, np.arange(0, len(data['lon']))) wgts = np.sqrt(coslat) solver = Eof(data[key], weights=wgts.transpose()) pcs = solver.pcs(npcs=modes, pcscaling=1) if eoftype == 'correlation': eof = solver.eofsAsCorrelation(neofs=modes) elif eoftype == 'covariance': eof = solver.eofsAsCovariance(neofs=modes) else: eof = solver.eofs(neofs=modes) varfr = solver.varianceFraction(neigs=4) print(varfr) #----------- Plotting -------------------- plt.close("all") if plot == 1: for i in np.arange(0, modes): fig = plt.figure(figsize=(8, 2)) ax1 = fig.add_axes([0.1, 0.1, 0.3, 0.9], projection=ccrs.PlateCarree()) # main axes ax1.set_extent( (lon_bnds[0], lon_bnds[1], lat_bnds[0], lat_bnds[1])) # discrete colormap cmap = plt.get_cmap('RdYlBu', len(np.arange(10, 30)) - 1) #inferno similar to cmo thermal eof[i, :, :].plot(ax=ax1, cbar_kwargs={'label': 'Correlation'}, transform=ccrs.PlateCarree(), x='lon', y='lat', add_colorbar=True, cmap=cmap) gl = map_stuff(ax1) gl.xlocator = mticker.FixedLocator([100, 110, 120]) gl.ylocator = mticker.FixedLocator(np.arange(-35, -10, 5)) plt.text(116, -24, str(np.round(varfr[i].values, decimals=2)), horizontalalignment='center', verticalalignment='center', transform=ccrs.PlateCarree(), fontsize=8) ax2 = fig.add_axes([0.5, 0.1, 0.55, 0.9]) # main axes plt.plot(pcs.time_counter, pcs[:, i].values, linewidth=0.1, color='k') anomaly(ax2, pcs.time_counter.values, pcs.values[:, i], [0, 0]) ax2.set_xlim( [pcs.time_counter[0].values, pcs.time_counter[-1].values]) plt.savefig(pathplots + 'eof_as' + eoftype + '_mode' + str(i) + '_' + time + '_' + run + '_' + var + '.png', dpi=300, bbox_inches='tight', pad_inches=0.1) plt.show() #---------------------------------------------- return pcs, eof, varfr
for la in range(0,len(lat_obs)): for lo in range(0,len(lon_obs)): valid = ~np.isnan(sst_obs[:,la,lo]) if (valid.any()==True): sst_obs[:,la,lo] = signal.detrend(sst_obs[:,la,lo], axis=0, \ type='linear') elif (valid.all()==False): sst_obs[:,la,lo] = np.nan ''' # EOF for model coslat_mdl = np.cos(np.deg2rad(sst_mdl.coords['lat'].values)) wgts_mdl = np.sqrt(coslat_mdl)[..., np.newaxis] solver_mdl = Eof(sst_mdl, weights=wgts_mdl, center=True) lambdas_mdl=solver_mdl.eigenvalues() vf_mdl = solver_mdl.varianceFraction() Nerror_mdl = solver_mdl.northTest(vfscaled=True) pcs_mdl = solver_mdl.pcs() #(time, mode) eofs_mdl = solver_mdl.eofs() # EOF for obs coslat_obs = np.cos(np.deg2rad(sst_obs.coords['lat'].values)) wgts_obs = np.sqrt(coslat_obs)[..., np.newaxis] solver_obs = Eof(sst_obs, weights=wgts_obs, center=True) lambdas_obs=solver_obs.eigenvalues() vf_obs = solver_obs.varianceFraction() Nerror_obs = solver_obs.northTest(vfscaled=True) pcs_obs = solver_obs.pcs() #(time, mode) eofs_obs = solver_obs.eofs()
def calcEOF(xrdata, data_var, w, wei=True): """ input: xrdata: xarray Dataset data_var: string. Variable name to use on EOF. w: string. variable for using weights. E.g. latitude use as: solver, eof1, var1 = calcEOF(xrdata, 'data_var') """ xrdata = xrdata - xrdata[data_var].mean(dim="time") # Testing if we can select data from level, lat and time try: xrdata = xrdata.sel(level=1000, latitude=slice(90, 20), time=slice("1979-01-01", "2000-12-31")) print( 'Data selection OK on first try. Level, lat and time slice done.') except ValueError: try: print('valueError: Trying next') xrdata = xrdata.sel(level=1000, lat=slice(90, 20), time=slice("1979-01-01", "2000-12-31")) print( 'Data selection OK on second try. Level, lat and time slice done.' ) except ValueError: try: print('valueError: Trying next') xrdata = xrdata.sel(latitude=slice(90, 20), time=slice("1979-01-01", "2000-12-31")) print('Data selection OK on third try. No level cut') except ValueError: try: print('valueError: Trying next') xrdata = xrdata.sel(time=slice("1979-01-01", "2000-12-31")) print('Data selection OK on fourth try. Only time slice.') except: raise TypeError(' Data out of limits') xrdata = (xrdata.groupby('time.month') - xrdata[data_var].groupby('time.month').mean()) # To ensure equal area weighting for the covariance matrix, # the gridded data is weighted by the square root of the cosine of # latitude. - NOAA if wei == True: coslat = np.cos(np.deg2rad(xrdata.coords[w].values)).clip(0., 1.) # np.newaxis add a dimention to wgts. dont know what ... does # i think its like a transposed. It took all the objects on a list and # make a new list with lists inside (each list with only one object) # just an adjustment of format wgts = np.sqrt(coslat)[..., np.newaxis] # The EOF analysis is handled by a solver class, and the EOF solution # is computed when the solver class is created. Method calls are then used # to retrieve the quantities of interest from the solver class. # center = False do not remove mean from data # solver = Eof(m_anomalie.hgt, weights=wgts, center=False) """ # solver.eofAsCovariance Returns the EOFs expressed as the covariance between each PC and the input # data set at each point in space. they are not actually the EOFs. They tell you how each point in space # varies like the given mode. The eofs method provides the raw EOFs (eigenvectors of the covariance # matrix) which are the spatial patterns the PCs are the coefficeints of. # “The covariance matrix is used for the EOF analysis.” - NOAA """ solver = Eof(xrdata[data_var], weights=wgts) else: solver = Eof(xrdata[data_var]) # solver = Eof(s_anomalie.hgt, weights=wgts, center=False) # Retrieve the leading EOF, expressed as the covariance between the leading PC # time series and the input SLP anomalies at each grid point. eof1 = solver.eofsAsCovariance(pcscaling=1) var1 = solver.varianceFraction().sel(mode=0) return solver, eof1, var1
def main(args): #environmental constants if platform.system() == 'Windows': in_dir = '../examples/' out_dir = '../regressors/' reg_dir = '../regressors/' #${in_dir}'regresory_2013/' nc_gen = True pdf_gen = False plus = '' else: n_samples = int(os.environ['n_samples']) in_dir = os.environ['in_dir'] #out_dir = os.environ['out_dir'] reg_dir = os.environ['reg_dir'] pdf_gen = os.environ['pdf_gen'] nc_gen = os.environ['nc_gen'] what_re = args.what_re vari = args.vari i_year = args.i_year s_year = args.s_year e_year = args.e_year in_file_name = args.in_file_name if args.verbose: print('dataset: ', what_re) print('variable: ', vari) print('initial year of dataset: ', i_year) print('initial year of analysis: ', s_year) print('end year of analysis: ', e_year) print('input filename: ', in_file_name) print('data opening') in_netcdf = in_dir + in_file_name print(in_netcdf) ds = xr.open_dataset(in_netcdf) #print(ds) lat_name = fce.get_coords_name(ds, 'latitude') lat = ds.coords[lat_name] nlat = lat.shape[0] lev_name = fce.get_coords_name(ds, 'pressure') if ds.coords[lev_name].attrs['units'] == 'Pa': lev = ds.coords[lev_name] / 100. ds[lev_name] = lev else: lev = ds.coords[lev_name] n = ds.coords['time'].shape[0] #it may happen that the field is 3D (longitude is missing) try: lon_name = fce.get_coords_name(ds, 'longitude') lon = ds.coords[lon_name] nlon = lon.shape[0] except: nlon = 1 #print nlat, nlev, n, nlon #zonal mean if nlon != 1: uwnd = ds[vari].mean(lon_name) else: uwnd = ds[vari] #equatorial average and level selection sel_dict = { lev_name: fce.coord_Between(lev, 10, 50), lat_name: fce.coord_Between(lat, -10, 10) } zm_u = uwnd.sel(**sel_dict).mean(lat_name) #period selection times = pd.date_range(str(s_year) + '-01-01', str(e_year) + '-12-31', name='time', freq='M') zm_u_sel = zm_u.sel(time=times, method='ffill') #nearest #remove seasonality climatology = zm_u_sel.groupby('time.month').mean('time') anomalies = zm_u_sel.groupby('time.month') - climatology #print anomalies #sys.exit() #additional constants npca = 30 norm = 2 #5 norms = 3 #5 what_sp = '' # what solar proxy? print("regressors' openning") global reg #, reg_names, nr reg, reg_names, history = fce.configuration_ccmi(what_re, what_sp, norm, 'no_qbo', i_year, s_year, e_year, reg_dir) nr = reg.shape[1] #print(anomalies) #extracting of other variability by MLR stacked = anomalies.stack(allpoints=[lev_name]) stacked = stacked.reset_coords(drop=True) resids = stacked.groupby('allpoints').apply(xr_regression) resids = resids.rename({'dim_0': 'time'}) resids['time'] = times #EOF analysis solver = Eof(resids.T, weights=None) #sys.exit() #coslat = np.cos(np.deg2rad(lat)).clip(0.,1.) #wgts = np.sqrt(coslat)[np.newaxis,...] for i in range(npca): var_eofs = solver.varianceFraction(neigs=i) #print var_eofs if np.sum(var_eofs) > 0.95: npca = i total_variance = np.sum(var_eofs) print(total_variance, ' % based on ', i, ' components') break var_eofs = solver.varianceFraction(neigs=npca) pcs = solver.pcs(npcs=npca, pcscaling=1) nte = solver.northTest(neigs=npca, vfscaled=True) subdir = './' if pdf_gen: fig = plt.figure(figsize=(11, 8)) ax1 = fig.add_subplot(111) ax1.set_title( str(npca) + ' PCAs cover ' + str(np.round(total_variance * 100, 2)) + '% of total variance') for i in xrange(npca): #plotting pcs[:, i].plot(linewidth=2, ax=ax1, label='pca ' + str(i + 1)) ax1.set_xlabel('time [years]') ax1.set_ylabel('QBO index') ax1.set_title('') ax1.legend(loc='best') plt.savefig(reg_dir + 'qbo_' + what_re + '_pcas.pdf', bbox_inches='tight') plt.close(fig) if nc_gen: #save to netcdf #print(pcs[:,0]) for i in range(npca): pcs_ds = pcs[:, i].to_dataset(name='index') pcs_ds.to_netcdf(reg_dir + r'qbo_' + what_re + '_pc' + str(i + 1) + pripona_nc)
def main(): ens = sys.argv[1] sYear = sys.argv[2] eYear = sys.argv[3] if int(sYear) < 1920: raise ValueError("Starting year must be 1920 or later.") if int(eYear) > 2100: raise ValueError("End year must be 2100 or earlier.") print("Computing NPGO for ensemble number " + ens + "...") filepath = ('/glade/scratch/rbrady/EBUS_BGC_Variability/' + 'global_residuals/SST/remapped/remapped.SST.' + ens + '.192001-210012.nc') print("Global residuals loaded...") ds = xr.open_dataset(filepath) ds = ds['SST'].squeeze() # Make time dimension readable through xarray. ds['time'] = pd.date_range('1920-01', '2101-01', freq='M') # Reduce to time period of interest. ds = ds.sel(time=slice(sYear + '-01', eYear + '-12')) # Slice down to Northeast Pacific domain. ds = ds.sel(lat=slice(25, 62), lon=slice(180, 250)) # Take annual JFM means. month = ds['time.month'] JFM = (month <= 3) ds_winter = ds.where(JFM).resample('A', 'time') # Compute EOF coslat = np.cos(np.deg2rad(ds_winter.lat.values)) wgts = np.sqrt(coslat)[..., np.newaxis] solver = Eof(ds_winter, weights=wgts, center=False) print("NPGO computed.") eof = solver.eofsAsCorrelation(neofs=2) variance = solver.varianceFraction(neigs=2) # Reconstruct the monthly index of SSTa by projecting # these values onto the annual PC timeseries. pseudo_pc = solver.projectField(ds, neofs=2, eofscaling=1) # Set up as dataset. ds = eof.to_dataset() ds['pc'] = pseudo_pc ds['variance_fraction'] = variance ds = ds.rename({'eofs': 'eof'}) ds = ds.sel(mode=1) # Invert to the proper values for the bullseye. if ds.sel(lat=45.5, lon=210).eof < 0: pass else: ds['eof'] = ds['eof'] * -1 ds['pc'] = ds['pc'] * -1 # Change some attributes for the variables. ds['eof'].attrs['long_name'] = 'Correlation between PC and JFM SSTa' ds['pc'].attrs['long_name'] = 'Principal component for NPGO' # Add a description of methods for clarity. ds.attrs[ 'description'] = 'Second mode of JFM SSTa variability over 25-62N and 180-110W.' ds.attrs[ 'anomalies'] = 'Anomalies were computed by removing the ensemble mean at each grid cell.' ds.attrs['weighting'] = ( 'The native grid was regridded to a standard 1deg x 1deg (180x360) grid.' + 'Weighting was computed via the sqrt of the cosine of latitude.') print("Saving to netCDF...") ds.to_netcdf('/glade/p/work/rbrady/NPGO/NPGO.' + ens + '.' + str(sYear) + '-' + str(eYear) + '.nc')