def four(): #This splits by phase after eofs #pcs done on individual basins from numpy import cov, argsort, real, cumsum, round, \ corrcoef, ndarray, zeros, hstack, \ vstack, array from numpy.linalg import eig from atmos_ocean_data import weightsst sst, slp, mei, phaseind = get_climate_data() lcrb_prcp = get_lcrb_prcp() phase = 'neutpos' ### This way the ocean regions are obtained and then PC done on all ### Should write another way where pcs get computed separately and then all # put into the NIPA module. atl = get_region(weightsst(sst), region = 'Atlantic')#[phaseind[phase]] pac = get_region(weightsst(sst), region = 'Pacific')#[phaseind[phase]] ind = get_region(weightsst(sst), region = 'Indian')#[phaseind[phase]] pcs = [] lams = [] for data in [atl, ind, pac]: cvr = cov(data.T) eigval, eigvec = eig(cvr) eigvalsort = argsort(eigval)[::-1] eigval = eigval[eigvalsort] eigval = real(eigval) lam = round(eigval / sum(eigval),2) idx = cumsum(lam) < .90 ncomps = idx.sum() eofs= real(eigvec[:,:ncomps]) pcs.append(eofs.T.dot(data.T).squeeze()) lams.append(lam[:ncomps]) print '# components is %i\n' % ncomps, lam[:ncomps] data = vstack((pcs[0],pcs[1], pcs[2])) lams = hstack((array(lams[0]),array(lams[1]),array(lams[2]))) data = data.T[phaseind[phase]] # This turns on and off prcp = lcrb_prcp[phaseind[phase]] c = zeros((data.shape[1])) for i in range(len(c)): c[i] = corrcoef(prcp, data[:,i])[0,1] # data needs : idx = abs(c) > 0.33 model = NIPAphase(lcrb_prcp, data.T[idx], mei, phaseind[phase]) model.crossvalpcr() print model.correlation print c plt.scatter(model.clim_data, model.hindcast); plt.show() return
def one(): ### Combine as many or few regions as you want. from numpy import cov, argsort, real, cumsum, round, corrcoef, ndarray, zeros, hstack from numpy.linalg import eig from atmos_ocean_data import weightsst sst, slp, mei, phaseind = get_climate_data() lcrb_prcp = get_lcrb_prcp() phase = 'neutpos' ### This way the ocean regions are obtained and then PC done on all ### Should write another way where pcs get computed separately and then all # put into the NIPA module. data_a = get_region(weightsst(sst), region = 'Atlantic') data_b = get_region(weightsst(sst), region = 'Pacific') data_c = get_region(weightsst(sst), region = 'Indian') #data_c = get_region(weightsst(sst), region = 'All') data = hstack((data_b, data_c)) data = data[phaseind[phase]] prcp = lcrb_prcp[phaseind[phase]] print data.shape cvr = cov(data.T) eigval, eigvec = eig(cvr) eigvalsort = argsort(eigval)[::-1] eigval = eigval[eigvalsort] eigval = real(eigval) lam = round(eigval / sum(eigval),2) idx = cumsum(lam) < .90 ncomps = idx.sum() eofs= real(eigvec[:,:ncomps]) pcs = eofs.T.dot(data.T).squeeze() c = zeros((ncomps)) for i in range(ncomps): c[i] = corrcoef(prcp, pcs[i])[0,1] model = NIPAphase(lcrb_prcp, pcs[abs(c)>0.3], mei, phaseind[phase]) model.crossvalpcr() print model.correlation print c plt.scatter(model.clim_data, model.hindcast); plt.show() return
def all_sst(): sst = weightsst(sst) data = sst.data nt, nlat, nlon = data.shape data = get_region(sst) idx = ~isnan(data)[0, :] return []
def individual_eofs(region = 'Pacific', phase = None): ### This function gets individual pcs/eofs for each basin ### for any phase. sst, slp, mei, phaseind = get_climate_data() data, lat, lon = get_region(weightsst(sst), region = region) ssts = {}; nlats = {}; nlons = {}; ngrids = {}; if phase is not None: eof_data = ndarray(shape = (phaseind[phase].sum(),0)) else: eof_data = ndarray(shape = (90,0)) for band in data: ssts[band] = seasonal_var(data[band], lat[band], lon[band]) if phase is not None: ssts[band] = seasonal_var(data[band][phaseind[phase]], lat[band], lon[band]) nt, ny, nx = ssts[band].data.shape eof_data = hstack((eof_data, ssts[band].data.reshape(nt, ny * nx))) nlats[band], nlons[band], ngrids[band] = ny, nx, ny * nx nanidx = ~isnan(eof_data[0]) #_EOF analysis cvr = cov(eof_data[:,nanidx].T) eigval, eigvec = eig(cvr) eigvalsort = argsort(eigval)[::-1] eigval = eigval[eigvalsort] eigval = real(eigval) lam = np.round(eigval / sum(eigval),2) print lam[:6] idx = cumsum(lam) < .80 ncomps = idx.sum() print region print ncomps eofs= real(eigvec[:,:ncomps]) pcs = eofs.T.dot(eof_data[:,nanidx].T).squeeze() print pcs.shape nanidx = ~isnan(eof_data[0]) final = {} for n in arange(ncomps): tmp = zeros((len(nanidx))) tmp[nanidx] = eofs[:,n]; tmp[~nanidx] = nan patterns = {} high, low = 0, 0 for band in data: #_This will put the data back into corresponding boxes high += ngrids[band] patterns[band] = tmp[low:high].reshape(nlats[band], nlons[band]) low += ngrids[band] final['eof'+str(n)] = patterns return final, lat, lon, lam[:ncomps], pcs
def crossvalpcr(self, xval = True, debug = False): #Must set phase with bootcorr, and then use crossvalpcr, as it just uses the corr_grid attribute import numpy as np from numpy import array from scipy.stats import pearsonr as corr from scipy.stats import linregress from matplotlib import pyplot as plt from atmos_ocean_data import weightsst predictand = self.clim_data if self.corr_grid.mask.sum() >= len(self.sst.lat) * len(self.sst.lon) - 4: yhat = np.nan e = np.nan index = self.clim_data.index hindcast = pd.Series(data = yhat, index = index) error = pd.Series(data = e, index = index) self.correlation = np.nan self.hindcast = np.nan self.hindcast_error = np.nan self.flags['noSST'] = True return self.flags['noSST'] = False sstidx = self.corr_grid.mask == False n = len(predictand) yhat = np.zeros(n) e = np.zeros(n) idx = np.arange(n) params = [] std_errs = [] p_vals = [] t_vals = [] if not xval: rawSSTdata = weightsst(self.sst).data rawdata = rawSSTdata[:, sstidx] cvr = np.cov(rawdata.T) eigval, eigvec = np.linalg.eig(cvr) eigvalsort = np.argsort(eigval)[::-1] eigval = eigval[eigvalsort] eigval = np.real(eigval) ncomp = 1 eof_1 = eigvec[:,:ncomp] #_fv stands for Feature Vector, in this case EOF-1 eof_1 = np.real(eof_1) pc_1 = eof_1.T.dot(rawdata.T).squeeze() self.pc1 = pc_1 return pc_1 for i in idx: test = idx == i train = idx != i rawSSTdata = weightsst(self.sst).data[train] droppedSSTdata = weightsst(self.sst).data[test] rawdata = rawSSTdata[:, sstidx]# dropped_data = droppedSSTdata[:,sstidx].squeeze() #U, s, V = np.linalg.svd(rawdata) #pc_1 = V[0,:] #_Rows of V are principal components #eof_1 = U[:,0].squeeze() #_Columns are EOFS #EIGs = s**2 #_s is square root of eigenvalues cvr = np.cov(rawdata.T) eigval, eigvec = np.linalg.eig(cvr) eigvalsort = np.argsort(eigval)[::-1] eigval = eigval[eigvalsort] eigval = np.real(eigval) ncomp = 1 eof_1 = eigvec[:,:ncomp] #_fv stands for Feature Vector, in this case EOF-1 eof_1 = np.real(eof_1) pc_1 = eof_1.T.dot(rawdata.T).squeeze() slope, intercept, r_value, p_value, std_err = linregress(pc_1, predictand[train]) predictor = dropped_data.dot(eof_1) yhat[i] = slope * predictor + intercept e[i] = predictand[i] - yhat[i] params.append(slope); std_errs.append(std_err); p_vals.append(p_value) t_vals.append(slope/std_err) r, p = corr(predictand, yhat) index = self.clim_data.index hindcast = pd.Series(data = yhat, index = index) error = pd.Series(data = e, index = index) self.hindcast = hindcast self.hindcast_error = error self.correlation = round(r, 2) self.reg_stats = { 'params' : array(params), 'std_errs' : array(std_errs), 't_vals' : array(t_vals), 'p_vals' : array(p_vals)} return
from os import environ as EV from data_load import * from utils import * from simpleNIPApca import * from numpy import cov, argsort, real, cumsum, round, corrcoef, ndarray, zeros, hstack from numpy.linalg import eig from atmos_ocean_data import weightsst st = time.time() sst, slp, mei, phaseind = get_climate_data() prcp = get_lcrb_prcp() ### This way the ocean regions are obtained and then PC done on all ### Should write another way where pcs get computed separately and then all # put into the NIPA module. pac = get_region(weightsst(sst), region = 'Pacific') atl = get_region(weightsst(sst), region = 'Atlantic') ind = get_region(weightsst(sst), region = 'Indian') ### This block will combine and do PCA on all, then split into regions. bands = pac[0] lats = pac[1] lons = pac[2] for i in atl[0]: bands[str(int(i)+4)] = atl[0][i] lats[str(int(i)+4)] = atl[1][i] lons[str(int(i)+4)] = atl[2][i] for i in ind[0]: bands[str(int(i)+8)] = ind[0][i] lats[str(int(i)+8)] = ind[1][i] lons[str(int(i)+8)] = ind[2][i]