Пример #1
0
def four():
    #This splits by phase after eofs
    #pcs done on individual basins
    from numpy import cov, argsort, real, cumsum, round, \
                    corrcoef, ndarray, zeros, hstack,   \
                    vstack, array

    from numpy.linalg import eig
    from atmos_ocean_data import weightsst

    sst, slp, mei, phaseind = get_climate_data()
    lcrb_prcp = get_lcrb_prcp()
    phase = 'neutpos'

    ### This way the ocean regions are obtained and then PC done on all
    ### Should write another way where pcs get computed separately and then all
    #   put into the NIPA module.


    atl = get_region(weightsst(sst), region = 'Atlantic')#[phaseind[phase]]
    pac = get_region(weightsst(sst), region = 'Pacific')#[phaseind[phase]]
    ind = get_region(weightsst(sst), region = 'Indian')#[phaseind[phase]]


    pcs = []
    lams = []
    for data in [atl, ind, pac]:

        cvr = cov(data.T)
        eigval, eigvec = eig(cvr)
        eigvalsort = argsort(eigval)[::-1]
        eigval = eigval[eigvalsort]
        eigval = real(eigval)
        lam = round(eigval / sum(eigval),2)
        idx = cumsum(lam) < .90
        ncomps = idx.sum()
        eofs= real(eigvec[:,:ncomps])
        pcs.append(eofs.T.dot(data.T).squeeze())
        lams.append(lam[:ncomps])
        print '# components is %i\n' % ncomps, lam[:ncomps]

    data = vstack((pcs[0],pcs[1], pcs[2]))
    lams = hstack((array(lams[0]),array(lams[1]),array(lams[2])))
    data = data.T[phaseind[phase]] # This turns on and off
    prcp = lcrb_prcp[phaseind[phase]]

    c = zeros((data.shape[1]))
    for i in range(len(c)):
        c[i] = corrcoef(prcp, data[:,i])[0,1] # data needs :

    idx = abs(c) > 0.33
    model = NIPAphase(lcrb_prcp, data.T[idx], mei, phaseind[phase])
    model.crossvalpcr()
    print model.correlation
    print c
    plt.scatter(model.clim_data, model.hindcast); plt.show()
    return
Пример #2
0
def one():

    ### Combine as many or few regions as you want.
    from numpy import cov, argsort, real, cumsum, round, corrcoef, ndarray, zeros, hstack
    from numpy.linalg import eig
    from atmos_ocean_data import weightsst

    sst, slp, mei, phaseind = get_climate_data()
    lcrb_prcp = get_lcrb_prcp()
    phase = 'neutpos'

    ### This way the ocean regions are obtained and then PC done on all
    ### Should write another way where pcs get computed separately and then all
    #   put into the NIPA module.

    data_a = get_region(weightsst(sst), region = 'Atlantic')
    data_b = get_region(weightsst(sst), region = 'Pacific')
    data_c = get_region(weightsst(sst), region = 'Indian')
    #data_c = get_region(weightsst(sst), region = 'All')


    data = hstack((data_b, data_c))
    data = data[phaseind[phase]]
    prcp = lcrb_prcp[phaseind[phase]]


    print data.shape
    cvr = cov(data.T)
    eigval, eigvec = eig(cvr)
    eigvalsort = argsort(eigval)[::-1]
    eigval = eigval[eigvalsort]
    eigval = real(eigval)
    lam = round(eigval / sum(eigval),2)
    idx = cumsum(lam) < .90
    ncomps = idx.sum()

    eofs= real(eigvec[:,:ncomps])
    pcs = eofs.T.dot(data.T).squeeze()
    c = zeros((ncomps))
    for i in range(ncomps):
        c[i] = corrcoef(prcp, pcs[i])[0,1]


    model = NIPAphase(lcrb_prcp, pcs[abs(c)>0.3], mei, phaseind[phase])
    model.crossvalpcr()
    print model.correlation
    print c
    plt.scatter(model.clim_data, model.hindcast); plt.show()
    return
Пример #3
0
def all_sst():
    sst = weightsst(sst)

    data = sst.data

    nt, nlat, nlon = data.shape

    data = get_region(sst)
    idx = ~isnan(data)[0, :]
    return []
Пример #4
0
def individual_eofs(region = 'Pacific', phase = None):
    ### This function gets individual pcs/eofs for each basin
    ### for any phase.
    sst, slp, mei, phaseind = get_climate_data()
    data, lat, lon = get_region(weightsst(sst), region = region)
    ssts = {}; nlats = {}; nlons = {}; ngrids = {};
    if phase is not None:
        eof_data = ndarray(shape = (phaseind[phase].sum(),0))
    else:
        eof_data = ndarray(shape = (90,0))

    for band in data:
        ssts[band] = seasonal_var(data[band], lat[band], lon[band])
        if phase is not None:
            ssts[band] = seasonal_var(data[band][phaseind[phase]],
                                            lat[band], lon[band])
        nt, ny, nx = ssts[band].data.shape
        eof_data = hstack((eof_data, ssts[band].data.reshape(nt, ny * nx)))
        nlats[band], nlons[band], ngrids[band] = ny, nx, ny * nx

    nanidx = ~isnan(eof_data[0])

    #_EOF analysis
    cvr = cov(eof_data[:,nanidx].T)
    eigval, eigvec = eig(cvr)
    eigvalsort = argsort(eigval)[::-1]
    eigval = eigval[eigvalsort]
    eigval = real(eigval)
    lam = np.round(eigval / sum(eigval),2)
    print lam[:6]
    idx = cumsum(lam) < .80
    ncomps = idx.sum()
    print region
    print ncomps
    eofs= real(eigvec[:,:ncomps])
    pcs = eofs.T.dot(eof_data[:,nanidx].T).squeeze()
    print pcs.shape

    nanidx = ~isnan(eof_data[0])
    final = {}
    for n in arange(ncomps):
        tmp = zeros((len(nanidx)))
        tmp[nanidx] = eofs[:,n]; tmp[~nanidx] = nan
        patterns = {}
        high, low = 0, 0
        for band in data:
            #_This will put the data back into corresponding boxes
            high += ngrids[band]
            patterns[band] = tmp[low:high].reshape(nlats[band], nlons[band])
            low += ngrids[band]
        final['eof'+str(n)] = patterns
    return final, lat, lon, lam[:ncomps], pcs
Пример #5
0
	def crossvalpcr(self, xval = True, debug = False):
		#Must set phase with bootcorr, and then use crossvalpcr, as it just uses the corr_grid attribute
		import numpy as np
		from numpy import array
		from scipy.stats import pearsonr as corr
		from scipy.stats import linregress
		from matplotlib import pyplot as plt
		from atmos_ocean_data import weightsst
		predictand = self.clim_data

		if self.corr_grid.mask.sum() >= len(self.sst.lat) * len(self.sst.lon) - 4:
			yhat = np.nan
			e = np.nan
			index = self.clim_data.index
			hindcast = pd.Series(data = yhat, index = index)
			error = pd.Series(data = e, index = index)
			self.correlation = np.nan
			self.hindcast = np.nan
			self.hindcast_error = np.nan
			self.flags['noSST'] = True
			return

		self.flags['noSST'] = False
		sstidx = self.corr_grid.mask == False
		n = len(predictand)
		yhat = np.zeros(n)
		e = np.zeros(n)
		idx = np.arange(n)

		params = []
		std_errs = []
		p_vals = []
		t_vals = []
		if not xval:
			rawSSTdata = weightsst(self.sst).data
			rawdata = rawSSTdata[:, sstidx]
			cvr = np.cov(rawdata.T)
			eigval, eigvec = np.linalg.eig(cvr)
			eigvalsort = np.argsort(eigval)[::-1]
			eigval = eigval[eigvalsort]
			eigval = np.real(eigval)
			ncomp = 1
			eof_1 = eigvec[:,:ncomp] #_fv stands for Feature Vector, in this case EOF-1
			eof_1 = np.real(eof_1)
			pc_1 = eof_1.T.dot(rawdata.T).squeeze()
			self.pc1 = pc_1
			return pc_1

		for i in idx:
			test = idx == i
			train = idx != i
			rawSSTdata = weightsst(self.sst).data[train]
			droppedSSTdata = weightsst(self.sst).data[test]
			rawdata = rawSSTdata[:, sstidx]#
			dropped_data = droppedSSTdata[:,sstidx].squeeze()

			#U, s, V = np.linalg.svd(rawdata)
			#pc_1 = V[0,:] #_Rows of V are principal components
			#eof_1 = U[:,0].squeeze() #_Columns are EOFS
			#EIGs = s**2 #_s is square root of eigenvalues

			cvr = np.cov(rawdata.T)
			eigval, eigvec = np.linalg.eig(cvr)
			eigvalsort = np.argsort(eigval)[::-1]
			eigval = eigval[eigvalsort]
			eigval = np.real(eigval)
			ncomp = 1
			eof_1 = eigvec[:,:ncomp] #_fv stands for Feature Vector, in this case EOF-1
			eof_1 = np.real(eof_1)
			pc_1 = eof_1.T.dot(rawdata.T).squeeze()

			slope, intercept, r_value, p_value, std_err = linregress(pc_1, predictand[train])
			predictor = dropped_data.dot(eof_1)
			yhat[i] = slope * predictor + intercept
			e[i] = predictand[i] - yhat[i]
			params.append(slope); std_errs.append(std_err); p_vals.append(p_value)
			t_vals.append(slope/std_err)

		r, p = corr(predictand, yhat)

		index = self.clim_data.index
		hindcast = pd.Series(data = yhat, index = index)
		error = pd.Series(data = e, index = index)
		self.hindcast = hindcast
		self.hindcast_error = error
		self.correlation = round(r, 2)
		self.reg_stats = {	'params' : array(params),
							'std_errs' : array(std_errs),
							't_vals' : array(t_vals),
							'p_vals' : array(p_vals)}

		return
Пример #6
0
    from os import environ as EV

    from data_load import *
    from utils import *
    from simpleNIPApca import *
    from numpy import cov, argsort, real, cumsum, round, corrcoef, ndarray, zeros, hstack
    from numpy.linalg import eig
    from atmos_ocean_data import weightsst
    st = time.time()
    sst, slp, mei, phaseind = get_climate_data()
    prcp = get_lcrb_prcp()

    ### This way the ocean regions are obtained and then PC done on all
    ### Should write another way where pcs get computed separately and then all
    #   put into the NIPA module.
    pac = get_region(weightsst(sst), region = 'Pacific')
    atl = get_region(weightsst(sst), region = 'Atlantic')
    ind = get_region(weightsst(sst), region = 'Indian')
    ### This block will combine and do PCA on all, then split into regions.
    bands = pac[0]
    lats = pac[1]
    lons = pac[2]

    for i in atl[0]:
        bands[str(int(i)+4)] = atl[0][i]
        lats[str(int(i)+4)] = atl[1][i]
        lons[str(int(i)+4)] = atl[2][i]
    for i in ind[0]:
        bands[str(int(i)+8)] = ind[0][i]
        lats[str(int(i)+8)] = ind[1][i]
        lons[str(int(i)+8)] = ind[2][i]