def main(args):
    #environmental constants  
    if platform.system() == 'Windows':
        in_dir='../examples/'
        out_dir='../regressors/'
        reg_dir='../regressors/'#${in_dir}'regresory_2013/'
        nc_gen=True
        pdf_gen=False
        plus = ''
    else:
        n_samples = int(os.environ['n_samples'])
        in_dir = os.environ['in_dir']
        #out_dir = os.environ['out_dir']
        reg_dir = os.environ['reg_dir']
        pdf_gen = os.environ['pdf_gen']
        nc_gen = os.environ['nc_gen']

    what_re = args.what_re
    vari = args.vari
    i_year = args.i_year
    s_year = args.s_year
    e_year = args.e_year
    in_file_name = args.in_file_name

    if args.verbose:
        print('dataset: ', what_re)
        print('variable: ', vari)
        print('initial year of dataset: ', i_year)
        print('initial year of analysis: ', s_year)
        print('end year of analysis: ', e_year)
        print('input filename: ', in_file_name)


    print('data opening')
    in_netcdf = in_dir + in_file_name
    print(in_netcdf)
    ds = xr.open_dataset(in_netcdf)
    #print(ds)
    lat_name = fce.get_coords_name(ds, 'latitude')
    lat = ds.coords[lat_name]
    nlat = lat.shape[0]

    lev_name = fce.get_coords_name(ds, 'pressure')
    if ds.coords[lev_name].attrs['units'] == 'Pa':
        lev =  ds.coords[lev_name]/100.
        ds[lev_name] = lev    
    else:
        lev = ds.coords[lev_name]

    
    n = ds.coords['time'].shape[0]
    #it may happen that the field is 3D (longitude is missing)
    try:
        lon_name = fce.get_coords_name(ds, 'longitude')
        lon = ds.coords[lon_name]
        nlon = lon.shape[0]
    except:
        nlon = 1

    #print nlat, nlev, n, nlon

    #zonal mean
    if nlon != 1:
        uwnd = ds[vari].mean(lon_name)
    else:
        uwnd = ds[vari]
      
    #equatorial average and level selection
    sel_dict = {lev_name: fce.coord_Between(lev,10,50), lat_name: fce.coord_Between(lat,-10,10)}    
    zm_u = uwnd.sel(**sel_dict).mean(lat_name)
    #period selection
    times = pd.date_range(str(s_year)+'-01-01', str(e_year)+'-12-31', name='time', freq = 'M')
    zm_u_sel = zm_u.sel(time = times, method='ffill') #nearest
    #remove seasonality
    climatology = zm_u_sel.groupby('time.month').mean('time')
    anomalies = zm_u_sel.groupby('time.month') - climatology

    #print anomalies
    #sys.exit()
    
    #additional constants
    npca = 30
    norm=2 #5
    norms=3 #5
    what_sp = '' # what solar proxy?

    print("regressors' openning")
    global reg#, reg_names, nr
    reg, reg_names, history = fce.configuration_ccmi(what_re, what_sp, norm, 'no_qbo' , i_year, s_year, e_year, reg_dir)
    nr = reg.shape[1]
    #print(anomalies) 
    #extracting of other variability by MLR
    stacked = anomalies.stack(allpoints = [lev_name])
    stacked = stacked.reset_coords(drop=True)
    resids = stacked.groupby('allpoints').apply(xr_regression)
    resids = resids.rename({'dim_0': 'time'})
    resids['time'] = times
    #EOF analysis
    solver = Eof(resids.T, weights=None) 
    #sys.exit()

    #coslat = np.cos(np.deg2rad(lat)).clip(0.,1.)
    #wgts = np.sqrt(coslat)[np.newaxis,...]   

    for i in range(npca):		
            var_eofs = solver.varianceFraction(neigs=i)
            #print var_eofs
            if np.sum(var_eofs) > 0.95:
                    npca=i
                    total_variance = np.sum(var_eofs)
                    print(total_variance, ' % based on ', i, ' components')
                    break

    var_eofs = solver.varianceFraction(neigs=npca)
    pcs = solver.pcs(npcs=npca, pcscaling=1)
    nte = solver.northTest(neigs=npca, vfscaled=True)

    subdir = './'
    if pdf_gen:
        fig = plt.figure(figsize=(11,8))
        ax1 = fig.add_subplot(111)
        ax1.set_title(str(npca)+' PCAs cover '+str(np.round(total_variance*100, 2))+'% of total variance')
        for i in xrange(npca):
                #plotting
                pcs[:,i].plot(linewidth = 2, ax = ax1, label = 'pca '+str(i+1))

        ax1.set_xlabel('time [years]')
        ax1.set_ylabel('QBO index')
        ax1.set_title('')
        ax1.legend(loc = 'best')
        plt.savefig(reg_dir+'qbo_'+what_re+'_pcas.pdf', bbox_inches='tight')
        plt.close(fig)    
                
    if nc_gen:
        #save to netcdf
        #print(pcs[:,0])
        for i in range(npca):
            pcs_ds = pcs[:,i].to_dataset(name = 'index')
            pcs_ds.to_netcdf(reg_dir+r'qbo_'+what_re+'_pc'+str(i+1)+pripona_nc)       
示例#2
0
'''
If *True*, the mean along the first axis of *dataset* (the
            time-mean) will be removed prior to analysis. If *False*,
            the mean along the first axis will not be removed. Defaults
            to *True* (mean is removed).
            The covariance interpretation relies on the input data being
            anomaly data with a time-mean of 0. Therefore this option
            should usually be set to *True*. Setting this option to
            *True* has the useful side effect of propagating missing
            values along the time dimension, ensuring that a solution
            can be found even if missing values occur in different
            locations at different times.
'''
lambdas = solver.eigenvalues()
vf = solver.varianceFraction()
Nerror = solver.northTest(vfscaled=True)
pcs = solver.pcs()  #(time, mode)
eofs = solver.eofsAsCovariance()
'''
plt.figure()
plt.subplot(3,2,1)
pcs[:, 0].plot()#color='b', linewidth=2)
ax = plt.gca()
ax.axhline(0, color='k')
ax.set_xlabel('Year')
ax.set_ylabel('PC1 amplitude')
plt.grid()
plt.subplot(3,2,2)
pcs[:, 1].plot()
ax = plt.gca()
ax.axhline(0, color='k')
示例#3
0
    for lo in range(0,len(lon_obs)):
        valid = ~np.isnan(sst_obs[:,la,lo])
        if (valid.any()==True):
            sst_obs[:,la,lo] = signal.detrend(sst_obs[:,la,lo], axis=0, \
                                               type='linear')
        elif (valid.all()==False):
            sst_obs[:,la,lo] = np.nan
'''

# EOF for model
coslat_mdl = np.cos(np.deg2rad(sst_mdl.coords['lat'].values))
wgts_mdl = np.sqrt(coslat_mdl)[..., np.newaxis]
solver_mdl = Eof(sst_mdl, weights=wgts_mdl, center=True)
lambdas_mdl=solver_mdl.eigenvalues()
vf_mdl = solver_mdl.varianceFraction()
Nerror_mdl = solver_mdl.northTest(vfscaled=True)
pcs_mdl = solver_mdl.pcs() #(time, mode)
eofs_mdl = solver_mdl.eofs()
# EOF for obs
coslat_obs = np.cos(np.deg2rad(sst_obs.coords['lat'].values))
wgts_obs = np.sqrt(coslat_obs)[..., np.newaxis]
solver_obs = Eof(sst_obs, weights=wgts_obs, center=True)
lambdas_obs=solver_obs.eigenvalues()
vf_obs = solver_obs.varianceFraction()
Nerror_obs = solver_obs.northTest(vfscaled=True)
pcs_obs = solver_obs.pcs() #(time, mode)
eofs_obs = solver_obs.eofs()



def main(args):
    #environmental constants
    if platform.system() == 'Windows':
        in_dir = '../examples/'
        out_dir = '../regressors/'
        reg_dir = '../regressors/'  #${in_dir}'regresory_2013/'
        nc_gen = True
        pdf_gen = False
        plus = ''
    else:
        n_samples = int(os.environ['n_samples'])
        in_dir = os.environ['in_dir']
        #out_dir = os.environ['out_dir']
        reg_dir = os.environ['reg_dir']
        pdf_gen = os.environ['pdf_gen']
        nc_gen = os.environ['nc_gen']

    what_re = args.what_re
    vari = args.vari
    i_year = args.i_year
    s_year = args.s_year
    e_year = args.e_year
    in_file_name = args.in_file_name

    if args.verbose:
        print('dataset: ', what_re)
        print('variable: ', vari)
        print('initial year of dataset: ', i_year)
        print('initial year of analysis: ', s_year)
        print('end year of analysis: ', e_year)
        print('input filename: ', in_file_name)

    print('data opening')
    in_netcdf = in_dir + in_file_name
    print(in_netcdf)
    ds = xr.open_dataset(in_netcdf)
    #print(ds)
    lat_name = fce.get_coords_name(ds, 'latitude')
    lat = ds.coords[lat_name]
    nlat = lat.shape[0]

    lev_name = fce.get_coords_name(ds, 'pressure')
    if ds.coords[lev_name].attrs['units'] == 'Pa':
        lev = ds.coords[lev_name] / 100.
        ds[lev_name] = lev
    else:
        lev = ds.coords[lev_name]

    n = ds.coords['time'].shape[0]
    #it may happen that the field is 3D (longitude is missing)
    try:
        lon_name = fce.get_coords_name(ds, 'longitude')
        lon = ds.coords[lon_name]
        nlon = lon.shape[0]
    except:
        nlon = 1

    #print nlat, nlev, n, nlon

    #zonal mean
    if nlon != 1:
        uwnd = ds[vari].mean(lon_name)
    else:
        uwnd = ds[vari]

    #equatorial average and level selection
    sel_dict = {
        lev_name: fce.coord_Between(lev, 10, 50),
        lat_name: fce.coord_Between(lat, -10, 10)
    }
    zm_u = uwnd.sel(**sel_dict).mean(lat_name)
    #period selection
    times = pd.date_range(str(s_year) + '-01-01',
                          str(e_year) + '-12-31',
                          name='time',
                          freq='M')
    zm_u_sel = zm_u.sel(time=times, method='ffill')  #nearest
    #remove seasonality
    climatology = zm_u_sel.groupby('time.month').mean('time')
    anomalies = zm_u_sel.groupby('time.month') - climatology

    #print anomalies
    #sys.exit()

    #additional constants
    npca = 30
    norm = 2  #5
    norms = 3  #5
    what_sp = ''  # what solar proxy?

    print("regressors' openning")
    global reg  #, reg_names, nr
    reg, reg_names, history = fce.configuration_ccmi(what_re, what_sp, norm,
                                                     'no_qbo', i_year, s_year,
                                                     e_year, reg_dir)
    nr = reg.shape[1]
    #print(anomalies)
    #extracting of other variability by MLR
    stacked = anomalies.stack(allpoints=[lev_name])
    stacked = stacked.reset_coords(drop=True)
    resids = stacked.groupby('allpoints').apply(xr_regression)
    resids = resids.rename({'dim_0': 'time'})
    resids['time'] = times
    #EOF analysis
    solver = Eof(resids.T, weights=None)
    #sys.exit()

    #coslat = np.cos(np.deg2rad(lat)).clip(0.,1.)
    #wgts = np.sqrt(coslat)[np.newaxis,...]

    for i in range(npca):
        var_eofs = solver.varianceFraction(neigs=i)
        #print var_eofs
        if np.sum(var_eofs) > 0.95:
            npca = i
            total_variance = np.sum(var_eofs)
            print(total_variance, ' % based on ', i, ' components')
            break

    var_eofs = solver.varianceFraction(neigs=npca)
    pcs = solver.pcs(npcs=npca, pcscaling=1)
    nte = solver.northTest(neigs=npca, vfscaled=True)

    subdir = './'
    if pdf_gen:
        fig = plt.figure(figsize=(11, 8))
        ax1 = fig.add_subplot(111)
        ax1.set_title(
            str(npca) + ' PCAs cover ' +
            str(np.round(total_variance * 100, 2)) + '% of total variance')
        for i in xrange(npca):
            #plotting
            pcs[:, i].plot(linewidth=2, ax=ax1, label='pca ' + str(i + 1))

        ax1.set_xlabel('time [years]')
        ax1.set_ylabel('QBO index')
        ax1.set_title('')
        ax1.legend(loc='best')
        plt.savefig(reg_dir + 'qbo_' + what_re + '_pcas.pdf',
                    bbox_inches='tight')
        plt.close(fig)

    if nc_gen:
        #save to netcdf
        #print(pcs[:,0])
        for i in range(npca):
            pcs_ds = pcs[:, i].to_dataset(name='index')
            pcs_ds.to_netcdf(reg_dir + r'qbo_' + what_re + '_pc' + str(i + 1) +
                             pripona_nc)
示例#5
0
# --- read netcdf file
dset = xr.open_dataset('asstdt_pacific.nc')

# --- select djf months
sst = dset['sst'].sel(time=np.in1d(dset['time.month'], [1, 2, 12]))

# --- square-root of cosine of latitude weights
coslat = np.cos(np.deg2rad(sst.coords['lat'].values))
wgts = np.sqrt(coslat)[..., np.newaxis]
# --- eof solver
solver = Eof(sst, weights=wgts)
# --- eof results
eofs = solver.eofsAsCorrelation(neofs=2)
pcs = solver.pcs(npcs=2, pcscaling=1)
variance_fractions = solver.varianceFraction()
north_test = solver.northTest(vfscaled=True)


# --- spatial patterns
fig, ax = plot.subplots(axwidth=5, nrows=2, tight=True, proj='pcarree',
                        proj_kw={'lon_0': 180})
# --- format options
ax.format(land=False, coast=True, innerborders=True, borders=True,
          large='15px', labels=False,
          latlim=(31, -31), lonlim=(119, 291),
          geogridlinewidth=0,
          abcloc='ul')
# a) first EOF mode
map1 = ax[0].contourf(dset['lon'], dset['lat'], eofs[0, :, :],
                      levels=np.arange(-0.5, 0.6, 0.1), cmap='Div', extend='both')