Exemplo n.º 1
0
    def gwr_mth(self, pt, mth, nnghs=None, stns_rm=None):
        '''
        Run geographically weighted regression of daily temperature anomalies 
        for a specific month and point location. The function interpolates
        daily anomalies using GWR and then adds the anomalies to the point's
        monthly normal and returns the actual daily values.
        
        Parameters
        ----------
        pt : structured array
            A structured array containing the point's latitude, longitude,
            elevation, topographic dissection index, and average land skin 
            temperatures for each month. An empty point can be initialized
            with build_empty_pt()
        nnghs : int, optional
            The number of closest neighboring stations to use for the GWR.
            If None, nnghs will be set to the optimized number of GWR neighbors
            for the point's region.
        stns_rm : ndarray or str, optional
            An array of station ids or a single station id for stations that
            should not be considered neighbors for the specific point.
        
        Returns
        ----------
        interp_vals : ndarray
            A 1-D array containing the GWR interpolated actual daily values
            for the specified month. 
        '''

        if nnghs == None:
            # Get the nnghs to use from the optimal values
            # at surrounding stations
            nnghs = self.__get_nnghs(pt, mth, stns_rm)

        self.stn_slct.set_ngh_stns(pt[LAT],
                                   pt[LON],
                                   nnghs,
                                   load_obs=True,
                                   stns_rm=stns_rm,
                                   obs_mth=mth)

        ngh_obs = self.stn_slct.ngh_obs
        ngh_stns = self.stn_slct.ngh_stns
        ngh_wgt = self.stn_slct.ngh_wgt
        ngh_obs_cntr = ngh_obs - ngh_stns[get_norm_varname(mth)]

        # Perform a GWR for each day
        X = [ngh_stns[avar] for avar in self.mthly_predictors[mth]]
        X = np.column_stack(X)

        x = [pt[avar] for avar in self.mthly_predictors[mth]]
        x = np.array(x)

        interp_anom = _gwr_series(X, x, ngh_obs_cntr, ngh_wgt)

        # Add interpolated anomalies to monthly norm to get actual values
        interp_vals = interp_anom + pt[get_norm_varname(mth)]

        return interp_vals
Exemplo n.º 2
0
    def run_xval(self, stn_id, abw_nngh):
        '''
        Run leave-one-out cross validations for a specific station id
        and a set of different neighbor bandwidths.
        
        Parameters
        ----------
        stn_id : str
            Station id for cross validation
        abw_nngh : ndarray
            A 1-D array of different neighbor station bandwidths
            for which to run cross validation.
        
        Returns
        ----------
        err : ndarray
            A 12 * N array of cross validations error (modeled - observed)
            where N is the number of bandwidths specified by abw_nngh
            
        '''
        xval_stn = self.stn_da.stns[self.stn_da.stn_idxs[stn_id]]

        err = np.zeros((12, abw_nngh.size))
        xvalNorms = np.array(
            [xval_stn[get_norm_varname(mth)] for mth in np.arange(1, 13)])

        for bw_nngh, x in zip(abw_nngh, np.arange(abw_nngh.size)):

            interp_norms = self.krig.krigall(xval_stn,
                                             bw_nngh,
                                             stns_rm=xval_stn[STN_ID])
            err[:, x] = interp_norms - xvalNorms

        return err
Exemplo n.º 3
0
def proc_write(fpath_stndb, elem, fpath_out, nwrkers):

    status = MPI.Status()
    nwrkrs_done = 0

    stn_da = StationSerialDataDb(fpath_stndb, elem)
    stn_ids = stn_da.stn_ids
    stns = stn_da.stns
    stn_mask = np.logical_and(np.isfinite(stn_da.stns[MASK]),
                              np.isnan(stn_da.stns[BAD]))
    days = stn_da.days
    stn_da.ds.close()
    stn_da = None

    print "WRITER: Creating output station netCDF database..."

    create_quick_db(fpath_out, stns, days, DB_VARIABLES[elem])
    stnda_out = StationSerialDataDb(fpath_out, elem, mode='r+')

    mth_names = []
    for mth in np.arange(1, 13):

        norm_var_name = get_norm_varname(mth)
        stnda_out.add_stn_variable(norm_var_name,
                                   '',
                                   units='C',
                                   dtype='f8',
                                   fill_value=netCDF4.default_fillvals['f8'])
        mth_names.append(norm_var_name)

    stnda_out.ds.sync()

    print "WRITER: Output station netCDF database created."

    mths = np.arange(12)

    stat_chk = StatusCheck(np.sum(stn_mask), 50)

    while 1:

        stn_id, tair_daily, tair_norms = MPI.COMM_WORLD.recv(
            source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status)

        if status.tag == TAG_STOPWORK:

            nwrkrs_done += 1
            if nwrkrs_done == nwrkers:
                print "WRITER: Finished"
                return 0
        else:

            x = np.nonzero(stn_ids == stn_id)[0][0]
            stnda_out.ds.variables[elem][:, x] = tair_daily

            for i in mths:
                stnda_out.ds.variables[mth_names[i]][x] = tair_norms[i]

            stnda_out.ds.sync()

            stat_chk.increment()
Exemplo n.º 4
0
    def run_xval(self, stn_id, a_nnghs):

        xval_stn = self.stn_da.stns[self.stn_da.stn_idxs[stn_id]]
        xval_obs = self.stn_da.load_obs(xval_stn[STN_ID])

        biasAll = np.zeros((a_nnghs.size, 12))
        maeAll = np.zeros((a_nnghs.size, 12))
        r2All = np.zeros((a_nnghs.size, 12))

        for x in np.arange(a_nnghs.size):

            nnghs = a_nnghs[x]

            biasMths = np.zeros(12)
            maeMths = np.zeros(12)
            r2Mths = np.zeros(12)

            for mth in np.arange(1, 13):

                xval_anom = xval_obs[self.stn_da.mth_idx[mth]] - xval_stn[
                    get_norm_varname(mth)]

                interp_tair = self.gwr.gwr_mth(xval_stn,
                                               mth,
                                               nnghs,
                                               stns_rm=xval_stn[STN_ID])
                interp_anom = interp_tair - xval_stn[get_norm_varname(mth)]

                difs = interp_anom - xval_anom

                bias = np.mean(difs)
                mae = np.mean(np.abs(difs))

                r_value = stats.linregress(interp_anom, xval_anom)[2]
                r2 = r_value**2  # r-squared value; variance explained

                biasMths[mth - 1] = bias
                maeMths[mth - 1] = mae
                r2Mths[mth - 1] = r2

            biasAll[x, :] = biasMths
            maeAll[x, :] = maeMths
            r2All[x, :] = r2Mths

        return biasAll, maeAll, r2All
Exemplo n.º 5
0
    def krigall(self, pt, nnghs, stns_rm=None):
        '''
        Run moving window variogram fitting and regression kriging
        to interpolate monthly temperature normals to a single point location.
        
        Parameters
        ----------
        pt : structured array
            A structured array containing the point's latitude, longitude,
            elevation, topographic dissection index, and average land skin 
            temperatures for each month. An empty point can be initialized
            with build_empty_pt()
        nnghs : int
            The number of neighboring stations to use.
        stns_rm : ndarray or str, optional
            An array of station ids or a single station id for stations that
            should not be considered neighbors for the specific point.
        
        Returns
        ----------
        interp_norms : ndarray
            A 1-D array of size 12 containing 
            the interpolated monthly normals
        
        '''

        self.stn_slct.set_ngh_stns(pt[LAT],
                                   pt[LON],
                                   nnghs,
                                   load_obs=False,
                                   stns_rm=stns_rm)

        nghs = self.stn_slct.ngh_stns
        ngh_lon = ri.FloatSexpVector(nghs[LON])
        ngh_lat = ri.FloatSexpVector(nghs[LAT])
        ngh_elev = ri.FloatSexpVector(nghs[ELEV])
        ngh_tdi = ri.FloatSexpVector(nghs[TDI])
        ngh_wgt = ri.FloatSexpVector(self.stn_slct.ngh_wgt)
        ngh_dists = ri.FloatSexpVector(self.stn_slct.ngh_dists)

        interp_norms = np.zeros(12)

        for mth in np.arange(1, 13):

            ngh_lst = ri.FloatSexpVector(nghs[get_lst_varname(mth)])
            ngh_tair = ri.FloatSexpVector(nghs[get_norm_varname(mth)])
            pt_svp = ri.FloatSexpVector((pt[LON], pt[LAT], pt[ELEV], pt[TDI],
                                         pt[get_lst_varname(mth)]))
            rslt = self.r_func(pt_svp, ngh_lon, ngh_lat, ngh_elev, ngh_tdi,
                               ngh_lst, ngh_tair, ngh_wgt, ngh_dists)

            interp_norms[mth - 1] = rslt[0]

        return interp_norms
Exemplo n.º 6
0
    def gwr_mth(self, pt, mth, nnghs=None, stns_rm=None):

        if nnghs == None:
            # Get the nnghs to use from the optimal values at surrounding stations
            nnghs = self._GwrTairAnom__get_nnghs(pt, mth, stns_rm)

        self.stn_slct.set_ngh_stns(pt[LAT],
                                   pt[LON],
                                   nnghs,
                                   load_obs=True,
                                   stns_rm=stns_rm,
                                   obs_mth=mth)

        ngh_obs = self.stn_slct.ngh_obs
        ngh_stns = self.stn_slct.ngh_stns
        ngh_wgt = self.stn_slct.ngh_wgt
        ngh_obs_cntr = ngh_obs - ngh_stns[get_norm_varname(mth)]

        a_pt = np.array(
            [pt[LON], pt[LAT], pt[ELEV], pt[TDI], pt[get_lst_varname(mth)]])

        rslt = r.gwr_anomaly(
            robjects.FloatVector(ngh_stns[LON]),
            robjects.FloatVector(ngh_stns[LAT]),
            robjects.FloatVector(ngh_stns[ELEV]),
            robjects.FloatVector(ngh_stns[TDI]),
            robjects.FloatVector(ngh_stns[get_lst_varname(mth)]),
            robjects.FloatVector(ngh_wgt), robjects.Matrix(ngh_obs_cntr),
            robjects.FloatVector(a_pt))

        fit_anom = np.array(rslt.rx('fit_anom'))
        nrow = np.array(rslt.rx('fit_nrow'))[0]
        ncol = np.array(rslt.rx('fit_ncol'))[0]
        fit_anom = np.reshape(fit_anom, (nrow, ncol), order='F')

        interp_anom = np.array(rslt.rx('pt_anom')).ravel()

        interp_vals = interp_anom + pt[get_norm_varname(mth)]

        return interp_vals
Exemplo n.º 7
0
    def interp(self, pt, stns_rm=None):
        '''
        Interpolate monthly temperature normals and daily 
        temperatures to a single point location.
        
        Parameters
        ----------
        pt : structured array
            A structured array containing the point's latitude, longitude,
            elevation, topographic dissection index, and average land skin 
            temperatures for each month. An empty point can be initialized
            with build_empty_pt()
        stns_rm : ndarray or str, optional
            An array of station ids or a single station id for stations that
            should not be considered neighbors for the specific point.
        
        Returns
        ----------
        tair_daily : ndarray
            A 1-D array of interpolated daily temperatures.
        tair_norms : ndarray
            A 1-D array of size 12 with the interpolated 
            monthly temperature normals.
        tair_se : ndarray
            A 1-D array of size 12 with the kriging standard
            errors for the interpolated monthly temperature
            normals.
        '''

        tair_daily = np.zeros(self.ndays)
        tair_norms = np.zeros(12)
        tair_se = np.zeros(12)

        for mth in np.arange(1, 13):

            tair_mean, tair_var = self.krig_tair.krig(pt, mth, stns_rm=stns_rm)
            std_err, ci = self.krig_tair.std_err_ci(tair_mean, tair_var)
            pt[get_norm_varname(mth)] = tair_mean
            tair_norms[mth - 1] = tair_mean
            tair_se[mth - 1] = std_err

            tair_daily[self.mth_masks[mth]] = self.gwr_tair.gwr_mth(
                pt, mth, stns_rm=stns_rm)

        return tair_daily, tair_norms, tair_se
Exemplo n.º 8
0
def build_empty_pt():

    ptDtype = [(LON, np.float64), (LAT, np.float64), (ELEV, np.float64),
               (TDI, np.float64), (CLIMDIV, np.float64), (MASK, np.float64)]
    ptDtype.extend([("tmin%02d" % mth, np.float64)
                    for mth in np.arange(1, 13)])
    ptDtype.extend([("tmax%02d" % mth, np.float64)
                    for mth in np.arange(1, 13)])
    ptDtype.extend([(get_norm_varname(mth), np.float64)
                    for mth in np.arange(1, 13)])
    ptDtype.extend([(get_optim_varname(mth), np.float64)
                    for mth in np.arange(1, 13)])
    ptDtype.extend([(get_lst_varname(mth), np.float64)
                    for mth in np.arange(1, 13)])
    ptDtype.extend([(get_optim_anom_varname(mth), np.float64)
                    for mth in np.arange(1, 13)])

    a_pt = np.empty(1, dtype=ptDtype)

    return a_pt[0]
Exemplo n.º 9
0
    def gwr_predict(self, pt, mth, nnghs=None, stns_rm=None):

        if nnghs is None:
            # Get the nnghs to use from the optimal values at surrounding stations
            nnghs = self.__get_nnghs(pt, mth, stns_rm)

        self.stn_slct.set_ngh_stns(pt[LAT],
                                   pt[LON],
                                   nnghs,
                                   load_obs=False,
                                   stns_rm=stns_rm)

        nghs = self.stn_slct.ngh_stns

        ngh_lon = ri.FloatSexpVector(nghs[LON])
        ngh_lat = ri.FloatSexpVector(nghs[LAT])
        ngh_elev = ri.FloatSexpVector(nghs[ELEV])
        ngh_tdi = ri.FloatSexpVector(nghs[TDI])
        ngh_lst = ri.FloatSexpVector(nghs[get_lst_varname(mth)])
        ngh_tair = ri.FloatSexpVector(nghs[get_norm_varname(mth)])

        pt_svp = ri.FloatSexpVector(
            (pt[LON], pt[LAT], pt[ELEV], pt[TDI], pt[get_lst_varname(mth)]))

        ngh_wgt = ri.FloatSexpVector(self.stn_slct.ngh_wgt)

        rslt = self.r_gwr_func(ngh_lon, ngh_lat, ngh_elev, ngh_tdi, ngh_lst,
                               ngh_tair, ngh_wgt, pt_svp)

        tair_mean = rslt[0]
        tair_var = rslt[1]
        bad_interp = rslt[2]

        if bad_interp != 0:
            print "".join(
                ["ERROR: ",
                 str(bad_interp), " bad interp: ",
                 str(pt)])

        return tair_mean, tair_var
Exemplo n.º 10
0
    def __init__(self, stn_da):
        '''        
        Parameters
        ----------
        stnda : twx.db.StationSerialDataDb
            A StationSerialDataDb object pointing to the
            database from which observations will be loaded.
        '''

        self.stn_da = stn_da
        mask_stns = np.isnan(self.stn_da.stns[BAD])
        self.stn_slct = StationSelect(self.stn_da,
                                      stn_mask=mask_stns,
                                      rm_zero_dist_stns=True)

        self.vnames_norm = [get_norm_varname(mth) for mth in np.arange(1, 13)]
        self.vnames_lst = [get_lst_varname(mth) for mth in np.arange(1, 13)]

        self.df_stns = pd.DataFrame(self.stn_da.stns)
        self.df_stns.index = self.df_stns[STN_ID]

        # Calculate annual means for monthly LST and Tair normals
        self.df_stns['lst'] = self.df_stns[self.vnames_lst].mean(axis=1)
        self.df_stns['norm'] = self.df_stns[self.vnames_norm].mean(axis=1)
Exemplo n.º 11
0
    def krig(self, pt, mth, nnghs=None, vario_params=None, stns_rm=None):
        '''
        Run moving window regression kriging to interpolate a 
        temperature normal for a specific month to a single point location.
        
        Parameters
        ----------
        pt : structured array
            A structured array containing the point's latitude, longitude,
            elevation, topographic dissection index, and average land skin 
            temperatures for each month. An empty point can be initialized
            with build_empty_pt()
        mth : int, optional
            The month for which to interpolate a temperature normal
        nnghs : int, optional
            The number of neighboring stations to use. If not provided, nnghs
            will be determined from the previously determined optimal number
            at surrounding neighboring stations.
        vario_params : tuple, optional
            A tuple of size 3 (nugget, sill, range). If not provided, the
            variogram parameters will be determined from previously fit 
            variogram parameters at neighboring stations
        stns_rm : ndarray or str, optional
            An array of station ids or a single station id for stations that
            should not be considered neighbors for the specific point.
        
        Returns
        ----------
        tair_mean : float
            The interpolated temperature normal.
        tair_var : float
            The kriging prediction variance for the 
            temperature normal.
        '''

        if nnghs is None:
            # Get the nnghs to use from the optimal values at surrounding stations
            nnghs = self.__get_nnghs(pt, mth, stns_rm)

        self.stn_slct.set_ngh_stns(pt[LAT],
                                   pt[LON],
                                   nnghs,
                                   load_obs=False,
                                   stns_rm=stns_rm)

        if vario_params is None:
            nug, psill, vrange = self.__get_vario_params(pt, mth)
        else:
            nug, psill, vrange = vario_params

        nghs = self.stn_slct.ngh_stns

        ngh_lon = ri.FloatSexpVector(nghs[LON])
        ngh_lat = ri.FloatSexpVector(nghs[LAT])
        ngh_elev = ri.FloatSexpVector(nghs[ELEV])
        ngh_tdi = ri.FloatSexpVector(nghs[TDI])
        ngh_lst = ri.FloatSexpVector(nghs[get_lst_varname(mth)])
        ngh_tair = ri.FloatSexpVector(nghs[get_norm_varname(mth)])

        pt_svp = ri.FloatSexpVector(
            (pt[LON], pt[LAT], pt[ELEV], pt[TDI], pt[get_lst_varname(mth)]))

        nug = ri.FloatSexpVector([nug])
        psill = ri.FloatSexpVector([psill])
        vrange = ri.FloatSexpVector([vrange])

        ngh_wgt = ri.FloatSexpVector(self.stn_slct.ngh_wgt)

        rslt = self.r_krig_func(ngh_lon, ngh_lat, ngh_elev, ngh_tdi, ngh_lst,
                                ngh_tair, ngh_wgt, pt_svp, nug, psill, vrange)

        tair_mean = rslt[0]
        tair_var = rslt[1]
        bad_interp = rslt[2]

        if bad_interp != 0:
            print "".join(
                ["ERROR: ",
                 str(bad_interp), " bad interp: ",
                 str(pt)])

        return tair_mean, tair_var
Exemplo n.º 12
0
    def get_krig_params(self, pt, mth, rm_stnid=None):
        '''
        Get the moving window regression kriging variogram
        parameters for a specific point and month. Currently
        assumes an exponential variogram
                
        Parameters
        ----------
        pt : structured array
            A structured array containing the point's latitude, longitude,
            elevation, topographic dissection index, and average land skin 
            temperatures for each month. An empty point can be initialized
            with build_empty_pt()
        mth : int
            The specific month as an integer (1-12)
        rm_stnid : ndarray or str, optional
            An array of station ids or a single station id for stations that
            should not be considered neighbors for the specific point.
        
        Returns
        ----------
        nug : float
            Exponential variogram nugget.
        psill : float
            Exponential variogram partial sill.
        rng : float
            Exponential variogram range.
        
        '''

        # First determine the nnghs to use based on smoothed weighted average of
        # the optimal nnghs bandwidth at each station point.
        self.stn_slct.set_ngh_stns(pt[LAT],
                                   pt[LON],
                                   DFLT_INIT_NNGHS,
                                   load_obs=False)

        indomain_mask = np.isfinite(
            self.stn_slct.ngh_stns[get_optim_varname(mth)])

        domain_stns = self.stn_slct.ngh_stns[indomain_mask]

        if domain_stns.size == 0:
            raise Exception(
                "Cannot determine the optimal # of neighbors to use!")

        n_wgt = self.stn_slct.ngh_wgt[indomain_mask]

        nnghs = np.int(
            np.round(
                np.average(domain_stns[get_optim_varname(mth)],
                           weights=n_wgt)))

        # Now use the optimal nnghs to get the krig params for this mth
        self.stn_slct.set_ngh_stns(pt[LAT], pt[LON], nnghs, load_obs=False)

        nghs = self.stn_slct.ngh_stns
        ngh_lon = ri.FloatSexpVector(nghs[LON])
        ngh_lat = ri.FloatSexpVector(nghs[LAT])
        ngh_elev = ri.FloatSexpVector(nghs[ELEV])
        ngh_tdi = ri.FloatSexpVector(nghs[TDI])
        ngh_lst = ri.FloatSexpVector(nghs[get_lst_varname(mth)])
        ngh_tair = ri.FloatSexpVector(nghs[get_norm_varname(mth)])
        ngh_wgt = ri.FloatSexpVector(self.stn_slct.ngh_wgt)
        ngh_dists = ri.FloatSexpVector(self.stn_slct.ngh_dists)

        rslt = self.r_func(ngh_lon, ngh_lat, ngh_elev, ngh_tdi, ngh_lst,
                           ngh_tair, ngh_wgt, ngh_dists)
        nug = rslt[0]
        psill = rslt[1]
        rng = rslt[2]

        return nug, psill, rng