Exemplo n.º 1
0
    def __init__(self, path_db, tair_var):
        '''
        Parameters
        ----------
        path_db : str
            File path to a serially complete netCDF
            station database containing the stations and
            temperature variable for interpolation.
        tair_var : str
            The temperature variable for interpolation ('tmin' or 'tmax')
        '''

        stn_da = StationSerialDataDb(path_db, tair_var, vcc_size=470560000 * 2)
        mask_stns = np.isnan(stn_da.stns[BAD])
        stn_slct = StationSelect(stn_da,
                                 stn_mask=mask_stns,
                                 rm_zero_dist_stns=True)

        krig_tair = KrigTair(stn_slct)
        gwr_tair = GwrTairAnom(stn_slct)
        interp_tair = InterpTair(krig_tair, gwr_tair)

        self.stn_da = stn_da
        self.interp_tair = interp_tair
        self.mth_masks = stn_da.mth_idx
Exemplo n.º 2
0
    def __init__(self, stn_da):
        '''        
        Parameters
        ----------
        stnda : twx.db.StationSerialDataDb
            A StationSerialDataDb object pointing to the
            database from which observations will be loaded.
        '''

        self.stn_da = stn_da
        mask_stns = np.isnan(self.stn_da.stns[BAD])
        self.stn_slct = StationSelect(self.stn_da,
                                      stn_mask=mask_stns,
                                      rm_zero_dist_stns=True)

        self.vnames_norm = [get_norm_varname(mth) for mth in np.arange(1, 13)]
        self.vnames_lst = [get_lst_varname(mth) for mth in np.arange(1, 13)]

        self.df_stns = pd.DataFrame(self.stn_da.stns)
        self.df_stns.index = self.df_stns[STN_ID]

        # Calculate annual means for monthly LST and Tair normals
        self.df_stns['lst'] = self.df_stns[self.vnames_lst].mean(axis=1)
        self.df_stns['norm'] = self.df_stns[self.vnames_norm].mean(axis=1)
Exemplo n.º 3
0
    def __init__(self, path_db, tair_var):
        '''
        Parameters
        ----------
        path_db : str
            File path to a serially complete netCDF
            station database containing the stations and
            temperature variable for interpolation.
        tair_var : str
            The temperature variable for interpolation ('tmin' or 'tmax')
        '''

        stn_da = StationSerialDataDb(path_db, tair_var)
        mask_stns = np.isnan(stn_da.stns[BAD])
        stn_slct = StationSelect(stn_da,
                                 stn_mask=mask_stns,
                                 rm_zero_dist_stns=True)

        self.krig = KrigTairAll(stn_slct)
        self.stn_da = stn_da
Exemplo n.º 4
0
class XvalOutlier(object):
    '''
    Class for running a leave-one-out cross validation of simple
    geographically weighted regression models of station monthly and annual normals 
    to determine if a station is an outlier and has possible erroneous values 
    based on unrealistic model error.
    '''
    def __init__(self, stn_da):
        '''        
        Parameters
        ----------
        stnda : twx.db.StationSerialDataDb
            A StationSerialDataDb object pointing to the
            database from which observations will be loaded.
        '''

        self.stn_da = stn_da
        mask_stns = np.isnan(self.stn_da.stns[BAD])
        self.stn_slct = StationSelect(self.stn_da,
                                      stn_mask=mask_stns,
                                      rm_zero_dist_stns=True)

        self.vnames_norm = [get_norm_varname(mth) for mth in np.arange(1, 13)]
        self.vnames_lst = [get_lst_varname(mth) for mth in np.arange(1, 13)]

        self.df_stns = pd.DataFrame(self.stn_da.stns)
        self.df_stns.index = self.df_stns[STN_ID]

        # Calculate annual means for monthly LST and Tair normals
        self.df_stns['lst'] = self.df_stns[self.vnames_lst].mean(axis=1)
        self.df_stns['norm'] = self.df_stns[self.vnames_norm].mean(axis=1)

    def run_xval_stn(self, stn_id, bw_nngh=100):
        '''
        Run a single leave-one-out cross validation of a geographically
        weighted regression model of a station's monthly and annual normals
        (norm~lst+elev+lon+lat).
        
        Parameters
        ----------
        stn_id : str
            The stn_id for which to run the cross validation
        bw_nngh : int, optional
            The number of neighbors to use for the
            geographically weighted regression. Default: 100.
        
        Returns
        ----------
        err : float
            The difference between predicted and observed
            (predicted minus observed)
        '''

        xval_stn = self.stn_da.stns[self.stn_da.stn_idxs[stn_id]]
        df_xval_stn = self.df_stns.loc[stn_id, :]
        self.stn_slct.set_ngh_stns(xval_stn[LAT],
                                   xval_stn[LON],
                                   bw_nngh,
                                   load_obs=False,
                                   stns_rm=stn_id)
        df_nghs = self.df_stns.loc[self.stn_slct.ngh_stns[STN_ID], :]

        errs = np.empty(13)

        # Errors for monthly normals
        for mth in np.arange(1, 13):

            ls_form = 'norm%.2d~lst%.2d+elevation+longitude+latitude' % (mth,
                                                                         mth)
            ls_fit = sm.wls(ls_form,
                            data=df_nghs,
                            weights=self.stn_slct.ngh_wgt).fit()
            err = ls_fit.predict(df_xval_stn)[0] - df_xval_stn['norm%.2d' %
                                                               mth]
            errs[mth - 1] = err

        # Error for annual normal
        ls_form = 'norm~lst+elevation+longitude+latitude'
        ls_fit = sm.wls(ls_form, data=df_nghs,
                        weights=self.stn_slct.ngh_wgt).fit()
        err = ls_fit.predict(df_xval_stn)[0] - df_xval_stn['norm']
        errs[-1] = err

        return errs

    def find_xval_outliers(self,
                           stn_ids=None,
                           bw_nngh=100,
                           zscore_threshold=6):
        '''
        Runs a leave-one-out cross validation of a geographically
        weighted regression model of station monthly and annual normals
        (norm~lst+elev+lon+lat) and returns those stations whose error is
        a specified # of standard deviations above/below the mean
        
        Parameters
        ----------
        stn_ids : list_like, optional
            The station ids for which to run the cross validation.
            If None, the cross validation will be run for all stations
            in the database
        bw_nngh : int, optional
            The number of neighbors to use for the
            geographically weighted regression. Default: 100.
        zscore_threshold : float, optional
            The zcore threshold by which a station's error should be
            considered an outlier.
        
        Returns
        ----------
        out_stnids : ndarray
            The outlier stations
        out_errs : ndarray
            The model error associated with each outlier
        '''

        if stn_ids is None:
            stn_ids = self.stn_da.stn_ids

        schk = StatusCheck(stn_ids.size, check_cnt=250)

        xval_errs = np.zeros((13, stn_ids.size))

        for i, a_id in enumerate(stn_ids):

            xval_errs[:, i] = self.run_xval_stn(a_id, bw_nngh)
            schk.increment()

        xval_errs = pd.DataFrame(xval_errs)
        xval_errs.columns = stn_ids
        zscores = (xval_errs.subtract(xval_errs.mean(axis=1),
                                      axis=0).divide(xval_errs.std(axis=1),
                                                     axis=0).abs())
        out_stnids = zscores.columns[(zscores > zscore_threshold).any(
            axis=0)].values

        return out_stnids
Exemplo n.º 5
0
    def __init__(self,
                 stn_da_tmin,
                 stn_da_tmax,
                 aux_fpaths=None,
                 interp_orders=None,
                 norms_only=False):
        '''
        Parameters
        ----------
        stn_da_tmin : twx.db.StationSerialDataDb
            A StationSerialDataDb object pointing to the
            database from which Tmin observations should
            be loaded.
        stn_da_tmax : twx.db.StationSerialDataDb
            A StationSerialDataDb object pointing to the
            database from which Tmax observations should
            be loaded.      
        '''
        self.days = stn_da_tmin.days
        self.stn_da_tmin = stn_da_tmin
        self.stn_da_tmax = stn_da_tmax

        # Masks for calculating monthly norms after daily Tmin/Tmax values
        # had to be adjusted due to Tmin >= Tmax
        self.daysNormMask = np.nonzero(
            np.logical_and(self.days[YEAR] >= 1981,
                           self.days[YEAR] <= 2010))[0]
        daysNorm = self.days[self.daysNormMask]

        uYrs = np.unique(daysNorm[YEAR])
        self.yr_mths = get_mth_metadata(uYrs[0], uYrs[-1])

        self.yrMthsMasks = []
        for aYr in uYrs:
            for aMth in np.arange(1, 13):
                self.yrMthsMasks.append(
                    np.nonzero(
                        np.logical_and(daysNorm[YEAR] == aYr,
                                       daysNorm[MONTH] == aMth))[0])

        self.mth_masks = []
        for mth in np.arange(1, 13):
            self.mth_masks.append(np.nonzero(self.yr_mths[MONTH] == mth)[0])

        mask_stns_tmin = np.isnan(stn_da_tmin.stns[BAD])
        mask_stns_tmax = np.isnan(stn_da_tmax.stns[BAD])

        stn_slct_tmin = StationSelect(stn_da_tmin, mask_stns_tmin)
        stn_slct_tmax = StationSelect(stn_da_tmax, mask_stns_tmax)

        domain_stns_tmin = stn_da_tmin.stns[np.logical_and(
            mask_stns_tmin, np.isfinite(stn_da_tmin.stns[MASK]))]
        domain_stns_tmax = stn_da_tmax.stns[np.logical_and(
            mask_stns_tmax, np.isfinite(stn_da_tmax.stns[MASK]))]
        self.nnghparams_tmin = _get_rgn_nnghs_dict(domain_stns_tmin)
        self.nnghparams_tmax = _get_rgn_nnghs_dict(domain_stns_tmax)

        krig_tmin = KrigTair(stn_slct_tmin)
        krig_tmax = KrigTair(stn_slct_tmax)

        if norms_only:
            gwr_tmin = GwrTairAnomBlank(stn_slct_tmin)
            gwr_tmax = GwrTairAnomBlank(stn_slct_tmax)
        else:
            gwr_tmin = GwrTairAnom(stn_slct_tmin)
            gwr_tmax = GwrTairAnom(stn_slct_tmax)

        self.interp_tmin = InterpTair(krig_tmin, gwr_tmin)
        self.interp_tmax = InterpTair(krig_tmax, gwr_tmax)

        if aux_fpaths is not None:
            self.pGrids = PredictorGrids(aux_fpaths, interp_orders)

        self.a_pt = build_empty_pt()