예제 #1
0
파일: qa_temp.py 프로젝트: prayer007/topowx
def _stns_in_radius_mask(stn, stn_da, radius=NGH_RADIUS):
    dists = grt_circle_dist(stn[LON], stn[LAT], stn_da.stns[LON],
                            stn_da.stns[LAT])
    # mask = np.logical_and(dists <= radius,np.char.startswith(stn_da.stns[STN_ID],"GHCN"))
    mask = dists <= radius
    # mask = dists <= radius
    return mask, dists[mask]
예제 #2
0
def find_dup_stns(stnda):
    '''
    Find duplicate stations in a netCDF4 infilled station database. Two or
    more stations are considered duplicates if they are at the exact
    same location. For two or more stations with the same
    location, the one with the longest non-infilled period-of-record is
    kept and the others are considered duplicates and will be returned
    by this function.
    
    Parameters
    ----------
    stnda : twx.db.StationSerialDataDb
        A StationSerialDataDb object pointing to the infilled 
        database that should be searched for duplicate stations.
        
    Returns
    ----------
    rm_stnids : ndarray
        An array of duplicate station ids
    '''

    dup_stnids = []
    rm_stnids = []

    stat_chk = StatusCheck(stnda.stns.size, 1000)

    for stn in stnda.stns:

        if stn[STN_ID] not in dup_stnids:

            ngh_stns = stnda.stns[stnda.stn_ids != stn[STN_ID]]
            dists = grt_circle_dist(stn[LON], stn[LAT], ngh_stns[LON],
                                    ngh_stns[LAT])

            dup_nghs = ngh_stns[dists == 0]

            if dup_nghs.size > 0:

                dup_stnids.extend(dup_nghs[STN_ID])

                stn_ids_load = np.sort(
                    np.concatenate([
                        np.array([stn[STN_ID]]).ravel(),
                        np.array([dup_nghs[STN_ID]]).ravel()
                    ]))
                # print stn_ids_load
                stn_idxs = np.nonzero(
                    np.in1d(stnda.stn_ids, stn_ids_load, True))[0]
                imp_flgs = stnda.ds.variables['flag_infilled'][:, stn_idxs]
                imp_flg_sum = np.sum(imp_flgs, axis=0)

                stn_ids_rm = stn_ids_load[imp_flg_sum != np.min(imp_flg_sum)]

                rm_stnids.extend(stn_ids_rm)

        stat_chk.increment()

    rm_stnids = np.array(rm_stnids)

    return rm_stnids
예제 #3
0
    def __set_pt(self, lat, lon, stns_rm=None):

        if isinstance(stns_rm, str) or isinstance(stns_rm, unicode):
            stns_rm = np.array([stns_rm])
        elif not isinstance(stns_rm, np.ndarray) and not stns_rm is None:
            raise Exception(
                "stns_rm must be str, unicode, or numpy array of str/unicode")

        do_set_pt = True

        if self.pt_lat == lat and self.pt_lon == lon:

            try:
                if self.pt_stns_rm is None and stns_rm is None:
                    do_set_pt = False
                elif np.alltrue(self.pt_stns_rm == stns_rm):
                    do_set_pt = False
            except:
                pass

        if do_set_pt:

            stn_dists = grt_circle_dist(lon, lat, self.stns[LON],
                                        self.stns[LAT])

            fnl_stns_rm = stns_rm if stns_rm is not None else np.array([])

            if self.rm_zero_dist_stns:
                # Remove any stations that are at the same location (dist == 0)
                fnl_stns_rm = np.unique(
                    np.concatenate(
                        (fnl_stns_rm, self.stns[STN_ID][stn_dists == 0])))

            if fnl_stns_rm.size > 0:
                mask_rm = np.logical_not(
                    np.in1d(self.stns[STN_ID], fnl_stns_rm,
                            assume_unique=True))
            else:
                mask_rm = self.mask_all

            self.pt_lat = lat
            self.pt_lon = lon
            self.pt_stns_rm = stns_rm
            self.pt_mask_stns_rm = mask_rm
            self.pt_stn_dists = stn_dists
            self.pt_dist_sort = np.argsort(self.pt_stn_dists)
            self.pt_sort_stn_dists = np.take(self.pt_stn_dists,
                                             self.pt_dist_sort)

            self.pt_sort_stns = np.take(self.stns, self.pt_dist_sort)
            mask_rm = np.take(self.pt_mask_stns_rm, self.pt_dist_sort)

            mask_rm = np.nonzero(mask_rm)[0]
            self.pt_sort_stn_dists = np.take(self.pt_sort_stn_dists, mask_rm)
            self.pt_sort_stns = np.take(self.pt_sort_stns, mask_rm)
예제 #4
0
파일: reanalysis.py 프로젝트: wk1984/topowx
    def get_nngh_matrix(self, lon, lat, tair_var, utc_offset, nngh=4):
        '''
        Load a 2-d matrix of of NCEP/NCAR Reanalysis data for the lon, lat point
        a temperature variable of interest.
        
        Parameters
        ----------
        lon : double
            The longitude of the point
        lat : double
            The latitude of the point
        tair_var : str
            The temperature variable for which to load corresponding
            reanalysis data
        utc_offset : int
            The UTC offset of the point's time zone
        nngh : int, optional
            The number of nearest NCEP/NCAR Reanalysis grid cells to load in the
            returned matrix
            
        Returns
        -------
        nnr_matrix : ndarray
            A N*P 2-D array where N is the number of days in the reanalysis time
            series and P is the number of reanalysis variables * the number of 
            neighboring grid cells that were loaded
        '''

        dist_nnr = grt_circle_dist(lon, lat, self.grid_lons, self.grid_lats)
        sort_dist_nnr = np.argsort(dist_nnr)

        nnr_ngh_lons = self.grid_lons[sort_dist_nnr][0:nngh]
        nnr_ngh_lats = self.grid_lats[sort_dist_nnr][0:nngh]

        nnr_time = self.UTC_OFFSET_TIMES[tair_var][utc_offset]

        nnr_matrix = None

        for x in np.arange(nnr_ngh_lons.size):

            idx_lon = np.nonzero(self.nnr_lons == nnr_ngh_lons[x])[0][0]
            idx_lat = np.nonzero(self.nnr_lats == nnr_ngh_lats[x])[0][0]

            for nnr_var in self.nnr_vars:

                ds = self.ds_nnr["".join([nnr_var, nnr_time])]

                if "level" in ds.dimensions:
                    adata = ds.variables[nnr_var][self.day_mask, :, idx_lat,
                                                  idx_lon]
                else:
                    adata = ds.variables[nnr_var][self.day_mask, idx_lat,
                                                  idx_lon]

                if len(adata.shape) == 1:
                    adata.shape = (adata.size, 1)

                if nnr_matrix is None:
                    nnr_matrix = adata
                else:
                    nnr_matrix = np.hstack((nnr_matrix, adata))

        return nnr_matrix
예제 #5
0
def _find_nn_data(a_data, a_rast, x, y):

    r = 1
    nn = []
    nn_vals = []

    while len(nn) == 0:

        lcol = x - r
        rcol = x + r
        trow = y - r
        brow = y + r

        # top ring
        if trow > 0 and trow < a_rast.rows:

            for i in np.arange(lcol, rcol + 1):

                if i > 0 and i < a_rast.cols:

                    if a_data[trow, i] != a_rast.ndata:
                        nn.append((trow, i))
                        nn_vals.append(a_data[trow, i])

        # left ring
        if lcol > 0 and lcol < a_rast.cols:

            for i in np.arange(trow, brow + 1):

                if i > 0 and i < a_rast.rows:

                    if a_data[i, lcol] != a_rast.ndata:
                        nn.append((i, lcol))
                        nn_vals.append(a_data[i, lcol])

        # bottom ring
        if brow > 0 and brow < a_rast.rows:

            for i in np.arange(rcol, lcol, -1):

                if i > 0 and i < a_rast.cols:

                    if a_data[brow, i] != a_rast.ndata:
                        nn.append((brow, i))
                        nn_vals.append(a_data[brow, i])

        # right ring
        if rcol > 0 and rcol < a_rast.cols:

            for i in np.arange(brow, trow, -1):

                if i > 0 and i < a_rast.rows:

                    if a_data[i, rcol] != a_rast.ndata:
                        nn.append((i, rcol))
                        nn_vals.append(a_data[i, rcol])

        r += 1

    nn = np.array(nn)
    nn_vals = np.array(nn_vals)
    lats, lons = a_rast.get_coord(nn[:, 0], nn[:, 1])
    pt_lat, pt_lon = a_rast.get_coord(y, x)
    d = grt_circle_dist(pt_lon, pt_lat, lons, lats)
    j = np.argsort(d)[0]
    nval = nn_vals[j]
    return nval, d[j]
예제 #6
0
    def __init__(self,
                 stn_id,
                 stn_da,
                 stns_mask,
                 tair_var,
                 nnr_ds,
                 min_dist=-1,
                 max_dist=MAX_DISTANCE,
                 tair_mask=None,
                 day_mask=None,
                 add_bestngh=True):
        '''
        Parameters
        ----------
        stn_id : str
            The station id of the target station
        stn_da : twx.db.StationDataDb
            The station database from which all target and neighboring
            station observations should be loaded
        stns_mask : ndarray
            A boolean array mask specifying which stations in the database
            can be used as neighbors. Mask size must equal the number of
            stations in the database
        tair_var : str
            The temperature variable ('tmin' or 'tmax') of focus.
        nnr_ds : twx.db.NNRNghData
            A NNRNghData object for loading reanalysis data to help supplement
            the neighboring station data.
        min_dist : int, optional
            The minimum distance (exclusive) for which to search for neighboring stations.
            Pass -1 if there should be no minimum distance
        max_dist : int, optional
            The maximum distance (inclusive) for which to search for neighboring stations.
            Defaults to MAX_DISTANCE
        tair_mask : ndarray, optional
            A boolean mask specifying which observations at the target should
            artificially be set to nan. This can be used for cross-validation.
            Mask size must equal the time series length specified by the passed
            StationDataDb.
        day_mask : boolean, optional
            If true and tair_mask is not None, days with actual missing observations will
            be removed before station mean and variance estimation. Ignored if
            tair_mask is None.
        add_bestngh : boolean optional
            Add the best correlated neighbor to the data matrix even if the time
            series period-of-record of the neighbor is less than the
            MIN_POR_OVERLAP threshold for the entire period over which 
            the target station's mean and variance is being estimated
        '''

        # Get target station metadata
        stn = stn_da.stns[stn_da.stn_ids == stn_id][0]

        # Load target station observations
        target_tair = stn_da.load_all_stn_obs_var(np.array([stn_id]),
                                                  tair_var)[0]
        target_tair = target_tair.astype(np.float64)

        if tair_mask is not None:
            target_tair[tair_mask] = np.nan

        if day_mask is None:
            day_mask = np.ones(target_tair.size, dtype=np.bool)

        day_idx = np.nonzero(day_mask)[0]

        target_tair = np.take(target_tair, day_idx)

        # Number of observations threshold for entire period that is being infilled
        nthres_all = np.round(MIN_POR_OVERLAP * target_tair.size)

        # Number of observations threshold just for the target's period of record
        valid_tair_mask = np.isfinite(target_tair)
        ntair_valid = np.nonzero(valid_tair_mask)[0].size
        nthres_target_por = np.round(MIN_POR_OVERLAP * ntair_valid)

        # Make sure to not include the target station itself as a neighbor station
        stns_mask = np.logical_and(stn_da.stns[STN_ID] != stn_id, stns_mask)
        all_stns = stn_da.stns[stns_mask]

        dists = grt_circle_dist(stn[LON], stn[LAT], all_stns[LON],
                                all_stns[LAT])
        mask_dists = np.logical_and(dists <= max_dist, dists > min_dist)

        while np.nonzero(mask_dists)[0].size == 0:
            max_dist += MAX_DISTANCE / 2.0
            mask_dists = np.logical_and(dists <= max_dist, dists > min_dist)

        ngh_stns = all_stns[mask_dists]
        ngh_dists = dists[mask_dists]

        ngh_ids = ngh_stns[STN_ID]
        ngh_tair = stn_da.load_all_stn_obs_var(ngh_ids,
                                               tair_var,
                                               set_flagged_nan=True)[0]
        ngh_tair = ngh_tair.astype(np.float64)

        if len(ngh_tair.shape) == 1:
            ngh_tair.shape = (ngh_tair.size, 1)

        ngh_tair = np.take(ngh_tair, day_idx, axis=0)

        dist_sort = np.argsort(ngh_dists)
        ngh_stns = ngh_stns[dist_sort]
        ngh_dists = ngh_dists[dist_sort]
        ngh_tair = ngh_tair[:, dist_sort]

        overlap_mask_tair = np.zeros(ngh_stns.size, dtype=np.bool)
        ioa = np.zeros(ngh_stns.size)

        best_ioa = 0
        i = None

        for x in np.arange(ngh_stns.size):

            valid_ngh_mask = np.isfinite(ngh_tair[:, x])

            nlap = np.nonzero(valid_ngh_mask)[0].size

            overlap_mask = np.logical_and(valid_tair_mask, valid_ngh_mask)

            nlap_stn = np.nonzero(overlap_mask)[0].size

            if nlap >= nthres_all and nlap_stn >= nthres_target_por:

                ioa[x] = calc_ioa_d1(target_tair[overlap_mask],
                                     ngh_tair[:, x][overlap_mask])

                overlap_mask_tair[x] = True

            elif nlap_stn >= nthres_target_por and add_bestngh:

                aioa = calc_ioa_d1(target_tair[overlap_mask],
                                   ngh_tair[:, x][overlap_mask])

                if aioa > best_ioa:

                    ioa[x] = aioa
                    overlap_mask_tair[x] = True

                    if i != None:
                        overlap_mask_tair[i] = False

                    i = x
                    best_ioa = aioa

        if add_bestngh and i is not None:

            if ioa[i] != np.max(ioa) or ioa[i] < 0.7:

                overlap_mask_tair[i] = False

        ioa = ioa[overlap_mask_tair]
        ngh_dists = ngh_dists[overlap_mask_tair]
        ngh_tair = ngh_tair[:, overlap_mask_tair]

        if ioa.size > 0:

            ioa_sort = np.argsort(ioa)[::-1]
            ioa = ioa[ioa_sort]
            ngh_dists = ngh_dists[ioa_sort]
            ngh_tair = ngh_tair[:, ioa_sort]

            target_tair.shape = (target_tair.size, 1)

            imp_tair_mat = np.hstack((target_tair, ngh_tair))
            ngh_dists = np.concatenate((np.zeros(1), ngh_dists))
            ioa = np.concatenate((np.ones(1), ioa))

            valid_imp_mask = np.isfinite(imp_tair_mat)

            nnghs_per_day = np.sum(valid_imp_mask, axis=1)

        else:

            target_tair.shape = (target_tair.size, 1)
            imp_tair_mat = target_tair

            valid_tair_mask.shape = (valid_tair_mask.size, 1)
            valid_imp_mask = valid_tair_mask

            ioa = np.ones(1)
            ngh_dists = np.zeros(1)

            nnghs_per_day = np.zeros(target_tair.shape[0])

        #############################################################
        self.imp_tair_mat = np.array(imp_tair_mat, dtype=np.float64)
        self.valid_imp_mask = valid_imp_mask
        self.ngh_ioa = ioa
        self.ngh_dists = ngh_dists
        self.max_dist = max_dist
        self.stn_id = stn_id
        self.stn_da = stn_da
        self.tair_var = tair_var
        self.tair_mask = tair_mask
        self.nnghs_per_day = nnghs_per_day
        self.stns_mask = stns_mask
        self.nnr_ds = nnr_ds
        self.stn = stn
        self.day_idx = day_idx
        self.day_mask = day_mask