def _stns_in_radius_mask(stn, stn_da, radius=NGH_RADIUS): dists = grt_circle_dist(stn[LON], stn[LAT], stn_da.stns[LON], stn_da.stns[LAT]) # mask = np.logical_and(dists <= radius,np.char.startswith(stn_da.stns[STN_ID],"GHCN")) mask = dists <= radius # mask = dists <= radius return mask, dists[mask]
def find_dup_stns(stnda): ''' Find duplicate stations in a netCDF4 infilled station database. Two or more stations are considered duplicates if they are at the exact same location. For two or more stations with the same location, the one with the longest non-infilled period-of-record is kept and the others are considered duplicates and will be returned by this function. Parameters ---------- stnda : twx.db.StationSerialDataDb A StationSerialDataDb object pointing to the infilled database that should be searched for duplicate stations. Returns ---------- rm_stnids : ndarray An array of duplicate station ids ''' dup_stnids = [] rm_stnids = [] stat_chk = StatusCheck(stnda.stns.size, 1000) for stn in stnda.stns: if stn[STN_ID] not in dup_stnids: ngh_stns = stnda.stns[stnda.stn_ids != stn[STN_ID]] dists = grt_circle_dist(stn[LON], stn[LAT], ngh_stns[LON], ngh_stns[LAT]) dup_nghs = ngh_stns[dists == 0] if dup_nghs.size > 0: dup_stnids.extend(dup_nghs[STN_ID]) stn_ids_load = np.sort( np.concatenate([ np.array([stn[STN_ID]]).ravel(), np.array([dup_nghs[STN_ID]]).ravel() ])) # print stn_ids_load stn_idxs = np.nonzero( np.in1d(stnda.stn_ids, stn_ids_load, True))[0] imp_flgs = stnda.ds.variables['flag_infilled'][:, stn_idxs] imp_flg_sum = np.sum(imp_flgs, axis=0) stn_ids_rm = stn_ids_load[imp_flg_sum != np.min(imp_flg_sum)] rm_stnids.extend(stn_ids_rm) stat_chk.increment() rm_stnids = np.array(rm_stnids) return rm_stnids
def __set_pt(self, lat, lon, stns_rm=None): if isinstance(stns_rm, str) or isinstance(stns_rm, unicode): stns_rm = np.array([stns_rm]) elif not isinstance(stns_rm, np.ndarray) and not stns_rm is None: raise Exception( "stns_rm must be str, unicode, or numpy array of str/unicode") do_set_pt = True if self.pt_lat == lat and self.pt_lon == lon: try: if self.pt_stns_rm is None and stns_rm is None: do_set_pt = False elif np.alltrue(self.pt_stns_rm == stns_rm): do_set_pt = False except: pass if do_set_pt: stn_dists = grt_circle_dist(lon, lat, self.stns[LON], self.stns[LAT]) fnl_stns_rm = stns_rm if stns_rm is not None else np.array([]) if self.rm_zero_dist_stns: # Remove any stations that are at the same location (dist == 0) fnl_stns_rm = np.unique( np.concatenate( (fnl_stns_rm, self.stns[STN_ID][stn_dists == 0]))) if fnl_stns_rm.size > 0: mask_rm = np.logical_not( np.in1d(self.stns[STN_ID], fnl_stns_rm, assume_unique=True)) else: mask_rm = self.mask_all self.pt_lat = lat self.pt_lon = lon self.pt_stns_rm = stns_rm self.pt_mask_stns_rm = mask_rm self.pt_stn_dists = stn_dists self.pt_dist_sort = np.argsort(self.pt_stn_dists) self.pt_sort_stn_dists = np.take(self.pt_stn_dists, self.pt_dist_sort) self.pt_sort_stns = np.take(self.stns, self.pt_dist_sort) mask_rm = np.take(self.pt_mask_stns_rm, self.pt_dist_sort) mask_rm = np.nonzero(mask_rm)[0] self.pt_sort_stn_dists = np.take(self.pt_sort_stn_dists, mask_rm) self.pt_sort_stns = np.take(self.pt_sort_stns, mask_rm)
def get_nngh_matrix(self, lon, lat, tair_var, utc_offset, nngh=4): ''' Load a 2-d matrix of of NCEP/NCAR Reanalysis data for the lon, lat point a temperature variable of interest. Parameters ---------- lon : double The longitude of the point lat : double The latitude of the point tair_var : str The temperature variable for which to load corresponding reanalysis data utc_offset : int The UTC offset of the point's time zone nngh : int, optional The number of nearest NCEP/NCAR Reanalysis grid cells to load in the returned matrix Returns ------- nnr_matrix : ndarray A N*P 2-D array where N is the number of days in the reanalysis time series and P is the number of reanalysis variables * the number of neighboring grid cells that were loaded ''' dist_nnr = grt_circle_dist(lon, lat, self.grid_lons, self.grid_lats) sort_dist_nnr = np.argsort(dist_nnr) nnr_ngh_lons = self.grid_lons[sort_dist_nnr][0:nngh] nnr_ngh_lats = self.grid_lats[sort_dist_nnr][0:nngh] nnr_time = self.UTC_OFFSET_TIMES[tair_var][utc_offset] nnr_matrix = None for x in np.arange(nnr_ngh_lons.size): idx_lon = np.nonzero(self.nnr_lons == nnr_ngh_lons[x])[0][0] idx_lat = np.nonzero(self.nnr_lats == nnr_ngh_lats[x])[0][0] for nnr_var in self.nnr_vars: ds = self.ds_nnr["".join([nnr_var, nnr_time])] if "level" in ds.dimensions: adata = ds.variables[nnr_var][self.day_mask, :, idx_lat, idx_lon] else: adata = ds.variables[nnr_var][self.day_mask, idx_lat, idx_lon] if len(adata.shape) == 1: adata.shape = (adata.size, 1) if nnr_matrix is None: nnr_matrix = adata else: nnr_matrix = np.hstack((nnr_matrix, adata)) return nnr_matrix
def _find_nn_data(a_data, a_rast, x, y): r = 1 nn = [] nn_vals = [] while len(nn) == 0: lcol = x - r rcol = x + r trow = y - r brow = y + r # top ring if trow > 0 and trow < a_rast.rows: for i in np.arange(lcol, rcol + 1): if i > 0 and i < a_rast.cols: if a_data[trow, i] != a_rast.ndata: nn.append((trow, i)) nn_vals.append(a_data[trow, i]) # left ring if lcol > 0 and lcol < a_rast.cols: for i in np.arange(trow, brow + 1): if i > 0 and i < a_rast.rows: if a_data[i, lcol] != a_rast.ndata: nn.append((i, lcol)) nn_vals.append(a_data[i, lcol]) # bottom ring if brow > 0 and brow < a_rast.rows: for i in np.arange(rcol, lcol, -1): if i > 0 and i < a_rast.cols: if a_data[brow, i] != a_rast.ndata: nn.append((brow, i)) nn_vals.append(a_data[brow, i]) # right ring if rcol > 0 and rcol < a_rast.cols: for i in np.arange(brow, trow, -1): if i > 0 and i < a_rast.rows: if a_data[i, rcol] != a_rast.ndata: nn.append((i, rcol)) nn_vals.append(a_data[i, rcol]) r += 1 nn = np.array(nn) nn_vals = np.array(nn_vals) lats, lons = a_rast.get_coord(nn[:, 0], nn[:, 1]) pt_lat, pt_lon = a_rast.get_coord(y, x) d = grt_circle_dist(pt_lon, pt_lat, lons, lats) j = np.argsort(d)[0] nval = nn_vals[j] return nval, d[j]
def __init__(self, stn_id, stn_da, stns_mask, tair_var, nnr_ds, min_dist=-1, max_dist=MAX_DISTANCE, tair_mask=None, day_mask=None, add_bestngh=True): ''' Parameters ---------- stn_id : str The station id of the target station stn_da : twx.db.StationDataDb The station database from which all target and neighboring station observations should be loaded stns_mask : ndarray A boolean array mask specifying which stations in the database can be used as neighbors. Mask size must equal the number of stations in the database tair_var : str The temperature variable ('tmin' or 'tmax') of focus. nnr_ds : twx.db.NNRNghData A NNRNghData object for loading reanalysis data to help supplement the neighboring station data. min_dist : int, optional The minimum distance (exclusive) for which to search for neighboring stations. Pass -1 if there should be no minimum distance max_dist : int, optional The maximum distance (inclusive) for which to search for neighboring stations. Defaults to MAX_DISTANCE tair_mask : ndarray, optional A boolean mask specifying which observations at the target should artificially be set to nan. This can be used for cross-validation. Mask size must equal the time series length specified by the passed StationDataDb. day_mask : boolean, optional If true and tair_mask is not None, days with actual missing observations will be removed before station mean and variance estimation. Ignored if tair_mask is None. add_bestngh : boolean optional Add the best correlated neighbor to the data matrix even if the time series period-of-record of the neighbor is less than the MIN_POR_OVERLAP threshold for the entire period over which the target station's mean and variance is being estimated ''' # Get target station metadata stn = stn_da.stns[stn_da.stn_ids == stn_id][0] # Load target station observations target_tair = stn_da.load_all_stn_obs_var(np.array([stn_id]), tair_var)[0] target_tair = target_tair.astype(np.float64) if tair_mask is not None: target_tair[tair_mask] = np.nan if day_mask is None: day_mask = np.ones(target_tair.size, dtype=np.bool) day_idx = np.nonzero(day_mask)[0] target_tair = np.take(target_tair, day_idx) # Number of observations threshold for entire period that is being infilled nthres_all = np.round(MIN_POR_OVERLAP * target_tair.size) # Number of observations threshold just for the target's period of record valid_tair_mask = np.isfinite(target_tair) ntair_valid = np.nonzero(valid_tair_mask)[0].size nthres_target_por = np.round(MIN_POR_OVERLAP * ntair_valid) # Make sure to not include the target station itself as a neighbor station stns_mask = np.logical_and(stn_da.stns[STN_ID] != stn_id, stns_mask) all_stns = stn_da.stns[stns_mask] dists = grt_circle_dist(stn[LON], stn[LAT], all_stns[LON], all_stns[LAT]) mask_dists = np.logical_and(dists <= max_dist, dists > min_dist) while np.nonzero(mask_dists)[0].size == 0: max_dist += MAX_DISTANCE / 2.0 mask_dists = np.logical_and(dists <= max_dist, dists > min_dist) ngh_stns = all_stns[mask_dists] ngh_dists = dists[mask_dists] ngh_ids = ngh_stns[STN_ID] ngh_tair = stn_da.load_all_stn_obs_var(ngh_ids, tair_var, set_flagged_nan=True)[0] ngh_tair = ngh_tair.astype(np.float64) if len(ngh_tair.shape) == 1: ngh_tair.shape = (ngh_tair.size, 1) ngh_tair = np.take(ngh_tair, day_idx, axis=0) dist_sort = np.argsort(ngh_dists) ngh_stns = ngh_stns[dist_sort] ngh_dists = ngh_dists[dist_sort] ngh_tair = ngh_tair[:, dist_sort] overlap_mask_tair = np.zeros(ngh_stns.size, dtype=np.bool) ioa = np.zeros(ngh_stns.size) best_ioa = 0 i = None for x in np.arange(ngh_stns.size): valid_ngh_mask = np.isfinite(ngh_tair[:, x]) nlap = np.nonzero(valid_ngh_mask)[0].size overlap_mask = np.logical_and(valid_tair_mask, valid_ngh_mask) nlap_stn = np.nonzero(overlap_mask)[0].size if nlap >= nthres_all and nlap_stn >= nthres_target_por: ioa[x] = calc_ioa_d1(target_tair[overlap_mask], ngh_tair[:, x][overlap_mask]) overlap_mask_tair[x] = True elif nlap_stn >= nthres_target_por and add_bestngh: aioa = calc_ioa_d1(target_tair[overlap_mask], ngh_tair[:, x][overlap_mask]) if aioa > best_ioa: ioa[x] = aioa overlap_mask_tair[x] = True if i != None: overlap_mask_tair[i] = False i = x best_ioa = aioa if add_bestngh and i is not None: if ioa[i] != np.max(ioa) or ioa[i] < 0.7: overlap_mask_tair[i] = False ioa = ioa[overlap_mask_tair] ngh_dists = ngh_dists[overlap_mask_tair] ngh_tair = ngh_tair[:, overlap_mask_tair] if ioa.size > 0: ioa_sort = np.argsort(ioa)[::-1] ioa = ioa[ioa_sort] ngh_dists = ngh_dists[ioa_sort] ngh_tair = ngh_tair[:, ioa_sort] target_tair.shape = (target_tair.size, 1) imp_tair_mat = np.hstack((target_tair, ngh_tair)) ngh_dists = np.concatenate((np.zeros(1), ngh_dists)) ioa = np.concatenate((np.ones(1), ioa)) valid_imp_mask = np.isfinite(imp_tair_mat) nnghs_per_day = np.sum(valid_imp_mask, axis=1) else: target_tair.shape = (target_tair.size, 1) imp_tair_mat = target_tair valid_tair_mask.shape = (valid_tair_mask.size, 1) valid_imp_mask = valid_tair_mask ioa = np.ones(1) ngh_dists = np.zeros(1) nnghs_per_day = np.zeros(target_tair.shape[0]) ############################################################# self.imp_tair_mat = np.array(imp_tair_mat, dtype=np.float64) self.valid_imp_mask = valid_imp_mask self.ngh_ioa = ioa self.ngh_dists = ngh_dists self.max_dist = max_dist self.stn_id = stn_id self.stn_da = stn_da self.tair_var = tair_var self.tair_mask = tair_mask self.nnghs_per_day = nnghs_per_day self.stns_mask = stns_mask self.nnr_ds = nnr_ds self.stn = stn self.day_idx = day_idx self.day_mask = day_mask