def proc_write(twx_cfg, xval_stnids, start_ymd, end_ymd, nwrkers): status = MPI.Status() nwrkrs_done = 0 stn_da = StationDataDb(twx_cfg.fpath_stndata_nc_tair_homog, (start_ymd, end_ymd)) if xval_stnids is None: xval_stnids = load_default_xval_stnids(stn_da.stn_ids) ttl_infills = xval_stnids.size * 2 xval_stns = stn_da.stns[np.in1d(stn_da.stn_ids, xval_stnids, True)] create_quick_db(twx_cfg.fpath_xval_infill_nc, xval_stns, stn_da.days, NETCDF_OUT_VARIABLES) ds_out = Dataset(twx_cfg.fpath_xval_infill_nc, 'r+') stn_idxs = {} for x in np.arange(xval_stnids.size): stn_idxs[xval_stnids[x]] = x stat_chk = StatusCheck(ttl_infills, 10) while 1: stn_id, tair_var, infill_tair, obs_tair = MPI.COMM_WORLD.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status) if status.tag == TAG_STOPWORK: nwrkrs_done += 1 if nwrkrs_done == nwrkers: print "WRITER: Finished" return 0 else: infill_tair = np.ma.masked_array(infill_tair, np.isnan(infill_tair)) obs_tair = np.ma.masked_array(obs_tair, np.isnan(obs_tair)) i = stn_idxs[stn_id] difs = infill_tair - obs_tair bias = np.ma.mean(difs) mae = np.ma.mean(np.ma.abs(difs)) print "|".join(["WRITER", stn_id, tair_var, "MAE: %.2f" % (mae,), "BIAS: %.2f" % (bias,)]) obs_tair = np.ma.filled(obs_tair, netCDF4.default_fillvals['f4']) ds_out.variables["obs_%s" % (tair_var,)][:, i] = obs_tair infill_tair = np.ma.filled(infill_tair, netCDF4.default_fillvals['f4']) ds_out.variables["infilled_%s" % (tair_var,)][:, i] = infill_tair ds_out.sync() stat_chk.increment()
def find_dup_stns(stnda): ''' Find duplicate stations in a netCDF4 infilled station database. Two or more stations are considered duplicates if they are at the exact same location. For two or more stations with the same location, the one with the longest non-infilled period-of-record is kept and the others are considered duplicates and will be returned by this function. Parameters ---------- stnda : twx.db.StationSerialDataDb A StationSerialDataDb object pointing to the infilled database that should be searched for duplicate stations. Returns ---------- rm_stnids : ndarray An array of duplicate station ids ''' dup_stnids = [] rm_stnids = [] stat_chk = StatusCheck(stnda.stns.size, 1000) for stn in stnda.stns: if stn[STN_ID] not in dup_stnids: ngh_stns = stnda.stns[stnda.stn_ids != stn[STN_ID]] dists = grt_circle_dist(stn[LON], stn[LAT], ngh_stns[LON], ngh_stns[LAT]) dup_nghs = ngh_stns[dists == 0] if dup_nghs.size > 0: dup_stnids.extend(dup_nghs[STN_ID]) stn_ids_load = np.sort( np.concatenate([ np.array([stn[STN_ID]]).ravel(), np.array([dup_nghs[STN_ID]]).ravel() ])) # print stn_ids_load stn_idxs = np.nonzero( np.in1d(stnda.stn_ids, stn_ids_load, True))[0] imp_flgs = stnda.ds.variables['flag_infilled'][:, stn_idxs] imp_flg_sum = np.sum(imp_flgs, axis=0) stn_ids_rm = stn_ids_load[imp_flg_sum != np.min(imp_flg_sum)] rm_stnids.extend(stn_ids_rm) stat_chk.increment() rm_stnids = np.array(rm_stnids) return rm_stnids
def proc_write(fpath_stndb, elem, fpath_out, nwrkers): status = MPI.Status() nwrkrs_done = 0 stn_da = StationSerialDataDb(fpath_stndb, elem) stn_ids = stn_da.stn_ids stns = stn_da.stns stn_mask = np.logical_and(np.isfinite(stn_da.stns[MASK]), np.isnan(stn_da.stns[BAD])) days = stn_da.days stn_da.ds.close() stn_da = None print "WRITER: Creating output station netCDF database..." create_quick_db(fpath_out, stns, days, DB_VARIABLES[elem]) stnda_out = StationSerialDataDb(fpath_out, elem, mode='r+') mth_names = [] for mth in np.arange(1, 13): norm_var_name = get_norm_varname(mth) stnda_out.add_stn_variable(norm_var_name, '', units='C', dtype='f8', fill_value=netCDF4.default_fillvals['f8']) mth_names.append(norm_var_name) stnda_out.ds.sync() print "WRITER: Output station netCDF database created." mths = np.arange(12) stat_chk = StatusCheck(np.sum(stn_mask), 50) while 1: stn_id, tair_daily, tair_norms = MPI.COMM_WORLD.recv( source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status) if status.tag == TAG_STOPWORK: nwrkrs_done += 1 if nwrkrs_done == nwrkers: print "WRITER: Finished" return 0 else: x = np.nonzero(stn_ids == stn_id)[0][0] stnda_out.ds.variables[elem][:, x] = tair_daily for i in mths: stnda_out.ds.variables[mth_names[i]][x] = tair_norms[i] stnda_out.ds.sync() stat_chk.increment()
def set_optim_nstns_tair_anom(stnda, path_xval_ds): ''' Set the local optimal number of stations to be used for anomaly interpolation each U.S. climate division based on cross-validation mean absolute error. Parameters ---------- stnda : twx.db.StationSerialDataDb A StationSerialDataDb object pointing to the database for which the local optimal number of neighbors should be set. path_xval_ds : str Path where netCDF cross-validation MAE files from create_climdiv_optim_nstns_db are located ''' climdiv_stns = stnda.stns[CLIMDIV] vars_optim = {} for mth in np.arange(1, 13): var_name_optim = get_optim_anom_varname(mth) long_name = "Optimal number of neighbors to use for daily anomaly interpolation for month %d" % mth var_optim = stnda.add_stn_variable( var_name_optim, long_name, "", 'f8', fill_value=netCDF4.default_fillvals['f8']) vars_optim[mth] = var_optim divs = np.unique(climdiv_stns[np.isfinite(climdiv_stns)]) stchk = StatusCheck(divs.size, 10) for clim_div in divs: fpath = os.path.join( path_xval_ds, "optim_nstns_%s_climdiv%d.nc" % (stnda.var_name, clim_div)) ds_climdiv = Dataset(fpath) mae_climdiv = ds_climdiv.variables['mae'][:] nnghs_climdiv = ds_climdiv.variables['min_nghs'][:] climdiv_mask = np.nonzero(climdiv_stns == clim_div)[0] for mth in np.arange(1, 13): mae_climdiv_mth = mae_climdiv[mth - 1, :, :] mmae = np.mean(mae_climdiv_mth, axis=1) min_idx = np.argmin(mmae) vars_optim[mth][climdiv_mask] = nnghs_climdiv[min_idx] stchk.increment() stnda.ds.sync()
def add_monthly_normals(stnda, start_norm_yr=1981, end_norm_yr=2010): ''' Calculate and add station monthly normals to a serially-complete netCDF station database. Parameters ---------- stnda : twx.db.StationSerialDataDb A StationSerialDataDb object pointing to the database to which station monthly normals should be added. start_norm_yr : int, optional The start year for the normals. end_norm_yr : int, optional The end year for the normals ''' tagg = TairAggregate(stnda.days) stns = stnda.stns norm_vars = {} for mth in np.arange(1, 13): norm_var_name = 'norm%02d' % mth long_name = "%d - %d Monthly Normal" % (start_norm_yr, end_norm_yr) norm_var = stnda.add_stn_variable( norm_var_name, long_name, units='C', dtype='f8', fill_value=netCDF4.default_fillvals['f8']) norm_vars[mth] = norm_var dly_var = stnda.var chk_size = 50 stchk = StatusCheck(np.int(np.round(stns.size / np.float(chk_size))), 10) for i in np.arange(0, stns.size, chk_size): if i + chk_size < stns.size: nstns = chk_size else: nstns = stns.size - i dly_vals = np.ma.masked_equal(dly_var[:, i:i + nstns], dly_var._FillValue) norm_vals = tagg.daily_to_mthly_norms(dly_vals, start_norm_yr, end_norm_yr) for mth in np.arange(1, 13): norm_vars[mth][i:i + nstns] = norm_vals[mth - 1, :] stnda.ds.sync() stchk.increment()
def proc_write(fpath_stndb, elem, nwrkers): status = MPI.Status() nwrkrs_done = 0 stn_da = StationSerialDataDb(fpath_stndb, elem, mode="r+") mask_stns = np.logical_and(np.isfinite(stn_da.stns[MASK]), np.isnan(stn_da.stns[BAD])) nstns = np.sum(mask_stns) dsvars = {} for mth in np.arange(1, 13): vname_nug = get_krigparam_varname(mth, VARIO_NUG) vname_psill = get_krigparam_varname(mth, VARIO_PSILL) vname_rng = get_krigparam_varname(mth, VARIO_RNG) dsvars[vname_nug] = stn_da.add_stn_variable(vname_nug, vname_nug, "C**2", 'f8') dsvars[vname_psill] = stn_da.add_stn_variable(vname_psill, vname_nug, "C**2", 'f8') dsvars[vname_rng] = stn_da.add_stn_variable(vname_rng, vname_nug, "km", 'f8') stat_chk = StatusCheck(nstns, 250) while 1: stn_id, nug, psill, rng = MPI.COMM_WORLD.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status) if status.tag == TAG_STOPWORK: nwrkrs_done += 1 if nwrkrs_done == nwrkers: print "WRITER: Finished" return 0 else: x = stn_da.stn_idxs[stn_id] for mth in np.arange(1, 13): dsvars[get_krigparam_varname(mth, VARIO_NUG)][x] = nug[mth - 1] dsvars[get_krigparam_varname(mth, VARIO_PSILL)][x] = psill[mth - 1] dsvars[get_krigparam_varname(mth, VARIO_RNG)][x] = rng[mth - 1] stn_da.ds.sync() stat_chk.increment()
def find_xval_outliers(self, stn_ids=None, bw_nngh=100, zscore_threshold=6): ''' Runs a leave-one-out cross validation of a geographically weighted regression model of station monthly and annual normals (norm~lst+elev+lon+lat) and returns those stations whose error is a specified # of standard deviations above/below the mean Parameters ---------- stn_ids : list_like, optional The station ids for which to run the cross validation. If None, the cross validation will be run for all stations in the database bw_nngh : int, optional The number of neighbors to use for the geographically weighted regression. Default: 100. zscore_threshold : float, optional The zcore threshold by which a station's error should be considered an outlier. Returns ---------- out_stnids : ndarray The outlier stations out_errs : ndarray The model error associated with each outlier ''' if stn_ids is None: stn_ids = self.stn_da.stn_ids schk = StatusCheck(stn_ids.size, check_cnt=250) xval_errs = np.zeros((13, stn_ids.size)) for i, a_id in enumerate(stn_ids): xval_errs[:, i] = self.run_xval_stn(a_id, bw_nngh) schk.increment() xval_errs = pd.DataFrame(xval_errs) xval_errs.columns = stn_ids zscores = (xval_errs.subtract(xval_errs.mean(axis=1), axis=0).divide(xval_errs.std(axis=1), axis=0).abs()) out_stnids = zscores.columns[(zscores > zscore_threshold).any( axis=0)].values return out_stnids
def proc_write(twx_cfg, mask_stns, nwrkers): status = MPI.Status() nwrkrs_done = 0 iter_all = IterMultiFlagUpdate() nstns = np.nonzero(mask_stns)[0].size stat_chk = StatusCheck(nstns, 10) while 1: a_iter = MPI.COMM_WORLD.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status) if status.tag == TAG_STOPWORK: nwrkrs_done += 1 if nwrkrs_done == nwrkers: print "Writer: updating QA flags in database..." iter_all.update_flags(twx_cfg.fpath_stndata_nc_all) # Recalculate period-of-record for Tmin and Tmax # since some stations might now fall below the min # por requirement after QA is run for elem in ['tmin', 'tmax']: print ("Updating monthly observation counts for %s from %d to %d... " % (elem, ymdL(twx_cfg.obs_start_date), ymdL(twx_cfg.obs_end_date))) add_obs_cnt(twx_cfg.fpath_stndata_nc_all, elem, twx_cfg.obs_start_date, twx_cfg.obs_end_date, twx_cfg.stn_agg_chunk) print ("Updating monthly observation counts for %s from %d to %d... " % (elem, ymdL(twx_cfg.interp_start_date), ymdL(twx_cfg.interp_end_date))) add_obs_cnt(twx_cfg.fpath_stndata_nc_all, elem, twx_cfg.interp_start_date, twx_cfg.interp_end_date, twx_cfg.stn_agg_chunk) print "Writer: Finished" return 0 else: iter_all.add_iter(a_iter) stat_chk.increment()
print "Retrieving DEM elevation data for %d stations..." % len( stns_elevdem) write_chk = 50 schk = StatusCheck(len(stns), check_cnt=write_chk) for i in np.arange(len(stns_elevdem), step=write_chk): stns_chk = stns_elevdem.iloc[i:(i + write_chk)].copy() for stnid in stns_chk.station_id: lon, lat = stns_chk.loc[stnid, ['longitude', 'latitude']] elevdem = locqa.get_elevation_dem(lon, lat) stns_chk.loc[stnid, 'elevation_dem'] = elevdem schk.increment() locqa.update_locqa_hdf(stns_chk, reload_locqa=False) locqa.reload_stns_locqa() stns = locqa.add_locqa_cols(stns) # Find stations that have a 200-m difference between their provided elevation # and the DEM-based elevation stns_fail = locqa.get_locqa_fail_stns(stns, elev_dif_thres=200) # Write out CSV file of failed station locations for manual investigtion print "%d stations failed location QA. Writing to %s" % ( len(stns_fail), twx_cfg.fpath_locqa_fail_csv) stns_fail['station_name'] = stns_fail.station_name.str.replace(',', ' ')
def proc_write(twx_cfg, ncdf_mode, start_ymd, end_ymd, nwrkers): status = MPI.Status() stn_da = StationDataDb(twx_cfg.fpath_stndata_nc_tair_homog, (start_ymd, end_ymd)) days = stn_da.days nwrkrs_done = 0 bcast_msg = None bcast_msg = MPI.COMM_WORLD.bcast(bcast_msg, root=RANK_COORD) stnids_tmin, stnids_tmax = bcast_msg print "WRITER: Received broadcast msg" if ncdf_mode == 'r+': ds_tmin = Dataset(twx_cfg.fpath_stndata_nc_infill_tmin, 'r+') ds_tmax = Dataset(twx_cfg.fpath_stndata_nc_infill_tmax, 'r+') ttl_infills = stnids_tmin.size + stnids_tmax.size stnids_tmin = ds_tmin.variables[STN_ID][:].astype(np.str) stnids_tmax = ds_tmax.variables[STN_ID][:].astype(np.str) else: stns_tmin = stn_da.stns[np.in1d(stn_da.stns[STN_ID], stnids_tmin, assume_unique=True)] variables_tmin = [('tmin', 'f4', netCDF4.default_fillvals['f4'], 'minimum air temperature', 'C'), ('flag_infilled', 'i1', netCDF4.default_fillvals['i1'], 'infilled flag', ''), ('tmin_infilled', 'f4', netCDF4.default_fillvals['f4'], 'infilled minimum air temperature', 'C')] create_quick_db(twx_cfg.fpath_stndata_nc_infill_tmin, stns_tmin, days, variables_tmin) stnda_out_tmin = StationDataDb(twx_cfg.fpath_stndata_nc_infill_tmin, mode="r+") stnda_out_tmin.add_stn_variable('mae', 'mean absolute error', 'C', "f8") stnda_out_tmin.add_stn_variable('bias', 'bias', 'C', "f8") ds_tmin = stnda_out_tmin.ds stns_tmax = stn_da.stns[np.in1d(stn_da.stns[STN_ID], stnids_tmax, assume_unique=True)] variables_tmax = [('tmax', 'f4', netCDF4.default_fillvals['f4'], 'maximum air temperature', 'C'), ('flag_infilled', 'i1', netCDF4.default_fillvals['i1'], 'infilled flag', ''), ('tmax_infilled', 'f4', netCDF4.default_fillvals['f4'], 'infilled maximum air temperature', 'C')] create_quick_db(twx_cfg.fpath_stndata_nc_infill_tmax, stns_tmax, days, variables_tmax) stnda_out_tmax = StationDataDb(twx_cfg.fpath_stndata_nc_infill_tmax, mode="r+") stnda_out_tmax.add_stn_variable('mae', 'mean absolute error', 'C', "f8") stnda_out_tmax.add_stn_variable('bias', 'bias', 'C', "f8") ds_tmax = stnda_out_tmax.ds ttl_infills = stnids_tmin.size + stnids_tmax.size print "WRITER: Infilling a total of %d station time series " % (ttl_infills,) print "WRITER: Output NCDF files ready" stat_chk = StatusCheck(ttl_infills, 10) while 1: result = MPI.COMM_WORLD.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status) stn_id, tair_var, tair, fill_mask, tair_infill, mae, bias = result if status.tag == TAG_STOPWORK: nwrkrs_done += 1 if nwrkrs_done == nwrkers: print "Writer: Finished" return 0 else: if tair_var == 'tmin': stn_idx = np.nonzero(stnids_tmin == stn_id)[0][0] ds = ds_tmin else: stn_idx = np.nonzero(stnids_tmax == stn_id)[0][0] ds = ds_tmax ds.variables[tair_var][:, stn_idx] = tair ds.variables["".join([tair_var, "_infilled"])][:, stn_idx] = tair_infill ds.variables['flag_infilled'][:, stn_idx] = fill_mask ds.variables['bias'][stn_idx] = bias ds.variables[LAST_VAR_WRITTEN][stn_idx] = mae ds.sync() print "|".join(["WRITER", stn_id, tair_var, "%.4f" % (mae,), "%.4f" % (bias,)]) stat_chk.increment()
def proc_write(fpath_stndb, elem, climdivs, ngh_rng, path_out_optim, nwrkers): status = MPI.Status() nwrkrs_done = 0 bcast_msg = None bcast_msg = MPI.COMM_WORLD.bcast(bcast_msg, root=RANK_COORD) stn_ids = bcast_msg print "WRITER: Received broadcast msg" stn_da = StationSerialDataDb(fpath_stndb, elem, mode="r+") stn_mask = np.in1d(stn_da.stn_ids, stn_ids, True) stns = stn_da.stns[stn_mask] climdiv_ds = {} ttl_xval_stns = 0 for climdiv in climdivs: stnids_climdiv = stns[STN_ID][stns[CLIMDIV] == climdiv] a_ds = create_climdiv_optim_nstns_db(path_out_optim, elem, stnids_climdiv, ngh_rng, climdiv) climdiv_ds[climdiv] = a_ds, stnids_climdiv ttl_xval_stns += stnids_climdiv.size print "WRITER: Output NCDF files created" stn_idxs = {} for x in np.arange(stns.size): stn_idxs[stns[STN_ID][x]] = x min_ngh_wins = ngh_rng ngh_idxs = {} for x in np.arange(min_ngh_wins.size): ngh_idxs[min_ngh_wins[x]] = x ttl_xvals = ttl_xval_stns stat_chk = StatusCheck(ttl_xvals, 250) while 1: stn_id, mae, bias, r2 = MPI.COMM_WORLD.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status) if status.tag == TAG_STOPWORK: nwrkrs_done += 1 if nwrkrs_done == nwrkers: ####################################################### print "WRITER: Setting the optim # of nghs..." set_optim_nstns_tair_anom(stn_da, path_out_optim) ###################################################### print "WRITER: Finished" return 0 else: stn = stns[stn_idxs[stn_id]] ds, stnids_climdiv = climdiv_ds[stn[CLIMDIV]] dim2 = np.nonzero(stnids_climdiv == stn_id)[0][0] ds.variables['mae'][:, :, dim2] = mae ds.sync() stat_chk.increment()
def create_serially_complete_db(fpath_infill_db, tair_var, fpath_out_serial_db): ''' Create a netCDF single variable, serially-complete station database and insert serially-complete station observations. Based on a specific threshold of total missing data, a station's serially-complete time series will either consist of a mix of actual and infilled observations or entirely of infilled observations from the infill model. Parameters ---------- fpath_infill_db : str The file path to the infilled station database tair_var : str The temperature variable ('tmin' or 'tmax') for the database fpath_out_serial_db : str The file path for the output serially-complete database ''' ds_infill = Dataset(fpath_infill_db) var_time = ds_infill.variables['time'] stns = _build_stn_struct(ds_infill) start, end = num2date([var_time[0], var_time[-1]], var_time.units) days = get_days_metadata(start, end) create_quick_db(fpath_out_serial_db, stns, days, SERIAL_DB_VARIABLES[tair_var]) ds_out = Dataset(fpath_out_serial_db, 'r+') all_infill_flags = np.ones(days.size, dtype=np.bool) all_infill_stns = np.zeros(stns.size, dtype=np.bool) stat_chk = StatusCheck(stns.size, 100) for x in np.arange(stns.size): infill_mask = ds_infill.variables['flag_infilled'][:, x].astype(np.bool) infill_runs = _runs_of_ones_array(infill_mask) if infill_runs.size > 0: max_infill = np.max(infill_runs) else: max_infill = 0 if max_infill >= USE_ALL_INFILL_THRESHOLD: # This station has greater than USE_ALL_INFILL_THRESHOLD continuous # years of missing data. Use all infilled values for this station to avoid # discontinuities between infilled and observed portions of time series tair_stn = ds_infill.variables["".join([tair_var, "_infilled"])][:, x] flag_stn = all_infill_flags all_infill_stns[x] = True else: tair_stn = ds_infill.variables[tair_var][:, x] flag_stn = infill_mask ds_out.variables[tair_var][:, x] = tair_stn ds_out.variables['flag_infilled'][:, x] = flag_stn ds_out.sync() stat_chk.increment() ds_out.close() print "% of stns with all infilled values: " + str( (np.sum(all_infill_stns) / np.float(all_infill_stns.size)) * 100.)
def add_stn_raster_values(stnda, var_name, long_name, units, a_rast, extract_method=1, revert_nn=False, force_data_value=False): ''' Extract raster values for station locations and add them to a serially-complete netCDF station database. Uses mpl_toolkits.basemap.interp to extract raster values. Parameters ---------- stnda : twx.db.StationSerialDataDb A StationSerialDataDb object pointing to the database to which station raster values should be added. var_name : str The netCDF variable name to be used for the raster values long_name : str The long netCDF variable name to be used for the raster values units : str The units of the raster values a_rast : RasterDataset The raster dataset from which to extract values extract_method : int, optional The mpl_toolkits.basemap.interp interpolation method to use for extraction specified as an integer (0 = nearest neighbor, 1 = bilinear, 3 = cubic spline). Default = 1. revert_nn : boolean, optional Set to True if extract_methods > 0 should revert to nearest neighbor if a value cannot be extracted with the specified extract method. Default = False. force_data_value : boolean, optional If True, for station locations that have no data raster values or are outside the extent of the raster, the nearest grid cell with a value will be used. Default = False. Returns ---------- stnids_ndata : ndarray An array of station ids for which raster values could not be extracted ''' lon = stnda.stns[LON] lat = stnda.stns[LAT] newvar = stnda.add_stn_variable(var_name, long_name, units, 'f8', fill_value=a_rast.ndata) # Setup data and coordinates for mpl_toolkits.basemap.interp a = a_rast.read_as_array() aflip = np.flipud(a) aflip = aflip.astype(np.float) a = a.data yGrid, xGrid = a_rast.get_coord_grid_1d() yGrid = np.sort(yGrid) # Initialize output array rvals = np.zeros(len(newvar[:])) # Loop through stations schk = StatusCheck(lon.size, 5000) for x in np.arange(lon.size): try: rval = bm.interp(aflip, xGrid, yGrid, np.array(lon[x]), np.array(lat[x]), checkbounds=True, masked=False, order=extract_method) except ValueError: # ValueError means that station point is outside the bounds of the raster rval = np.ma.masked # Re-run nearest neighbor extraction with no checkbounds constraint if rval is # masked (i.e.--no data) and at least one of the following conditions is met: # 1.) The station point is in bounds and extract method is nearest neighbor or # revert_nn is True.Since mpl_toolkits.basemap.interp uses coordinates based on cell centers, # stations near very edges of raster will produce ValueErrors with a checkbounds restriction # when they aren't actually out-of-bounds. # 2.) force_data_value = True. If point is outside bounds of raster, the returned # value will be clipped to the edge of raster. if (np.ma.is_masked(rval) and ((a_rast.is_inbounds(lon[x], lat[x]) and (extract_method == 0 or revert_nn)) or force_data_value)): rval = bm.interp(aflip, xGrid, yGrid, np.array(lon[x]), np.array(lat[x]), checkbounds=False, masked=False, order=0) # If rval is still masked (i.e.--no data) and force_data_value is True, # find the nearest grid cell with data to the point if np.ma.is_masked(rval) and force_data_value: if np.ma.is_masked(rval): row, col = a_rast.get_row_col(lon[x], lat[x], check_bounds=False) rval, dist = _find_nn_data(a, a_rast, col, row) if np.ma.is_masked(rval): rval = a_rast.ndata rvals[x] = rval schk.increment() newvar[:] = rvals stnda.ds.sync() stnids_ndata = stnda.stn_ids[rvals == a_rast.ndata] if force_data_value and stnids_ndata.size > 0: raise Exception( 'force_data_value turned on, but station points still had no data values.' ) return stnids_ndata
def proc_write(twx_cfg, start_ymd, end_ymd, nwrkers): status = MPI.Status() nwrkrs_done = 0 stn_da = StationDataDb(twx_cfg.fpath_stndata_nc_tair_homog, (start_ymd, end_ymd), mode="r+") mths = np.arange(1, 13) for mth in mths: for varname in ['tmin', 'tmax']: varname_mean = get_mean_varname(varname, mth) varname_vari = get_variance_varname(varname, mth) stn_da.add_stn_variable(varname_mean, varname_mean, "C", 'f8') stn_da.add_stn_variable(varname_vari, varname_vari, "C**2", 'f8') stn_da.ds.sync() bcast_msg = None bcast_msg = MPI.COMM_WORLD.bcast(bcast_msg, root=RANK_COORD) mask_por_tmin, mask_por_tmax = bcast_msg stn_ids_tmin, stn_ids_tmax = (stn_da.stn_ids[mask_por_tmin], stn_da.stn_ids[mask_por_tmax]) print "WRITER: Received broadcast msg" stn_ids_uniq = np.unique(np.concatenate([stn_ids_tmin, stn_ids_tmax])) stn_idxs = {} for x in np.arange(stn_da.stn_ids.size): if stn_da.stn_ids[x] in stn_ids_uniq: stn_idxs[stn_da.stn_ids[x]] = x ttl_infills = stn_ids_tmin.size + stn_ids_tmax.size stat_chk = StatusCheck(ttl_infills, 30) while 1: stn_id, tair_var, stn_mean, stn_vari = MPI.COMM_WORLD.recv( source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status) if status.tag == TAG_STOPWORK: nwrkrs_done += 1 if nwrkrs_done == nwrkers: print "WRITER: Finished" return 0 else: stnid_dim = stn_idxs[stn_id] for mth in mths: vname_mean = get_mean_varname(tair_var, mth) stn_da.ds.variables[vname_mean][stnid_dim] = stn_mean[mth - 1] vname_vary = get_variance_varname(tair_var, mth) stn_da.ds.variables[vname_vary][stnid_dim] = stn_vari[mth - 1] stn_da.ds.sync() stat_chk.increment()
def create_tobs_db(fpath_tobs_file, fpath_db, stnids, min_date, max_date): ''' Create a time-of-observation (tobs) netCDF4 database from a tobs file generated from create_tobs_file. Parameters ---------- fpath_tobs_file : str The file path to the tobs file from create_tobs_file fpath_db : str The file path to which to write the tobs database. stnids : sequence of str The sorted station ids of stns whose tobs should be written to the database min_date : datetime The earliest observation date max_date : datetime The latest observation date ''' ds = Dataset(fpath_db, 'w') # Set global attributes ds.title = "Time-of-Observation Database" ds.institution = "University of Montana Numerical Terradynamics Simulation Group" ds.history = "".join([ "Created on: ", datetime.datetime.strftime(datetime.date.today(), "%Y-%m-%d") ]) days = get_days_metadata(min_date, max_date) print "Creating netCDF4 Time-of-Observation Database for " + str(min_date) + \ " to " + str(max_date) + " for " + str(stnids.size) + " stations." ds.createDimension('time', days.size) ds.createDimension('stn_id', stnids.size) times = ds.createVariable('time', 'f8', ('time', ), fill_value=False) times.long_name = "time" times.units = "".join([ "days since ", str(min_date.year), "-", str(min_date.month), "-", str(min_date.day), " 0:0:0" ]) times.standard_name = "time" times.calendar = "standard" times[:] = date2num(days[DATE], times.units) stations = ds.createVariable('stn_id', 'str', ('stn_id', )) stations.long_name = "station id" for x in np.arange(stnids.size): ds.variables['stn_id'][x] = str(stnids[x]) tobs = ds.createVariable('tobs', np.int16, ('time', 'stn_id'), fill_value=-1, chunksizes=(days[DATE].size, NCDF_CHK_COLS)) tobs.long_name = "time-of-observation" tobs.missing_value = -1 stnidsOrig = np.char.replace(stnids, "GHCN_", "", 1) fileTobs = open(fpath_tobs_file) aline = fileTobs.readline() atobs = np.ones((days.size, stnids.size)) * -1 curYmd = days[YMD][0] time_idx = 0 stn_idx = 0 n_obs = int( subprocess.check_output(["wc", "-l", fpath_tobs_file]).split()[0]) stchk = StatusCheck(n_obs, 1000000) stn_idxs = {} for x in np.arange(stnidsOrig.size): stn_idxs[stnidsOrig[x]] = x while aline != "": try: stn_idx = stn_idxs[aline[0:11]] aYmd = np.int(aline[12:20]) if aYmd != curYmd: time_idx = np.nonzero(days[YMD] == aYmd)[0][0] curYmd = aYmd atobs[time_idx, stn_idx] = np.int(aline[-5:]) # except KeyError: pass stchk.increment() aline = fileTobs.readline() tobs[:] = atobs