def proc_write(twx_cfg, xval_stnids, start_ymd, end_ymd, nwrkers): status = MPI.Status() nwrkrs_done = 0 stn_da = StationDataDb(twx_cfg.fpath_stndata_nc_tair_homog, (start_ymd, end_ymd)) if xval_stnids is None: xval_stnids = load_default_xval_stnids(stn_da.stn_ids) ttl_infills = xval_stnids.size * 2 xval_stns = stn_da.stns[np.in1d(stn_da.stn_ids, xval_stnids, True)] create_quick_db(twx_cfg.fpath_xval_infill_nc, xval_stns, stn_da.days, NETCDF_OUT_VARIABLES) ds_out = Dataset(twx_cfg.fpath_xval_infill_nc, 'r+') stn_idxs = {} for x in np.arange(xval_stnids.size): stn_idxs[xval_stnids[x]] = x stat_chk = StatusCheck(ttl_infills, 10) while 1: stn_id, tair_var, infill_tair, obs_tair = MPI.COMM_WORLD.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status) if status.tag == TAG_STOPWORK: nwrkrs_done += 1 if nwrkrs_done == nwrkers: print "WRITER: Finished" return 0 else: infill_tair = np.ma.masked_array(infill_tair, np.isnan(infill_tair)) obs_tair = np.ma.masked_array(obs_tair, np.isnan(obs_tair)) i = stn_idxs[stn_id] difs = infill_tair - obs_tair bias = np.ma.mean(difs) mae = np.ma.mean(np.ma.abs(difs)) print "|".join(["WRITER", stn_id, tair_var, "MAE: %.2f" % (mae,), "BIAS: %.2f" % (bias,)]) obs_tair = np.ma.filled(obs_tair, netCDF4.default_fillvals['f4']) ds_out.variables["obs_%s" % (tair_var,)][:, i] = obs_tair infill_tair = np.ma.filled(infill_tair, netCDF4.default_fillvals['f4']) ds_out.variables["infilled_%s" % (tair_var,)][:, i] = infill_tair ds_out.sync() stat_chk.increment()
def proc_coord(twx_cfg, xval_stnids, start_ymd, end_ymd, nwrkers): stn_da = StationDataDb(twx_cfg.fpath_stndata_nc_tair_homog, (start_ymd, end_ymd)) if xval_stnids is None: xval_stnids = load_default_xval_stnids(stn_da.stn_ids) print "COORD: Done initialization. Starting to send work." cnt = 0 nrec = 0 for stn_id in xval_stnids: for tair_var in ['tmin', 'tmax']: if cnt < nwrkers: dest = cnt + N_NON_WRKRS else: dest = MPI.COMM_WORLD.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG) nrec += 1 MPI.COMM_WORLD.send((stn_id, tair_var), dest=dest, tag=TAG_DOWORK) cnt += 1 for w in np.arange(nwrkers): MPI.COMM_WORLD.send((None, None), dest=w + N_NON_WRKRS, tag=TAG_STOPWORK) print "COORD: done"
def proc_work(twx_cfg, start_ymd, end_ymd, min_nngh_daily, rank): status = MPI.Status() stn_da = StationDataDb(twx_cfg.fpath_stndata_nc_tair_homog, (start_ymd, end_ymd)) bcast_msg = None bcast_msg = MPI.COMM_WORLD.bcast(bcast_msg, root=RANK_COORD) mask_por_tmin, mask_por_tmax = bcast_msg print "".join(["WORKER ", str(rank), ": Received broadcast msg"]) stn_masks = {'tmin': mask_por_tmin, 'tmax': mask_por_tmax} ds_nnr = NNRNghData(twx_cfg.path_reanalysis_namerica, (start_ymd, end_ymd)) mth_masks = [stn_da.days[MONTH] == mth for mth in np.arange(1, 13)] while 1: stn_id, tair_var = MPI.COMM_WORLD.recv(source=RANK_COORD, tag=MPI.ANY_TAG, status=status) if status.tag == TAG_STOPWORK: MPI.COMM_WORLD.send([None] * 4, dest=RANK_WRITE, tag=TAG_STOPWORK) print "".join(["WORKER ", str(rank), ": Finished"]) return 0 else: try: stn_mean, stn_vari = infill_mean_variance(stn_id, stn_da, stn_masks[tair_var], tair_var, ds_nnr, nnghs=min_nngh_daily, day_masks=mth_masks) except Exception as e: print "".join([ "ERROR: WORKER ", str(rank), ": could not infill ", tair_var, " for ", stn_id, str(e) ]) empty = np.empty(12) empty.fill(netCDF4.default_fillvals['f8']) stn_mean = empty stn_vari = empty MPI.COMM_WORLD.send((stn_id, tair_var, stn_mean, stn_vari), dest=RANK_WRITE, tag=TAG_DOWORK) MPI.COMM_WORLD.send(rank, dest=RANK_COORD, tag=TAG_DOWORK)
def proc_coord(twx_cfg, min_por, start_ymd, end_ymd, nwrkers): stndb = StationDataDb(twx_cfg.fpath_stndata_nc_tair_homog, (start_ymd, end_ymd)) mask_por_tmin = build_por_mask(stndb.ds, ['tmin'], twx_cfg.interp_start_date, twx_cfg.interp_end_date, min_por_yrs=min_por) mask_por_tmax = build_por_mask(stndb.ds, ['tmax'], twx_cfg.interp_start_date, twx_cfg.interp_end_date, min_por_yrs=min_por) stndb.stn_ids[mask_por_tmin] # Extract stn_ids that have min # of observations stn_ids_tmin = stndb.stn_ids[mask_por_tmin] stn_ids_tmax = stndb.stn_ids[mask_por_tmax] stndb.ds.close() del stndb # Send stn masks to all processes MPI.COMM_WORLD.bcast((mask_por_tmin, mask_por_tmax), root=RANK_COORD) print "COORD: Done initialization. Starting to send work." cnt = 0 nrec = 0 for stn_id in np.unique(np.concatenate([stn_ids_tmin, stn_ids_tmax])): tair_vars = [] if stn_id in stn_ids_tmin: tair_vars.append('tmin') if stn_id in stn_ids_tmax: tair_vars.append('tmax') for tair_var in tair_vars: if cnt < nwrkers: dest = cnt + N_NON_WRKRS else: dest = MPI.COMM_WORLD.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG) nrec += 1 MPI.COMM_WORLD.send((stn_id, tair_var), dest=dest, tag=TAG_DOWORK) cnt += 1 for w in np.arange(nwrkers): MPI.COMM_WORLD.send((None, None), dest=w + N_NON_WRKRS, tag=TAG_STOPWORK) print "COORD: done"
def proc_work(twx_cfg, qa_spatial, rank): status = MPI.Status() stndb = StationDataDb(twx_cfg.fpath_stndata_nc_all) while 1: try: stn_id = MPI.COMM_WORLD.recv(source=RANK_COORD, tag=MPI.ANY_TAG, status=status) if status.tag == TAG_STOPWORK: MPI.COMM_WORLD.send(None, dest=RANK_WRITE, tag=TAG_STOPWORK) print "".join(["Worker ", str(rank), ": Finished"]) return 0 obs = stndb.load_all_stn_obs(np.array([stn_id])) stn = stndb.stns[stndb.stn_ids == stn_id][0] if qa_spatial: flags_tmin, flags_tmax = qa_temp.run_qa_spatial_only(stn, stndb, obs[TMIN], obs[TMAX], stndb.days) else: flags_tmin, flags_tmax = qa_temp.run_qa_non_spatial(obs[TMIN], obs[TMAX], stndb.days) a_iter = create_update_iter(stn, flags_tmin, flags_tmax, stndb.days, obs[TMIN_FLAG], obs[TMAX_FLAG]) MPI.COMM_WORLD.send(a_iter, dest=RANK_WRITE, tag=TAG_DOWORK) except Exception, e: print traceback.format_exc() print "".join(["Error in QA of ", stn_id, ":", str(e), "\n"]) MPI.COMM_WORLD.send(rank, dest=RANK_COORD, tag=TAG_DOWORK)
def proc_work(twx_cfg, xval_stnids, nyrs_train, start_ymd, end_ymd, params_ppca, rank): status = MPI.Status() stn_da = StationDataDb(twx_cfg.fpath_stndata_nc_tair_homog, (start_ymd, end_ymd)) ds_nnr = NNRNghData(twx_cfg.path_reanalysis_namerica, (start_ymd, end_ymd)) infill_params = XvalInfillParams(nnr_ds=ds_nnr, **params_ppca) xval_infills = {} xval_infills['tmin'] = XvalInfill(stn_da, 'tmin', infill_params, xval_stnids=xval_stnids, ntrain_yrs=nyrs_train) xval_infills['tmax'] = XvalInfill(stn_da, 'tmax', infill_params, xval_stnids=xval_stnids, ntrain_yrs=nyrs_train) print "".join(["WORKER ", str(rank), ": ready to receive work."]) empty = np.ones(stn_da.days.size) * np.nan while 1: stn_id, tair_var = MPI.COMM_WORLD.recv(source=RANK_COORD, tag=MPI.ANY_TAG, status=status) if status.tag == TAG_STOPWORK: MPI.COMM_WORLD.send([None] * 4, dest=RANK_WRITE, tag=TAG_STOPWORK) print "".join(["WORKER ", str(rank), ": Finished"]) return 0 else: try: obs_tair, infill_tair = xval_infills[tair_var].run_xval(stn_id) except Exception as e: print "".join(["ERROR: WORKER ", str(rank), ": could not infill ", tair_var, " for ", stn_id, str(e)]) infill_tair = empty obs_tair = empty MPI.COMM_WORLD.send((stn_id, tair_var, infill_tair, obs_tair), dest=RANK_WRITE, tag=TAG_DOWORK) MPI.COMM_WORLD.send(rank, dest=RANK_COORD, tag=TAG_DOWORK)
def proc_coord(twx_cfg, mask_stns, nwrkers): stndb = StationDataDb(twx_cfg.fpath_stndata_nc_all) stns = stndb.stns[mask_stns] cnt = 0 nrec = 0 for stn in stns: if cnt < nwrkers: dest = cnt + N_NON_WRKRS else: dest = MPI.COMM_WORLD.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG) nrec += 1 MPI.COMM_WORLD.send(stn[STN_ID], dest=dest, tag=TAG_DOWORK) cnt += 1 for w in np.arange(nwrkers): MPI.COMM_WORLD.send(stn[STN_ID], dest=w + N_NON_WRKRS, tag=TAG_STOPWORK)
''' Script to run station location quality assurance for stations in netCDF database. ''' from twx.db import StationDataDb from twx.db.obs_por import build_por_mask from twx.qa.qa_location import LocQA from twx.utils import StatusCheck from twx.utils.config import TwxConfig import numpy as np import os if __name__ == '__main__': twx_cfg = TwxConfig(os.getenv('TOPOWX_INI')) stndb = StationDataDb(twx_cfg.fpath_stndata_nc_all) # To save processing time, only run location for stations with at least a 5 # year period-of-record por_mask = build_por_mask(stndb.ds, twx_cfg.obs_main_elems, twx_cfg.interp_start_date, twx_cfg.interp_end_date, min_por_yrs=5) stns = stndb.stns_df[por_mask] # Load location QA HDF file locqa = LocQA(twx_cfg.fpath_locqa_hdf, usrname_geonames=twx_cfg.username_geonames) # Add location QA data columns to stations
def proc_work(twx_cfg, start_ymd, end_ymd, params_ppca, rank): status = MPI.Status() stn_da = StationDataDb(twx_cfg.fpath_stndata_nc_tair_homog, (start_ymd, end_ymd)) days = stn_da.days ndays = days.size empty_fill = np.ones(ndays, dtype=np.float32) * netCDF4.default_fillvals['f4'] empty_flags = np.ones(ndays, dtype=np.int8) * netCDF4.default_fillvals['i1'] empty_bias = netCDF4.default_fillvals['f4'] empty_mae = netCDF4.default_fillvals['f4'] ds_nnr = NNRNghData(twx_cfg.path_reanalysis_namerica, (start_ymd, end_ymd)) mths = np.arange(1, 13) mth_masks = [stn_da.days[MONTH] == mth for mth in mths] vnames_mean_tmin = [get_mean_varname('tmin', mth) for mth in mths] vnames_vari_tmin = [get_variance_varname('tmin', mth) for mth in mths] vnames_mean_tmax = [get_mean_varname('tmax', mth) for mth in mths] vnames_vari_tmax = [get_variance_varname('tmax', mth) for mth in mths] bcast_msg = None bcast_msg = MPI.COMM_WORLD.bcast(bcast_msg, root=RANK_COORD) stnids_tmin, stnids_tmax = bcast_msg print "".join(["WORKER ", str(rank), ": Received broadcast msg"]) print "".join(["WORKER ", str(rank), ": Minimum number of station neighbors for infilling: ", str(params_ppca['min_daily_nnghs'])]) while 1: stn_id = MPI.COMM_WORLD.recv(source=RANK_COORD, tag=MPI.ANY_TAG, status=status) if status.tag == TAG_STOPWORK: MPI.COMM_WORLD.send([None] * 7, dest=RANK_WRITE, tag=TAG_STOPWORK) print "".join(["WORKER ", str(rank), ": Finished"]) return 0 else: try: run_infill_tmin = stn_id in stnids_tmin run_infill_tmax = stn_id in stnids_tmax if run_infill_tmin: results = infill_tair(stn_id, stn_da, 'tmin', ds_nnr, vnames_mean_tmin, vnames_vari_tmin, mth_masks, params_ppca) fnl_tmin, fill_mask_tmin, infill_tmin, mae_tmin, bias_tmin = results if run_infill_tmax: results = infill_tair(stn_id, stn_da,'tmax', ds_nnr, vnames_mean_tmax, vnames_vari_tmax, mth_masks, params_ppca) fnl_tmax, fill_mask_tmax, infill_tmax, mae_tmax, bias_tmax = results except Exception as e: print "".join(["ERROR: Could not infill ", stn_id, "|", str(e)]) if run_infill_tmin: results = empty_fill, empty_flags, empty_fill, empty_mae, empty_bias fnl_tmin, fill_mask_tmin, infill_tmin, mae_tmin, bias_tmin = results if run_infill_tmax: results = empty_fill, empty_flags, empty_fill, empty_mae, empty_bias fnl_tmax, fill_mask_tmax, infill_tmax, mae_tmax, bias_tmax = results if run_infill_tmin: MPI.COMM_WORLD.send((stn_id, 'tmin', fnl_tmin, fill_mask_tmin, infill_tmin, mae_tmin, bias_tmin), dest=RANK_WRITE, tag=TAG_DOWORK) if run_infill_tmax: MPI.COMM_WORLD.send((stn_id, 'tmax', fnl_tmax, fill_mask_tmax, infill_tmax, mae_tmax, bias_tmax), dest=RANK_WRITE, tag=TAG_DOWORK) MPI.COMM_WORLD.send(rank, dest=RANK_COORD, tag=TAG_DOWORK)
def proc_coord(twx_cfg, ncdf_mode, stnids_to_infill_tmin, stnids_to_infill_tmax, start_ymd, end_ymd, nwrkers): stn_da = StationDataDb(twx_cfg.fpath_stndata_nc_tair_homog, (start_ymd, end_ymd)) mask_tmin = np.isfinite(stn_da.stns[get_mean_varname('tmin', 1)]) mask_tmax = np.isfinite(stn_da.stns[get_mean_varname('tmax', 1)]) stnids_tmin = stn_da.stn_ids[mask_tmin] stnids_tmax = stn_da.stn_ids[mask_tmax] # Check if we're restarting a run if ncdf_mode == 'r+': # If rerunning remove stn ids that have already been completed try: if stnids_to_infill_tmin is None: ds_tmin = Dataset(twx_cfg.fpath_stndata_nc_infill_tmin) mask_incplt = ds_tmin.variables[LAST_VAR_WRITTEN][:].mask stnids_tmin = stnids_tmin[mask_incplt] else: stnids_tmin = stnids_to_infill_tmin except AttributeError: # no mask: infill complete stnids_tmin = np.array([], dtype="<S16") try: if stnids_to_infill_tmax is None: ds_tmax = Dataset(twx_cfg.fpath_stndata_nc_infill_tmax) mask_incplt = ds_tmax.variables[LAST_VAR_WRITTEN][:].mask stnids_tmax = stnids_tmax[mask_incplt] else: stnids_tmax = stnids_to_infill_tmax except AttributeError: # no mask: infill complete stnids_tmax = np.array([], dtype="<S16") stnids_all = np.unique(np.concatenate((stnids_tmin, stnids_tmax))) # Send stn ids to all processes MPI.COMM_WORLD.bcast((stnids_tmin, stnids_tmax), root=RANK_COORD) print "COORD: Done initialization. Starting to send work." cnt = 0 nrec = 0 for stn_id in stnids_all: if cnt < nwrkers: dest = cnt + N_NON_WRKRS else: dest = MPI.COMM_WORLD.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG) nrec += 1 MPI.COMM_WORLD.send(stn_id, dest=dest, tag=TAG_DOWORK) cnt += 1 for w in np.arange(nwrkers): MPI.COMM_WORLD.send(None, dest=w + N_NON_WRKRS, tag=TAG_STOPWORK) print "COORD: done"
def proc_write(twx_cfg, ncdf_mode, start_ymd, end_ymd, nwrkers): status = MPI.Status() stn_da = StationDataDb(twx_cfg.fpath_stndata_nc_tair_homog, (start_ymd, end_ymd)) days = stn_da.days nwrkrs_done = 0 bcast_msg = None bcast_msg = MPI.COMM_WORLD.bcast(bcast_msg, root=RANK_COORD) stnids_tmin, stnids_tmax = bcast_msg print "WRITER: Received broadcast msg" if ncdf_mode == 'r+': ds_tmin = Dataset(twx_cfg.fpath_stndata_nc_infill_tmin, 'r+') ds_tmax = Dataset(twx_cfg.fpath_stndata_nc_infill_tmax, 'r+') ttl_infills = stnids_tmin.size + stnids_tmax.size stnids_tmin = ds_tmin.variables[STN_ID][:].astype(np.str) stnids_tmax = ds_tmax.variables[STN_ID][:].astype(np.str) else: stns_tmin = stn_da.stns[np.in1d(stn_da.stns[STN_ID], stnids_tmin, assume_unique=True)] variables_tmin = [('tmin', 'f4', netCDF4.default_fillvals['f4'], 'minimum air temperature', 'C'), ('flag_infilled', 'i1', netCDF4.default_fillvals['i1'], 'infilled flag', ''), ('tmin_infilled', 'f4', netCDF4.default_fillvals['f4'], 'infilled minimum air temperature', 'C')] create_quick_db(twx_cfg.fpath_stndata_nc_infill_tmin, stns_tmin, days, variables_tmin) stnda_out_tmin = StationDataDb(twx_cfg.fpath_stndata_nc_infill_tmin, mode="r+") stnda_out_tmin.add_stn_variable('mae', 'mean absolute error', 'C', "f8") stnda_out_tmin.add_stn_variable('bias', 'bias', 'C', "f8") ds_tmin = stnda_out_tmin.ds stns_tmax = stn_da.stns[np.in1d(stn_da.stns[STN_ID], stnids_tmax, assume_unique=True)] variables_tmax = [('tmax', 'f4', netCDF4.default_fillvals['f4'], 'maximum air temperature', 'C'), ('flag_infilled', 'i1', netCDF4.default_fillvals['i1'], 'infilled flag', ''), ('tmax_infilled', 'f4', netCDF4.default_fillvals['f4'], 'infilled maximum air temperature', 'C')] create_quick_db(twx_cfg.fpath_stndata_nc_infill_tmax, stns_tmax, days, variables_tmax) stnda_out_tmax = StationDataDb(twx_cfg.fpath_stndata_nc_infill_tmax, mode="r+") stnda_out_tmax.add_stn_variable('mae', 'mean absolute error', 'C', "f8") stnda_out_tmax.add_stn_variable('bias', 'bias', 'C', "f8") ds_tmax = stnda_out_tmax.ds ttl_infills = stnids_tmin.size + stnids_tmax.size print "WRITER: Infilling a total of %d station time series " % (ttl_infills,) print "WRITER: Output NCDF files ready" stat_chk = StatusCheck(ttl_infills, 10) while 1: result = MPI.COMM_WORLD.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status) stn_id, tair_var, tair, fill_mask, tair_infill, mae, bias = result if status.tag == TAG_STOPWORK: nwrkrs_done += 1 if nwrkrs_done == nwrkers: print "Writer: Finished" return 0 else: if tair_var == 'tmin': stn_idx = np.nonzero(stnids_tmin == stn_id)[0][0] ds = ds_tmin else: stn_idx = np.nonzero(stnids_tmax == stn_id)[0][0] ds = ds_tmax ds.variables[tair_var][:, stn_idx] = tair ds.variables["".join([tair_var, "_infilled"])][:, stn_idx] = tair_infill ds.variables['flag_infilled'][:, stn_idx] = fill_mask ds.variables['bias'][stn_idx] = bias ds.variables[LAST_VAR_WRITTEN][stn_idx] = mae ds.sync() print "|".join(["WRITER", stn_id, tair_var, "%.4f" % (mae,), "%.4f" % (bias,)]) stat_chk.increment()
params_ppca['frac_obs_initnpcs'] = 0.5 params_ppca['ppca_varyexplain'] = 0.99 params_ppca['chk_perf'] = True params_ppca['npcs'] = 0 params_ppca['verbose'] = False if rank == RANK_COORD: # If previous log file specified, run infilling # only for stations that were suspect. if fpath_log is not None: print ("Initializing rerun of previous infill run. " "Process %d of %d"%(rank, nsize)) stn_da = StationDataDb(twx_cfg.fpath_stndata_nc_tair_homog, (start_ymd, end_ymd)) mask_tmin = np.isfinite(stn_da.stns[get_mean_varname('tmin', 1)]) mask_tmax = np.isfinite(stn_da.stns[get_mean_varname('tmax', 1)]) stnids_tmin = stn_da.stn_ids[mask_tmin] stnids_tmax = stn_da.stn_ids[mask_tmax] stnids_bad = get_bad_infill_stnids(fpath_log) stnids_to_infill_tmin = stnids_tmin[np.in1d(stnids_tmin, stnids_bad, True)] stnids_to_infill_tmax = stnids_tmax[np.in1d(stnids_tmax, stnids_bad, True)] stn_da.ds.close() stn_da = None mask_tmin = None
create_aux_db(twx_cfg.fpath_stndata_nc_all, twx_cfg.fpath_stndata_nc_infill_tmin, twx_cfg.fpath_stndata_nc_serial_tmin, twx_cfg.fpath_stndata_nc_aux_tmin, 'tmin', twx_cfg.interp_start_date.year, twx_cfg.interp_end_date.year, twx_cfg.twx_data_version) create_aux_db(twx_cfg.fpath_stndata_nc_all, twx_cfg.fpath_stndata_nc_infill_tmax, twx_cfg.fpath_stndata_nc_serial_tmax, twx_cfg.fpath_stndata_nc_aux_tmax, 'tmax', twx_cfg.interp_start_date.year, twx_cfg.interp_end_date.year, twx_cfg.twx_data_version) # Write CSV file of PHA adjustments stns = StationDataDb(twx_cfg.fpath_stndata_nc_tair_tobs_adj).stns def build_pha_log_fpath(elem): fpath_adj_log = os.path.join(twx_cfg.path_homog_pha, elem, 'run', 'data', 'benchmark', 'world1', 'output', 'pha_adj_%s.log' % elem) return fpath_adj_log pha_adj_tmin = get_pha_adj_df(build_pha_log_fpath('tmin'), stns, 'tmin') pha_adj_tmax = get_pha_adj_df(build_pha_log_fpath('tmax'), stns, 'tmax') pha_adj = pd.concat( [pha_adj_tmin.reset_index(), pha_adj_tmax.reset_index()], ignore_index=True)
raws = obsio.HdfObsIO(twx_cfg.fpath_stndata_hdf_raws) allio = obsio.MultiObsIO([ghcnd, snotel, raws]) # Sort station inserts by station id stnids_sorted = np.sort(allio.stns.station_id.values) # Create netcdf allio.to_netcdf(twx_cfg.fpath_stndata_nc_all, stnids_sorted, twx_cfg.obs_start_date, twx_cfg.obs_end_date, chk_rw=twx_cfg.stn_write_chunk_nc, verbose=True) # Add QA Flag observation variable for each element stndb = StationDataDb(twx_cfg.fpath_stndata_nc_all, mode='r+') for elem in twx_cfg.obs_main_elems: varname = "qflag_" + elem long_name = "quality assurance flag " + elem stndb.add_obs_variable(varname, long_name, '', 'S1', fill_value='', zlib=True, chunksizes=(stndb.days.size, 1), reset=True) stndb.ds.close()
def proc_write(twx_cfg, start_ymd, end_ymd, nwrkers): status = MPI.Status() nwrkrs_done = 0 stn_da = StationDataDb(twx_cfg.fpath_stndata_nc_tair_homog, (start_ymd, end_ymd), mode="r+") mths = np.arange(1, 13) for mth in mths: for varname in ['tmin', 'tmax']: varname_mean = get_mean_varname(varname, mth) varname_vari = get_variance_varname(varname, mth) stn_da.add_stn_variable(varname_mean, varname_mean, "C", 'f8') stn_da.add_stn_variable(varname_vari, varname_vari, "C**2", 'f8') stn_da.ds.sync() bcast_msg = None bcast_msg = MPI.COMM_WORLD.bcast(bcast_msg, root=RANK_COORD) mask_por_tmin, mask_por_tmax = bcast_msg stn_ids_tmin, stn_ids_tmax = (stn_da.stn_ids[mask_por_tmin], stn_da.stn_ids[mask_por_tmax]) print "WRITER: Received broadcast msg" stn_ids_uniq = np.unique(np.concatenate([stn_ids_tmin, stn_ids_tmax])) stn_idxs = {} for x in np.arange(stn_da.stn_ids.size): if stn_da.stn_ids[x] in stn_ids_uniq: stn_idxs[stn_da.stn_ids[x]] = x ttl_infills = stn_ids_tmin.size + stn_ids_tmax.size stat_chk = StatusCheck(ttl_infills, 30) while 1: stn_id, tair_var, stn_mean, stn_vari = MPI.COMM_WORLD.recv( source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status) if status.tag == TAG_STOPWORK: nwrkrs_done += 1 if nwrkrs_done == nwrkers: print "WRITER: Finished" return 0 else: stnid_dim = stn_idxs[stn_id] for mth in mths: vname_mean = get_mean_varname(tair_var, mth) stn_da.ds.variables[vname_mean][stnid_dim] = stn_mean[mth - 1] vname_vary = get_variance_varname(tair_var, mth) stn_da.ds.variables[vname_vary][stnid_dim] = stn_vari[mth - 1] stn_da.ds.sync() stat_chk.increment()