def initial_params_from_txt(cfg, maindb): """ import initial calibration parameters from txt data file. Args: cfg: SEIMS config object maindb: MongoDB database object """ # delete if existed, create if not existed c_list = maindb.collection_names() if not StringClass.string_in_list(DBTableNames.main_parameter, c_list): maindb.create_collection(DBTableNames.main_parameter) else: maindb.drop_collection(DBTableNames.main_parameter) # create bulk operator bulk = maindb[DBTableNames.main_parameter].initialize_ordered_bulk_op() # read initial parameters from txt file data_items = read_data_items_from_txt(cfg.paramcfgs.init_params_file) field_names = data_items[0][0:] # print (field_names) for i, cur_data_item in enumerate(data_items): if i == 0: continue # print cur_data_item # initial one default blank parameter dict. data_import = {ModelParamFields.name: '', ModelParamFields.desc: '', ModelParamFields.unit: '', ModelParamFields.module: '', ModelParamFields.value: DEFAULT_NODATA, ModelParamFields.impact: DEFAULT_NODATA, ModelParamFields.change: 'NC', ModelParamFields.max: DEFAULT_NODATA, ModelParamFields.min: DEFAULT_NODATA, ModelParamFields.type: ''} for k, v in data_import.items(): idx = field_names.index(k) if cur_data_item[idx] == '': if StringClass.string_match(k, ModelParamFields.change_ac): data_import[k] = 0 elif StringClass.string_match(k, ModelParamFields.change_rc): data_import[k] = 1 elif StringClass.string_match(k, ModelParamFields.change_nc): data_import[k] = 0 elif StringClass.string_match(k, ModelParamFields.change_vc): data_import[k] = DEFAULT_NODATA # Be careful to check NODATA when use! else: if MathClass.isnumerical(cur_data_item[idx]): data_import[k] = float(cur_data_item[idx]) else: data_import[k] = cur_data_item[idx] bulk.insert(data_import) # execute import operators bulk.execute() # create index by parameter's type and name by ascending order. maindb[DBTableNames.main_parameter].create_index([(ModelParamFields.type, ASCENDING), (ModelParamFields.name, ASCENDING)])
def variable_table(db, var_file): """Import variables table""" var_data_items = read_data_items_from_txt(var_file) var_flds = var_data_items[0] for i in range(1, len(var_data_items)): dic = {} for j in range(len(var_data_items[i])): if StringClass.string_match(var_flds[j], VariableDesc.type): dic[VariableDesc.type] = var_data_items[i][j] elif StringClass.string_match(var_flds[j], VariableDesc.unit): dic[VariableDesc.unit] = var_data_items[i][j] # If this item existed already, then update it, otherwise insert one. curfilter = {VariableDesc.type: dic[VariableDesc.type]} db[DBTableNames.var_desc].find_one_and_replace(curfilter, dic, upsert=True)
def initialize_landcover_parameters(landcover_file, landcover_initial_fields_file, dst_dir): """generate initial landcover_init_param parameters""" lc_data_items = read_data_items_from_txt(landcover_initial_fields_file) # print lc_data_items field_names = lc_data_items[0] lu_id = -1 for i, v in enumerate(field_names): if StringClass.string_match(v, 'LANDUSE_ID'): lu_id = i break data_items = lc_data_items[1:] replace_dicts = dict() for item in data_items: for i, v in enumerate(item): if i != lu_id: if field_names[i].upper() not in replace_dicts.keys(): replace_dicts[field_names[i].upper()] = { float(item[lu_id]): float(v) } else: replace_dicts[field_names[i].upper()][float( item[lu_id])] = float(v) # print replace_dicts # Generate GTIFF for item, v in replace_dicts.items(): filename = dst_dir + SEP + item + '.tif' print(filename) RasterUtilClass.raster_reclassify(landcover_file, v, filename) return replace_dicts['LANDCOVER'].values()
def export_landuse_lookup_files_from_mongodb(cfg, maindb): """export landuse lookup tables to txt file from MongoDB.""" lookup_dir = cfg.dirs.lookup property_namelist = ModelParamDataUtils.landuse_fields property_map = {} property_namelist.append('USLE_P') query_result = maindb['LANDUSELOOKUP'].find() if query_result is None: raise RuntimeError( "LanduseLoop Collection is not existed or empty!") count = 0 for row in query_result: # print row value_map = dict() for i, p_name in enumerate(property_namelist): if StringClass.string_match(p_name, "USLE_P"): # Currently, USLE_P is set as 1 for all landuse. value_map[p_name] = 1 else: if StringClass.string_match(p_name, "Manning"): value_map[p_name] = row.get(p_name) * 10 else: value_map[p_name] = row.get(p_name) count += 1 property_map[count] = value_map n = len(property_map) UtilClass.rmmkdir(lookup_dir) for propertyName in property_namelist: f = open("%s/%s.txt" % ( lookup_dir, propertyName, ), 'w') f.write("%d\n" % n) for prop_id in property_map: s = "%d %f\n" % (prop_id, property_map[prop_id][propertyName]) f.write(s) f.close()
def sites_table(hydro_clim_db, site_file, site_type): """Import HydroClimate sites table""" sites_loc = dict() site_data_items = read_data_items_from_txt(site_file) site_flds = site_data_items[0] for i in range(1, len(site_data_items)): dic = dict() for j in range(len(site_data_items[i])): if StringClass.string_match(site_flds[j], StationFields.id): dic[StationFields.id] = int(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.name): # unicode(site_data_items[i][j], 'gb2312') dic[StationFields.name] = site_data_items[i][j] elif StringClass.string_match(site_flds[j], StationFields.x): dic[StationFields.x] = float(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.y): dic[StationFields.y] = float(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.lon): dic[StationFields.lon] = float(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.lat): dic[StationFields.lat] = float(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.elev): dic[StationFields.elev] = float(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.outlet): dic[StationFields.outlet] = float(site_data_items[i][j]) dic[StationFields.type] = site_type curfilter = {StationFields.id: dic[StationFields.id], StationFields.type: dic[StationFields.type]} hydro_clim_db[DBTableNames.sites].find_one_and_replace(curfilter, dic, upsert=True) if dic[StationFields.id] not in sites_loc.keys(): sites_loc[dic[StationFields.id]] = SiteInfo(dic[StationFields.id], dic[StationFields.name], dic[StationFields.lat], dic[StationFields.lon], dic[StationFields.x], dic[StationFields.y], dic[StationFields.elev]) hydro_clim_db[DBTableNames.sites].create_index([(StationFields.id, ASCENDING), (StationFields.type, ASCENDING)]) return sites_loc
def __init__(self, cf): # 1. Directories self.base_dir = None self.clim_dir = None self.spatial_dir = None self.observe_dir = None self.scenario_dir = None self.model_dir = None self.txt_db_dir = None self.preproc_script_dir = None self.seims_bin = None self.mpi_bin = None self.workspace = None # 1.1. Directory determined flags self.use_observed = True self.use_scernario = True # 2. MongoDB configuration and database, collation, GridFS names self.hostname = '127.0.0.1' # localhost by default self.port = 27017 self.climate_db = '' self.bmp_scenario_db = '' self.spatial_db = '' # 3. Switch for building SEIMS self.cluster = False self.storm_mode = False self.gen_cn = True self.gen_runoff_coef = True self.gen_crop = True self.gen_iuh = True # 4. Climate inputs self.hydro_climate_vars = None self.prec_sites = None self.prec_data = None self.Meteo_sites = None self.Meteo_data = None self.thiessen_field = 'ID' # 5. Spatial inputs self.prec_sites_thiessen = None self.meteo_sites_thiessen = None self.dem = None self.outlet_file = None self.landuse = None self.landcover_init_param = None self.soil = None self.soil_property = None self.mgt_field = None # 6. Option parameters self.is_TauDEM = True self.d8acc_threshold = 0 self.np = 4 self.d8down_method = 's' self.dorm_hr = -1. self.temp_base = 0. self.imper_perc_in_urban = 0.3 self.default_reach_depth = 5. self.default_landuse = -1 self.default_soil = -1 # 1. Directories if 'PATH' in cf.sections(): self.base_dir = cf.get('PATH', 'base_data_dir') self.clim_dir = cf.get('PATH', 'climate_data_dir') self.spatial_dir = cf.get('PATH', 'spatial_data_dir') self.observe_dir = cf.get('PATH', 'measurement_data_dir') self.scenario_dir = cf.get('PATH', 'bmp_data_dir') self.model_dir = cf.get('PATH', 'model_dir') self.txt_db_dir = cf.get('PATH', 'txt_db_dir') self.preproc_script_dir = cf.get('PATH', 'preproc_script_dir') self.seims_bin = cf.get('PATH', 'cpp_program_dir') self.mpi_bin = cf.get('PATH', 'mpiexec_dir') self.workspace = cf.get('PATH', 'working_dir') else: raise ValueError("[PATH] section MUST be existed in *.ini file.") if not (FileClass.is_file_exists(self.base_dir) and FileClass.is_file_exists(self.model_dir) and FileClass.is_file_exists(self.txt_db_dir) and FileClass.is_file_exists(self.preproc_script_dir) and FileClass.is_file_exists(self.seims_bin)): raise IOError( "Please Check Directories defined in [PATH]. " "BASE_DIR, MODEL_DIR, TXT_DB_DIR, PREPROC_SCRIPT_DIR, " "and CPP_PROGRAM_DIR are required!") if not FileClass.is_file_exists(self.mpi_bin): self.mpi_bin = None if not os.path.isdir(self.workspace): try: # first try to make dirs os.mkdir(self.workspace) except OSError as exc: self.workspace = self.model_dir + os.sep + 'preprocess_output' print( "WARNING: Make WORKING_DIR failed: %s. Use the default: %s" % (exc.message, self.workspace)) if not os.path.exists(self.workspace): os.mkdir(self.workspace) self.dirs = DirNameUtils(self.workspace) self.logs = LogNameUtils(self.dirs.log) self.vecs = VectorNameUtils(self.dirs.geoshp) self.taudems = TauDEMFilesUtils(self.dirs.taudem) self.spatials = SpatialNamesUtils(self.dirs.geodata2db) self.modelcfgs = ModelCfgUtils(self.model_dir) self.paramcfgs = ModelParamDataUtils(self.preproc_script_dir + os.sep + 'database') if not FileClass.is_file_exists(self.clim_dir): print( "The CLIMATE_DATA_DIR is not existed, try the default folder name 'climate'." ) self.clim_dir = self.base_dir + os.sep + 'climate' if not FileClass.is_file_exists(self.clim_dir): raise IOError( "Directories named 'climate' MUST BE located in [base_dir]!" ) if not FileClass.is_file_exists(self.spatial_dir): print( "The SPATIAL_DATA_DIR is not existed, try the default folder name 'spatial'." ) self.spatial_dir = self.base_dir + os.sep + 'spatial' raise IOError( "Directories named 'spatial' MUST BE located in [base_dir]!") if not FileClass.is_file_exists(self.observe_dir): self.observe_dir = None self.use_observed = False if not FileClass.is_file_exists(self.scenario_dir): self.scenario_dir = None self.use_scernario = False # 2. MongoDB related if 'MONGODB' in cf.sections(): self.hostname = cf.get('MONGODB', 'hostname') self.port = cf.getint('MONGODB', 'port') self.climate_db = cf.get('MONGODB', 'climatedbname') self.bmp_scenario_db = cf.get('MONGODB', 'BMPScenarioDBName') self.spatial_db = cf.get('MONGODB', 'SpatialDBName') else: raise ValueError( "[MONGODB] section MUST be existed in *.ini file.") if not StringClass.is_valid_ip_addr(self.hostname): raise ValueError("HOSTNAME illegal defined in [MONGODB]!") # 3. Model related switch # by default, OpenMP version and daily (longterm) mode will be built if 'SWITCH' in cf.sections(): self.cluster = cf.getboolean('SWITCH', 'forCluster') self.storm_mode = cf.getboolean('SWITCH', 'stormMode') self.gen_cn = cf.getboolean('SWITCH', 'genCN') self.gen_runoff_coef = cf.getboolean('SWITCH', 'genRunoffCoef') self.gen_crop = cf.getboolean('SWITCH', 'genCrop') if self.storm_mode: self.gen_iuh = False self.climate_db = ModelNameUtils.standardize_climate_dbname( self.climate_db) self.spatial_db = ModelNameUtils.standardize_spatial_dbname( self.cluster, self.storm_mode, self.spatial_db) # 4. Climate Input if 'CLIMATE' in cf.sections(): self.hydro_climate_vars = self.clim_dir + os.sep + cf.get( 'CLIMATE', 'hydroclimatevarfile') self.prec_sites = self.clim_dir + os.sep + cf.get( 'CLIMATE', 'precsitefile') self.prec_data = self.clim_dir + os.sep + cf.get( 'CLIMATE', 'precdatafile') self.Meteo_sites = self.clim_dir + os.sep + cf.get( 'CLIMATE', 'meteositefile') self.Meteo_data = self.clim_dir + os.sep + cf.get( 'CLIMATE', 'meteodatafile') self.thiessen_field = cf.get('CLIMATE', 'thiessenidfield') else: raise ValueError( "Climate input file names MUST be provided in [CLIMATE]!") # 5. Spatial Input if 'SPATIAL' in cf.sections(): self.prec_sites_thiessen = self.spatial_dir + os.sep + cf.get( 'SPATIAL', 'precsitesthiessen') self.meteo_sites_thiessen = self.spatial_dir + os.sep + cf.get( 'SPATIAL', 'meteositesthiessen') self.dem = self.spatial_dir + os.sep + cf.get('SPATIAL', 'dem') self.outlet_file = self.spatial_dir + os.sep + cf.get( 'SPATIAL', 'outlet_file') if not os.path.exists(self.outlet_file): self.outlet_file = None self.landuse = self.spatial_dir + os.sep + cf.get( 'SPATIAL', 'landusefile') self.landcover_init_param = self.txt_db_dir + os.sep \ + cf.get('SPATIAL', 'landcoverinitfile') self.soil = self.spatial_dir + os.sep + cf.get( 'SPATIAL', 'soilseqnfile') self.soil_property = self.txt_db_dir + os.sep + cf.get( 'SPATIAL', 'soilseqntext') self.mgt_field = self.spatial_dir + os.sep + cf.get( 'SPATIAL', 'mgtfieldfile') if not os.path.exists(self.mgt_field) or \ StringClass.string_match(self.mgt_field, 'none'): self.mgt_field = None else: raise ValueError( "Spatial input file names MUST be provided in [SPATIAL]!") # 6. Option parameters if 'OPTIONAL_PARAMETERS' in cf.sections(): self.is_TauDEM = cf.getboolean('OPTIONAL_PARAMETERS', 'istaudemd8') self.d8acc_threshold = cf.getfloat('OPTIONAL_PARAMETERS', 'd8accthreshold') self.np = cf.getint('OPTIONAL_PARAMETERS', 'np') self.d8down_method = cf.get('OPTIONAL_PARAMETERS', 'd8downmethod') if StringClass.string_match(self.d8down_method, 'surface'): self.d8down_method = 's' elif StringClass.string_match(self.d8down_method, 'horizontal'): self.d8down_method = 'h' elif StringClass.string_match(self.d8down_method, 'pythagoras'): self.d8down_method = 'p' elif StringClass.string_match(self.d8down_method, 'vertical'): self.d8down_method = 'v' else: self.d8down_method = self.d8down_method.lower() if self.d8down_method not in ['s', 'h', 'p', 'v']: self.d8down_method = 'h' self.dorm_hr = cf.getfloat('OPTIONAL_PARAMETERS', 'dorm_hr') self.temp_base = cf.getfloat('OPTIONAL_PARAMETERS', 't_base') self.imper_perc_in_urban = cf.getfloat( 'OPTIONAL_PARAMETERS', 'imperviouspercinurbancell') self.default_reach_depth = cf.getfloat('OPTIONAL_PARAMETERS', 'default_reach_depth') self.default_landuse = cf.getint('OPTIONAL_PARAMETERS', 'defaultlanduse') self.default_soil = cf.getint('OPTIONAL_PARAMETERS', 'defaultsoil')
def model_io_configuration(cfg, maindb): """ Import Input and Output Configuration of SEIMS, i.e., file.in and file.out Args: cfg: SEIMS config object maindb: MongoDB database object """ file_in_path = cfg.modelcfgs.filein file_out_path = cfg.paramcfgs.init_outputs_file # create if collection not existed c_list = maindb.collection_names() conf_tabs = [DBTableNames.main_filein, DBTableNames.main_fileout] for item in conf_tabs: if not StringClass.string_in_list(item, c_list): maindb.create_collection(item) else: maindb.drop_collection(item) file_in_items = read_data_items_from_txt(file_in_path) file_out_items = read_data_items_from_txt(file_out_path) for item in file_in_items: file_in_dict = dict() values = StringClass.split_string(StringClass.strip_string(item[0]), ['|']) if len(values) != 2: raise ValueError("One item should only have one Tag and one value string," " split by '|'") file_in_dict[ModelCfgFields.tag] = values[0] file_in_dict[ModelCfgFields.value] = values[1] maindb[DBTableNames.main_filein].insert(file_in_dict) # begin to import initial outputs settings bulk = maindb[DBTableNames.main_fileout].initialize_unordered_bulk_op() out_field_array = file_out_items[0] out_data_array = file_out_items[1:] # print out_data_array for item in out_data_array: file_out_dict = dict() for i, v in enumerate(out_field_array): if StringClass.string_match(ModelCfgFields.mod_cls, v): file_out_dict[ModelCfgFields.mod_cls] = item[i] elif StringClass.string_match(ModelCfgFields.output_id, v): file_out_dict[ModelCfgFields.output_id] = item[i] elif StringClass.string_match(ModelCfgFields.desc, v): file_out_dict[ModelCfgFields.desc] = item[i] elif StringClass.string_match(ModelCfgFields.unit, v): file_out_dict[ModelCfgFields.unit] = item[i] elif StringClass.string_match(ModelCfgFields.type, v): file_out_dict[ModelCfgFields.type] = item[i] elif StringClass.string_match(ModelCfgFields.stime, v): file_out_dict[ModelCfgFields.stime] = item[i] elif StringClass.string_match(ModelCfgFields.etime, v): file_out_dict[ModelCfgFields.etime] = item[i] elif StringClass.string_match(ModelCfgFields.interval, v): file_out_dict[ModelCfgFields.interval] = item[i] elif StringClass.string_match(ModelCfgFields.interval_unit, v): file_out_dict[ModelCfgFields.interval_unit] = item[i] elif StringClass.string_match(ModelCfgFields.filename, v): file_out_dict[ModelCfgFields.filename] = item[i] elif StringClass.string_match(ModelCfgFields.use, v): file_out_dict[ModelCfgFields.use] = item[i] elif StringClass.string_match(ModelCfgFields.subbsn, v): file_out_dict[ModelCfgFields.subbsn] = item[i] if file_out_dict.keys() is []: raise ValueError("There are not any valid output item stored in file.out!") bulk.insert(file_out_dict) bulk.execute() # begin to import the desired outputs # create bulk operator bulk = maindb[DBTableNames.main_fileout].initialize_ordered_bulk_op() # read initial parameters from txt file data_items = read_data_items_from_txt(cfg.modelcfgs.fileout) # print (field_names) for i, cur_data_item in enumerate(data_items): data_import = dict() cur_filter = dict() # print (cur_data_item) if len(cur_data_item) == 7: data_import[ModelCfgFields.output_id] = cur_data_item[0] data_import[ModelCfgFields.type] = cur_data_item[1] data_import[ModelCfgFields.stime] = cur_data_item[2] data_import[ModelCfgFields.etime] = cur_data_item[3] data_import[ModelCfgFields.interval] = cur_data_item[4] data_import[ModelCfgFields.interval_unit] = cur_data_item[5] data_import[ModelCfgFields.subbsn] = cur_data_item[6] data_import[ModelCfgFields.use] = 1 cur_filter[ModelCfgFields.output_id] = cur_data_item[0] else: raise RuntimeError("Items in file.out must have 7 columns, i.e., OUTPUTID," "TYPE,STARTTIME,ENDTIME,INTERVAL,INTERVAL_UNIT,SUBBASIN.") bulk.find(cur_filter).update({'$set': data_import}) # execute import operators bulk.execute()
def data_from_txt(hydro_clim_db, obs_txts_list, sites_info_txts_list, subbsn_file): """ Read observed data from txt file Args: hydro_clim_db: hydro-climate dababase obs_txts_list: txt file paths of observed data sites_info_txts_list: txt file paths of site information subbsn_file: subbasin raster file Returns: True or False """ # 1. Read monitor station information, and store variables information and station IDs variable_lists = [] site_ids = [] for site_file in sites_info_txts_list: site_data_items = read_data_items_from_txt(site_file) site_flds = site_data_items[0] for i in range(1, len(site_data_items)): dic = {} for j in range(len(site_data_items[i])): if StringClass.string_match(site_flds[j], StationFields.id): dic[StationFields.id] = int(site_data_items[i][j]) site_ids.append(dic[StationFields.id]) elif StringClass.string_match(site_flds[j], StationFields.name): dic[StationFields.name] = StringClass.strip_string( site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.type): types = StringClass.split_string( StringClass.strip_string(site_data_items[i][j]), ',') elif StringClass.string_match(site_flds[j], StationFields.lat): dic[StationFields.lat] = float(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.lon): dic[StationFields.lon] = float(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.x): dic[StationFields.x] = float(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.y): dic[StationFields.y] = float(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.unit): dic[StationFields.unit] = StringClass.strip_string( site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.elev): dic[StationFields.elev] = float(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.outlet): dic[StationFields.outlet] = float( site_data_items[i][j]) for j, cur_type in enumerate(types): site_dic = dict() site_dic[StationFields.id] = dic[StationFields.id] site_dic[StationFields.name] = dic[StationFields.name] site_dic[StationFields.type] = cur_type site_dic[StationFields.lat] = dic[StationFields.lat] site_dic[StationFields.lon] = dic[StationFields.lon] site_dic[StationFields.x] = dic[StationFields.x] site_dic[StationFields.y] = dic[StationFields.y] site_dic[StationFields.elev] = dic[StationFields.elev] site_dic[StationFields.outlet] = dic[StationFields.outlet] # Add SubbasinID field matched, cur_subbsn_id = ImportObservedData.match_subbasin( subbsn_file, site_dic) if not matched: break cur_subbsn_id_str = '' for tmp_id in cur_subbsn_id: if tmp_id is not None: cur_subbsn_id_str += str(tmp_id) + ',' cur_subbsn_id_str = cur_subbsn_id_str[:-1] site_dic[StationFields.id] = cur_subbsn_id_str curfilter = { StationFields.id: site_dic[StationFields.id], StationFields.type: site_dic[StationFields.type] } # print (curfilter) hydro_clim_db[DBTableNames.sites].find_one_and_replace( curfilter, site_dic, upsert=True) var_dic = dict() var_dic[StationFields.type] = types[j] var_dic[StationFields.unit] = dic[StationFields.unit] if var_dic not in variable_lists: variable_lists.append(var_dic) site_ids = list(set(site_ids)) # 2. Read measurement data and import to MongoDB bulk = hydro_clim_db[ DBTableNames.observes].initialize_ordered_bulk_op() count = 0 for measDataFile in obs_txts_list: # print measDataFile obs_data_items = read_data_items_from_txt(measDataFile) # If the data items is EMPTY or only have one header row, then goto # next data file. if obs_data_items == [] or len(obs_data_items) == 1: continue obs_flds = obs_data_items[0] required_flds = [ StationFields.id, DataValueFields.y, DataValueFields.m, DataValueFields.d, DataValueFields.type, DataValueFields.value ] for fld in required_flds: if not StringClass.string_in_list( fld, obs_flds): # data can not meet the request! raise ValueError( "The %s can not meet the required format!" % measDataFile) for i in range(1, len(obs_data_items)): dic = dict() cur_y = 0 cur_m = 0 cur_d = 0 for j in range(len(obs_data_items[i])): if StringClass.string_match(obs_flds[j], StationFields.id): dic[StationFields.id] = int(obs_data_items[i][j]) # if current site ID is not included, goto next data item if dic[StationFields.id] not in site_ids: continue elif StringClass.string_match(obs_flds[j], DataValueFields.y): cur_y = int(obs_data_items[i][j]) elif StringClass.string_match(obs_flds[j], DataValueFields.m): cur_m = int(obs_data_items[i][j]) elif StringClass.string_match(obs_flds[j], DataValueFields.d): cur_d = int(obs_data_items[i][j]) elif StringClass.string_match(obs_flds[j], DataValueFields.type): dic[DataValueFields.type] = obs_data_items[i][j] elif StringClass.string_match(obs_flds[j], DataValueFields.value): dic[DataValueFields.value] = float( obs_data_items[i][j]) dt = datetime(cur_y, cur_m, cur_d, 0, 0) sec = time.mktime(dt.timetuple()) utc_time = time.gmtime(sec) dic[DataValueFields.local_time] = dt dic[DataValueFields.time_zone] = time.timezone / 3600 dic[DataValueFields.utc] = datetime(utc_time[0], utc_time[1], utc_time[2], utc_time[3]) curfilter = { StationFields.id: dic[StationFields.id], DataValueFields.type: dic[DataValueFields.type], DataValueFields.utc: dic[DataValueFields.utc] } bulk.find(curfilter).replace_one(dic) count += 1 if count % 500 == 0: bulk.execute() bulk = hydro_clim_db[ DBTableNames.observes].initialize_ordered_bulk_op() # db[DBTableNames.observes].find_one_and_replace(curfilter, dic, upsert=True) if count % 500 != 0: bulk.execute() # 3. Add measurement data with unit converted # loop variables list added_dics = [] for curVar in variable_lists: # print curVar # if the unit is mg/L, then change the Type name with the suffix "Conc", # and convert the corresponding data to kg if the discharge data is # available. cur_type = curVar[StationFields.type] cur_unit = curVar[StationFields.unit] # Find data by Type for item in hydro_clim_db[DBTableNames.observes].find( {StationFields.type: cur_type}): # print item dic = dict() dic[StationFields.id] = item[StationFields.id] dic[DataValueFields.value] = item[DataValueFields.value] dic[StationFields.type] = item[StationFields.type] dic[DataValueFields.local_time] = item[ DataValueFields.local_time] dic[DataValueFields.time_zone] = item[ DataValueFields.time_zone] dic[DataValueFields.utc] = item[DataValueFields.utc] if cur_unit == "mg/L": # update the Type name dic[StationFields.type] = cur_type + "Conc" curfilter = { StationFields.id: dic[StationFields.id], DataValueFields.type: cur_type, DataValueFields.utc: dic[DataValueFields.utc] } hydro_clim_db[DBTableNames.observes].find_one_and_replace( curfilter, dic, upsert=True) dic[StationFields.type] = cur_type # find discharge on current day cur_filter = { StationFields.type: "Q", DataValueFields.utc: dic[DataValueFields.utc], StationFields.id: dic[StationFields.id] } q_dic = hydro_clim_db[DBTableNames.observes].find_one( filter=cur_filter) q = -9999. if q_dic is not None: # and q_dic.has_key(DataValueFields.value): q = q_dic[DataValueFields.value] else: continue if cur_unit == "mg/L": # convert mg/L to kg dic[DataValueFields.value] = round( dic[DataValueFields.value] * q * 86400. / 1000., 2) elif cur_unit == "kg": dic[StationFields.type] = cur_type + "Conc" # convert kg to mg/L dic[DataValueFields.value] = round( dic[DataValueFields.value] / q * 1000. / 86400., 2) # add new data item added_dics.append(dic) # import to MongoDB for dic in added_dics: curfilter = { StationFields.id: dic[StationFields.id], DataValueFields.type: dic[DataValueFields.type], DataValueFields.utc: dic[DataValueFields.utc] } hydro_clim_db[DBTableNames.observes].find_one_and_replace( curfilter, dic, upsert=True)
def daily_data_from_txt(climdb, data_txt_file, sites_info_dict): """Import climate data table""" # delete existed precipitation data climdb[DBTableNames.data_values].remove( {DataValueFields.type: DataType.m}) clim_data_items = read_data_items_from_txt(data_txt_file) clim_flds = clim_data_items[0] # PHUCalDic is used for Calculating potential heat units (PHU) # for each climate station and each year. # format is {StationID:{Year1:[values],Year2:[Values]...}, ...} # PHUCalDic = {} # format: {StationID1: climateStats1, ...} hydro_climate_stats = {} required_flds = [ DataValueFields.y, DataValueFields.m, DataValueFields.d, DataType.max_tmp, DataType.min_tmp, DataType.rm, DataType.ws ] for fld in required_flds: if not StringClass.string_in_list(fld, clim_flds): raise ValueError( "Meteorological Daily data is invalid, please Check!") # Create bulk object bulk = climdb[DBTableNames.data_values].initialize_ordered_bulk_op() count = 0 for i, cur_clim_data_item in enumerate(clim_data_items): if i == 0: continue dic = dict() cur_ssd = DEFAULT_NODATA cur_y = 0 cur_m = 0 cur_d = 0 for j, clim_data_v in enumerate(cur_clim_data_item): if StringClass.string_match(clim_flds[j], DataValueFields.id): dic[DataValueFields.id] = int(clim_data_v) elif StringClass.string_match(clim_flds[j], DataValueFields.y): cur_y = int(clim_data_v) dic[DataValueFields.y] = cur_y elif StringClass.string_match(clim_flds[j], DataValueFields.m): cur_m = int(clim_data_v) elif StringClass.string_match(clim_flds[j], DataValueFields.d): cur_d = int(clim_data_v) elif StringClass.string_match(clim_flds[j], DataType.mean_tmp): dic[DataType.mean_tmp] = float(clim_data_v) elif StringClass.string_match(clim_flds[j], DataType.min_tmp): dic[DataType.min_tmp] = float(clim_data_v) elif StringClass.string_match(clim_flds[j], DataType.max_tmp): dic[DataType.max_tmp] = float(clim_data_v) elif StringClass.string_match(clim_flds[j], DataType.pet): dic[DataType.pet] = float(clim_data_v) elif StringClass.string_match(clim_flds[j], DataType.sr): dic[DataType.sr] = float(clim_data_v) elif StringClass.string_match(clim_flds[j], DataType.ws): dic[DataType.ws] = float(clim_data_v) elif StringClass.string_match(clim_flds[j], DataType.rm): dic[DataType.rm] = float(clim_data_v) * 0.01 elif StringClass.string_match(clim_flds[j], DataType.ssd): cur_ssd = float(clim_data_v) # Date transformation dt = datetime(cur_y, cur_m, cur_d, 0, 0) sec = time.mktime(dt.timetuple()) utc_time = time.gmtime(sec) dic[DataValueFields.local_time] = dt dic[DataValueFields.time_zone] = time.timezone / 3600 dic[DataValueFields.utc] = datetime(utc_time[0], utc_time[1], utc_time[2], utc_time[3]) # Do if some of these data are not provided if DataType.mean_tmp not in dic.keys(): dic[DataType.mean_tmp] = (dic[DataType.max_tmp] + dic[DataType.min_tmp]) / 2. if DataType.sr not in dic.keys(): if cur_ssd == DEFAULT_NODATA: raise ValueError(DataType.sr + " or " + DataType.ssd + " must be provided!") else: if dic[DataValueFields.id] in sites_info_dict.keys(): cur_lon, cur_lat = sites_info_dict[dic[ DataValueFields.id]].lon_lat() dic[DataType.sr] = round( HydroClimateUtilClass.rs(DateClass.day_of_year(dt), float(cur_ssd), cur_lat * PI / 180.), 1) output_flds = [ DataType.mean_tmp, DataType.max_tmp, DataType.min_tmp, DataType.rm, DataType.pet, DataType.ws, DataType.sr ] for fld in output_flds: cur_dic = dict() if fld in dic.keys(): cur_dic[DataValueFields.value] = dic[fld] cur_dic[DataValueFields.id] = dic[DataValueFields.id] cur_dic[DataValueFields.utc] = dic[DataValueFields.utc] cur_dic[DataValueFields.time_zone] = dic[ DataValueFields.time_zone] cur_dic[DataValueFields.local_time] = dic[ DataValueFields.local_time] cur_dic[DataValueFields.type] = fld # Old code, insert or update one item a time, which is quite inefficiency # Update by using bulk operation interface. lj bulk.insert(cur_dic) count += 1 if count % 500 == 0: # execute each 500 records bulk.execute() bulk = climdb[ DBTableNames. data_values].initialize_ordered_bulk_op() if dic[DataValueFields.id] not in hydro_climate_stats.keys(): hydro_climate_stats[dic[DataValueFields.id]] = ClimateStats() hydro_climate_stats[dic[DataValueFields.id]].add_item(dic) # execute the remained records if count % 500 != 0: bulk.execute() for item, cur_climate_stats in hydro_climate_stats.items(): cur_climate_stats.annual_stats() # Create index climdb[DBTableNames.data_values].create_index([ (DataValueFields.id, ASCENDING), (DataValueFields.type, ASCENDING), (DataValueFields.utc, ASCENDING) ]) # prepare dic for MongoDB for s_id, stats_v in hydro_climate_stats.items(): for YYYY in stats_v.Count.keys(): cur_dic = dict() cur_dic[DataValueFields.value] = stats_v.PHUTOT[YYYY] cur_dic[DataValueFields.id] = s_id cur_dic[DataValueFields.y] = YYYY cur_dic[VariableDesc.unit] = "heat units" cur_dic[VariableDesc.type] = DataType.phu_tot curfilter = { DataValueFields.id: s_id, VariableDesc.type: DataType.phu_tot, DataValueFields.y: YYYY } climdb[DBTableNames.annual_stats].find_one_and_replace( curfilter, cur_dic, upsert=True) # import annual mean temperature cur_dic[VariableDesc.type] = DataType.mean_tmp cur_dic[VariableDesc.unit] = "deg C" cur_dic[DataValueFields.value] = stats_v.MeanTmp[YYYY] curfilter = { DataValueFields.id: s_id, VariableDesc.type: DataType.mean_tmp, DataValueFields.y: YYYY } climdb[DBTableNames.annual_stats].find_one_and_replace( curfilter, cur_dic, upsert=True) cur_dic[DataValueFields.value] = stats_v.PHU0 cur_dic[DataValueFields.id] = s_id cur_dic[DataValueFields.y] = DEFAULT_NODATA cur_dic[VariableDesc.unit] = "heat units" cur_dic[VariableDesc.type] = DataType.phu0 curfilter = { DataValueFields.id: s_id, VariableDesc.type: DataType.phu0, DataValueFields.y: DEFAULT_NODATA } climdb[DBTableNames.annual_stats].find_one_and_replace(curfilter, cur_dic, upsert=True) # import annual mean temperature cur_dic[VariableDesc.type] = DataType.mean_tmp0 cur_dic[VariableDesc.unit] = "deg C" cur_dic[DataValueFields.value] = stats_v.MeanTmp0 curfilter = { DataValueFields.id: s_id, VariableDesc.type: DataType.mean_tmp0, DataValueFields.y: DEFAULT_NODATA } climdb[DBTableNames.annual_stats].find_one_and_replace(curfilter, cur_dic, upsert=True)
def regular_data_from_txt(climdb, data_file): """Regular precipitation data from text file.""" # delete existed precipitation data climdb[DBTableNames.data_values].remove( {DataValueFields.type: DataType.p}) clim_data_items = read_data_items_from_txt(data_file) clim_flds = clim_data_items[0] station_id = [] bulk = climdb[DBTableNames.data_values].initialize_ordered_bulk_op() count = 0 for i in range(3, len(clim_flds)): station_id.append(clim_flds[i]) for i, clim_data_item in enumerate(clim_data_items): if i == 0: continue dic = dict() precipitation = [] cur_y = 0 cur_m = 0 cur_d = 0 for j, clim_data_v in enumerate(clim_data_item): if StringClass.string_match(clim_flds[j], DataValueFields.y): cur_y = int(clim_data_v) elif StringClass.string_match(clim_flds[j], DataValueFields.m): cur_m = int(clim_data_v) elif StringClass.string_match(clim_flds[j], DataValueFields.d): cur_d = int(clim_data_v) else: for k, cur_id in enumerate(station_id): if StringClass.string_match(clim_flds[j], cur_id): precipitation.append(float(clim_data_v)) dt = datetime(cur_y, cur_m, cur_d, 0, 0) sec = time.mktime(dt.timetuple()) utc_time = time.gmtime(sec) dic[DataValueFields.local_time] = dt dic[DataValueFields.time_zone] = time.timezone / 3600. dic[DataValueFields.utc] = datetime(utc_time[0], utc_time[1], utc_time[2], utc_time[3]) for j, cur_id in enumerate(station_id): cur_dic = dict() cur_dic[DataValueFields.value] = precipitation[j] cur_dic[DataValueFields.id] = int(cur_id) cur_dic[DataValueFields.type] = DataType.p cur_dic[DataValueFields.time_zone] = dic[ DataValueFields.time_zone] cur_dic[DataValueFields.local_time] = dic[ DataValueFields.local_time] cur_dic[DataValueFields.utc] = dic[DataValueFields.utc] bulk.insert(cur_dic) count += 1 if count % 500 == 0: # execute each 500 records bulk.execute() bulk = climdb[ DBTableNames.data_values].initialize_ordered_bulk_op() if count % 500 != 0: bulk.execute() # Create index climdb[DBTableNames.data_values].create_index([ (DataValueFields.id, ASCENDING), (DataValueFields.type, ASCENDING), (DataValueFields.utc, ASCENDING) ])