def model_io_configuration(cfg, maindb): """ Import Input and Output Configuration of SEIMS, i.e., file.in and file.out Args: cfg: SEIMS config object maindb: MongoDB database object """ file_in_path = cfg.modelcfgs.filein file_out_path = cfg.paramcfgs.init_outputs_file # create if collection not existed c_list = maindb.collection_names() conf_tabs = [DBTableNames.main_filein, DBTableNames.main_fileout] for item in conf_tabs: if not StringClass.string_in_list(item, c_list): maindb.create_collection(item) else: maindb.drop_collection(item) file_in_items = read_data_items_from_txt(file_in_path) file_out_items = read_data_items_from_txt(file_out_path) for item in file_in_items: file_in_dict = dict() values = StringClass.split_string(StringClass.strip_string(item[0]), ['|']) if len(values) != 2: raise ValueError("One item should only have one Tag and one value string," " split by '|'") file_in_dict[ModelCfgFields.tag] = values[0] file_in_dict[ModelCfgFields.value] = values[1] maindb[DBTableNames.main_filein].insert(file_in_dict) # begin to import initial outputs settings bulk = maindb[DBTableNames.main_fileout].initialize_unordered_bulk_op() out_field_array = file_out_items[0] out_data_array = file_out_items[1:] # print out_data_array for item in out_data_array: file_out_dict = dict() for i, v in enumerate(out_field_array): if StringClass.string_match(ModelCfgFields.mod_cls, v): file_out_dict[ModelCfgFields.mod_cls] = item[i] elif StringClass.string_match(ModelCfgFields.output_id, v): file_out_dict[ModelCfgFields.output_id] = item[i] elif StringClass.string_match(ModelCfgFields.desc, v): file_out_dict[ModelCfgFields.desc] = item[i] elif StringClass.string_match(ModelCfgFields.unit, v): file_out_dict[ModelCfgFields.unit] = item[i] elif StringClass.string_match(ModelCfgFields.type, v): file_out_dict[ModelCfgFields.type] = item[i] elif StringClass.string_match(ModelCfgFields.stime, v): file_out_dict[ModelCfgFields.stime] = item[i] elif StringClass.string_match(ModelCfgFields.etime, v): file_out_dict[ModelCfgFields.etime] = item[i] elif StringClass.string_match(ModelCfgFields.interval, v): file_out_dict[ModelCfgFields.interval] = item[i] elif StringClass.string_match(ModelCfgFields.interval_unit, v): file_out_dict[ModelCfgFields.interval_unit] = item[i] elif StringClass.string_match(ModelCfgFields.filename, v): file_out_dict[ModelCfgFields.filename] = item[i] elif StringClass.string_match(ModelCfgFields.use, v): file_out_dict[ModelCfgFields.use] = item[i] elif StringClass.string_match(ModelCfgFields.subbsn, v): file_out_dict[ModelCfgFields.subbsn] = item[i] if file_out_dict.keys() is []: raise ValueError("There are not any valid output item stored in file.out!") bulk.insert(file_out_dict) bulk.execute() # begin to import the desired outputs # create bulk operator bulk = maindb[DBTableNames.main_fileout].initialize_ordered_bulk_op() # read initial parameters from txt file data_items = read_data_items_from_txt(cfg.modelcfgs.fileout) # print (field_names) for i, cur_data_item in enumerate(data_items): data_import = dict() cur_filter = dict() # print (cur_data_item) if len(cur_data_item) == 7: data_import[ModelCfgFields.output_id] = cur_data_item[0] data_import[ModelCfgFields.type] = cur_data_item[1] data_import[ModelCfgFields.stime] = cur_data_item[2] data_import[ModelCfgFields.etime] = cur_data_item[3] data_import[ModelCfgFields.interval] = cur_data_item[4] data_import[ModelCfgFields.interval_unit] = cur_data_item[5] data_import[ModelCfgFields.subbsn] = cur_data_item[6] data_import[ModelCfgFields.use] = 1 cur_filter[ModelCfgFields.output_id] = cur_data_item[0] else: raise RuntimeError("Items in file.out must have 7 columns, i.e., OUTPUTID," "TYPE,STARTTIME,ENDTIME,INTERVAL,INTERVAL_UNIT,SUBBASIN.") bulk.find(cur_filter).update({'$set': data_import}) # execute import operators bulk.execute()
def data_from_txt(hydro_clim_db, obs_txts_list, sites_info_txts_list, subbsn_file): """ Read observed data from txt file Args: hydro_clim_db: hydro-climate dababase obs_txts_list: txt file paths of observed data sites_info_txts_list: txt file paths of site information subbsn_file: subbasin raster file Returns: True or False """ # 1. Read monitor station information, and store variables information and station IDs variable_lists = [] site_ids = [] for site_file in sites_info_txts_list: site_data_items = read_data_items_from_txt(site_file) site_flds = site_data_items[0] for i in range(1, len(site_data_items)): dic = {} for j in range(len(site_data_items[i])): if StringClass.string_match(site_flds[j], StationFields.id): dic[StationFields.id] = int(site_data_items[i][j]) site_ids.append(dic[StationFields.id]) elif StringClass.string_match(site_flds[j], StationFields.name): dic[StationFields.name] = StringClass.strip_string( site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.type): types = StringClass.split_string( StringClass.strip_string(site_data_items[i][j]), ',') elif StringClass.string_match(site_flds[j], StationFields.lat): dic[StationFields.lat] = float(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.lon): dic[StationFields.lon] = float(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.x): dic[StationFields.x] = float(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.y): dic[StationFields.y] = float(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.unit): dic[StationFields.unit] = StringClass.strip_string( site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.elev): dic[StationFields.elev] = float(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.outlet): dic[StationFields.outlet] = float( site_data_items[i][j]) for j, cur_type in enumerate(types): site_dic = dict() site_dic[StationFields.id] = dic[StationFields.id] site_dic[StationFields.name] = dic[StationFields.name] site_dic[StationFields.type] = cur_type site_dic[StationFields.lat] = dic[StationFields.lat] site_dic[StationFields.lon] = dic[StationFields.lon] site_dic[StationFields.x] = dic[StationFields.x] site_dic[StationFields.y] = dic[StationFields.y] site_dic[StationFields.elev] = dic[StationFields.elev] site_dic[StationFields.outlet] = dic[StationFields.outlet] # Add SubbasinID field matched, cur_subbsn_id = ImportObservedData.match_subbasin( subbsn_file, site_dic) if not matched: break cur_subbsn_id_str = '' for tmp_id in cur_subbsn_id: if tmp_id is not None: cur_subbsn_id_str += str(tmp_id) + ',' cur_subbsn_id_str = cur_subbsn_id_str[:-1] site_dic[StationFields.id] = cur_subbsn_id_str curfilter = { StationFields.id: site_dic[StationFields.id], StationFields.type: site_dic[StationFields.type] } # print (curfilter) hydro_clim_db[DBTableNames.sites].find_one_and_replace( curfilter, site_dic, upsert=True) var_dic = dict() var_dic[StationFields.type] = types[j] var_dic[StationFields.unit] = dic[StationFields.unit] if var_dic not in variable_lists: variable_lists.append(var_dic) site_ids = list(set(site_ids)) # 2. Read measurement data and import to MongoDB bulk = hydro_clim_db[ DBTableNames.observes].initialize_ordered_bulk_op() count = 0 for measDataFile in obs_txts_list: # print measDataFile obs_data_items = read_data_items_from_txt(measDataFile) # If the data items is EMPTY or only have one header row, then goto # next data file. if obs_data_items == [] or len(obs_data_items) == 1: continue obs_flds = obs_data_items[0] required_flds = [ StationFields.id, DataValueFields.y, DataValueFields.m, DataValueFields.d, DataValueFields.type, DataValueFields.value ] for fld in required_flds: if not StringClass.string_in_list( fld, obs_flds): # data can not meet the request! raise ValueError( "The %s can not meet the required format!" % measDataFile) for i in range(1, len(obs_data_items)): dic = dict() cur_y = 0 cur_m = 0 cur_d = 0 for j in range(len(obs_data_items[i])): if StringClass.string_match(obs_flds[j], StationFields.id): dic[StationFields.id] = int(obs_data_items[i][j]) # if current site ID is not included, goto next data item if dic[StationFields.id] not in site_ids: continue elif StringClass.string_match(obs_flds[j], DataValueFields.y): cur_y = int(obs_data_items[i][j]) elif StringClass.string_match(obs_flds[j], DataValueFields.m): cur_m = int(obs_data_items[i][j]) elif StringClass.string_match(obs_flds[j], DataValueFields.d): cur_d = int(obs_data_items[i][j]) elif StringClass.string_match(obs_flds[j], DataValueFields.type): dic[DataValueFields.type] = obs_data_items[i][j] elif StringClass.string_match(obs_flds[j], DataValueFields.value): dic[DataValueFields.value] = float( obs_data_items[i][j]) dt = datetime(cur_y, cur_m, cur_d, 0, 0) sec = time.mktime(dt.timetuple()) utc_time = time.gmtime(sec) dic[DataValueFields.local_time] = dt dic[DataValueFields.time_zone] = time.timezone / 3600 dic[DataValueFields.utc] = datetime(utc_time[0], utc_time[1], utc_time[2], utc_time[3]) curfilter = { StationFields.id: dic[StationFields.id], DataValueFields.type: dic[DataValueFields.type], DataValueFields.utc: dic[DataValueFields.utc] } bulk.find(curfilter).replace_one(dic) count += 1 if count % 500 == 0: bulk.execute() bulk = hydro_clim_db[ DBTableNames.observes].initialize_ordered_bulk_op() # db[DBTableNames.observes].find_one_and_replace(curfilter, dic, upsert=True) if count % 500 != 0: bulk.execute() # 3. Add measurement data with unit converted # loop variables list added_dics = [] for curVar in variable_lists: # print curVar # if the unit is mg/L, then change the Type name with the suffix "Conc", # and convert the corresponding data to kg if the discharge data is # available. cur_type = curVar[StationFields.type] cur_unit = curVar[StationFields.unit] # Find data by Type for item in hydro_clim_db[DBTableNames.observes].find( {StationFields.type: cur_type}): # print item dic = dict() dic[StationFields.id] = item[StationFields.id] dic[DataValueFields.value] = item[DataValueFields.value] dic[StationFields.type] = item[StationFields.type] dic[DataValueFields.local_time] = item[ DataValueFields.local_time] dic[DataValueFields.time_zone] = item[ DataValueFields.time_zone] dic[DataValueFields.utc] = item[DataValueFields.utc] if cur_unit == "mg/L": # update the Type name dic[StationFields.type] = cur_type + "Conc" curfilter = { StationFields.id: dic[StationFields.id], DataValueFields.type: cur_type, DataValueFields.utc: dic[DataValueFields.utc] } hydro_clim_db[DBTableNames.observes].find_one_and_replace( curfilter, dic, upsert=True) dic[StationFields.type] = cur_type # find discharge on current day cur_filter = { StationFields.type: "Q", DataValueFields.utc: dic[DataValueFields.utc], StationFields.id: dic[StationFields.id] } q_dic = hydro_clim_db[DBTableNames.observes].find_one( filter=cur_filter) q = -9999. if q_dic is not None: # and q_dic.has_key(DataValueFields.value): q = q_dic[DataValueFields.value] else: continue if cur_unit == "mg/L": # convert mg/L to kg dic[DataValueFields.value] = round( dic[DataValueFields.value] * q * 86400. / 1000., 2) elif cur_unit == "kg": dic[StationFields.type] = cur_type + "Conc" # convert kg to mg/L dic[DataValueFields.value] = round( dic[DataValueFields.value] / q * 1000. / 86400., 2) # add new data item added_dics.append(dic) # import to MongoDB for dic in added_dics: curfilter = { StationFields.id: dic[StationFields.id], DataValueFields.type: dic[DataValueFields.type], DataValueFields.utc: dic[DataValueFields.utc] } hydro_clim_db[DBTableNames.observes].find_one_and_replace( curfilter, dic, upsert=True)