def get_time_system_from_data_file(in_file): """Get the time system from the data file. The basic format is: #<time_system> [<time_zone>], e.g., #LOCALTIME 8, #UTCTIME """ time_sys = 'LOCALTIME' time_zone = time.timezone // -3600 with open(in_file, 'r') as f: lines = f.readlines() for line in lines: str_line = line.strip() # for LF in LFs: # if LF in line: # str_line = line.split(LF)[0] # break if str_line[0] != '#': break if str_line.lower().find('utc') >= 0: time_sys = 'UTCTIME' time_zone = 0 break if str_line.lower().find('local') >= 0: line_list = StringClass.split_string(str_line, [',']) if len(line_list) == 2 and MathClass.isnumerical(line_list[1]): time_zone = -1 * int(line_list[1]) break return time_sys, time_zone
def get_time_system_from_data_file(in_file): # type: (str) -> (str, int) """Get the time system from the data file. The basic format is: #<time_system> [<time_zone>], e.g., #LOCALTIME 8, #LOCALTIME -2, #UTCTIME Returns: time_sys: 'UTCTIME' or 'LOCALTIME' time_zone(int): Positive for West time zone, and negative for East. """ time_sys = 'LOCALTIME' time_zone = time.timezone // 3600 with open(in_file, 'r', encoding='utf-8') as f: lines = f.readlines() for line in lines: str_line = line.strip() # for LF in LFs: # if LF in line: # str_line = line.split(LF)[0] # break if str_line[0] != '#': break if str_line.lower().find('utc') >= 0: time_sys = 'UTCTIME' time_zone = 0 break if str_line.lower().find('local') >= 0: line_list = StringClass.split_string(str_line, [' ', ',']) if len(line_list) == 2 and MathClass.isnumerical(line_list[1]): time_zone = -1 * int(line_list[1]) break return time_sys, time_zone
def output_runtime_to_log(title, lines, logfile): if logfile is None: return fname = FileClass.get_core_name_without_suffix(title) time_dict = { 'name': fname, 'readt': 0, 'writet': 0, 'computet': 0, 'totalt': 0 } for line in lines: # print(line) line = line.lower() time_value = line.split(os.linesep)[0].split(':')[-1] if not MathClass.isnumerical(time_value): continue time_value = float(time_value) if line.find('read') >= 0 and line.find('time') >= 0: time_dict['readt'] += time_value elif line.find('compute') >= 0 and line.find('time') >= 0: time_dict['computet'] += time_value elif line.find('write') >= 0 and line.find('time') >= 0: time_dict['writet'] += time_value elif line.find('total') >= 0 and line.find('time') >= 0: time_dict['totalt'] += time_value TauDEM.write_time_log(logfile, time_dict)
def initial_params_from_txt(cfg, maindb): """ import initial calibration parameters from txt data file. Args: cfg: SEIMS config object maindb: MongoDB database object """ # delete if existed, initialize if not existed c_list = maindb.collection_names() if not StringClass.string_in_list(DBTableNames.main_parameter, c_list): maindb.create_collection(DBTableNames.main_parameter) else: maindb.drop_collection(DBTableNames.main_parameter) # initialize bulk operator bulk = maindb[DBTableNames.main_parameter].initialize_ordered_bulk_op() # read initial parameters from txt file data_items = read_data_items_from_txt(cfg.paramcfgs.init_params_file) field_names = data_items[0][0:] # print(field_names) for i, cur_data_item in enumerate(data_items): if i == 0: continue # print(cur_data_item) # initial one default blank parameter dict. data_import = {ModelParamFields.name: '', ModelParamFields.desc: '', ModelParamFields.unit: '', ModelParamFields.module: '', ModelParamFields.value: DEFAULT_NODATA, ModelParamFields.impact: DEFAULT_NODATA, ModelParamFields.change: 'NC', ModelParamFields.max: DEFAULT_NODATA, ModelParamFields.min: DEFAULT_NODATA, ModelParamFields.type: ''} for k, v in list(data_import.items()): idx = field_names.index(k) if cur_data_item[idx] == '': if StringClass.string_match(k, ModelParamFields.change_ac): data_import[k] = 0 elif StringClass.string_match(k, ModelParamFields.change_rc): data_import[k] = 1 elif StringClass.string_match(k, ModelParamFields.change_nc): data_import[k] = 0 elif StringClass.string_match(k, ModelParamFields.change_vc): data_import[k] = DEFAULT_NODATA # Be careful to check NODATA when use! else: if MathClass.isnumerical(cur_data_item[idx]): data_import[k] = float(cur_data_item[idx]) else: data_import[k] = cur_data_item[idx] bulk.insert(data_import) # execute import operators MongoUtil.run_bulk(bulk, 'No operation during initial_params_from_txt.') # initialize index by parameter's type and name by ascending order. maindb[DBTableNames.main_parameter].create_index([(ModelParamFields.type, ASCENDING), (ModelParamFields.name, ASCENDING)])
def delete_model_outputs(model_workdir, hostname, port, dbname): """Delete model outputs and scenario in MongoDB.""" f_list = os.listdir(model_workdir) sids = list() for f in f_list: outfilename = model_workdir + os.path.sep + f if os.path.isdir(outfilename): if len(f) > 9: if MathClass.isnumerical(f[-9:]): shutil.rmtree(outfilename) sid = int(f[-9:]) sids.append(sid) if len(sids) > 0: delete_scenarios_by_ids(hostname, port, dbname, sids)
def output_runtime_to_log(title, lines, logfile): if logfile is None: return fname = FileClass.get_core_name_without_suffix(title) time_dict = {'name': fname, 'readt': 0, 'writet': 0, 'computet': 0, 'totalt': 0} for line in lines: # print(line) line = line.lower() time_value = line.split(os.linesep)[0].split(':')[-1] if not MathClass.isnumerical(time_value): continue time_value = float(time_value) if line.find('read') >= 0 and line.find('time') >= 0: time_dict['readt'] += time_value elif line.find('compute') >= 0 and line.find('time') >= 0: time_dict['computet'] += time_value elif line.find('write') >= 0 and line.find('time') >= 0: time_dict['writet'] += time_value elif line.find('total') >= 0 and line.find('time') >= 0: time_dict['totalt'] += time_value TauDEM.write_time_log(logfile, time_dict)
def scenario_from_texts(cfg, main_db, scenario_db): """Import BMPs Scenario data to MongoDB Args: cfg: SEIMS configuration object main_db: climate database scenario_db: scenario database Returns: False if failed, otherwise True. """ if not cfg.use_scernario: return False print('Import BMP Scenario Data... ') bmp_files = FileClass.get_filename_by_suffixes(cfg.scenario_dir, ['.txt']) bmp_tabs = list() bmp_tabs_path = list() for f in bmp_files: bmp_tabs.append(f.split('.')[0]) bmp_tabs_path.append(cfg.scenario_dir + os.path.sep + f) # initialize if collection not existed c_list = scenario_db.collection_names() for item in bmp_tabs: if not StringClass.string_in_list(item.upper(), c_list): scenario_db.create_collection(item.upper()) else: scenario_db.drop_collection(item.upper()) # Read subbasin.tif and dist2Stream.tif subbasin_r = RasterUtilClass.read_raster(cfg.spatials.subbsn) dist2stream_r = RasterUtilClass.read_raster( cfg.spatials.dist2stream_d8) # End reading for j, bmp_txt in enumerate(bmp_tabs_path): bmp_tab_name = bmp_tabs[j] data_array = read_data_items_from_txt(bmp_txt) field_array = data_array[0] data_array = data_array[1:] for item in data_array: dic = dict() for i, field_name in enumerate(field_array): if MathClass.isnumerical(item[i]): v = float(item[i]) if v % 1. == 0.: v = int(v) dic[field_name.upper()] = v else: dic[field_name.upper()] = str(item[i]).upper() if StringClass.string_in_list(ImportScenario2Mongo._LocalX, list(dic.keys())) and \ StringClass.string_in_list(ImportScenario2Mongo._LocalY, list(dic.keys())): subbsn_id = subbasin_r.get_value_by_xy( dic[ImportScenario2Mongo._LocalX.upper()], dic[ImportScenario2Mongo._LocalY.upper()]) distance = dist2stream_r.get_value_by_xy( dic[ImportScenario2Mongo._LocalX.upper()], dic[ImportScenario2Mongo._LocalY.upper()]) if subbsn_id is not None and distance is not None: dic[ImportScenario2Mongo._SUBBASINID] = int(subbsn_id) dic[ImportScenario2Mongo._DISTDOWN] = float(distance) scenario_db[bmp_tab_name.upper()].find_one_and_replace( dic, dic, upsert=True) else: scenario_db[bmp_tab_name.upper()].find_one_and_replace( dic, dic, upsert=True) # print('BMP tables are imported.') # Write BMP database name into Model workflow database c_list = main_db.collection_names() if not StringClass.string_in_list(DBTableNames.main_scenario, c_list): main_db.create_collection(DBTableNames.main_scenario) bmp_info_dic = dict() bmp_info_dic[ImportScenario2Mongo._FLD_DB] = cfg.bmp_scenario_db main_db[DBTableNames.main_scenario].find_one_and_replace(bmp_info_dic, bmp_info_dic, upsert=True) return True
def test_mathclass_isnumerical(): assert MathClass.isnumerical('78') == True assert MathClass.isnumerical('1.e-5') == True assert MathClass.isnumerical(None) == False assert MathClass.isnumerical('a1.2') == False
def run(function_name, in_files, wp=None, in_params=None, out_files=None, mpi_params=None, log_params=None): """ Run TauDEM function. - 1. The command will not execute if any input file does not exist. - 2. An error will be detected after running the TauDEM command if any output file does not exist; Args: function_name (str): Full path of TauDEM function. in_files (dict, required): Dict of pairs of parameter id (string) and file path (string or list) for input files, e.g.:: {'-z': '/full/path/to/dem.tif'} wp (str, optional): Workspace for outputs. If not specified, the directory of the first input file in ``in_files`` will be used. in_params (dict, optional): Dict of pairs of parameter id (string) and value (or None for a flag parameter without a value) for input parameters, e.g.:: {'-nc': None} {'-thresh': threshold} {'-m': 'ave' 's', '-nc': None} out_files (dict, optional): Dict of pairs of parameter id (string) and file path (string or list) for output files, e.g.:: {'-fel': 'filleddem.tif'} {'-maxS': ['harden.tif', 'maxsimi.tif']} mpi_params (dict, optional): Dict of pairs of parameter id (string) and value or path for MPI setting, e.g.:: {'mpipath':'/soft/bin','hostfile':'/soft/bin/cluster.node','n':4} {'mpipath':'/soft/bin', 'n':4} {'n':4} log_params (dict, optional): Dict of pairs of parameter id (string) and value or path for runtime and log output parameters. e.g.:: {'logfile': '/home/user/log.txt', 'runtimefile': '/home/user/runtime.txt'} Returns: True if TauDEM run successfully, otherwise False. """ # Check input files if in_files is None: TauDEM.error('Input files parameter is required!') if not isinstance(in_files, dict): TauDEM.error('The input files parameter must be a dict!') for (pid, infile) in iteritems(in_files): if infile is None: continue if isinstance(infile, list) or isinstance(infile, tuple): for idx, inf in enumerate(infile): if inf is None: continue inf, wp = TauDEM.check_infile_and_wp(inf, wp) in_files[pid][idx] = inf continue if os.path.exists(infile): infile, wp = TauDEM.check_infile_and_wp(infile, wp) in_files[pid] = os.path.abspath(infile) else: # For more flexible input files extension. # e.g., -inputtags 1 <path/to/tag1.tif> 2 <path/to/tag2.tif> ... # in such unpredictable circumstance, we cannot check the existance of # input files, so the developer will check it in other place. if len(StringClass.split_string(infile, ' ')) > 1: continue else: # the infile still should be a existing file, so check in workspace if wp is None: TauDEM.error('Workspace should not be None!') infile = wp + os.sep + infile if not os.path.exists(infile): TauDEM.error('Input files parameter %s: %s is not existed!' % (pid, infile)) in_files[pid] = os.path.abspath(infile) # Make workspace dir if not existed UtilClass.mkdir(wp) # Check the log parameter log_file = None runtime_file = None if log_params is not None: if not isinstance(log_params, dict): TauDEM.error('The log parameter must be a dict!') if 'logfile' in log_params and log_params['logfile'] is not None: log_file = log_params['logfile'] # If log_file is just a file name, then save it in the default workspace. if os.sep not in log_file: log_file = wp + os.sep + log_file log_file = os.path.abspath(log_file) if 'runtimefile' in log_params and log_params['runtimefile'] is not None: runtime_file = log_params['runtimefile'] # If log_file is just a file name, then save it in the default workspace. if os.sep not in runtime_file: runtime_file = wp + os.sep + runtime_file runtime_file = os.path.abspath(runtime_file) # remove out_files to avoid any file IO related error new_out_files = list() if out_files is not None: if not isinstance(out_files, dict): TauDEM.error('The output files parameter must be a dict!') for (pid, out_file) in iteritems(out_files): if out_file is None: continue if isinstance(out_file, list) or isinstance(out_file, tuple): for idx, outf in enumerate(out_file): if outf is None: continue outf = FileClass.get_file_fullpath(outf, wp) FileClass.remove_files(outf) out_files[pid][idx] = outf new_out_files.append(outf) else: out_file = FileClass.get_file_fullpath(out_file, wp) FileClass.remove_files(out_file) out_files[pid] = out_file new_out_files.append(out_file) # concatenate command line commands = list() # MPI header if mpi_params is not None: if not isinstance(mpi_params, dict): TauDEM.error('The MPI settings parameter must be a dict!') if 'mpipath' in mpi_params and mpi_params['mpipath'] is not None: commands.append(mpi_params['mpipath'] + os.sep + 'mpiexec') else: commands.append('mpiexec') if 'hostfile' in mpi_params and mpi_params['hostfile'] is not None \ and not StringClass.string_match(mpi_params['hostfile'], 'none') \ and os.path.isfile(mpi_params['hostfile']): commands.append('-f') commands.append(mpi_params['hostfile']) if 'n' in mpi_params and mpi_params['n'] > 1: commands.append('-n') commands.append(str(mpi_params['n'])) else: # If number of processor is less equal than 1, then do not call mpiexec. commands = [] # append TauDEM function name, which can be full path or just one name commands.append(function_name) # append input files for (pid, infile) in iteritems(in_files): if infile is None: continue if pid[0] != '-': pid = '-' + pid commands.append(pid) if isinstance(infile, list) or isinstance(infile, tuple): commands.append(' '.join(tmpf for tmpf in infile)) else: commands.append(infile) # append input parameters if in_params is not None: if not isinstance(in_params, dict): TauDEM.error('The input parameters must be a dict!') for (pid, v) in iteritems(in_params): if pid[0] != '-': pid = '-' + pid commands.append(pid) # allow for parameter which is an flag without value if v != '' and v is not None: if MathClass.isnumerical(v): commands.append(str(v)) else: commands.append(v) # append output parameters if out_files is not None: for (pid, outfile) in iteritems(out_files): if outfile is None: continue if pid[0] != '-': pid = '-' + pid commands.append(pid) if isinstance(outfile, list) or isinstance(outfile, tuple): commands.append(' '.join(tmpf for tmpf in outfile)) else: commands.append(outfile) # run command runmsg = UtilClass.run_command(commands) TauDEM.log(runmsg, log_file) TauDEM.output_runtime_to_log(function_name, runmsg, runtime_file) # Check out_files, raise RuntimeError if not exist. for of in new_out_files: if not os.path.exists(of): TauDEM.error('%s failed, and the %s was not generated!' % (function_name, of)) return False return True
def interpolate_observed_data_to_regular_interval(in_file, time_interval, start_time, end_time, eliminate_zero=False, time_sys_output='UTCTIME', day_divided_hour=0): """ Interpolate not regular observed data to regular time interval data. Args: in_file: input data file, the basic format is as follows: line 1: #<time_system> [<time_zone>], e.g., #LOCALTIME 8, #UTCTIME line 2: DATETIME,field1,field2,... line 3: YYYY-mm-dd HH:MM:SS,field1_value,field2_value,... line 4: ... ... Field name can be PCP, FLOW, SED the unit is mm/h, m3/s, g/L (i.e., kg/m3), respectively. time_interval: time interval, unit is minute, e.g., daily output is 1440 start_time: start time, the format must be 'YYYY-mm-dd HH:MM:SS', and the time system is based on time_sys. end_time: end time, see also start_time. eliminate_zero: Boolean flag. If true, the time interval without original records will not be output. time_sys_output: time system of output time_system, the format must be '<time_system> [<time_zone>]', e.g., 'LOCALTIME' 'LOCALTIME 8' 'UTCTIME' (default) day_divided_hour: If the time_interval is equal to N*1440, this parameter should be carefully specified. The value must range from 0 to 23. e.g., day_divided_hour ==> day ranges (all expressed as 2013-02-03) 0 ==> 2013-02-03 00:00:00 to 2013-02-03 23:59:59 (default) 8 ==> 2013-02-03 08:00:00 to 2013-02-04 07:59:59 20 ==> 2013-02-03 20:00:00 to 2013-02-04 19:59:59 Returns: The output data files are located in the same directory with the input file. The nomenclature is: <field name>_<time system>_<time interval>_<nonzero>, e.g., pcp_utctime_1440_nonzero.txt, flow_localtime_60.txt """ FileClass.check_file_exists(in_file) time_sys_input, time_zone_input = HydroClimateUtilClass.get_time_system_from_data_file(in_file) data_items = read_data_items_from_txt(in_file) flds = data_items[0][:] data_items.remove(flds) if not 0 <= day_divided_hour <= 23: raise ValueError('Day divided hour must range from 0 to 23!') try: date_idx = flds.index('DATETIME') flds.remove('DATETIME') except ValueError: raise ValueError('DATETIME must be one of the fields!') # available field available_flds = ['FLOW', 'SED', 'PCP'] def check_avaiable_field(cur_fld): """Check if the given field name is supported.""" support_flag = False for fff in available_flds: if fff.lower() in cur_fld.lower(): support_flag = True break return support_flag ord_data = OrderedDict() time_zone_output = time.timezone / -3600 if time_sys_output.lower().find('local') >= 0: tmpstrs = StringClass.split_string(time_sys_output, [' ']) if len(tmpstrs) == 2 and MathClass.isnumerical(tmpstrs[1]): time_zone_output = int(tmpstrs[1]) time_sys_output = 'LOCALTIME' else: time_sys_output = 'UTCTIME' time_zone_output = 0 for item in data_items: org_datetime = StringClass.get_datetime(item[date_idx]) if time_sys_input == 'LOCALTIME': org_datetime -= timedelta(hours=time_zone_input) # now, org_datetime is UTC time. if time_sys_output == 'LOCALTIME': org_datetime += timedelta(hours=time_zone_output) # now, org_datetime is consistent with the output time system ord_data[org_datetime] = list() for i, v in enumerate(item): if i == date_idx: continue if MathClass.isnumerical(v): ord_data[org_datetime].append(float(v)) else: ord_data[org_datetime].append(v) # print(ord_data) itp_data = OrderedDict() out_time_delta = timedelta(minutes=time_interval) sdatetime = StringClass.get_datetime(start_time) edatetime = StringClass.get_datetime(end_time) item_dtime = sdatetime if time_interval % 1440 == 0: item_dtime = sdatetime.replace(hour=0, minute=0, second=0) + \ timedelta(minutes=day_divided_hour * 60) while item_dtime <= edatetime: # print(item_dtime) # if item_dtime.month == 12 and item_dtime.day == 31: # print("debug") sdt = item_dtime # start datetime of records edt = item_dtime + out_time_delta # end datetime of records # get original data items org_items = list() pre_dt = list(ord_data.keys())[0] pre_added = False for i, v in list(ord_data.items()): if sdt <= i < edt: if not pre_added and pre_dt < sdt < i and sdt - pre_dt < out_time_delta: # only add one item that less than sdt. org_items.append([pre_dt] + ord_data.get(pre_dt)) pre_added = True org_items.append([i] + v) if i > edt: break pre_dt = i if len(org_items) > 0: org_items.append([edt]) # Just add end time for compute convenient if org_items[0][0] < sdt: org_items[0][0] = sdt # set the begin datetime of current time interval # if eliminate time interval without original records # initial interpolated list itp_data[item_dtime] = [0.] * len(flds) if len(org_items) == 0: if eliminate_zero: itp_data.popitem() item_dtime += out_time_delta continue # core interpolation code flow_idx = -1 for v_idx, v_name in enumerate(flds): if not check_avaiable_field(v_name): continue if 'SED' in v_name.upper(): # FLOW must be existed for v_idx2, v_name2 in enumerate(flds): if 'FLOW' in v_name2.upper(): flow_idx = v_idx2 break if flow_idx < 0: raise RuntimeError('To interpolate SED, FLOW must be provided!') for v_idx, v_name in enumerate(flds): if not check_avaiable_field(v_name): continue itp_value = 0. itp_auxiliary_value = 0. for org_item_idx, org_item_dtv in enumerate(org_items): if org_item_idx == 0: continue org_item_dt = org_item_dtv[0] pre_item_dtv = org_items[org_item_idx - 1] pre_item_dt = pre_item_dtv[0] tmp_delta_dt = org_item_dt - pre_item_dt tmp_delta_secs = tmp_delta_dt.days * 86400 + tmp_delta_dt.seconds if 'SED' in v_name.upper(): itp_value += pre_item_dtv[v_idx + 1] * pre_item_dtv[flow_idx + 1] * \ tmp_delta_secs itp_auxiliary_value += pre_item_dtv[flow_idx + 1] * tmp_delta_secs else: itp_value += pre_item_dtv[v_idx + 1] * tmp_delta_secs if 'SED' in v_name.upper(): if MathClass.floatequal(itp_auxiliary_value, 0.): itp_value = 0. print('WARNING: Flow is 0 for %s, please check!' % item_dtime.strftime('%Y-%m-%d %H:%M:%S')) itp_value /= itp_auxiliary_value elif 'FLOW' in v_name.upper(): itp_value /= (out_time_delta.days * 86400 + out_time_delta.seconds) elif 'PCP' in v_name.upper(): # the input is mm/h, and output is mm itp_value /= 3600. itp_data[item_dtime][v_idx] = round(itp_value, 4) item_dtime += out_time_delta # for i, v in itp_data.items(): # print(i, v) # output to files work_path = os.path.dirname(in_file) header_str = '#' + time_sys_output if time_sys_output == 'LOCALTIME': header_str = header_str + ' ' + str(time_zone_output) for idx, fld in enumerate(flds): if not check_avaiable_field(fld): continue file_name = fld + '_' + time_sys_output + '_' + str(time_interval) if eliminate_zero: file_name += '_nonzero' file_name += '.txt' out_file = work_path + os.path.sep + file_name with open(out_file, 'w') as f: f.write(header_str + '\n') f.write('DATETIME,' + fld + '\n') for i, v in list(itp_data.items()): cur_line = i.strftime('%Y-%m-%d %H:%M:%S') + ',' + str(v[idx]) + '\n' f.write(cur_line)
def lookup_tables_as_collection_and_gridfs(cfg, maindb): """Import lookup tables (from txt file) as Collection and GridFS Args: cfg: SEIMS config object maindb: workflow model database """ for tablename, txt_file in list(cfg.paramcfgs.lookup_tabs_dict.items()): # import each lookup table as a collection and GridFS file. c_list = maindb.collection_names() if not StringClass.string_in_list(tablename.upper(), c_list): maindb.create_collection(tablename.upper()) else: maindb.drop_collection(tablename.upper()) # initial bulk operator bulk = maindb[tablename.upper()].initialize_ordered_bulk_op() # delete if the tablename gridfs file existed spatial = GridFS(maindb, DBTableNames.gridfs_spatial) if spatial.exists(filename=tablename.upper()): x = spatial.get_version(filename=tablename.upper()) spatial.delete(x._id) # read data items data_items = read_data_items_from_txt(txt_file) field_names = data_items[0][0:] item_values = list() # import as gridfs file for i, cur_data_item in enumerate(data_items): if i == 0: continue data_import = dict() # import as Collection item_value = list() # import as gridfs file for idx, fld in enumerate(field_names): if MathClass.isnumerical(cur_data_item[idx]): tmp_value = float(cur_data_item[idx]) data_import[fld] = tmp_value item_value.append(tmp_value) else: data_import[fld] = cur_data_item[idx] bulk.insert(data_import) if len(item_value) > 0: item_values.append(item_value) MongoUtil.run_bulk(bulk, 'No operations during import %s.' % tablename) # begin import gridfs file n_row = len(item_values) # print(item_values) if n_row >= 1: n_col = len(item_values[0]) for i in range(n_row): if n_col != len(item_values[i]): raise ValueError('Please check %s to make sure each item has ' 'the same numeric dimension. The size of first ' 'row is: %d, and the current data item is: %d' % (tablename, n_col, len(item_values[i]))) else: item_values[i].insert(0, n_col) metadic = {ModelParamDataUtils.item_count: n_row, ModelParamDataUtils.field_count: n_col} cur_lookup_gridfs = spatial.new_file(filename=tablename.upper(), metadata=metadic) header = [n_row] fmt = '%df' % 1 s = pack(fmt, *header) cur_lookup_gridfs.write(s) fmt = '%df' % (n_col + 1) for i in range(n_row): s = pack(fmt, *item_values[i]) cur_lookup_gridfs.write(s) cur_lookup_gridfs.close()
def initial_params_from_txt(cfg, maindb): """ import initial calibration parameters from txt data file. Args: cfg: SEIMS config object maindb: MongoDB database object """ # delete if existed, initialize if not existed c_list = maindb.collection_names() if not StringClass.string_in_list(DBTableNames.main_parameter, c_list): maindb.create_collection(DBTableNames.main_parameter) else: maindb.drop_collection(DBTableNames.main_parameter) # initialize bulk operator bulk = maindb[DBTableNames.main_parameter].initialize_ordered_bulk_op() # read initial parameters from txt file data_items = read_data_items_from_txt(cfg.paramcfgs.init_params_file) field_names = data_items[0][0:] # print(field_names) for i, cur_data_item in enumerate(data_items): if i == 0: continue # print(cur_data_item) # initial one default blank parameter dict. data_import = { ModelParamFields.name: '', ModelParamFields.desc: '', ModelParamFields.unit: '', ModelParamFields.module: '', ModelParamFields.value: DEFAULT_NODATA, ModelParamFields.impact: DEFAULT_NODATA, ModelParamFields.change: 'NC', ModelParamFields.max: DEFAULT_NODATA, ModelParamFields.min: DEFAULT_NODATA, ModelParamFields.type: '' } for k, v in list(data_import.items()): idx = field_names.index(k) if cur_data_item[idx] == '': if StringClass.string_match(k, ModelParamFields.change_ac): data_import[k] = 0 elif StringClass.string_match(k, ModelParamFields.change_rc): data_import[k] = 1 elif StringClass.string_match(k, ModelParamFields.change_nc): data_import[k] = 0 elif StringClass.string_match(k, ModelParamFields.change_vc): data_import[ k] = DEFAULT_NODATA # Be careful to check NODATA when use! else: if MathClass.isnumerical(cur_data_item[idx]): data_import[k] = float(cur_data_item[idx]) else: data_import[k] = cur_data_item[idx] bulk.insert(data_import) # execute import operators MongoUtil.run_bulk(bulk, 'No operation during initial_params_from_txt.') # initialize index by parameter's type and name by ascending order. maindb[DBTableNames.main_parameter].create_index([ (ModelParamFields.type, ASCENDING), (ModelParamFields.name, ASCENDING) ])
def lookup_tables_as_collection_and_gridfs(cfg, maindb): """Import lookup tables (from txt file) as Collection and GridFS Args: cfg: SEIMS config object maindb: workflow model database """ for tablename, txt_file in list( cfg.paramcfgs.lookup_tabs_dict.items()): # import each lookup table as a collection and GridFS file. c_list = maindb.collection_names() if not StringClass.string_in_list(tablename.upper(), c_list): maindb.create_collection(tablename.upper()) else: maindb.drop_collection(tablename.upper()) # initial bulk operator bulk = maindb[tablename.upper()].initialize_ordered_bulk_op() # delete if the tablename gridfs file existed spatial = GridFS(maindb, DBTableNames.gridfs_spatial) if spatial.exists(filename=tablename.upper()): x = spatial.get_version(filename=tablename.upper()) spatial.delete(x._id) # read data items data_items = read_data_items_from_txt(txt_file) field_names = data_items[0][0:] item_values = list() # import as gridfs file for i, cur_data_item in enumerate(data_items): if i == 0: continue data_import = dict() # import as Collection item_value = list() # import as gridfs file for idx, fld in enumerate(field_names): if MathClass.isnumerical(cur_data_item[idx]): tmp_value = float(cur_data_item[idx]) data_import[fld] = tmp_value item_value.append(tmp_value) else: data_import[fld] = cur_data_item[idx] bulk.insert(data_import) if len(item_value) > 0: item_values.append(item_value) MongoUtil.run_bulk(bulk, 'No operations during import %s.' % tablename) # begin import gridfs file n_row = len(item_values) # print(item_values) if n_row >= 1: n_col = len(item_values[0]) for i in range(n_row): if n_col != len(item_values[i]): raise ValueError( 'Please check %s to make sure each item has ' 'the same numeric dimension. The size of first ' 'row is: %d, and the current data item is: %d' % (tablename, n_col, len(item_values[i]))) else: item_values[i].insert(0, n_col) metadic = { ModelParamDataUtils.item_count: n_row, ModelParamDataUtils.field_count: n_col } cur_lookup_gridfs = spatial.new_file( filename=tablename.upper(), metadata=metadic) header = [n_row] fmt = '%df' % 1 s = pack(fmt, *header) cur_lookup_gridfs.write(s) fmt = '%df' % (n_col + 1) for i in range(n_row): s = pack(fmt, *item_values[i]) cur_lookup_gridfs.write(s) cur_lookup_gridfs.close()
def scenario_from_texts(cfg, main_db, scenario_db): """Import BMPs Scenario data to MongoDB Args: cfg: SEIMS configuration object main_db: climate database scenario_db: scenario database Returns: False if failed, otherwise True. """ if not cfg.use_scernario: return False print('Import BMP Scenario Data... ') bmp_files = FileClass.get_filename_by_suffixes(cfg.scenario_dir, ['.txt']) bmp_tabs = list() bmp_tabs_path = list() for f in bmp_files: bmp_tabs.append(f.split('.')[0]) bmp_tabs_path.append(cfg.scenario_dir + os.path.sep + f) # initialize if collection not existed c_list = scenario_db.collection_names() for item in bmp_tabs: if not StringClass.string_in_list(item.upper(), c_list): scenario_db.create_collection(item.upper()) else: scenario_db.drop_collection(item.upper()) # Read subbasin.tif and dist2Stream.tif subbasin_r = RasterUtilClass.read_raster(cfg.spatials.subbsn) dist2stream_r = RasterUtilClass.read_raster(cfg.spatials.dist2stream_d8) # End reading for j, bmp_txt in enumerate(bmp_tabs_path): bmp_tab_name = bmp_tabs[j] data_array = read_data_items_from_txt(bmp_txt) field_array = data_array[0] data_array = data_array[1:] for item in data_array: dic = dict() for i, field_name in enumerate(field_array): if MathClass.isnumerical(item[i]): v = float(item[i]) if v % 1. == 0.: v = int(v) dic[field_name.upper()] = v else: dic[field_name.upper()] = str(item[i]).upper() if StringClass.string_in_list(ImportScenario2Mongo._LocalX, list(dic.keys())) and \ StringClass.string_in_list(ImportScenario2Mongo._LocalY, list(dic.keys())): subbsn_id = subbasin_r.get_value_by_xy( dic[ImportScenario2Mongo._LocalX.upper()], dic[ImportScenario2Mongo._LocalY.upper()]) distance = dist2stream_r.get_value_by_xy( dic[ImportScenario2Mongo._LocalX.upper()], dic[ImportScenario2Mongo._LocalY.upper()]) if subbsn_id is not None and distance is not None: dic[ImportScenario2Mongo._SUBBASINID] = int(subbsn_id) dic[ImportScenario2Mongo._DISTDOWN] = float(distance) scenario_db[bmp_tab_name.upper()].find_one_and_replace(dic, dic, upsert=True) else: scenario_db[bmp_tab_name.upper()].find_one_and_replace(dic, dic, upsert=True) # print('BMP tables are imported.') # Write BMP database name into Model workflow database c_list = main_db.collection_names() if not StringClass.string_in_list(DBTableNames.main_scenario, c_list): main_db.create_collection(DBTableNames.main_scenario) bmp_info_dic = dict() bmp_info_dic[ImportScenario2Mongo._FLD_DB] = cfg.bmp_scenario_db main_db[DBTableNames.main_scenario].find_one_and_replace(bmp_info_dic, bmp_info_dic, upsert=True) return True
def run(function_name, in_files, wp=None, in_params=None, out_files=None, mpi_params=None, log_params=None): """ Run TauDEM function. 1. The command will not execute if any input file does not exist. 2. An error will be detected after running the TauDEM command if any output file does not exist; Args: function_name (str): Full path of TauDEM function. in_files (dict, required): Dict of pairs of parameter id (string) and file path (string or list) for input files, e.g.:: {'-z': '/full/path/to/dem.tif'} wp (str, optional): Workspace for outputs. If not specified, the directory of the first input file in ``in_files`` will be used. in_params (dict, optional): Dict of pairs of parameter id (string) and value (or None for a flag parameter without a value) for input parameters, e.g.:: {'-nc': None} {'-thresh': threshold} {'-m': 'ave' 's', '-nc': None} out_files (dict, optional): Dict of pairs of parameter id (string) and file path (string or list) for output files, e.g.:: {'-fel': 'filleddem.tif'} {'-maxS': ['harden.tif', 'maxsimi.tif']} mpi_params (dict, optional): Dict of pairs of parameter id (string) and value or path for MPI setting, e.g.:: {'mpipath':'/soft/bin','hostfile':'/soft/bin/cluster.node','n':4} {'mpipath':'/soft/bin', 'n':4} {'n':4} log_params (dict, optional): Dict of pairs of parameter id (string) and value or path for runtime and log output parameters. e.g.:: {'logfile': '/home/user/log.txt', 'runtimefile': '/home/user/runtime.txt'} Returns: True if TauDEM run successfully, otherwise False. """ # Check input files if in_files is None: TauDEM.error('Input files parameter is required!') if not isinstance(in_files, dict): TauDEM.error('The input files parameter must be a dict!') for (pid, infile) in list(in_files.items()): if infile is None: continue if isinstance(infile, list) or isinstance(infile, tuple): for idx, inf in enumerate(infile): if inf is None: continue inf, wp = TauDEM.check_infile_and_wp(inf, wp) in_files[pid][idx] = inf continue if os.path.exists(infile): infile, wp = TauDEM.check_infile_and_wp(infile, wp) in_files[pid] = os.path.abspath(infile) else: # For more flexible input files extension. # e.g., -inputtags 1 <path/to/tag1.tif> 2 <path/to/tag2.tif> ... # in such unpredictable circumstance, we cannot check the existance of # input files, so the developer will check it in other place. if len(StringClass.split_string(infile, ' ')) > 1: continue else: # the infile still should be a existing file, so check in workspace if wp is None: TauDEM.error('Workspace should not be None!') infile = wp + os.sep + infile if not os.path.exists(infile): TauDEM.error( 'Input files parameter %s: %s is not existed!' % (pid, infile)) in_files[pid] = os.path.abspath(infile) # Make workspace dir if not existed UtilClass.mkdir(wp) # Check the log parameter log_file = None runtime_file = None if log_params is not None: if not isinstance(log_params, dict): TauDEM.error('The log parameter must be a dict!') if 'logfile' in log_params and log_params['logfile'] is not None: log_file = log_params['logfile'] # If log_file is just a file name, then save it in the default workspace. if os.sep not in log_file: log_file = wp + os.sep + log_file log_file = os.path.abspath(log_file) if 'runtimefile' in log_params and log_params[ 'runtimefile'] is not None: runtime_file = log_params['runtimefile'] # If log_file is just a file name, then save it in the default workspace. if os.sep not in runtime_file: runtime_file = wp + os.sep + runtime_file runtime_file = os.path.abspath(runtime_file) # remove out_files to avoid any file IO related error new_out_files = list() if out_files is not None: if not isinstance(out_files, dict): TauDEM.error('The output files parameter must be a dict!') for (pid, out_file) in list(out_files.items()): if out_file is None: continue if isinstance(out_file, list) or isinstance(out_file, tuple): for idx, outf in enumerate(out_file): if outf is None: continue outf = FileClass.get_file_fullpath(outf, wp) FileClass.remove_files(outf) out_files[pid][idx] = outf new_out_files.append(outf) else: out_file = FileClass.get_file_fullpath(out_file, wp) FileClass.remove_files(out_file) out_files[pid] = out_file new_out_files.append(out_file) # concatenate command line commands = list() # MPI header if mpi_params is not None: if not isinstance(mpi_params, dict): TauDEM.error('The MPI settings parameter must be a dict!') if 'mpipath' in mpi_params and mpi_params['mpipath'] is not None: commands.append(mpi_params['mpipath'] + os.sep + 'mpiexec') else: commands.append('mpiexec') if 'hostfile' in mpi_params and mpi_params['hostfile'] is not None \ and not StringClass.string_match(mpi_params['hostfile'], 'none') \ and os.path.isfile(mpi_params['hostfile']): commands.append('-f') commands.append(mpi_params['hostfile']) if 'n' in mpi_params and mpi_params['n'] > 1: commands.append('-n') commands.append(str(mpi_params['n'])) else: # If number of processor is less equal than 1, then do not call mpiexec. commands = [] # append TauDEM function name, which can be full path or just one name commands.append(function_name) # append input files for (pid, infile) in list(in_files.items()): if infile is None: continue if pid[0] != '-': pid = '-' + pid commands.append(pid) if isinstance(infile, list) or isinstance(infile, tuple): commands.append(' '.join(tmpf for tmpf in infile)) else: commands.append(infile) # append input parameters if in_params is not None: if not isinstance(in_params, dict): TauDEM.error('The input parameters must be a dict!') for (pid, v) in list(in_params.items()): if pid[0] != '-': pid = '-' + pid commands.append(pid) # allow for parameter which is an flag without value if v != '' and v is not None: if MathClass.isnumerical(v): commands.append(str(v)) else: commands.append(v) # append output parameters if out_files is not None: for (pid, outfile) in list(out_files.items()): if outfile is None: continue if pid[0] != '-': pid = '-' + pid commands.append(pid) if isinstance(outfile, list) or isinstance(outfile, tuple): commands.append(' '.join(tmpf for tmpf in outfile)) else: commands.append(outfile) # run command runmsg = UtilClass.run_command(commands) TauDEM.log(runmsg, log_file) TauDEM.output_runtime_to_log(function_name, runmsg, runtime_file) # Check out_files, raise RuntimeError if not exist. for of in new_out_files: if not os.path.exists(of): TauDEM.error('%s failed, and the %s was not generated!' % (function_name, of)) return False return True
def interpolate_observed_data_to_regular_interval(in_file, time_interval, start_time, end_time, eliminate_zero=False, time_sys_output='UTCTIME', day_divided_hour=0): """ Interpolate not regular observed data to regular time interval data. Todo: Not tested yet! Args: in_file: input data file, the basic format is as follows: line 1: #<time_system> [<time_zone>], e.g., #LOCALTIME 8, #UTCTIME line 2: DATETIME,field1,field2,... line 3: YYYY-mm-dd HH:MM:SS,field1_value,field2_value,... line 4: ... ... Field name can be PCP, FLOW, SED the unit is mm/h, m3/s, g/L (i.e., kg/m3), respectively. time_interval: time interval, unit is minute, e.g., daily output is 1440 start_time: start time, the format must be 'YYYY-mm-dd HH:MM:SS', and the time system is based on time_sys. end_time: end time, see also start_time. eliminate_zero: Boolean flag. If true, the time interval without original records will not be output. time_sys_output: time system of output time_system, the format must be '<time_system> [<time_zone>]', e.g., 'LOCALTIME' 'LOCALTIME 8' 'UTCTIME' (default) day_divided_hour: If the time_interval is equal to N*1440, this parameter should be carefully specified. The value must range from 0 to 23. e.g., day_divided_hour ==> day ranges (all expressed as 2013-02-03) 0 ==> 2013-02-03 00:00:00 to 2013-02-03 23:59:59 (default) 8 ==> 2013-02-03 08:00:00 to 2013-02-04 07:59:59 20 ==> 2013-02-03 20:00:00 to 2013-02-04 19:59:59 Returns: The output data files are located in the same directory with the input file. The nomenclature is: <field name>_<time system>_<time interval>_<nonzero>, e.g., pcp_utctime_1440_nonzero.csv, flow_localtime_60.csv. Note that `.txt` format is also supported. """ FileClass.check_file_exists(in_file) time_sys_input, time_zone_input = HydroClimateUtilClass.get_time_system_from_data_file(in_file) data_items = read_data_items_from_txt(in_file) flds = data_items[0][:] data_items.remove(flds) if not 0 <= day_divided_hour <= 23: raise ValueError('Day divided hour must range from 0 to 23!') try: date_idx = flds.index('DATETIME') flds.remove('DATETIME') except ValueError: raise ValueError('DATETIME must be one of the fields!') # available field available_flds = ['FLOW', 'SED', 'PCP'] def check_avaiable_field(cur_fld): """Check if the given field name is supported.""" support_flag = False for fff in available_flds: if fff.lower() in cur_fld.lower(): support_flag = True break return support_flag ord_data = OrderedDict() time_zone_output = time.timezone // 3600 if time_sys_output.lower().find('local') >= 0: tmpstrs = StringClass.split_string(time_sys_output, [' ']) if len(tmpstrs) == 2 and MathClass.isnumerical(tmpstrs[1]): time_zone_output = -1 * int(tmpstrs[1]) time_sys_output = 'LOCALTIME' else: time_sys_output = 'UTCTIME' time_zone_output = 0 for item in data_items: org_datetime = StringClass.get_datetime(item[date_idx]) if time_sys_input == 'LOCALTIME': org_datetime += timedelta(hours=time_zone_input) # now, org_datetime is UTC time. if time_sys_output == 'LOCALTIME': org_datetime -= timedelta(hours=time_zone_output) # now, org_datetime is consistent with the output time system ord_data[org_datetime] = list() for i, v in enumerate(item): if i == date_idx: continue if MathClass.isnumerical(v): ord_data[org_datetime].append(float(v)) else: ord_data[org_datetime].append(v) # print(ord_data) itp_data = OrderedDict() out_time_delta = timedelta(minutes=time_interval) sdatetime = StringClass.get_datetime(start_time) edatetime = StringClass.get_datetime(end_time) item_dtime = sdatetime if time_interval % 1440 == 0: item_dtime = sdatetime.replace(hour=0, minute=0, second=0) + \ timedelta(minutes=day_divided_hour * 60) while item_dtime <= edatetime: # print(item_dtime) # if item_dtime.month == 12 and item_dtime.day == 31: # print("debug") sdt = item_dtime # start datetime of records edt = item_dtime + out_time_delta # end datetime of records # get original data items org_items = list() pre_dt = list(ord_data.keys())[0] pre_added = False for i, v in list(ord_data.items()): if sdt <= i < edt: if not pre_added and pre_dt < sdt < i and sdt - pre_dt < out_time_delta: # only add one item that less than sdt. org_items.append([pre_dt] + ord_data.get(pre_dt)) pre_added = True org_items.append([i] + v) if i > edt: break pre_dt = i if len(org_items) > 0: org_items.append([edt]) # Just add end time for compute convenient if org_items[0][0] < sdt: org_items[0][0] = sdt # set the begin datetime of current time interval # if eliminate time interval without original records # initial interpolated list itp_data[item_dtime] = [0.] * len(flds) if len(org_items) == 0: if eliminate_zero: itp_data.popitem() item_dtime += out_time_delta continue # core interpolation code flow_idx = -1 for v_idx, v_name in enumerate(flds): if not check_avaiable_field(v_name): continue if 'SED' in v_name.upper(): # FLOW must be existed for v_idx2, v_name2 in enumerate(flds): if 'FLOW' in v_name2.upper(): flow_idx = v_idx2 break if flow_idx < 0: raise RuntimeError('To interpolate SED, FLOW must be provided!') for v_idx, v_name in enumerate(flds): if not check_avaiable_field(v_name): continue itp_value = 0. itp_auxiliary_value = 0. for org_item_idx, org_item_dtv in enumerate(org_items): if org_item_idx == 0: continue org_item_dt = org_item_dtv[0] pre_item_dtv = org_items[org_item_idx - 1] pre_item_dt = pre_item_dtv[0] tmp_delta_dt = org_item_dt - pre_item_dt tmp_delta_secs = tmp_delta_dt.days * 86400 + tmp_delta_dt.seconds if 'SED' in v_name.upper(): itp_value += pre_item_dtv[v_idx + 1] * pre_item_dtv[flow_idx + 1] * \ tmp_delta_secs itp_auxiliary_value += pre_item_dtv[flow_idx + 1] * tmp_delta_secs else: itp_value += pre_item_dtv[v_idx + 1] * tmp_delta_secs if 'SED' in v_name.upper(): if MathClass.floatequal(itp_auxiliary_value, 0.): itp_value = 0. print('WARNING: Flow is 0 for %s, please check!' % item_dtime.strftime('%Y-%m-%d %H:%M:%S')) itp_value /= itp_auxiliary_value elif 'FLOW' in v_name.upper(): itp_value /= (out_time_delta.days * 86400 + out_time_delta.seconds) elif 'PCP' in v_name.upper(): # the input is mm/h, and output is mm itp_value /= 3600. itp_data[item_dtime][v_idx] = round(itp_value, 4) item_dtime += out_time_delta # for i, v in itp_data.items(): # print(i, v) # output to files work_path = os.path.dirname(in_file) header_str = '#' + time_sys_output if time_sys_output == 'LOCALTIME': header_str = header_str + ' ' + str(time_zone_output) for idx, fld in enumerate(flds): if not check_avaiable_field(fld): continue file_name = fld + '_' + time_sys_output + '_' + str(time_interval) if eliminate_zero: file_name += '_nonzero' file_name += '.csv' out_file = work_path + os.path.sep + file_name with open(out_file, 'w', encoding='utf-8') as f: f.write(header_str + '\n') f.write('DATETIME,' + fld + '\n') for i, v in list(itp_data.items()): cur_line = i.strftime('%Y-%m-%d %H:%M:%S') + ',' + str(v[idx]) + '\n' f.write(cur_line)