def transform(message, data): logging.info("Transforming: message=[%s] data=[%s]" % (message, data)) t_data = data storage = message['storage'] data_type = message['type'] out_type = output_type(data_type, storage) logging.info("output type: %s" % out_type) if out_type == 'yaml': t_data = yaml.dump(data) elif out_type == 'json': t_data = json.dumps(data) message['app'] = message['name'] message['data'] = t_data # flavor spring? if utils.has_key('spring.profiles', data): spring_profile = utils.get_key('spring.profiles', data) n_pattern = "%s/%s" if storage == 'vault' else "%s-%s" message['name'] = n_pattern % (message['name'], spring_profile) return message
def get_article_urls(query='', page=0): yielded = [] url = 'https://www.hmetro.com.my/search?s={}{}'.format( query, '' if page == 0 or not isinstance(page, int) else '&page={}'.format(page)) for url in filter( map(filter(open_soup(url).find_all('a'), has_key('href')), get_key('href')), text_match(r'^/.+?/\d{4}/\d{2}/\d{6}/.+$')): url = 'https://www.hmetro.com.my{}'.format(url) if url in yielded: continue yielded.append(url) yield page, url, get_article(open_soup(url))
def config_wps(work_root, wps_root, geog_root, config, args): if has_key(config, ('custom', 'start_time')): start_time = config['custom']['start_time'] start_time_str = start_time.format('YYYY-MM-DD_HH:mm:ss') if not has_key(config, ('custom', 'end_time')): cli.error('custom->end_time does not exist in config file!') end_time = config['custom']['end_time'] end_time_str = end_time.format('YYYY-MM-DD_HH:mm:ss') if not has_key(config, ('domains', 'max_dom')): cli.error('domains->max_dom does not exist in config file!') max_dom = config['domains']['max_dom'] wps_work_dir = work_root + '/wps' if not os.path.isdir(wps_work_dir): os.makedirs(wps_work_dir) os.chdir(wps_work_dir) version = wrf_version(wps_root) if version < Version('3.9.1'): cli.error( f'WPS {version} may not handle GFS data correctly! Please use WPS >= 3.9.1.' ) cli.notice('Edit namelist.wps for WPS.') copy(f'{wps_root}/namelist.wps', 'namelist.wps') namelist_wps = f90nml.read('namelist.wps') namelist_wps['share']['max_dom'] = max_dom if has_key(config, ('custom', 'start_time')): namelist_wps['share']['start_date'] = [ start_time_str for i in range(max_dom) ] if has_key(config, ('custom', 'end_time')): namelist_wps['share']['end_date'] = [ end_time_str if i == 0 else start_time_str for i in range(max_dom) ] if has_key(config, ('custom', 'background')) and has_key( config, ('custom', 'background', 'interval_seconds')): namelist_wps['share']['interval_seconds'] = config['custom'][ 'background']['interval_seconds'] namelist_wps['geogrid']['geog_data_path'] = geog_root for key, value in config['geogrid'].items(): namelist_wps['geogrid'][key] = value namelist_wps['geogrid']['opt_geogrid_tbl_path'] = wps_work_dir namelist_wps['metgrid']['opt_metgrid_tbl_path'] = wps_work_dir if 'metgrid' in config: for key, value in config['metgrid'].items(): namelist_wps['metgrid'][key] = value namelist_wps.write('./namelist.wps', force=True) run(f'ncl -Q {script_root}/../plots/plot_domains.ncl') cli.notice(f'Check {wps_work_dir}/wps_show_dom.pdf for domains.') cli.notice('Succeeded.')
def callback(ch, method, properties, body): p_body = json.loads(body) id = p_body['id'] type = p_body['type'] message = dict(p_body) # get file by id resp = api_client.get_file(id) if resp.status_code != 200: error_msg = "Failed to get file %s. [HTTP_STATUS=%i] - %s" % ( id, resp.status_code, resp.text) rabbitmq.send_to_retry(error_msg, properties) raw_data = resp.text logging.info("raw_file=[%s]" % raw_data) if type == 'yaml': for data in raw_data.split('---'): message['data'] = data rabbitmq.publish(exchange, output_routing_key, message, conn) elif type == 'json': j_body = json.loads(raw_data) if utils.has_key('seeder.splitter-key', j_body): s_key = utils.get_key('seeder.splitter-key', j_body) for data in j_body[s_key]: message['data'] = data rabbitmq.publish(exchange, output_routing_key, message, conn) else: message['data'] = raw_data rabbitmq.publish(exchange, output_routing_key, message, conn) else: message['data'] = raw_data rabbitmq.publish(exchange, output_routing_key, message, conn) api_client.delete_file(id)
def config_wrfda(work_root, wrfda_root, config, args, wrf_work_dir=None, tag=None, fg=None): start_time = config['custom']['start_time'] end_time = config['custom']['end_time'] datetime_fmt = 'YYYY-MM-DD_HH:mm:ss' start_time_str = start_time.format(datetime_fmt) max_dom = config['domains']['max_dom'] # Need to take some parameters from wrfinput file. if not wrf_work_dir: if tag != None: wrf_work_dir = f'{work_root}/wrf_{tag}' else: wrf_work_dir = f'{work_root}/wrf' if max_dom > 1: if not has_key(config, ('custom', 'wrfda', 'dom')): cli.error( 'You need to set custom->wrfda->dom to set which domain to work on!' ) dom_idx = config['custom']['wrfda']['dom'] dom_str = 'd' + str(dom_idx + 1).zfill(2) if tag != None: wrfda_work_dir = f'{work_root}/wrfda_{tag}/{dom_str}' else: wrfda_work_dir = f'{work_root}/wrfda/{dom_str}' else: dom_idx = 0 dom_str = 'd01' if tag != None: wrfda_work_dir = f'{work_root}/wrfda_{tag}' else: wrfda_work_dir = f'{work_root}/wrfda' if not os.path.isdir(wrfda_work_dir): os.makedirs(wrfda_work_dir) os.chdir(wrfda_work_dir) version = wrf_version(wrfda_root) if os.path.isfile(f'{wrf_work_dir}/wrfinput_{dom_str}'): f = Dataset(f'{wrf_work_dir}/wrfinput_{dom_str}') elif os.path.isfile(f'{wrf_work_dir}/wrfout_{dom_str}_{start_time_str}'): f = Dataset(f'{wrf_work_dir}/wrfout_{dom_str}_{start_time_str}') elif fg: f = Dataset(fg) else: cli.error( f'config_wrfda: Cannot find wrfinput or wrfout in {wrf_work_dir} or wrfvar!' ) num_land_cat = f.getncattr('NUM_LAND_CAT') hypsometric_opt = f.getncattr('HYPSOMETRIC_OPT') f.close() time_window = get_value(config, ('custom', 'wrfda', 'time_window'), 360) # Read in namelist template (not exact Fortran namelist format, we need to change it). template = open(f'{wrfda_root}/var/README.namelist').read() template = re.sub(r'^[^&]*', '', template, flags=re.DOTALL) template = re.sub(r';.*', '', template) template = re.sub(r'\([^\)]*\)', '', template) namelist_input = f90nml.read(StringIO(template)) namelist_input['wrfvar1']['var4d_lbc'] = False namelist_input['wrfvar18']['analysis_date'] = start_time_str namelist_input['wrfvar21']['time_window_min'] = start_time.subtract( minutes=time_window / 2).format(datetime_fmt) namelist_input['wrfvar22']['time_window_max'] = start_time.add( minutes=time_window / 2).format(datetime_fmt) # Fix bugs namelist_input['wrfvar2']['qc_rej_both'] = False namelist_input['wrfvar14']['rtminit_satid'] = -1 namelist_input['wrfvar14']['rtminit_sensor'] = -1 if version == Version('3.6.1'): namelist_input['wrfvar4']['use_iasiobs'] = False del namelist_input['wrfvar4']['use_iasisobs'] namelist_input['wrfvar4']['use_seviriobs'] = False del namelist_input['wrfvar4']['use_sevirisobs'] namelist_input['wrfvar5']['max_omb_spd'] = namelist_input['wrfvar5'][ 'max_omb_sp'] del namelist_input['wrfvar5']['max_omb_sp'] namelist_input['wrfvar5']['max_error_spd'] = namelist_input['wrfvar5'][ 'max_error_sp'] del namelist_input['wrfvar5']['max_error_sp'] elif version > Version('3.8.1'): namelist_input['wrfvar11']['write_detail_grad_fn'] = True namelist_input['wrfvar11']['calculate_cg_cost_fn'] = True # Merge namelist.input in tutorial. tmp = f90nml.read(f'{wrfda_root}/var/test/tutorial/namelist.input') for key, value in tmp.items(): if not key in namelist_input: namelist_input[key] = value namelist_input['time_control']['run_hours'] = config['custom'][ 'forecast_hours'] namelist_input['time_control']['start_year'] = [ int(start_time.format("Y")) for i in range(max_dom) ] namelist_input['time_control']['start_month'] = [ int(start_time.format("M")) for i in range(max_dom) ] namelist_input['time_control']['start_day'] = [ int(start_time.format("D")) for i in range(max_dom) ] namelist_input['time_control']['start_hour'] = [ int(start_time.format("H")) for i in range(max_dom) ] namelist_input['time_control']['end_year'] = [ int(end_time.format("Y")) for i in range(max_dom) ] namelist_input['time_control']['end_month'] = [ int(end_time.format("M")) for i in range(max_dom) ] namelist_input['time_control']['end_day'] = [ int(end_time.format("D")) for i in range(max_dom) ] namelist_input['time_control']['end_hour'] = [ int(end_time.format("H")) for i in range(max_dom) ] namelist_input['time_control']['frames_per_outfile'] = [ 1 for i in range(max_dom) ] for key, value in config['time_control'].items(): namelist_input['time_control'][key] = value for key, value in config['domains'].items(): namelist_input['domains'][key] = value # WRFDA only take grids parameters one domain at a time. namelist_input['domains']['max_dom'] = 1 for key in ('e_we', 'e_sn', 'e_vert', 'dx', 'dy', 'grid_id', 'parent_id', 'i_parent_start', 'j_parent_start', 'parent_grid_ratio', 'parent_time_step_ratio'): if key in config['domains']: namelist_input['domains'][key] = config['domains'][key][dom_idx] namelist_input['domains']['hypsometric_opt'] = hypsometric_opt # Sync physics parameters. if 'physics' in config: for key, value in config['physics'].items(): namelist_input['physics'][key] = value namelist_input['physics']['num_land_cat'] = num_land_cat if version == Version('3.9.1'): namelist_input['dynamics']['gwd_opt'] = 0 # Write customized parameters. for tag in range(1, 23): section = f'wrfvar{tag}' for key, value in config[section].items(): namelist_input[section][key] = value # Validate some parameters. for key in ('as1', 'as2', 'as3', 'as4', 'as5'): if namelist_input['wrfvar7'][key] == -1: cli.error(f'wrfvar7->{key} is -1!') namelist_input.write(f'{wrfda_work_dir}/namelist.input', force=True) cli.notice('Succeeded.')
def run_wrfda_obsproc(work_root, wrfda_root, littler_root, config, args, wrf_work_dir=None, tag=None): start_time = config['custom']['start_time'] datetime_fmt = 'YYYY-MM-DD_HH:mm:ss' start_time_str = start_time.format(datetime_fmt) if not wrf_work_dir: if tag != None: wrf_work_dir = f'{work_root}/wrf_{tag}' else: wrf_work_dir = f'{work_root}/wrf' if tag != None: wrfda_work_dir = f'{work_root}/wrfda_{tag}/obsproc' else: wrfda_work_dir = f'{work_root}/wrfda/obsproc' if not os.path.isdir(wrfda_work_dir): os.mkdir(wrfda_work_dir) os.chdir(wrfda_work_dir) cli.notice('Use builtin obserr.') run(f'ln -sf {wrfda_root}/var/obsproc/obserr.txt {wrfda_work_dir}') # Use d01 domain extent. if check_files([f'{wrf_work_dir}/wrfinput_d01_{start_time_str}']): ncfile = Dataset(f'{wrf_work_dir}/wrfinput_d01_{start_time_str}', 'r') iproj = ncfile.getncattr('MAP_PROJ') phic = ncfile.getncattr('CEN_LAT') xlonc = ncfile.getncattr('CEN_LON') moad_cen_lat = ncfile.getncattr('MOAD_CEN_LAT') standard_lon = ncfile.getncattr('STAND_LON') ncfile.close() else: iproj = config['geogrid']['map_proj'] phic = config['geogrid']['ref_lat'] xlonc = config['geogrid']['ref_lon'] moad_cen_lat = config['geogrid']['ref_lat'] standard_lon = config['geogrid']['ref_lon'] output_format = get_value(config, ['custom', 'obsproc', 'output_format'], default=2) time_window = get_value(config, ['custom', 'wrfda', 'time_window'], default=360) if has_key(config, ('custom', 'da', 'type')): if config['custom']['da']['type'] == '3dvar': namelist_obsproc = f90nml.read( f'{wrfda_root}/var/obsproc/namelist.obsproc.3dvar.wrfvar-tut') else: cli.error('Currently, we only support 3DVar...') else: namelist_obsproc = f90nml.read( f'{wrfda_root}/var/obsproc/namelist.obsproc.3dvar.wrfvar-tut') namelist_obsproc['record1'][ 'obs_gts_filename'] = f'obs.gts.{start_time.format("YYYYMMDDHHmm")}' namelist_obsproc['record2']['time_window_min'] = start_time.subtract( minutes=time_window / 2).format('YYYY-MM-DD_HH:mm:ss') namelist_obsproc['record2']['time_analysis'] = start_time.format( 'YYYY-MM-DD_HH:mm:ss') namelist_obsproc['record2']['time_window_max'] = start_time.add( minutes=time_window / 2).format('YYYY-MM-DD_HH:mm:ss') namelist_obsproc['record3']['max_number_of_obs'] = 1200000 namelist_obsproc['record7']['PHIC'] = phic namelist_obsproc['record7']['XLONC'] = xlonc namelist_obsproc['record7']['MOAD_CEN_LAT'] = moad_cen_lat namelist_obsproc['record7']['STANDARD_LON'] = standard_lon namelist_obsproc['record8']['NESTIX'] = config['geogrid']['e_sn'] namelist_obsproc['record8']['NESTJX'] = config['geogrid']['e_we'] namelist_obsproc['record8']['DIS'] = config['geogrid']['dx'] namelist_obsproc['record9']['OUTPUT_OB_FORMAT'] = output_format namelist_obsproc.write('./namelist.obsproc', force=True) cli.stage(f'Run obsproc.exe at {wrfda_work_dir} ...') expected_files = [ f'obs_gts_{start_time.format("YYYY-MM-DD_HH:mm:ss")}.3DVAR' ] if not check_files(expected_files) or args.force: run('rm -f obs_gts_*') if has_key(config, ('custom', 'littler')): if 'dir_pattern' in config['custom'][ 'littler'] and 'file_pattern' in config['custom'][ 'littler']: dir_name = Template( config['custom']['littler']['dir_pattern']).render( time=start_time) file_name = Template( config['custom']['littler']['file_pattern']).render( time=start_time) littler_path = f'{littler_root}/{dir_name}/{file_name}' else: cli.error( 'No dir_pattern and file_pattern in custom->littler section!' ) else: littler_path = f'{littler_root}/{start_time.format("YYYYMMDD")}/obs.gts.{start_time.format("YYYYMMDDHHmm")}' if os.path.exists(littler_path): run(f'ln -sf {littler_path} {wrfda_work_dir}/obs.gts.{start_time.format("YYYYMMDDHHmm")}' ) else: cli.error(f'Failed! {littler_path} Not Found.') submit_job(f'{wrfda_root}/var/obsproc/obsproc.exe', 1, config, args, wait=True) if not check_files(expected_files): cli.error(f'Failed!') cli.notice('Succeeded.') else: cli.notice('File obs_gts_* already exist.') run('ls -l obs_gts_*')
args.littler_root = os.path.abspath(args.littler_root) if not os.path.isdir(args.littler_root): cli.error(f'Directory {args.littler_root} does not exist!') start_time = config['custom']['start_time'] end_time = config['custom']['end_time'] datetime_fmt = 'YYYY-MM-DD_HH:mm:ss' start_time_str = start_time.format(datetime_fmt) end_time_str = end_time.format(datetime_fmt) if not os.path.isdir(args.work_root + '/fb'): os.mkdir(args.work_root + '/fb') if not os.path.isdir(args.work_root + '/fa'): os.mkdir(args.work_root + '/fa') if not os.path.isdir(args.work_root + '/ref'): os.mkdir(args.work_root + '/ref') if not has_key(config, ('wrfvar7', 'cv_options')): cli.error('cv_options in wrfvar7 is not set!') wrf.config_wps(args.work_root, args.wps_root, args.geog_root, config, args) wrf.run_wps_geogrid(args.work_root, args.wps_root, config, args) # Spin up 6 hours. spinup_hours = get_value(config['custom'], 'spinup_hours', 6) spinup_config = copy.deepcopy(config) spinup_config['custom']['start_time'] = config['custom'][ 'start_time'].subtract(hours=spinup_hours) spinup_config['custom']['forecast_hours'] += spinup_hours wrf.config_wps(args.work_root, args.wps_root, args.geog_root, spinup_config, args) # Run forecast with xb as initial condition.
def is_spring_valid_profile(data): has_profile = utils.has_key('spring.profiles', data) return has_profile is False or (has_profile and data['spring']['profiles'] == SPRING_PROFILE)