예제 #1
0
파일: forecast.py 프로젝트: islenv/wrfxpy
def execute(args):
    """
    Executes a weather/fire simulation.

    The args dictionary contains

    :param args: a dictionary with the following keys
    :param grid_code: the (unique) code of the grid that is used
    :param sys_install_path: system installation directory
    :param start_utc: start time of simulation in UTC
    :param end_utc: end time of simulation in UTC
    :param workspace_path: workspace directory
    :param wps_install_path: installation directory of WPS that will be used
    :param wrf_install_path: installation directory of WRF that will be used
    :param grib_source: a string identifying a valid GRIB2 source
    :param wps_namelist_path: the path to the namelist.wps file that will be used as template
    :param wrf_namelist_path: the path to the namelist.input file that will be used as template
    :param fire_namelist_path: the path to the namelist.fire file that will be used as template
    :param wps_geog_path: the path to the geogrid data directory providing terrain/fuel data
    :param email_notification: dictionary containing keys address and events indicating when a mail should be fired off
    """
    logging.basicConfig(level=logging.INFO)

    # initialize the job state from the arguments
    js = JobState(args)

    logging.info("job %s starting [%d hours to forecast]." % (js.job_id, js.fc_hrs))
    send_email(js, 'start', 'Job %s started.' % js.job_id)

    # read in all namelists
    js.wps_nml = f90nml.read(args['wps_namelist_path'])
    js.wrf_nml = f90nml.read(args['wrf_namelist_path'])
    js.fire_nml = f90nml.read(args['fire_namelist_path'])
    js.ems_nml = None
    if 'emissions_namelist_path' in args:
        js.ems_nml = f90nml.read(args['emissions_namelist_path'])
    
    # Parse and setup the domain configuration
    js.domain_conf = WPSDomainConf(js.domains)

    num_doms = len(js.domain_conf)
    js.wps_nml['share']['start_date'] = [utc_to_esmf(js.start_utc)] * num_doms
    js.wps_nml['share']['end_date'] = [utc_to_esmf(js.end_utc)] * num_doms
    js.wps_nml['share']['interval_seconds'] = 3600

    logging.info("number of domains defined is %d." % num_doms)

    # build directories in workspace
    js.wps_dir = osp.abspath(osp.join(js.workspace_path, js.job_id, 'wps'))
    js.wrf_dir = osp.abspath(osp.join(js.workspace_path, js.job_id, 'wrf'))

    logging.info("cloning WPS into %s" % js.wps_dir)

    # step 1: clone WPS and WRF directories
    cln = WRFCloner(args)
    cln.clone_wps(js.wps_dir, js.grib_source.vtables(), [])

    # step 2: process domain information and patch namelist for geogrid
    js.wps_nml['geogrid']['geog_data_path'] = args['wps_geog_path']
    js.domain_conf.prepare_for_geogrid(js.wps_nml, js.wrf_nml, js.wrfxpy_dir, js.wps_dir)
    f90nml.write(js.wps_nml, osp.join(js.wps_dir, 'namelist.wps'), force=True)

    # do steps 2 & 3 & 4 in parallel (two execution streams)
    #  -> GEOGRID ->
    #  -> GRIB2 download ->  UNGRIB ->

    proc_q = Queue()
    geogrid_proc = Process(target=run_geogrid, args=(js, proc_q))
    grib_proc = Process(target=retrieve_gribs_and_run_ungrib, args=(js, proc_q))

    geogrid_proc.start()
    grib_proc.start()

    # wait until both tasks are done
    geogrid_proc.join()
    grib_proc.join()

    if proc_q.get() != 'SUCCESS':
        return

    if proc_q.get() != 'SUCCESS':
        return

    proc_q.close()

    # step 5: execute metgrid after ensuring all grids will be processed
    js.domain_conf.prepare_for_metgrid(js.wps_nml)
    f90nml.write(js.wps_nml, osp.join(js.wps_dir, 'namelist.wps'), force=True)

    logging.info("running METGRID")
    Metgrid(js.wps_dir).execute().check_output()

    send_email(js, 'metgrid', 'Job %s - metgrid complete.' % js.job_id)
    logging.info("cloning WRF into %s" % js.wrf_dir)

    # step 6: clone wrf directory, symlink all met_em* files
    cln.clone_wrf(js.wrf_dir, [])
    symlink_matching_files(js.wrf_dir, js.wps_dir, "met_em*")

    logging.info("running REAL")

    # step 7: patch input namelist, fire namelist, emissions namelist (if required)
    #         and execute real.exe
    time_ctrl = update_time_control(js.start_utc, js.end_utc, num_doms)
    js.wrf_nml['time_control'].update(time_ctrl)
    update_namelist(js.wrf_nml, js.grib_source.namelist_keys())
    if 'ignitions' in args:
        update_namelist(js.wrf_nml, render_ignitions(js, num_doms))

    # if we have an emissions namelist, automatically turn on the tracers
    if js.ems_nml is not None:
        f90nml.write(js.ems_nml, osp.join(js.wrf_dir, 'namelist.fire_emissions'), force=True)
        js.wrf_nml['dynamics']['tracer_opt'] = [2] * num_doms

    f90nml.write(js.wrf_nml, osp.join(js.wrf_dir, 'namelist.input'), force=True)

    f90nml.write(js.fire_nml, osp.join(js.wrf_dir, 'namelist.fire'), force=True)

    # try to run Real twice as it sometimes fails the first time
    # it's not clear why this error happens 
    try:
        Real(js.wrf_dir).execute().check_output()
    except Exception as e:
        logging.error('Real step failed with exception %s, retrying ...' % str(e))
        Real(js.wrf_dir).execute().check_output()
    

    # step 8: if requested, do fuel moisture DA
    if js.fmda is not None:
        logging.info('running fuel moisture data assimilation')
        for dom in js.fmda.domains:
            assimilate_fm10_observations(osp.join(wrf_dir, 'wrfinput_d%02d' % dom), None, js.fmda.token)

    logging.info('submitting WRF job')
    send_email(js, 'wrf_submit', 'Job %s - wrf job submitted.' % js.job_id)

    # step 8: execute wrf.exe on parallel backend
    js.task_id = "sim-" + js.grid_code + "-" + utc_to_esmf(js.start_utc)[:10]
    WRF(js.wrf_dir, js.qsys).submit(js.task_id, js.num_nodes, js.ppn, js.wall_time_hrs)

    send_email(js, 'wrf_exec', 'Job %s - wrf job starting now with id %s.' % (js.job_id, js.task_id))
    logging.info("WRF job submitted with id %s, waiting for rsl.error.0000" % js.task_id)

    # step 9: wait for appearance of rsl.error.0000 and open it
    wrf_out = None
    while wrf_out is None:
        try:
            wrf_out = open(osp.join(js.wrf_dir, 'rsl.error.0000'))
            break
        except IOError:
            logging.info('forecast: waiting 10 seconds for rsl.error.0000 file')
        
        time.sleep(5)
    
    logging.info('Detected rsl.error.0000')

    # step 10: track log output and check for history writes fro WRF
    pp = None
    already_sent_files, max_pp_dom = [], -1
    if js.postproc is not None:
        js.pp_dir = osp.join(js.workspace_path, js.job_id, "products")
        make_dir(js.pp_dir)
        pp = Postprocessor(js.pp_dir, 'wfc-' + js.grid_code)
	max_pp_dom = max([int(x) for x in filter(lambda x: len(x) == 1, js.postproc)])

    while True:
        line = wrf_out.readline().strip()
        if not line:
            time.sleep(0.2)
            continue

        if "SUCCESS COMPLETE WRF" in line:
            send_email(js, 'complete', 'Job %s - wrf job complete SUCCESS.' % js.job_id)
            logging.info("WRF completion detected.")
            break

        if "Timing for Writing wrfout" in line:
            esmf_time,domain_str = re.match(r'.*wrfout_d.._([0-9_\-:]{19}) for domain\ +(\d+):' ,line).groups()
            dom_id = int(domain_str)
            logging.info("Detected history write for domain %d for time %s." % (dom_id, esmf_time))
            if js.postproc is not None and str(dom_id) in js.postproc:
                var_list = [str(x) for x in js.postproc[str(dom_id)]]
                logging.info("Executing postproc instructions for vars %s for domain %d." % (str(var_list), dom_id))
                wrfout_path = find_fresh_wrfout(js.wrf_dir, dom_id)
            try:
                pp.process_vars(wrfout_path, dom_id, esmf_time, var_list)
            except Exception as e:
                logging.warning('Failed to postprocess for time %s with error %s.' % (esmf_time, str(e)))

            # if this is the last processed domain for this timestamp in incremental mode, upload to server
            if dom_id == max_pp_dom and js.postproc.get('shuttle', None) == 'incremental':
                desc = js.postproc['description'] if 'description' in js.postproc else js.job_id
                sent_files_1 = send_product_to_server(args, js.pp_dir, js.job_id, js.job_id, desc, already_sent_files)
                logging.info('sent %d files to visualization server.'  % len(sent_files_1))
                already_sent_files = filter(lambda x: not x.endswith('json'), already_sent_files + sent_files_1)

    # if we are to send out the postprocessed files after completion, this is the time
    if js.postproc.get('shuttle', None) == 'on_completion':
        desc = js.postproc['description'] if 'description' in js.postproc else js.job_id
        send_product_to_server(args, js.pp_dir, js.job_id, js.job_id, desc)
예제 #2
0
파일: forecast.py 프로젝트: auteuov/wrfxpy
def execute(args, job_args):
    """
    Executes a weather/fire simulation.

    :param args: a dictionary with all to start the simulationfollowing keys
    :param job_args: a the original json given the forecast

    Keys in args:
    :param grid_code: the (unique) code of the grid that is used
    :param sys_install_path: system installation directory
    :param start_utc: start time of simulation in UTC
    :param end_utc: end time of simulation in UTC
    :param workspace_path: workspace directory
    :param wps_install_path: installation directory of WPS that will be used
    :param wrf_install_path: installation directory of WRF that will be used
    :param grib_source: a string identifying a valid GRIB2 source
    :param wps_namelist_path: the path to the namelist.wps file that will be used as template
    :param wrf_namelist_path: the path to the namelist.input file that will be used as template
    :param fire_namelist_path: the path to the namelist.fire file that will be used as template
    :param wps_geog_path: the path to the geogrid data directory providing terrain/fuel data
    :param email_notification: dictionary containing keys address and events indicating when a mail should be fired off
 
    
    """

    logging.info('step 0 initialize the job state from the arguments')
    ## logging.info('args = %s' % json.dumps(jargs, open(osp.join(jobdir,'input.json'),'w'), indent=4, separators=(',', ': ')))
    js = JobState(args)
    ## logging.info('js = %s' % json.dumps(js, open(osp.join(jobdir,'input.json'),'w'), indent=4, separators=(',', ': ')))

    jobdir = osp.abspath(osp.join(js.workspace_path, js.job_id))
    make_clean_dir(jobdir)

    json.dump(job_args,
              open(osp.join(jobdir, 'input.json'), 'w'),
              indent=4,
              separators=(',', ': '))
    jsub = make_job_file(js)
    json.dump(jsub, open(jsub.jobfile, 'w'), indent=4, separators=(',', ': '))

    logging.info("job %s starting [%d hours to forecast]." %
                 (js.job_id, js.fc_hrs))
    sys.stdout.flush()
    send_email(js, 'start', 'Job %s started.' % js.job_id)

    # read in all namelists
    js.wps_nml = read_namelist(js.args['wps_namelist_path'])
    js.wrf_nml = read_namelist(js.args['wrf_namelist_path'])
    js.fire_nml = read_namelist(js.args['fire_namelist_path'])
    js.ems_nml = None
    if 'emissions_namelist_path' in js.args:
        js.ems_nml = read_namelist(js.args['emissions_namelist_path'])

    # Parse and setup the domain configuration
    js.domain_conf = WPSDomainConf(js.domains)

    js.num_doms = len(js.domain_conf)
    js.wps_nml['share']['interval_seconds'] = js.grib_source[
        0].interval_seconds

    logging.info("number of domains defined is %d." % js.num_doms)

    # build directories in workspace
    js.wps_dir = osp.abspath(osp.join(js.workspace_path, js.job_id, 'wps'))
    js.wrf_dir = osp.abspath(osp.join(js.workspace_path, js.job_id, 'wrf'))

    #check_obj(args,'args')
    #check_obj(js,'Initial job state')

    logging.info("step 1: clone WPS and WRF directories")
    logging.info("cloning WPS into %s" % js.wps_dir)
    cln = WRFCloner(js.args)
    cln.clone_wps(js.wps_dir, [])
    js.grib_source[0].clone_vtables(js.wps_dir)

    logging.info(
        "step 2: process domain information and patch namelist for geogrid")
    js.wps_nml['share']['start_date'] = [utc_to_esmf(js.start_utc)
                                         ] * js.num_doms
    js.wps_nml['share']['end_date'] = [utc_to_esmf(js.end_utc)] * js.num_doms
    js.wps_nml['geogrid']['geog_data_path'] = js.args['wps_geog_path']
    js.domain_conf.prepare_for_geogrid(js.wps_nml, js.wrf_nml, js.wrfxpy_dir,
                                       js.wps_dir)
    f90nml.write(js.wps_nml, osp.join(js.wps_dir, 'namelist.wps'), force=True)

    # do steps 2 & 3 & 4 in parallel (two execution streams)
    #  -> GEOGRID ->
    #  -> GRIB2 download ->  UNGRIB ->

    proc_q = Queue()

    geogrid_proc = Process(target=run_geogrid, args=(js, proc_q))
    # grib_proc = Process(target=retrieve_gribs_and_run_ungrib_all, args=(js, proc_q, ref_utc))
    grib_proc = {}
    for grib_source in js.grib_source:
        grib_proc[grib_source.id] = Process(
            target=retrieve_gribs_and_run_ungrib,
            args=(js, grib_source, proc_q))

    logging.info('starting GEOGRID and GRIB2/UNGRIB')

    if js.ungrib_only:
        logging.info(
            'ungrib_only set, skipping GEOGRID, will exit after UNGRIB')
    else:
        geogrid_proc.start()

    for grib_source in js.grib_source:
        grib_proc[grib_source.id].start()

    # wait until all tasks are done
    logging.info('waiting until all tasks are done')

    for grib_source in js.grib_source:
        grib_proc[grib_source.id].join()

    if js.ungrib_only:
        return
    else:
        geogrid_proc.join()

    for grib_source in js.grib_source:
        if proc_q.get() != 'SUCCESS':
            return

    if proc_q.get() != 'SUCCESS':
        return

    proc_q.close()

    logging.info(
        "step 5: execute metgrid after ensuring all grids will be processed")
    update_namelist(js.wps_nml, js.grib_source[0].namelist_wps_keys())
    js.domain_conf.prepare_for_metgrid(js.wps_nml)
    logging.info("namelist.wps for METGRID: %s" %
                 json.dumps(js.wps_nml, indent=4, separators=(',', ': ')))
    f90nml.write(js.wps_nml, osp.join(js.wps_dir, 'namelist.wps'), force=True)

    logging.info("running METGRID")
    Metgrid(js.wps_dir).execute().check_output()

    send_email(js, 'metgrid', 'Job %s - metgrid complete.' % js.job_id)
    logging.info("METGRID complete")

    logging.info("cloning WRF into %s" % js.wrf_dir)

    logging.info(
        "step 6: clone wrf directory, symlink all met_em* files, make namelists"
    )
    cln.clone_wrf(js.wrf_dir, [])
    symlink_matching_files(js.wrf_dir, js.wps_dir, "met_em*")
    time_ctrl = update_time_control(js.start_utc, js.end_utc, js.num_doms)
    js.wrf_nml['time_control'].update(time_ctrl)
    js.wrf_nml['time_control']['interval_seconds'] = js.grib_source[
        0].interval_seconds
    update_namelist(js.wrf_nml, js.grib_source[0].namelist_keys())
    if 'ignitions' in js.args:
        update_namelist(js.wrf_nml, render_ignitions(js, js.num_doms))

    # if we have an emissions namelist, automatically turn on the tracers
    if js.ems_nml is not None:
        logging.debug('namelist.fire_emissions given, turning on tracers')
        f90nml.write(js.ems_nml,
                     osp.join(js.wrf_dir, 'namelist.fire_emissions'),
                     force=True)
        js.wrf_nml['dynamics']['tracer_opt'] = [2] * js.num_doms

    f90nml.write(js.wrf_nml,
                 osp.join(js.wrf_dir, 'namelist.input'),
                 force=True)

    f90nml.write(js.fire_nml,
                 osp.join(js.wrf_dir, 'namelist.fire'),
                 force=True)

    # step 7: execute real.exe

    logging.info("running REAL")
    # try to run Real twice as it sometimes fails the first time
    # it's not clear why this error happens
    try:
        Real(js.wrf_dir).execute().check_output()
    except Exception as e:
        logging.error('Real step failed with exception %s, retrying ...' %
                      str(e))
        Real(js.wrf_dir).execute().check_output()

    logging.info('step 7b: if requested, do fuel moisture DA')
    logging.info('fmda = %s' % js.fmda)
    if js.fmda is not None:
        logging.info('running fuel moisture data assimilation')
        for dom in js.fmda.domains:
            logging.info('assimilate_fm10_observations for domain %s' % dom)
            assimilate_fm10_observations(
                osp.join(js.wrf_dir, 'wrfinput_d%02d' % int(dom)), None,
                js.fmda.token)

    # step 8: execute wrf.exe on parallel backend
    logging.info('submitting WRF job')
    send_email(js, 'wrf_submit', 'Job %s - wrf job submitted.' % js.job_id)

    js.task_id = "sim-" + js.grid_code + "-" + utc_to_esmf(js.start_utc)[:10]
    jsub.job_num = WRF(js.wrf_dir, js.qsys).submit(js.task_id, js.num_nodes,
                                                   js.ppn, js.wall_time_hrs)

    send_email(
        js, 'wrf_exec',
        'Job %s - wrf job starting now with id %s.' % (js.job_id, js.task_id))
    logging.info(
        "WRF job %s submitted with id %s, waiting for rsl.error.0000" %
        (jsub.job_num, js.task_id))

    jobfile = osp.abspath(osp.join(js.workspace_path, js.job_id, 'job.json'))
    json.dump(jsub, open(jobfile, 'w'), indent=4, separators=(',', ': '))

    process_output(js.job_id)
예제 #3
0
파일: forecast.py 프로젝트: openwfm/wrfxpy
def execute(args,job_args):
    """
    Executes a weather/fire simulation.

    :param args: a dictionary with all to start the simulationfollowing keys
    :param job_args: a the original json given the forecast

    Keys in args:
    :param grid_code: the (unique) code of the grid that is used
    :param sys_install_path: system installation directory
    :param start_utc: start time of simulation in UTC
    :param end_utc: end time of simulation in UTC
    :param workspace_path: workspace directory
    :param wps_install_path: installation directory of WPS that will be used
    :param wrf_install_path: installation directory of WRF that will be used
    :param grib_source: a string identifying a valid GRIB2 source
    :param wps_namelist_path: the path to the namelist.wps file that will be used as template
    :param wrf_namelist_path: the path to the namelist.input file that will be used as template
    :param fire_namelist_path: the path to the namelist.fire file that will be used as template
    :param wps_geog_path: the path to the geogrid data directory providing terrain/fuel data
    :param email_notification: dictionary containing keys address and events indicating when a mail should be fired off
 
    
    """

    # step 0 initialize the job state from the arguments
    js = JobState(args)

    jobdir = osp.abspath(osp.join(js.workspace_path, js.job_id))
    make_clean_dir(jobdir)

    json.dump(job_args, open(osp.join(jobdir,'input.json'),'w'), indent=4, separators=(',', ': '))
    jsub = make_job_file(js)
    json.dump(jsub, open(jsub.jobfile,'w'), indent=4, separators=(',', ': '))
 
    logging.info("job %s starting [%d hours to forecast]." % (js.job_id, js.fc_hrs))
    sys.stdout.flush()
    send_email(js, 'start', 'Job %s started.' % js.job_id)

    # read in all namelists
    js.wps_nml = f90nml.read(js.args['wps_namelist_path'])
    js.wrf_nml = f90nml.read(js.args['wrf_namelist_path'])
    js.fire_nml = f90nml.read(js.args['fire_namelist_path'])
    js.ems_nml = None
    if 'emissions_namelist_path' in js.args:
        js.ems_nml = f90nml.read(js.args['emissions_namelist_path'])
    
    # Parse and setup the domain configuration
    js.domain_conf = WPSDomainConf(js.domains)

    num_doms = len(js.domain_conf)
    js.wps_nml['share']['start_date'] = [utc_to_esmf(js.start_utc)] * num_doms
    js.wps_nml['share']['end_date'] = [utc_to_esmf(js.end_utc)] * num_doms
    js.wps_nml['share']['interval_seconds'] = 3600

    logging.info("number of domains defined is %d." % num_doms)

    # build directories in workspace
    js.wps_dir = osp.abspath(osp.join(js.workspace_path, js.job_id, 'wps'))
    js.wrf_dir = osp.abspath(osp.join(js.workspace_path, js.job_id, 'wrf'))

    #check_obj(args,'args')
    #check_obj(js,'Initial job state')

    # step 1: clone WPS and WRF directories
    logging.info("cloning WPS into %s" % js.wps_dir)
    cln = WRFCloner(js.args)
    cln.clone_wps(js.wps_dir, js.grib_source.vtables(), [])

    # step 2: process domain information and patch namelist for geogrid
    js.wps_nml['geogrid']['geog_data_path'] = js.args['wps_geog_path']
    js.domain_conf.prepare_for_geogrid(js.wps_nml, js.wrf_nml, js.wrfxpy_dir, js.wps_dir)
    f90nml.write(js.wps_nml, osp.join(js.wps_dir, 'namelist.wps'), force=True)

    # do steps 2 & 3 & 4 in parallel (two execution streams)
    #  -> GEOGRID ->
    #  -> GRIB2 download ->  UNGRIB ->

    proc_q = Queue()
    geogrid_proc = Process(target=run_geogrid, args=(js, proc_q))
    grib_proc = Process(target=retrieve_gribs_and_run_ungrib, args=(js, proc_q))


    logging.info('starting GEOGRID and GRIB2/UNGRIB')
    geogrid_proc.start()
    grib_proc.start()

    # wait until both tasks are done
    logging.info('waiting until both tasks are done')
    grib_proc.join()
    geogrid_proc.join()

    if proc_q.get() != 'SUCCESS':
        return

    if proc_q.get() != 'SUCCESS':
        return

    proc_q.close()

    # step 5: execute metgrid after ensuring all grids will be processed
    js.domain_conf.prepare_for_metgrid(js.wps_nml)
    f90nml.write(js.wps_nml, osp.join(js.wps_dir, 'namelist.wps'), force=True)

    logging.info("running METGRID")
    Metgrid(js.wps_dir).execute().check_output()

    send_email(js, 'metgrid', 'Job %s - metgrid complete.' % js.job_id)
    logging.info("cloning WRF into %s" % js.wrf_dir)

    # step 6: clone wrf directory, symlink all met_em* files, make namelists
    cln.clone_wrf(js.wrf_dir, [])
    symlink_matching_files(js.wrf_dir, js.wps_dir, "met_em*")
    time_ctrl = update_time_control(js.start_utc, js.end_utc, num_doms)
    js.wrf_nml['time_control'].update(time_ctrl)
    update_namelist(js.wrf_nml, js.grib_source.namelist_keys())
    if 'ignitions' in js.args:
        update_namelist(js.wrf_nml, render_ignitions(js, num_doms))

    # if we have an emissions namelist, automatically turn on the tracers
    if js.ems_nml is not None:
        logging.debug('namelist.fire_emissions given, turning on tracers')
        f90nml.write(js.ems_nml, osp.join(js.wrf_dir, 'namelist.fire_emissions'), force=True)
        js.wrf_nml['dynamics']['tracer_opt'] = [2] * num_doms

    f90nml.write(js.wrf_nml, osp.join(js.wrf_dir, 'namelist.input'), force=True)

    f90nml.write(js.fire_nml, osp.join(js.wrf_dir, 'namelist.fire'), force=True)

    # step 7: execute real.exe
    
    logging.info("running REAL")
    # try to run Real twice as it sometimes fails the first time
    # it's not clear why this error happens 
    try:
        Real(js.wrf_dir).execute().check_output()
    except Exception as e:
        logging.error('Real step failed with exception %s, retrying ...' % str(e))
        Real(js.wrf_dir).execute().check_output()
    

    # step 7b: if requested, do fuel moisture DA
    if js.fmda is not None:
        logging.info('running fuel moisture data assimilation')
        for dom in js.fmda.domains:
            assimilate_fm10_observations(osp.join(wrf_dir, 'wrfinput_d%02d' % dom), None, js.fmda.token)

    # step 8: execute wrf.exe on parallel backend
    logging.info('submitting WRF job')
    send_email(js, 'wrf_submit', 'Job %s - wrf job submitted.' % js.job_id)

    js.task_id = "sim-" + js.grid_code + "-" + utc_to_esmf(js.start_utc)[:10]
    jsub.job_num=WRF(js.wrf_dir, js.qsys).submit(js.task_id, js.num_nodes, js.ppn, js.wall_time_hrs)

    send_email(js, 'wrf_exec', 'Job %s - wrf job starting now with id %s.' % (js.job_id, js.task_id))
    logging.info("WRF job %s submitted with id %s, waiting for rsl.error.0000" % (jsub.job_num, js.task_id))
  
    jobfile = osp.abspath(osp.join(js.workspace_path, js.job_id,'job.json'))
    json.dump(jsub, open(jobfile,'w'), indent=4, separators=(',', ': '))

    process_output(js.job_id)
예제 #4
0
파일: forecast.py 프로젝트: auteuov/wrfxpy
def retrieve_gribs_and_run_ungrib(js, grib_source, q):
    """
    This function retrieves required GRIB files and runs ungrib.

    It returns either 'SUCCESS' or 'FAILURE' on completion.

    :param js: the JobState object containing the forecast configuration
    :param grib_source: the GribSource object containing ungrib configuration
    :param q: the multiprocessing Queue into which we will send either 'SUCCESS' or 'FAILURE'
    """
    wps_dir = osp.abspath(js.wps_dir)
    grib_dir = osp.join(wps_dir, grib_source.id)
    make_clean_dir(grib_dir)
    wps_nml = js.wps_nml
    try:
        logging.info("retrieving GRIB files from %s" % grib_source.id)

        download_whole_cycle = js.get('download_whole_cycle', False)
        manifest = grib_source.retrieve_gribs(js.start_utc, js.end_utc,
                                              js.ref_utc, js.cycle_start_utc,
                                              download_whole_cycle)
        # logging.info('manifest: ' + str(manifest))

        cache_colmet = len(manifest) > 1
        have_all_colmet = False
        if cache_colmet:
            have_all_colmet = len(manifest.colmet_missing) == 0
            colmet_dir = osp.join(grib_source.cache_dir,
                                  manifest.colmet_prefix)

        logging.info('cache colmet %s, have all colmet %s' %
                     (cache_colmet, have_all_colmet))

        if not have_all_colmet:
            # this is also if we do not cache
            grib_source.symlink_gribs(manifest.grib_files, grib_dir)

            send_email(
                js, 'grib2', 'Job %s - %d GRIB2 files downloaded.' %
                (js.job_id, len(manifest)))
            logging.info("running UNGRIB for %s" % grib_source.id)

            logging.info(
                "step 4: patch namelist for ungrib end execute ungrib on %s files"
                % grib_source.id)

            update_namelist(wps_nml, grib_source.namelist_wps_keys())
            if cache_colmet:
                wps_nml['share']['start_date'] = [
                    utc_to_esmf(manifest.colmet_files_utc[0])
                ] * js.num_doms
                wps_nml['share']['end_date'] = [
                    utc_to_esmf(manifest.colmet_files_utc[-1])
                ] * js.num_doms

            # logging.info("namelist.wps for UNGRIB: %s" % json.dumps(wps_nml, indent=4, separators=(',', ': ')))
            f90nml.write(wps_nml,
                         osp.join(grib_dir, 'namelist.wps'),
                         force=True)
            grib_source.clone_vtables(grib_dir)
            symlink_unless_exists(osp.join(wps_dir, 'ungrib.exe'),
                                  osp.join(grib_dir, 'ungrib.exe'))

            print(grib_dir + ':')
            os.system('ls -l %s' % grib_dir)

            Ungrib(grib_dir).execute().check_output()

            print(grib_dir + ':')
            os.system('ls -l %s' % grib_dir)

            if cache_colmet:
                # move output to cache directory
                make_dir(colmet_dir)
                for f in manifest.colmet_files:
                    move(osp.join(grib_dir, f), osp.join(colmet_dir, f))
                # now all colmet files should be in the cache

        if cache_colmet:
            for f in manifest.colmet_files:
                symlink_unless_exists(osp.join(colmet_dir, f),
                                      osp.join(wps_dir, f))
        else:
            # move output
            for f in glob.glob(osp.join(grib_dir, grib_source.prefix() + '*')):
                move(f, wps_dir)

        send_email(js, 'ungrib', 'Job %s - ungrib complete.' % js.job_id)
        logging.info('UNGRIB complete for %s' % grib_source.id)
        q.put('SUCCESS')

    except Exception as e:
        logging.error('GRIB2/UNGRIB step failed with exception %s' % repr(e))
        traceback.print_exc()
        q.put('FAILURE')
예제 #5
0
def execute(args):
    """
    Executes a weather/fire simulation.

    The args dictionary contains

    :param args: a dictionary with the following keys
    :param grid_code: the (unique) code of the grid that is used
    :param sys_install_path: system installation directory
    :param start_utc: start time of simulation in UTC
    :param end_utc: end time of simulation in UTC
    :param workspace_path: workspace directory
    :param wps_install_path: installation directory of WPS that will be used
    :param wrf_install_path: installation directory of WRF that will be used
    :param grib_source: a string identifying a valid GRIB2 source
    :param wps_namelist_path: the path to the namelist.wps file that will be used as template
    :param wrf_namelist_path: the path to the namelist.input file that will be used as template
    :param fire_namelist_path: the path to the namelist.fire file that will be used as template
    :param wps_geog_path: the path to the geogrid data directory providing terrain/fuel data
    :param email_notification: dictionary containing keys address and events indicating when a mail should be fired off
    """
    logging.basicConfig(level=logging.INFO)

    # initialize the job state from the arguments
    js = JobState(args)

    logging.info("job %s starting [%d hours to forecast]." %
                 (js.job_id, js.fc_hrs))
    send_email(js, 'start', 'Job %s started.' % js.job_id)

    # read in all namelists
    js.wps_nml = f90nml.read(args['wps_namelist_path'])
    js.wrf_nml = f90nml.read(args['wrf_namelist_path'])
    js.fire_nml = f90nml.read(args['fire_namelist_path'])
    js.ems_nml = None
    if 'emissions_namelist_path' in args:
        js.ems_nml = f90nml.read(args['emissions_namelist_path'])

    # Parse and setup the domain configuration
    js.domain_conf = WPSDomainConf(js.domains)

    num_doms = len(js.domain_conf)
    js.wps_nml['share']['start_date'] = [utc_to_esmf(js.start_utc)] * num_doms
    js.wps_nml['share']['end_date'] = [utc_to_esmf(js.end_utc)] * num_doms
    js.wps_nml['share']['interval_seconds'] = 3600

    logging.info("number of domains defined is %d." % num_doms)

    # build directories in workspace
    js.wps_dir = osp.abspath(osp.join(js.workspace_path, js.job_id, 'wps'))
    js.wrf_dir = osp.abspath(osp.join(js.workspace_path, js.job_id, 'wrf'))

    logging.info("cloning WPS into %s" % js.wps_dir)

    # step 1: clone WPS and WRF directories
    cln = WRFCloner(args)
    cln.clone_wps(js.wps_dir, js.grib_source.vtables(), [])

    # step 2: process domain information and patch namelist for geogrid
    js.wps_nml['geogrid']['geog_data_path'] = args['wps_geog_path']
    js.domain_conf.prepare_for_geogrid(js.wps_nml, js.wrf_nml, js.wrfxpy_dir,
                                       js.wps_dir)
    f90nml.write(js.wps_nml, osp.join(js.wps_dir, 'namelist.wps'), force=True)

    # do steps 2 & 3 & 4 in parallel (two execution streams)
    #  -> GEOGRID ->
    #  -> GRIB2 download ->  UNGRIB ->

    proc_q = Queue()
    geogrid_proc = Process(target=run_geogrid, args=(js, proc_q))
    grib_proc = Process(target=retrieve_gribs_and_run_ungrib,
                        args=(js, proc_q))

    geogrid_proc.start()
    grib_proc.start()

    # wait until both tasks are done
    geogrid_proc.join()
    grib_proc.join()

    if proc_q.get() != 'SUCCESS':
        return

    if proc_q.get() != 'SUCCESS':
        return

    proc_q.close()

    # step 5: execute metgrid after ensuring all grids will be processed
    js.domain_conf.prepare_for_metgrid(js.wps_nml)
    f90nml.write(js.wps_nml, osp.join(js.wps_dir, 'namelist.wps'), force=True)

    logging.info("running METGRID")
    Metgrid(js.wps_dir).execute().check_output()

    send_email(js, 'metgrid', 'Job %s - metgrid complete.' % js.job_id)
    logging.info("cloning WRF into %s" % js.wrf_dir)

    # step 6: clone wrf directory, symlink all met_em* files
    cln.clone_wrf(js.wrf_dir, [])
    symlink_matching_files(js.wrf_dir, js.wps_dir, "met_em*")

    logging.info("running REAL")

    # step 7: patch input namelist, fire namelist, emissions namelist (if required)
    #         and execute real.exe
    time_ctrl = update_time_control(js.start_utc, js.end_utc, num_doms)
    js.wrf_nml['time_control'].update(time_ctrl)
    update_namelist(js.wrf_nml, js.grib_source.namelist_keys())
    if 'ignitions' in args:
        update_namelist(js.wrf_nml, render_ignitions(js, num_doms))

    # if we have an emissions namelist, automatically turn on the tracers
    if js.ems_nml is not None:
        f90nml.write(js.ems_nml,
                     osp.join(js.wrf_dir, 'namelist.fire_emissions'),
                     force=True)
        js.wrf_nml['dynamics']['tracer_opt'] = [2] * num_doms

    f90nml.write(js.wrf_nml,
                 osp.join(js.wrf_dir, 'namelist.input'),
                 force=True)

    f90nml.write(js.fire_nml,
                 osp.join(js.wrf_dir, 'namelist.fire'),
                 force=True)

    # try to run Real twice as it sometimes fails the first time
    # it's not clear why this error happens
    try:
        Real(js.wrf_dir).execute().check_output()
    except Exception as e:
        logging.error('Real step failed with exception %s, retrying ...' %
                      str(e))
        Real(js.wrf_dir).execute().check_output()

    # step 8: if requested, do fuel moisture DA
    if js.fmda is not None:
        logging.info('running fuel moisture data assimilation')
        for dom in js.fmda.domains:
            assimilate_fm10_observations(
                osp.join(wrf_dir, 'wrfinput_d%02d' % dom), None, js.fmda.token)

    logging.info('submitting WRF job')
    send_email(js, 'wrf_submit', 'Job %s - wrf job submitted.' % js.job_id)

    # step 8: execute wrf.exe on parallel backend
    js.task_id = "sim-" + js.grid_code + "-" + utc_to_esmf(js.start_utc)[:10]
    WRF(js.wrf_dir, js.qsys).submit(js.task_id, js.num_nodes, js.ppn,
                                    js.wall_time_hrs)

    send_email(
        js, 'wrf_exec',
        'Job %s - wrf job starting now with id %s.' % (js.job_id, js.task_id))
    logging.info("WRF job submitted with id %s, waiting for rsl.error.0000" %
                 js.task_id)

    # step 9: wait for appearance of rsl.error.0000 and open it
    wrf_out = None
    while wrf_out is None:
        try:
            wrf_out = open(osp.join(js.wrf_dir, 'rsl.error.0000'))
            break
        except IOError:
            logging.info(
                'forecast: waiting 10 seconds for rsl.error.0000 file')

        time.sleep(5)

    logging.info('Detected rsl.error.0000')

    # step 10: track log output and check for history writes fro WRF
    pp = None
    already_sent_files, max_pp_dom = [], -1
    if js.postproc is not None:
        js.pp_dir = osp.join(js.workspace_path, js.job_id, "products")
        make_dir(js.pp_dir)
        pp = Postprocessor(js.pp_dir, 'wfc-' + js.grid_code)
        max_pp_dom = max(
            [int(x) for x in filter(lambda x: len(x) == 1, js.postproc)])

    while True:
        line = wrf_out.readline().strip()
        if not line:
            time.sleep(0.2)
            continue

        if "SUCCESS COMPLETE WRF" in line:
            send_email(js, 'complete',
                       'Job %s - wrf job complete SUCCESS.' % js.job_id)
            logging.info("WRF completion detected.")
            break

        if "Timing for Writing wrfout" in line:
            esmf_time, domain_str = re.match(
                r'.*wrfout_d.._([0-9_\-:]{19}) for domain\ +(\d+):',
                line).groups()
            dom_id = int(domain_str)
            logging.info("Detected history write for domain %d for time %s." %
                         (dom_id, esmf_time))
            if js.postproc is not None and str(dom_id) in js.postproc:
                var_list = [str(x) for x in js.postproc[str(dom_id)]]
                logging.info(
                    "Executing postproc instructions for vars %s for domain %d."
                    % (str(var_list), dom_id))
                wrfout_path = find_fresh_wrfout(js.wrf_dir, dom_id)
            try:
                pp.process_vars(wrfout_path, dom_id, esmf_time, var_list)
            except Exception as e:
                logging.warning(
                    'Failed to postprocess for time %s with error %s.' %
                    (esmf_time, str(e)))

            # if this is the last processed domain for this timestamp in incremental mode, upload to server
            if dom_id == max_pp_dom and js.postproc.get('shuttle',
                                                        None) == 'incremental':
                desc = js.postproc[
                    'description'] if 'description' in js.postproc else js.job_id
                sent_files_1 = send_product_to_server(args, js.pp_dir,
                                                      js.job_id, js.job_id,
                                                      desc, already_sent_files)
                logging.info('sent %d files to visualization server.' %
                             len(sent_files_1))
                already_sent_files = filter(lambda x: not x.endswith('json'),
                                            already_sent_files + sent_files_1)

    # if we are to send out the postprocessed files after completion, this is the time
    if js.postproc.get('shuttle', None) == 'on_completion':
        desc = js.postproc[
            'description'] if 'description' in js.postproc else js.job_id
        send_product_to_server(args, js.pp_dir, js.job_id, js.job_id, desc)