Exemplo n.º 1
0
 def set_priority(self, priority=0):
     """
     Setting priority to jobs
     """
     logging.info('\t---> Setting priority Chunk %d %s %s' %
                  (self.chunk_id, datetime2datewrf(
                      self.start_date), datetime2datewrf(self.end_date)))
     l_jobs = self.job.filter_by(status=Job.Status.SUBMITTED).all()
     if not (l_jobs):
         logging.info('\t\tThere are not jobs to set priority.')
     else:
         for job in l_jobs:
             job.dryrun = self.dryrun
             job.set_priority(priority)
Exemplo n.º 2
0
 def cancel(self, hard=False):
     """
     Delete jobs
     """
     logging.info('\t---> Canceling Chunk %d %s %s' %
                  (self.chunk_id, datetime2datewrf(
                      self.start_date), datetime2datewrf(self.end_date)))
     l_jobs = self.job.filter(
         and_(Job.status != Job.Status.PREPARED,
              Job.status != Job.Status.FINISHED,
              Job.status != Job.Status.FAILED,
              Job.status != Job.Status.CANCEL)).all()
     if not (l_jobs):
         logging.info('\t\tThere are not jobs to cancel.')
     else:
         for job in l_jobs:
             job.dryrun = self.dryrun
             job.cancel(hard)
Exemplo n.º 3
0
def wps2wrf( namelist_wps, namelist_input, sdate, edate, maxdom, chunk_is_restart, timestep_dxfactor='6') :
    nmlw = fn.FortranNamelist( namelist_wps )
    nmli = fn.WrfNamelist( namelist_input )
    nmli.setValue("max_dom", maxdom)
    for var in ["run_days", "run_hours", "run_minutes", "run_seconds"]:
        nmli.setValue(var, 0)
    nmli.setMaxDomValue("start_year",  sdate.year)
    nmli.setMaxDomValue("start_month", sdate.month)
    nmli.setMaxDomValue("start_day",   sdate.day)
    nmli.setMaxDomValue("start_hour",  sdate.hour)
    nmli.setMaxDomValue("end_year",    edate.year)
    nmli.setMaxDomValue("end_month",   edate.month)
    nmli.setMaxDomValue("end_day",     edate.day)
    nmli.setMaxDomValue("end_hour",    edate.hour)
    for var in [ "parent_grid_ratio", "i_parent_start", "j_parent_start", "e_we", "e_sn"]:
        nmli.setValue(var, nmlw.getValue(var))
    nmli.setValue("parent_time_step_ratio", nmlw.getValue("parent_grid_ratio"))
    if exists("met_em.d01.%s.nc" % datetime2datewrf( sdate ) ):
        # If there are met_em files, we need to run real.exe. Otherwise, we
        # cannot get enough info (num_metgrid_*levels) to run real.exe
        nmli.setValue("num_metgrid_levels", get_num_metgrid_levels())
        nmli.setValue("num_metgrid_soil_levels", get_num_metgrid_soil_levels())
    #
    #  Compute the grid spacings. Read them from met_em files if the projection is lat-lon.
    #
    nmli.setValue("grid_id", list(range(1, maxdom+1)))
    # Update parant_id in the namelist
    nmli.setValue("parent_id", nmlw.getValue("parent_id"))

    alldx = []
    for idom in range(1,maxdom + 1):
        thisdx = get_latlon_dx(sdate, "d0%i" % idom)
        alldx.append(thisdx)

    nmli.setValue("dx", alldx)
    nmli.setValue("dy", alldx) # May be an issue for global WRF
    #
    # Compute the time step. 
    #
    if timestep_dxfactor.startswith("manual:"):
        nmli.setValue("time_step", int(timestep_dxfactor[7:]))
    elif timestep_dxfactor.startswith("adaptive:"):
        nmli.setValue("use_adaptive_time_step", ".true.", "domains")
    else:
        nmli.setValue("time_step",
            get_time_step(nmli.getValue("dx")[0], eval(timestep_dxfactor)))
    nmli.setValue("restart", chunk_is_restart)
    #
    #  Trim, check, overwrite the file and ... we are done!
    #
    #nmli.trimMaxDom()
    nmli.wrfCheck()
    #nmli.extendMaxDomVariables()
    nmli.overWriteNamelist()
Exemplo n.º 4
0
 def set_restart(self, restart_date):
     """
     Setting restart date.
     """
     try:
         datetime_restart_date = datewrf2datetime(restart_date)
     except:
         raise Exception("ERROR: restart date is malformed")
     else:
         logging.info('---> Setting restart date %s' %
                      datetime2datewrf(datetime_restart_date))
         self.restart = datetime_restart_date
Exemplo n.º 5
0
 def cycle_chunks(self):
     """
     Create chunks the needed for a realization 
     """
     # Define which calendar is going to be used
     exp_calendar = Calendar(self.cfg['calendar'])
     chunk_id = 1
     chunk_start_date = self.start_date
     while chunk_start_date < self.end_date:
         chunk_end_date = exp_calendar.add(chunk_start_date,
                                           self.chunk_size)
         if chunk_end_date > self.end_date:
             chunk_end_date = self.end_date
         # Check chunk on the database
         ch = self.check_db(rea_id=self.id,
                            chunk_start_date=chunk_start_date,
                            chunk_end_date=chunk_end_date,
                            chunk_id=chunk_id)
         if not ch:
             logging.info("\t\t---> Chunk %d %s %s" %
                          (chunk_id, datetime2datewrf(chunk_start_date),
                           datetime2datewrf(chunk_end_date)))
             # Create Chunk
             ch = Chunk()
             ch.rea_id = self.id
             ch.start_date = chunk_start_date
             ch.end_date = chunk_end_date
             ch.wps = 0
             ch.chunk_id = chunk_id
             ch.status = Chunk.Status.PREPARED
             # Add realization to the experiment
             self.chunk.append(ch)
         chunk_start_date = chunk_end_date
         chunk_id = chunk_id + 1
     # Set the number of chunks of a relaization
     self.nchunks = chunk_id - 1
Exemplo n.º 6
0
def get_latlon_dx(start_date, dom):
    #  Try to get dx from the met_em or wrfinput files. Only
    #  required for lat-lon grids, otherwise it is available
    #  in the namelist.wps file
    file_name = "met_em.%s.%s.nc" % ( dom, datetime2datewrf( start_date ) )
    if exists( file_name ) :
        dxfile = file_name
    file_name = "wrfinput_%s" % dom
    if exists( file_name ) :
        dxfile = file_name
    if dxfile:
        shcmd = "ncdump -h %s | grep 'DX =' | sed -e 's/^\t//' | tr '=;' ' ' | awk '{printf \"%%f\", $2}'" % dxfile
        rval = round(float(os.popen(shcmd).read().strip()), 4)
    else:
        raise Exception('get_latlon_dx: no met_em or wrfinput file found')
    return rval
Exemplo n.º 7
0
 def run(self, index, rerun=False, priority=0):
     """ 
     Run a chunk is run a drm4g job
     """
     #Send a gridway's job and save data in table Job
     gw_job = GWJob()
     # create template
     rea_name = self.realization.name
     exp_name = self.realization.experiment.name
     exp_path = join(WRF4G_DIR, 'var', 'submission', exp_name)
     rea_path = join(exp_path, rea_name)
     wrf4g_package = join(exp_path, "WRF4G.tar.gz")
     if not exists(wrf4g_package):
         raise Exception("'%s' file does not exist" % wrf4g_package)
     # files to add for the inputsandbox
     inputsandbox = "file://%s," % wrf4g_package
     inputsandbox += "file://%s/db.conf," % exp_path
     inputsandbox += "file://%s/experiment.wrf4g," % exp_path
     inputsandbox += "file://%s/realization.json," % rea_path
     inputsandbox += "file://%s/namelist.input" % rea_path
     # Add input file if it is exist
     input_files = join(exp_path, 'wrf4g_files.tar.gz')
     if exists(input_files):
         inputsandbox += ",file://%s" % (input_files)
     # files to add for the outputsandbox
     outputsandbox = "log_%d_${JOB_ID}.tar.gz, events.pkl" % self.chunk_id
     arguments = '%s %s %d %s %s %d' % (exp_name, rea_name, self.chunk_id,
                                        datetime2datewrf(self.start_date),
                                        datetime2datewrf(self.end_date),
                                        1 if rerun else 0)
     # Create the job template
     file_template = gw_job.create_template(
         name=rea_name,
         directory=rea_path,
         arguments=arguments,
         np=int(self.realization.cfg.get('np', '1')),
         req=self.realization.cfg.get('requirements', ''),
         environ=self.realization.cfg.get('environment', ''),
         inputsandbox=inputsandbox,
         outputsandbox=outputsandbox)
     # Submit the template
     job = Job()  #create an object "job"
     time.sleep(0.1)
     # if the first chunk of the realization
     if index == 0:
         job.gw_job = gw_job.submit(priority=priority,
                                    file_template=file_template)
     else:
         # if the chunk is not the first of the realization,
         # gwsubmit has an argument, gw_job of the job before
         chunk_before_id = self.chunk_id - 1
         chunk_before    = self.realization.chunk.\
                           filter( Chunk.chunk_id == chunk_before_id ).one()
         job_before = chunk_before.job.order_by(Job.id)[-1]
         id_job_before = job_before.id
         gw_job_before = job_before.gw_job
         job.gw_job = gw_job.submit(dep=gw_job_before,
                                    priority=priority,
                                    file_template=file_template)
     job.chunk_id = self.chunk_id
     job.run(rerun)
     self.job.append(job)
     # Update realizaiton status
     self.status = Chunk.Status.SUBMITTED
Exemplo n.º 8
0
 def get_restart(self):
     """
     Get restart date.
     """
     logging.info(datetime2datewrf(self.restart))
Exemplo n.º 9
0
 def run(self,
         first_chunk_run=None,
         last_chunk_run=None,
         rerun=False,
         priority=0):
     """ 
     Run n_chunk of the realization.
     If n_chunk=0 run every chunk of the realization which haven't finished yet
     else run (n_chunk) chunks since the last one finished
     """
     first_chunk_run = int(first_chunk_run) if first_chunk_run else None
     last_chunk_run = int(last_chunk_run) if last_chunk_run else None
     #Check the status of the realization
     if self.status == Realization.Status.FINISHED and not rerun:
         logging.warn("\tRealization '%s' already finished." % self.name)
     elif (self.status == Realization.Status.SUBMITTED
           or self.status == Realization.Status.RUNNING) and not rerun:
         logging.warn("\tRealization '%s' has been submitted." % self.name)
     elif first_chunk_run and first_chunk_run < 0:
         logging.error("\tERROR: The first chunk to run is '%d'." %
                       first_chunk_run)
     elif last_chunk_run and last_chunk_run < 0:
         logging.error("\tERROR: The last chunk to run is '%d'." %
                       last_chunk_run)
     elif (last_chunk_run
           and first_chunk_run) and last_chunk_run < first_chunk_run:
         logging.error(
             "\tERROR: The last chunk to run is greater than the fist one.")
     elif last_chunk_run > self.nchunks:
         logging.error("\tERROR: The last chunk does not exist.")
     elif first_chunk_run > self.nchunks:
         logging.error("\tERROR: The first chunk does not exist.")
     else:
         # search first chunk to run
         if rerun and first_chunk_run:
             ch = self.chunk.filter(Chunk.chunk_id == first_chunk_run).one()
             self.restart = ch.start_date
             self.current_date = ch.start_date
             self.current_chunk = first_chunk_run
         elif rerun and not first_chunk_run:
             self.restart = None
             first_chunk_run = self.current_chunk = 1
         else:
             #search first chunk to run
             if not self.restart:  # run every chunks of the realization
                 first_chunk_run = 1
             else:
                 #search chunk with end_date>restart and start_date<restart
                 try:
                     first_chunk = self.chunk.filter(
                         and_(Chunk.start_date <= self.restart,
                              Chunk.end_date >= self.restart)).all()[-1]
                 except:
                     raise Exception('There are not chunks to run.')
                 else:
                     if first_chunk_run and first_chunk.chunk_id != first_chunk_run:
                         raise Exception('Use the option --rerun.')
                     else:
                         first_chunk_run = self.current_chunk = first_chunk.chunk_id
         #search last chunk to run
         if not last_chunk_run:
             #run every chunk
             #Search last chunk of the realization
             last_chunk_run = self.nchunks
         else:
             #search last chunk
             last_chunk_run = last_chunk_run
         #Search chunks to run
         l_chunks = self.chunk.filter(
             and_(Chunk.chunk_id >= first_chunk_run,
                  Chunk.chunk_id <= last_chunk_run)).all()
         #run chunks
         for index, chunk in enumerate(l_chunks):
             #print data of chunks
             logging.info(
                 '\t---> Submitting Chunk %d %s %s' %
                 (chunk.chunk_id, datetime2datewrf(
                     chunk.start_date), datetime2datewrf(chunk.end_date)))
             if not self.dryrun:
                 chunk.run(index, rerun, priority)
         if not self.dryrun:
             # Update reealizaiton status
             self.status = Realization.Status.SUBMITTED
Exemplo n.º 10
0
 def file_name_wrf(self):
     return self.file_name + datetime2datewrf(self.date)
Exemplo n.º 11
0
 def date_wrf(self):
     return datetime2datewrf(self.date)
Exemplo n.º 12
0
def launch_wrapper(params):
    """
    Prepare and launch the job wrapper
    """
    ##
    # Create log directory
    ##
    try:
        os.makedirs(params.log_path)
    except:
        raise JobError(
            "Error creating the directory"
            "'%s' on the worker node" % params.log_path,
            Job.CodeError.LOG_PATH)
    ##
    # Logging configuration
    ##
    logging.basicConfig(format='%(asctime)s %(message)s',
                        filename=params.log_file,
                        level=params.log_level)
    ##
    # Show information about paths
    ##
    logging.info('Information about directories')

    # Show root path
    logging.info('Root path = %s' % params.root_path)

    # Show local path
    logging.info('Run path  = %s' % params.local_path)

    ##
    # DRM4G won't remove root_path if clean_after_run is 1
    ##
    if params.clean_after_run == 'no':
        logging.info("Creating a .lock file")
        f = open(join(params.root_path, '.lock'), 'w')
        f.close()

    ##
    # Get database session
    ##
    job_db = JobDB(params.job_id)
    try:
        ##
        # Check if this job should run
        ##
        if job_db.get_job_status() == Job.Status.CANCEL:
            raise JobError("Error this job should not run",
                           Job.CodeError.JOB_SHOULD_NOT_RUN)

        job_db.set_job_status(Job.Status.RUNNING)
        ##
        # Create a remote tree directory for the realization
        ##
        logging.info("Creating remote tree directory under '%s'" %
                     params.output_path)
        job_db.set_job_status(Job.Status.CREATE_OUTPUT_PATH)

        for remote_path in [
                params.output_path, params.exp_output_path,
                params.rea_output_path, params.out_rea_output_path,
                params.rst_rea_output_path, params.real_rea_output_path,
                params.log_rea_output_path
        ]:
            vcp_dir = VCPURL(remote_path)
            if not vcp_dir.exists():
                logging.info("Creating remote directory '%s'" % remote_path)
                vcp_dir.mkdir()

        ##
        # Copy configured files to the ouput path
        ##
        logging.info("Copy configured files to '%s'" % params.output_path)

        for conf_file in [
                "db.conf", "experiment.wrf4g", "realization.json",
                "namelist.input"
        ]:
            oring = join(params.root_path, conf_file)
            dest = join(params.rea_output_path, conf_file)
            try:
                copy_file(oring, dest)
            except:
                logging.warning("Error copying file '%s' to '%s'" %
                                (oring, dest))

        ##
        # Setting PATH and LD_LIBRARY_PATH
        ##
        logging.info('Setting PATH and LD_LIBRARY_PATH variables')

        root_bin_path = join(params.root_path, 'bin')
        PATH = '%s:%s' % (root_bin_path, os.environ.get('PATH'))
        logging.info("PATH=%s" % PATH)
        os.environ['PATH'] = PATH
        LD_LIBRARY_PATH = '%s:%s:%s' % (join(
            params.root_path, 'lib'), join(
                params.root_path, 'lib64'), os.environ.get('LD_LIBRARY_PATH'))
        logging.info("LD_LIBRARY_PATH=%s" % LD_LIBRARY_PATH)
        os.environ['LD_LIBRARY_PATH'] = LD_LIBRARY_PATH
        PYTHONPATH = '%s:%s' % (join(params.root_path, 'lib',
                                     'python'), os.environ.get('PYTHONPATH'))
        logging.info("PYTHONPATH=%s" % PYTHONPATH)
        os.environ['PYTHONPATH'] = PYTHONPATH

        if 'wrf_all_in_one' in params.app:
            OPAL_PREFIX = params.root_path
            logging.info("OPAL_PREFIX=%s" % OPAL_PREFIX)
            os.environ['OPAL_PREFIX'] = OPAL_PREFIX

        ##
        # Configure app
        ##
        logging.info('Configure app')
        job_db.set_job_status(Job.Status.CONF_APP)

        archives_path = join(params.root_path, 'archives')
        logging.info("Creating '%s' directory" % archives_path)
        os.makedirs(archives_path)
        for app in params.app.split('\n'):
            app_tag, app_type, app_value = app.split('|', 2)
            if 'bundle' in app_type:
                oring = app_value.strip()
                dest = join(archives_path, basename(app_value.strip()))
                try:
                    logging.info("Trying to copy '%s'" % oring)
                    copy_file(oring, dest)
                except:
                    raise JobError("'%s' has not copied" % oring,
                                   Job.CodeError.COPY_APP)
                else:
                    logging.info("Unpacking '%s' to '%s'" %
                                 (dest, params.root_path))
                    extract(dest, to_path=params.root_path)
            elif 'command' in app_type:
                logging.info('Configuring source script for %s' % app_tag)
                app_cmd = "{ %s; } && env" % app_value.strip()
                code, output = exec_cmd(app_cmd)
                if code:
                    logging.info(output)
                    raise JobError(
                        "Error executing source script for %s" % app_tag,
                        Job.CodeError.SOURCE_SCRIPT)
                for line in output.splitlines():
                    if "=" in line and not "(" in line:
                        try:
                            key, value = line.split("=", 1)
                        except:
                            pass
                        else:
                            logging.debug("%s=%s" % (key, value))
                            os.environ[key] = value
            else:
                raise JobError("Error app type does not exist",
                               Job.CodeError.APP_ERROR)
        wrf4g_files = join(params.root_path, 'wrf4g_files.tar.gz')
        if isfile(wrf4g_files):
            logging.info("Unpacking '%s'" % wrf4g_files)
            extract(wrf4g_files, to_path=params.root_path)

        ##
        # Clean archives directory
        ##
        shutil.rmtree(archives_path)

        ##
        # Set bin files execute by the group
        ##
        logging.info('Setting bin files execute by the group')

        for exe_file in os.listdir(root_bin_path):
            os.chmod(join(root_bin_path, exe_file), stat.S_IRWXU)

        if 'wrf_all_in_one' in params.app:
            os.chmod(join(params.root_path, 'WPS', 'ungrib', 'ungrib.exe'),
                     stat.S_IRWXU)
            os.chmod(join(params.root_path, 'WPS', 'metgrid', 'metgrid.exe'),
                     stat.S_IRWXU)
            os.chmod(join(params.root_path, 'WRFV3', 'run', 'real.exe'),
                     stat.S_IRWXU)
            os.chmod(join(params.root_path, 'WRFV3', 'run', 'wrf.exe'),
                     stat.S_IRWXU)

        ##
        # This is a little bit tricky prepare the pallalel environment.
        ##
        if ( params.parallel_real == 'yes' or params.parallel_wrf == 'yes' ) and \
           ( params.local_path != params.root_path ) :
            logging.info("Wiping the directory '%s' on all worker nodes" %
                         params.local_path)
            code, output = exec_cmd(
                "%s rm -rf %s" %
                (params.parallel_run_pernode, params.local_path))
            if code:
                logging.info(output)
                raise JobError(
                    "Error wiping the directory '%s' on worker nodes" %
                    (params.local_path), Job.CodeError.LOCAL_PATH)
            code, output = exec_cmd(
                "%s mkdir -p %s" %
                (params.parallel_run_pernode, params.local_path))
            if code:
                logging.info(output)
                raise JobError("Error creating direcory in all worker nodes",
                               Job.CodeError.COPY_FILE)
            for directory in ['WPS', 'WRFV3']:
                exec_cmd(
                    "%s cp -r %s %s" %
                    (params.parallel_run_pernode,
                     join(params.root_path, directory), params.local_path))
                if not exists(join(params.local_path, directory)):
                    raise JobError(
                        "Error copying '%s' directory to all worker nodes" %
                        directory, Job.CodeError.COPY_FILE)

        ##
        # Binaries for execution
        ##
        ungrib_exe = metgrid_exe = real_exe = wrf_exe = None
        if 'wrf_all_in_one' in params.app:
            ungrib_exe = join(params.wps_path, 'ungrib', 'ungrib.exe')
            metgrid_exe = join(params.wps_path, 'metgrid', 'metgrid.exe')
            real_exe = join(params.wrf_run_path, 'real.exe')
            wrf_exe = join(params.wrf_run_path, 'wrf.exe')
        else:
            ungrib_exe = which('ungrib.exe')
            metgrid_exe = which('metgrid.exe')
            real_exe = which('real.exe')
            wrf_exe = which('wrf.exe')
        if not ungrib_exe or not metgrid_exe or not real_exe or not wrf_exe:
            raise JobError("Error finding WRF binaries", Job.CodeError.BINARY)

        ##
        # Obtain information about the WN
        ##
        logging.info('Obtaining information about the worker node')

        # Host info
        logging.info('Host name        = %s' % get_hostname())

        # OS info
        logging.info('Linux release    = %s' % os_release())

        # CPU info
        model_name, number_of_cpus = cpu_info()
        logging.info('CPU (model)      = %s' % model_name)
        logging.info('CPU (processors) = %d' % number_of_cpus)

        # Memory info
        logging.info('RAM Memory       = %s MB' % mem_info())

        # Disk space check
        logging.info('DiskSpace of %s  = %d GB' %
                     (params.root_path, disk_space_check(params.root_path)))

        ##
        # Check the restart date
        ##
        logging.info('Checking restart date')
        rdate = job_db.get_restart_date()
        if not rdate or params.rerun:
            logging.info("Restart date will be '%s'" % params.chunk_sdate)
            if params.nchunk > 1:
                chunk_rerun = ".T."
            else:
                chunk_rerun = ".F."
        elif rdate >= params.chunk_sdate and rdate < params.chunk_edate:
            logging.info("Restart date will be '%s'" % rdate)
            params.chunk_rdate = rdate
            chunk_rerun = ".T."
        elif rdate == params.chunk_edate:
            raise JobError("Restart file is the end date",
                           Job.CodeError.RESTART_MISMATCH)
        else:
            raise JobError("There is a mismatch in the restart date",
                           Job.CodeError.RESTART_MISMATCH)

        if chunk_rerun == ".T.":
            pattern = "wrfrst*" + datetime2dateiso(params.chunk_rdate) + '*'
            files_downloaded = 0
            for file_name in VCPURL(params.rst_rea_output_path).ls(pattern):
                # file will follow the pattern: wrfrst_d01_19900101T000000Z.nc
                orig = join(params.rst_rea_output_path, file_name)
                dest = join(params.wrf_run_path,
                            WRFFile(file_name).file_name_wrf())
                try:
                    logging.info("Downloading file '%s'" % file_name)
                    copy_file(orig, dest)
                except:
                    raise JobError("'%s' has not copied" % file_name,
                                   Job.CodeError.COPY_RST_FILE)
                files_downloaded += 1
            if not files_downloaded:
                raise JobError("No restart file has been downloaded",
                               Job.CodeError.COPY_RST_FILE)
            job_db.set_job_status(Job.Status.DOWN_RESTART)

        ##
        # Either WPS runs or the boundaries and initial conditions are available
        ##

        #Copy namelist.input to wrf_run_path
        shutil.copyfile(join(params.root_path, 'namelist.input'),
                        params.namelist_input)

        if job_db.has_wps():
            logging.info("The boundaries and initial conditions are available")
            orig = join(params.domain_path, basename(params.namelist_wps))
            dest = params.namelist_wps
            try:
                logging.info("Downloading file 'namelist.wps'")
                copy_file(orig, dest)
            except:
                raise JobError("'namelist.wps' has not copied",
                               Job.CodeError.COPY_FILE)
            wps2wrf(params.namelist_wps, params.namelist_input,
                    params.chunk_rdate, params.chunk_edate, params.max_dom,
                    chunk_rerun, params.timestep_dxfactor)
            job_db.set_job_status(Job.Status.DOWN_WPS)
            pattern = "wrf[lbif]*_d\d\d_" + datetime2dateiso(sdate) + "*"
            for file_name in VCPURL(params.real_rea_output_path).ls(pattern):
                orig = join(params.real_rea_output_path, file_name)
                # From wrflowinp_d08_ we remove the _ at the end
                dest = join(params.wrf_run_path,
                            WRFFile(file_name).file_name[:-1])
                try:
                    logging.info("Downloading file '%s'" % file_name)
                    copy_file(orig, dest)
                except:
                    raise JobError("'%s' has not copied" % file_name,
                                   Job.CodeError.COPY_REAL_FILE)
        else:
            logging.info(
                "The boundaries and initial conditions are not available")

            # Change the directory to wps path
            os.chdir(params.wps_path)

            ##
            #  Get geo_em files and namelist.wps
            ##
            logging.info("Download geo_em files and namelist.wps")

            for file_name in VCPURL(params.domain_path).ls('*'):
                if '.nc' in file_name or 'namelist' in file_name:
                    orig = join(params.domain_path, file_name)
                    dest = join(params.wps_path, file_name)
                    try:
                        logging.info("Downloading file '%s'" % file_name)
                        copy_file(orig, dest)
                    except:
                        raise JobError("'%s' has not copied" % file_name,
                                       Job.CodeError.COPY_BOUND)
            job_db.set_job_status(Job.Status.DOWN_BOUND)

            ##
            #  Modify the namelist
            ##
            logging.info("Modify namelist.wps")

            try:
                nmlw = fn.FortranNamelist(params.namelist_wps)
                nmlw.setValue("max_dom", params.max_dom)
                nmlw.setValue(
                    "start_date",
                    params.max_dom * [datetime2datewrf(params.chunk_sdate)])
                nmlw.setValue(
                    "end_date",
                    params.max_dom * [datetime2datewrf(params.chunk_edate)])
                nmlw.setValue("interval_seconds", params.extdata_interval)
                nmlw.overWriteNamelist()
            except Exception as err:
                raise JobError("Error modifying namelist: %s" % err,
                               Job.CodeError.NAMELIST_FAILED)

            ##
            # Preprocessor and Ungrib
            ##
            logging.info("Run preprocessors and ungrib")

            for i, (vt, pp, epath) in enumerate(
                    zip(
                        params.extdata_vtable.replace(' ', '').split(','),
                        params.preprocessor.replace(' ', '').split(','),
                        params.extdata_path.replace(' ', '').split(','))):
                try:
                    nmlw = fn.FortranNamelist(params.namelist_wps)
                    nmlw.setValue("prefix", vt, "ungrib")
                    nmlw.overWriteNamelist()
                except Exception as err:
                    raise JobError("Error modifying namelist: %s" % err,
                                   Job.CodeError.NAMELIST_FAILED)
                vtable = join(params.wps_path, 'Vtable')
                if isfile(vtable):
                    os.remove(vtable)
                # This creates a symbolic link
                os.symlink(
                    join(params.wps_path, 'ungrib', 'Variable_Tables',
                         'Vtable.%s' % vt), vtable)

                ##
                # Execute preprocesor
                ##
                logging.info("Running preprocessor.%s" % pp)

                if not which("preprocessor.%s" % pp):
                    raise JobError("Preprocessor '%s' does not exist" % pp,
                                   Job.CodeError.PREPROCESSOR_FAILED)
                optargs = ""
                for arg in params.preprocessor_optargs.values():
                    optargs = optargs + " " + arg.split(',')[i]
                preprocessor_log = join(params.log_path,
                                        'preprocessor.%s.log' % pp)
                code, output = exec_cmd(
                    "preprocessor.%s %s %s %s %s &> %s" %
                    (pp, datetime2datewrf(params.chunk_rdate),
                     datetime2datewrf(params.chunk_edate), epath, optargs,
                     preprocessor_log))
                if code:
                    logging.info(output)
                    raise JobError("Preprocessor '%s' has failed" % pp,
                                   Job.CodeError.PREPROCESSOR_FAILED)

                grb_data_path = join(params.wps_path, 'grbData')
                for grib_file in glob.glob(join(params.wps_path,
                                                'GRIBFILE.*')):
                    os.remove(grib_file)
                try:
                    for grib_file_to_link, suffixe in zip(
                            glob.glob(join(grb_data_path, '*')),
                            list(
                                map(
                                    ''.join,
                                    itertools.product(string.ascii_uppercase,
                                                      repeat=3)))):
                        try:
                            os.symlink(
                                grib_file_to_link,
                                join(params.wps_path, "GRIBFILE." + suffixe))
                        except:
                            raise JobError("Error linking grib files",
                                           Job.CodeError.LINK_GRIB_FAILED)
                except:
                    raise JobError("Ran out of grib file suffixes",
                                   Job.CodeError.LINK_GRIB_FAILED)

                ##
                # Run Ungrib
                ##
                logging.info("Run ungrib")
                job_db.set_job_status(Job.Status.UNGRIB)

                ungrib_log = join(params.log_path, 'ungrib_%s.log' % vt)
                code, output = exec_cmd("%s > %s" % (ungrib_exe, ungrib_log))
                if code or not 'Successful completion' in open(
                        ungrib_log, 'r').read():
                    logging.info(output)
                    raise JobError("'%s' has failed" % ungrib_exe,
                                   Job.CodeError.UNGRIB_FAILED)
                else:
                    logging.info("ungrib has successfully finished")

            ##
            #  Update namelist.wps
            ##
            logging.info("Update namelist for metgrid")

            try:
                nmlw = fn.FortranNamelist(params.namelist_wps)
                nmlw.setValue(
                    "fg_name",
                    params.extdata_vtable.replace(' ', '').split(','),
                    "metgrid")
                if params.constants_name:
                    nmlw.setValue(
                        "constants_name",
                        params.constants_name.replace(' ', '').split(','),
                        "metgrid")
                for var_to_del in [
                        'opt_output_from_metgrid_path',
                        'opt_output_from_geogrid_path', 'opt_metgrid_tbl_path',
                        'opt_geogrid_tbl_path'
                ]:
                    nmlw.delVariable(var_to_del)
                nmlw.overWriteNamelist()
            except Exception as err:
                raise JobError("Error modifying namelist: %s" % err,
                               Job.CodeError.NAMELIST_FAILED)

            ##
            # Run Metgrid
            ##
            logging.info("Run metgrid")
            job_db.set_job_status(Job.Status.METGRID)

            metgrid_log = join(params.log_path, 'metgrid.log')
            code, output = exec_cmd("%s > %s" % (metgrid_exe, metgrid_log))
            if code or not 'Successful completion' in open(metgrid_log,
                                                           'r').read():
                logging.info(output)
                raise JobError("'%s' has failed" % metgrid_exe,
                               Job.CodeError.METGRID_FAILED)
            else:
                logging.info("metgrid has successfully finished")

            ##
            # Run real
            ##

            # Change the directory to wrf run path
            os.chdir(params.wrf_run_path)

            # Create a sumbolic link to run real
            met_files = glob.glob(join(params.wps_path, 'met_em.d*'))
            for met_file in met_files:
                os.symlink(met_file,
                           join(params.wrf_run_path, basename(met_file)))
            fix_ptop(params.namelist_input)
            wps2wrf(params.namelist_wps, params.namelist_input,
                    params.chunk_rdate, params.chunk_edate, params.max_dom,
                    chunk_rerun, params.timestep_dxfactor)

            if ( params.parallel_real == 'yes' or params.parallel_wrf == 'yes' ) and \
               ( params.local_path != params.root_path ) :
                logging.info("Copying namelist file to al WNs")
                bk_namelist = join(params.root_path, 'namelist.input.bk')
                shutil.copyfile(params.namelist_input, bk_namelist)
                code, output = exec_cmd(
                    "%s cp %s %s" % (params.parallel_run_pernode, bk_namelist,
                                     params.namelist_input))
                if code:
                    logging.info(output)
                    raise JobError("Error copying namelist to all WNs",
                                   Job.CodeError.COPY_FILE)

            logging.info("Run real")
            job_db.set_job_status(Job.Status.REAL)

            if params.parallel_real == 'yes':
                real_log = join(params.wrf_run_path, 'rsl.out.0000')
                cmd = "%s wrf_launcher.sh %s" % (params.parallel_run, real_exe)
                code, output = exec_cmd(cmd)
                if isfile(real_log):
                    real_rsl_path = join(params.log_path, 'rsl_real')
                    os.mkdir(real_rsl_path)
                    rsl_files = glob.glob(join(params.wrf_run_path, 'rsl.*'))
                    for rsl_file in rsl_files:
                        shutil.copyfile(
                            rsl_file, join(real_rsl_path, basename(rsl_file)))
            else:
                real_log = join(params.log_path, 'real.log')
                code, output = exec_cmd("wrf_launcher.sh %s > %s" %
                                        (real_exe, real_log))
            if code or not 'SUCCESS COMPLETE' in open(real_log, 'r').read():
                logging.info(output)
                raise JobError("'%s' has failed" % real_exe,
                               Job.CodeError.REAL_FAILED)
            else:
                logging.info("real has successfully finished")
            ##
            # Check if wps files has to be storaged
            ##
            if params.save_wps == 'yes':
                logging.info("Saving wps")
                job_db.set_job_status(Job.Status.UPLOAD_WPS)
                # If the files are WPS, add the date to the name. Three files have to be uploaded: wrfinput_d0?,wrfbdy_d0? and wrflowinp_d0?
                # The command: $ upload_file wps     1990-01-01_00:00:00
                # will create in the repositore three files with the following format: wrfinput_d01_19900101T000000Z
                suffix = "_" + datetime2dateiso(params.chunk_rdate) + ".nc"
                for wps_file in VCPURL(params.wps_path).ls("wrf[lbif]*_d\d\d"):
                    oiring = wps_file
                    dest = join(params.real_rea_output_path,
                                basename(wps_file), suffix)
                    try:
                        logging.info("Uploading '%s' file" % oiring)
                        os.chmod(
                            oiring, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP
                            | stat.S_IWGRP | stat.S_IROTH)
                        copy_file(oiring, dest)
                    except:
                        raise JobError("'%s' has not copied" % oiring,
                                       Job.CodeError.COPY_UPLOAD_WPS)
                job_db.set_wps()

        # Change the directory to wrf run path
        os.chdir(params.wrf_run_path)

        ##
        # Start a thread to monitor wrf
        ##
        if params.parallel_wrf == 'yes':
            log_wrf = join(params.wrf_run_path, 'rsl.out.0000')
        else:
            log_wrf = join(params.log_path, 'wrf.log')
        worker = threading.Thread(target=wrf_monitor,
                                  args=(job_db, log_wrf, params))
        worker.setDaemon(True)
        worker.start()

        ##
        # Wipe WPS path
        ##
        if params.clean_after_run == 'yes':
            logging.info("Wiping '%s' directory " % params.wps_path)
            try:
                shutil.rmtree(params.wps_path)
            except:
                logging.info("Error wiping '%s' directory " % params.wps_path)

        ##
        # Run wrf
        ##
        logging.info("Run wrf")
        job_db.set_job_status(Job.Status.WRF)

        if params.parallel_wrf == 'yes':
            cmd = "%s wrf_launcher.sh %s" % (params.parallel_run, wrf_exe)
            code, output = exec_cmd(cmd)
            if isfile(log_wrf):
                wrf_rsl_path = join(params.log_path, 'rsl_wrf')
                os.mkdir(wrf_rsl_path)
                rsl_files = glob.glob(join(params.wrf_run_path, 'rsl.*'))
                for rsl_file in rsl_files:
                    shutil.copyfile(rsl_file,
                                    join(wrf_rsl_path, basename(rsl_file)))
        else:
            code, output = exec_cmd("wrf_launcher.sh %s > %s" %
                                    (wrf_exe, log_wrf))
        if code or not 'SUCCESS COMPLETE' in open(log_wrf, 'r').read():
            logging.info(output)
            raise JobError("'%s' has failed" % wrf_exe,
                           Job.CodeError.WRF_FAILED)
        else:
            logging.info("wrf has successfully finished")
        ##
        # Update current date
        ##
        current_date = get_current_date(log_wrf)
        if not current_date:
            current_date = params.chunk_rdate
        job_db.set_current_date(current_date)

        ##
        # Save all files
        ##
        clean_wrf_files(job_db, params, clean_all=True)

        ##
        # Wipe after run
        ##
        if ( params.parallel_real == 'yes' or params.parallel_wrf == 'yes' ) and \
           ( params.local_path != params.root_path ) and ( params.clean_after_run == 'yes' ) :
            logging.info("Wiping the directory '%s' on all worker nodes" %
                         params.local_path)
            code, output = exec_cmd(
                "%s rm -rf %s" %
                (params.parallel_run_pernode, params.local_path))
            if code:
                logging.info(output)
                logging.error(
                    "Error wiping the directory '%s' on worker nodes" %
                    params.local_path)
        ##
        # Update the status
        ##
        job_db.set_job_status(Job.Status.FINISHED)
        exit_code = 0
    except JobError as err:
        logging.error(err.msg)
        job_db.set_job_status(Job.Status.FAILED)
        exit_code = err.exit_code
    except:
        logging.error("Unexpected error", exc_info=1)
        job_db.set_job_status(Job.Status.FAILED)
        exit_code = 255
    finally:
        ##
        # Create a log bundle
        ##
        os.chdir(params.root_path)
        log_name = "log_%d_%d" % (params.nchunk, params.job_id)
        log_tar = log_name + '.tar.gz'
        try:
            logging.info("Create tar file for logs")
            tar = tarfile.open(log_tar, "w:gz")
            tar.add('log', arcname=log_name)
        finally:
            tar.close()
        # Copy to repository
        oring = join(params.root_path, log_tar)
        dest = join(params.log_rea_output_path, log_tar)
        copy_file(oring, dest)

        ##
        # Close the connection with the database
        ##
        job_db.set_exit_code(exit_code)
        job_db.close(params.root_path)
        sys.exit(exit_code)
Exemplo n.º 13
0
def clean_wrf_files(job_db, params, clean_all=False):
    """
    Postprocess wrfout files and copy files to the output path 
    """
    with lock:
        for patt in params.files_to_save:
            all_files_patt = glob.glob(join(params.wrf_run_path, patt + '*'))
            if not clean_all:
                if len(all_files_patt) >= (2 * params.max_dom):
                    all_files_patt.sort(key=os.path.getmtime)
                    files = all_files_patt[:params.max_dom]
                else:
                    continue
            else:
                files = all_files_patt
            for file in files:
                logging.info("Checking '%s' file" % file)
                file_name = basename(file)
                if file_name == "wrfrst_d01_" + datetime2datewrf(
                        params.chunk_rdate):
                    # Skip the initial restart file
                    logging.info("Skipping initial restart file %s" %
                                 file_name)
                    continue
                elif "wrfout" in file_name and params.postprocessor:
                    code, output = exec_cmd("ncdump -v Times %s" % file_name)
                    if "WRF4G_postprocessor" in output:
                        logging.info("'%s' was already postprocessed" %
                                     file_name)
                        continue
                    try:
                        mo = re.findall(
                            "(\d{4}-\d{2}-\d{2}_\d{2}:\d{2}:\d{2})", output)
                        end_date_file = mo[-1]
                    except Exception as err:
                        logging.error(
                            "ERROR: Calculating wrfout_name_end_date %s" % err)
                        end_date_file = None
                    ##
                    # Execute postprocessor
                    ##
                    logging.info("Running postprocessor.%s" %
                                 params.postprocessor)

                    if not which("postprocessor.%s" % params.postprocessor):
                        raise JobError(
                            "Postprocessor '%s' does not exist" %
                            params.postprocessor,
                            Job.CodeError.POSTPROCESSOR_FAILED)
                    post_log = join(
                        params.log_path,
                        'postprocessor.%s.log' % params.postprocessor)
                    code, output = exec_cmd(
                        "postprocessor.%s %s &>> %s" %
                        (params.postprocessor, file_name, post_log))
                    if code:
                        logging.info(output)
                        raise JobError(
                            "Error processing '%s' file" % file_name,
                            Job.CodeError.POSTPROCESSOR_FAILED)
                    # The file will indicate that it has been postprocessed
                    exec_cmd(
                        'ncatted -O -a WRF4G_postprocessor,global,o,c,"%s" %s'
                        % (params.postprocessor, file))
                elif "wrfrst" in file_name and "d01" in file_name:
                    restart_date = WRFFile(file_name).date_datetime()
                    logging.info("Setting restart date to '%s'" % restart_date)
                    job_db.set_restart_date(restart_date)
                ##
                # Uploading "wrfout", "wrfrst", "wrfzout", "wrfz2out", "wrfrain", "wrfxtrm", "wrf24hc" files
                ##
                if patt != "wrfrst" and params.wrfout_name_end_date == 'yes' and end_date_file:
                    dest_file = WRFFile(file_name,
                                        end_date_file).file_name_out_iso()
                else:
                    dest_file = WRFFile(file_name).file_name_iso()

                if patt == "wrfrst":
                    dest = join(params.rst_rea_output_path, dest_file)
                else:
                    dest = join(params.out_rea_output_path, dest_file)

                logging.info("Uploading file '%s'" % file)
                os.chmod(
                    file, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP
                    | stat.S_IWGRP | stat.S_IROTH)
                try:
                    copy_file(file, dest)
                except:
                    logging.error("'%s' has not copied" % file)
                    time.sleep(10)
                    logging.info("Uploading file '%s' again" % file)
                    try:
                        copy_file(file, dest)
                    except:
                        raise JobError("'%s' has not copied" % file,
                                       Job.CodeError.COPY_OUTPUT_FILE)
                try:
                    os.remove(file)
                except:
                    pass