Exemplo n.º 1
0
def _read_single_namelist(lines: List[str], parser: Parser,
                          simple: bool) -> Namelist:
    """
    Read a namelist.

    * Simple parser. Assumes one array element per line.
        For example: `val%a(2)%b = value,`
    * Otherwise (or if the simple parser fails) it defaults
        to using f90nml to read it.

    Note that comment lines and blank lines have already been removed.
    """

    nml = None
    if simple:
        try:

            namelist_name = lines[0].lstrip("&").strip().lower()
            nml = Namelist({namelist_name: Namelist({})})
            for line in lines[1:]:
                d = line.split("=", 1)
                if len(d) == 1:
                    if d[0].strip() == "/":
                        break  # end of the namelist
                    else:
                        # something else - not valid
                        raise Exception("invalid line")
                elif len(d) >= 2:
                    if d[0][0] == "'" or d[0][0] == '"':
                        # = in a string - not valid
                        raise Exception("invalid line")
                    else:
                        path = d[0].strip()

                        if ":" in path:
                            raise Exception(
                                "invalid line"
                            )  # can't read multiple entries at once - not valid
                        else:
                            # warning: it will still read lines like
                            # this: `a = 1,2,3` as a single string

                            # convert the string to a Python value:
                            value = _nml_value_to_python_value(
                                d[1].rstrip(", "))

                            # add this value to the namelist:
                            _pathSet(nml[namelist_name], path, value)

        except Exception:
            nml = None

    if nml is None:
        nml = parser.reads("\n".join(lines))  # f90nml 1.1 and above

    return nml
Exemplo n.º 2
0
def read_namelist(filename: str,
                  *,
                  n_threads: int = 0,
                  parser: Parser = None,
                  simple: bool = True) -> Namelist:
    """
    Read a namelist quickly.

    For threaded use, set `n_threads` to the number of threads.
    """

    nml = Namelist({})

    def _loop_over_results(r):
        for key, value in r.items():
            if key in nml:
                # array of namelists:
                if isinstance(nml[key], list):
                    nml[key].append(value)
                else:
                    nml[key] = [nml[key], value]
            else:
                nml[key] = value

    if not parser:
        parser = Parser()

    namelists = _split_namelist_file(filename)
    results = list()
    results_append = results.append

    if n_threads:
        n_threads = max(1, min(mp.cpu_count(), n_threads))
        pool = mp.Pool(processes=n_threads)
        pool_apply_async = pool.apply_async

        for lines in namelists:
            results_append(
                pool_apply_async(_read_single_namelist,
                                 (lines, parser, simple)))

        pool.close()
        pool.join()

        for r in results:
            _loop_over_results(r.get())
    else:
        for lines in namelists:
            results_append(_read_single_namelist(lines, parser, simple))
        for r in results:
            _loop_over_results(r)

    return nml
Exemplo n.º 3
0
    def __init__(self,
                 name,
                 codebase,
                 safe_mode=False,
                 workbase=GFDL_WORK,
                 database=GFDL_DATA):
        super(Experiment, self).__init__()
        self.name = name
        self.codebase = codebase
        self.safe_mode = safe_mode

        # set the default locations of working directory,
        # executable directory, restart file storage, and
        # output data directory.
        self.workdir = P(workbase, 'experiment', self.name)
        self.rundir = P(
            self.workdir,
            'run')  # temporary area an individual run will be performed
        self.datadir = P(
            database,
            self.name)  # where run data will be moved to upon completion
        self.restartdir = P(self.datadir,
                            'restarts')  # where restarts will be stored
        self.template_dir = P(_module_directory, 'templates')

        self.env_source = get_env_file()

        self.templates = Environment(
            loader=FileSystemLoader(self.template_dir))

        self.diag_table = DiagTable()
        self.field_table_file = P(self.codebase.srcdir, 'extra', 'model',
                                  self.codebase.name, 'field_table')
        self.inputfiles = []

        self.namelist = Namelist()
Exemplo n.º 4
0
def _pathGet(dictionary: dict,
             path: str,
             sep: str = "%") -> Union[_nml_types, dict, list]:
    """
    Returns an item in a dictionary given the namelist path string.
    Assumes the input path uses Fortran-style 1-based indexing of arrays
    """

    for item in path.split(sep):
        i, arrayname = _get_array_index(item)
        if i is not None:
            # it is an array element:
            # create this item since it isn't there
            if arrayname not in dictionary:
                dictionary[arrayname] = [None]
            d = dictionary[arrayname]
            lenx = len(d)
            if lenx < i:
                # have to add this element
                for j in range(lenx, i):
                    d.append(None)

            # make sure it's a dict:
            if not isinstance(d[i - 1], dict):
                d[i - 1] = Namelist({})
            dictionary = d[i - 1]
        else:
            # it is just a normal variable:
            # make sure it's a dict first
            if not isinstance(dictionary, dict):
                dictionary = Namelist({})
            if item not in dictionary:
                dictionary[item] = Namelist({})
            dictionary = dictionary[item]

    return dictionary
Exemplo n.º 5
0
def create_inpFile(*inpFiles: List[InpFile]) -> InpFile:
    if len(inpFiles) > 0:
        base_inp = copy_inpFile(inpFiles[0])

        for inpfile in inpFiles[1:]:
            for group in inpfile.nml.keys():
                for key in inpfile.nml[group].keys():
                    value = inpfile.nml[group][key]

                    if key in inpfile.nml[group].start_index:
                        start_index = inpfile.nml[group].start_index[key][0]
                        value = list(value)
                        base_inp.setlist(group,
                                         key,
                                         value,
                                         start_index=start_index)
                    else:
                        if group not in base_inp.nml:
                            base_inp.nml[group] = Namelist()
                        base_inp.nml[group][key] = value
    return base_inp
Exemplo n.º 6
0
class Experiment(Logger, EventEmitter):
    """A basic GFDL experiment"""

    RESOLUTIONS = {
        'T170': {
            'lon_max': 512,
            'lat_max': 256,
            'num_fourier': 170,
            'num_spherical': 171
        },
        'T85': {
            'lon_max': 256,
            'lat_max': 128,
            'num_fourier': 85,
            'num_spherical': 86
        },
        'T42': {
            'lon_max': 128,
            'lat_max': 64,
            'num_fourier': 42,
            'num_spherical': 43,
        },
        'T21': {
            'lon_max': 64,
            'lat_max': 32,
            'num_fourier': 21,
            'num_spherical': 22,
        },
    }

    runfmt = 'run%04d'
    restartfmt = 'res%04d.tar.gz'

    def __init__(self,
                 name,
                 codebase,
                 safe_mode=False,
                 workbase=GFDL_WORK,
                 database=GFDL_DATA):
        super(Experiment, self).__init__()
        self.name = name
        self.codebase = codebase
        self.safe_mode = safe_mode

        # set the default locations of working directory,
        # executable directory, restart file storage, and
        # output data directory.
        self.workdir = P(workbase, 'experiment', self.name)
        self.rundir = P(
            self.workdir,
            'run')  # temporary area an individual run will be performed
        self.datadir = P(
            database,
            self.name)  # where run data will be moved to upon completion
        self.restartdir = P(self.datadir,
                            'restarts')  # where restarts will be stored
        self.template_dir = P(_module_directory, 'templates')

        self.env_source = get_env_file()

        self.templates = Environment(
            loader=FileSystemLoader(self.template_dir))

        self.diag_table = DiagTable()
        self.field_table_file = P(self.codebase.srcdir, 'extra', 'model',
                                  self.codebase.name, 'field_table')
        self.inputfiles = []

        self.namelist = Namelist()

    @destructive
    def rm_workdir(self):
        try:
            sh.rm(['-r', self.workdir])
        except sh.ErrorReturnCode:
            self.log.warning(
                'Tried to remove working directory but it doesnt exist')

    @destructive
    def rm_datadir(self):
        try:
            sh.rm(['-r', self.datadir])
        except sh.ErrorReturnCode:
            self.log.warning(
                'Tried to remove data directory but it doesnt exist')

    @destructive
    @useworkdir
    def clear_workdir(self):
        self.rm_workdir()
        mkdir(self.workdir)
        self.log.info('Emptied working directory %r' % self.workdir)

    @destructive
    @useworkdir
    def clear_rundir(self):
        #sh.cd(self.workdir)
        try:
            sh.rm(['-r', self.rundir])
        except sh.ErrorReturnCode:
            self.log.warning(
                'Tried to remove run directory but it doesnt exist')
        mkdir(self.rundir)
        self.log.info('Emptied run directory %r' % self.rundir)

    def get_restart_file(self, i):
        return P(self.restartdir, self.restartfmt % i)

    def get_outputdir(self, run):
        return P(self.datadir, self.runfmt % run)

    def set_resolution(self, res, num_levels=None):
        """Set the resolution of the model, based on the standard triangular
        truncations of the spectral core.  For example,
            exp.set_resolution('T85', 25)
        creates a spectral core with enough modes to natively correspond to
        a 256x128 lon-lat resolution."""
        delta = self.RESOLUTIONS[res]
        if num_levels is not None:
            delta['num_levels'] = num_levels
        self.update_namelist({'spectral_dynamics_nml': delta})

    def update_namelist(self, new_vals):
        """Update the namelist sections, overwriting existing values."""
        for sec in new_vals:
            if sec not in self.namelist:
                self.namelist[sec] = {}
            nml = self.namelist[sec]
            nml.update(new_vals[sec])

    def write_namelist(self, outdir):
        namelist_file = P(outdir, 'input.nml')
        self.log.info('Writing namelist to %r' % namelist_file)
        self.namelist.write(namelist_file)

    def write_diag_table(self, outdir):
        outfile = P(outdir, 'diag_table')
        self.log.info('Writing diag_table to %r' % outfile)
        if self.diag_table.is_valid():
            if self.diag_table.calendar is None:
                # diagnose the calendar from the namelist
                cal = self.get_calendar()
                self.diag_table.calendar = cal
            self.diag_table.write(outfile)
        else:
            self.log.error(
                "No output files defined in the DiagTable. Stopping.")
            raise ValueError()

    def write_field_table(self, outdir):
        self.log.info('Writing field_table to %r' % P(outdir, 'field_table'))
        sh.cp(self.field_table_file, P(outdir, 'field_table'))

    def log_output(self, outputstring):
        line = outputstring.strip()
        if 'warning' in line.lower():
            self.log.warn(line)
        else:
            self.log.debug(line)
        #return clean_log_debug(outputstring)

    def delete_restart(self, run):
        resfile = self.get_restart_file(run)
        if os.path.isfile(resfile):
            sh.rm(resfile)
            self.log.info('Deleted restart file %s' % resfile)

    def get_calendar(self):
        """Get the value of 'main_nml/calendar.
        Returns a string name of calendar, or None if not set in namelist.'"""
        if 'main_nml' in self.namelist:
            return self.namelist['main_nml'].get('calendar')
        else:
            return None

    @destructive
    @useworkdir
    def run(self,
            i,
            restart_file=None,
            use_restart=True,
            multi_node=False,
            num_cores=8,
            overwrite_data=False,
            save_run=False,
            run_idb=False,
            nice_score=0):
        """Run the model.
            `num_cores`: Number of mpi cores to distribute over.
            `restart_file` (optional): A path to a valid restart archive.  If None and `use_restart=True`,
                                       restart file (i-1) will be used.
            `save_run`:  If True, copy the entire working directory over to GFDL_DATA
                         so that the run can rerun without the python script.
                         (This uses a lot of data storage!)

        """

        self.clear_rundir()

        indir = P(self.rundir, 'INPUT')
        outdir = P(self.datadir, self.runfmt % i)
        resdir = P(self.rundir, 'RESTART')

        if os.path.isdir(outdir):
            if overwrite_data:
                self.log.warning(
                    'Data for run %d already exists and overwrite_data is True. Overwriting.'
                    % i)
                sh.rm('-r', outdir)
            else:
                self.log.warn(
                    'Data for run %d already exists but overwrite_data is False. Stopping.'
                    % i)
                return False

        # make the output run folder and copy over the input files
        mkdir([indir, resdir, self.restartdir])

        self.codebase.write_source_control_status(
            P(self.rundir, 'git_hash_used.txt'))
        self.write_namelist(self.rundir)
        self.write_field_table(self.rundir)
        self.write_diag_table(self.rundir)

        for filename in self.inputfiles:
            sh.cp([filename, P(indir, os.path.split(filename)[1])])

        mpirun_opts = ''

        if multi_node:
            mpirun_opts += ' -bootstrap pbsdsh -f $PBS_NODEFILE'

        if use_restart:
            if not restart_file:
                # get the restart from previous iteration
                restart_file = self.get_restart_file(i - 1)
            if not os.path.isfile(restart_file):
                self.log.error('Restart file not found, expecting file %r' %
                               restart_file)
                raise IOError('Restart file not found, expecting file %r' %
                              restart_file)
            else:
                self.log.info('Using restart file %r' % restart_file)

            self.extract_restart_archive(restart_file, indir)
        else:
            self.log.info('Running without restart file')
            restart_file = None

        vars = {
            'rundir': self.rundir,
            'execdir': self.codebase.builddir,
            'executable': self.codebase.executable_name,
            'env_source': self.env_source,
            'mpirun_opts': mpirun_opts,
            'num_cores': num_cores,
            'run_idb': run_idb,
            'nice_score': nice_score
        }

        runscript = self.templates.get_template('run.sh')

        # employ the template to create a runscript
        t = runscript.stream(**vars).dump(P(self.rundir, 'run.sh'))

        def _outhandler(line):
            handled = self.emit('run:output', self, line)
            if not handled:  # only log the output when no event handler is used
                self.log_output(line)

        self.emit('run:ready', self, i)
        self.log.info("Beginning run %d" % i)
        try:
            #for line in sh.bash(P(self.rundir, 'run.sh'), _iter=True, _err_to_out=True):
            proc = sh.bash(P(self.rundir, 'run.sh'),
                           _bg=True,
                           _out=_outhandler,
                           _err_to_out=True)
            self.log.info('process running as {}'.format(proc.process.pid))
            proc.wait()
            completed = True
        except KeyboardInterrupt as e:
            self.log.error("Manual interrupt, killing process.")
            proc.process.terminate()
            proc.wait()
            #log.info("Cleaning run directory.")
            #self.clear_rundir()
            raise e
        except sh.ErrorReturnCode as e:
            completed = False
            self.log.error("Run %d failed. See log for details." % i)
            self.log.error("Error: %r" % e)
            self.emit('run:failed', self)
            raise FailedRunError()

        self.emit('run:completed', self, i)
        self.log.info('Run %d complete' % i)
        mkdir(outdir)

        if num_cores > 1:
            # use postprocessing tool to combine the output from several cores
            codebase_combine_script = P(self.codebase.builddir,
                                        'mppnccombine_run.sh')
            if not os.path.exists(codebase_combine_script):
                self.log.warning(
                    'combine script does not exist in the commit you are running Isca from.  Falling back to using $GFDL_BASE mppnccombine_run.sh script'
                )
                sh.ln('-s',
                      P(GFDL_BASE, 'postprocessing', 'mppnccombine_run.sh'),
                      codebase_combine_script)
            combinetool = sh.Command(codebase_combine_script)
            for file in self.diag_table.files:
                netcdf_file = '%s.nc' % file
                filebase = P(self.rundir, netcdf_file)
                combinetool(self.codebase.builddir, filebase)
                # copy the combined netcdf file into the data archive directory
                sh.cp(filebase, P(outdir, netcdf_file))
                # remove all netcdf fragments from the run directory
                sh.rm(glob.glob(filebase + '*'))
                self.log.debug('%s combined and copied to data directory' %
                               netcdf_file)

            for restart in glob.glob(P(resdir, '*.res.nc.0000')):
                restartfile = restart.replace('.0000', '')
                combinetool(self.codebase.builddir, restartfile)
                sh.rm(glob.glob(restartfile + '.????'))
                self.log.debug("Restart file %s combined" % restartfile)

            self.emit('run:combined', self)

        # make the restart archive and delete the restart files
        self.make_restart_archive(self.get_restart_file(i), resdir)
        sh.rm('-r', resdir)

        if save_run:
            # copy the complete run directory to GFDL_DATA so that the run can
            # be recreated without the python script if required
            mkdir(resdir)
            sh.cp(['-a', self.rundir, outdir])
        else:
            # just save some useful diagnostic information
            self.write_namelist(outdir)
            self.write_field_table(outdir)
            self.write_diag_table(outdir)
            self.codebase.write_source_control_status(
                P(outdir, 'git_hash_used.txt'))

        self.clear_rundir()

        return True

    def make_restart_archive(self, archive_file, restart_directory):
        with tarfile.open(archive_file, 'w:gz') as tar:
            tar.add(restart_directory, arcname='.')
        self.log.info("Restart archive created at %s" % archive_file)

    def extract_restart_archive(self, archive_file, input_directory):
        with tarfile.open(archive_file, 'r:gz') as tar:
            tar.extractall(path=input_directory)
        self.log.info("Restart %s extracted to %s" %
                      (archive_file, input_directory))

    def derive(self, new_experiment_name):
        """Derive a new experiment based on this one."""
        new_exp = Experiment(new_experiment_name, self.codebase)
        new_exp.namelist = self.namelist.copy()
        new_exp.diag_table = self.diag_table.copy()
        new_exp.inputfiles = self.inputfiles[:]

        return new_exp