def _read_single_namelist(lines: List[str], parser: Parser, simple: bool) -> Namelist: """ Read a namelist. * Simple parser. Assumes one array element per line. For example: `val%a(2)%b = value,` * Otherwise (or if the simple parser fails) it defaults to using f90nml to read it. Note that comment lines and blank lines have already been removed. """ nml = None if simple: try: namelist_name = lines[0].lstrip("&").strip().lower() nml = Namelist({namelist_name: Namelist({})}) for line in lines[1:]: d = line.split("=", 1) if len(d) == 1: if d[0].strip() == "/": break # end of the namelist else: # something else - not valid raise Exception("invalid line") elif len(d) >= 2: if d[0][0] == "'" or d[0][0] == '"': # = in a string - not valid raise Exception("invalid line") else: path = d[0].strip() if ":" in path: raise Exception( "invalid line" ) # can't read multiple entries at once - not valid else: # warning: it will still read lines like # this: `a = 1,2,3` as a single string # convert the string to a Python value: value = _nml_value_to_python_value( d[1].rstrip(", ")) # add this value to the namelist: _pathSet(nml[namelist_name], path, value) except Exception: nml = None if nml is None: nml = parser.reads("\n".join(lines)) # f90nml 1.1 and above return nml
def read_namelist(filename: str, *, n_threads: int = 0, parser: Parser = None, simple: bool = True) -> Namelist: """ Read a namelist quickly. For threaded use, set `n_threads` to the number of threads. """ nml = Namelist({}) def _loop_over_results(r): for key, value in r.items(): if key in nml: # array of namelists: if isinstance(nml[key], list): nml[key].append(value) else: nml[key] = [nml[key], value] else: nml[key] = value if not parser: parser = Parser() namelists = _split_namelist_file(filename) results = list() results_append = results.append if n_threads: n_threads = max(1, min(mp.cpu_count(), n_threads)) pool = mp.Pool(processes=n_threads) pool_apply_async = pool.apply_async for lines in namelists: results_append( pool_apply_async(_read_single_namelist, (lines, parser, simple))) pool.close() pool.join() for r in results: _loop_over_results(r.get()) else: for lines in namelists: results_append(_read_single_namelist(lines, parser, simple)) for r in results: _loop_over_results(r) return nml
def __init__(self, name, codebase, safe_mode=False, workbase=GFDL_WORK, database=GFDL_DATA): super(Experiment, self).__init__() self.name = name self.codebase = codebase self.safe_mode = safe_mode # set the default locations of working directory, # executable directory, restart file storage, and # output data directory. self.workdir = P(workbase, 'experiment', self.name) self.rundir = P( self.workdir, 'run') # temporary area an individual run will be performed self.datadir = P( database, self.name) # where run data will be moved to upon completion self.restartdir = P(self.datadir, 'restarts') # where restarts will be stored self.template_dir = P(_module_directory, 'templates') self.env_source = get_env_file() self.templates = Environment( loader=FileSystemLoader(self.template_dir)) self.diag_table = DiagTable() self.field_table_file = P(self.codebase.srcdir, 'extra', 'model', self.codebase.name, 'field_table') self.inputfiles = [] self.namelist = Namelist()
def _pathGet(dictionary: dict, path: str, sep: str = "%") -> Union[_nml_types, dict, list]: """ Returns an item in a dictionary given the namelist path string. Assumes the input path uses Fortran-style 1-based indexing of arrays """ for item in path.split(sep): i, arrayname = _get_array_index(item) if i is not None: # it is an array element: # create this item since it isn't there if arrayname not in dictionary: dictionary[arrayname] = [None] d = dictionary[arrayname] lenx = len(d) if lenx < i: # have to add this element for j in range(lenx, i): d.append(None) # make sure it's a dict: if not isinstance(d[i - 1], dict): d[i - 1] = Namelist({}) dictionary = d[i - 1] else: # it is just a normal variable: # make sure it's a dict first if not isinstance(dictionary, dict): dictionary = Namelist({}) if item not in dictionary: dictionary[item] = Namelist({}) dictionary = dictionary[item] return dictionary
def create_inpFile(*inpFiles: List[InpFile]) -> InpFile: if len(inpFiles) > 0: base_inp = copy_inpFile(inpFiles[0]) for inpfile in inpFiles[1:]: for group in inpfile.nml.keys(): for key in inpfile.nml[group].keys(): value = inpfile.nml[group][key] if key in inpfile.nml[group].start_index: start_index = inpfile.nml[group].start_index[key][0] value = list(value) base_inp.setlist(group, key, value, start_index=start_index) else: if group not in base_inp.nml: base_inp.nml[group] = Namelist() base_inp.nml[group][key] = value return base_inp
class Experiment(Logger, EventEmitter): """A basic GFDL experiment""" RESOLUTIONS = { 'T170': { 'lon_max': 512, 'lat_max': 256, 'num_fourier': 170, 'num_spherical': 171 }, 'T85': { 'lon_max': 256, 'lat_max': 128, 'num_fourier': 85, 'num_spherical': 86 }, 'T42': { 'lon_max': 128, 'lat_max': 64, 'num_fourier': 42, 'num_spherical': 43, }, 'T21': { 'lon_max': 64, 'lat_max': 32, 'num_fourier': 21, 'num_spherical': 22, }, } runfmt = 'run%04d' restartfmt = 'res%04d.tar.gz' def __init__(self, name, codebase, safe_mode=False, workbase=GFDL_WORK, database=GFDL_DATA): super(Experiment, self).__init__() self.name = name self.codebase = codebase self.safe_mode = safe_mode # set the default locations of working directory, # executable directory, restart file storage, and # output data directory. self.workdir = P(workbase, 'experiment', self.name) self.rundir = P( self.workdir, 'run') # temporary area an individual run will be performed self.datadir = P( database, self.name) # where run data will be moved to upon completion self.restartdir = P(self.datadir, 'restarts') # where restarts will be stored self.template_dir = P(_module_directory, 'templates') self.env_source = get_env_file() self.templates = Environment( loader=FileSystemLoader(self.template_dir)) self.diag_table = DiagTable() self.field_table_file = P(self.codebase.srcdir, 'extra', 'model', self.codebase.name, 'field_table') self.inputfiles = [] self.namelist = Namelist() @destructive def rm_workdir(self): try: sh.rm(['-r', self.workdir]) except sh.ErrorReturnCode: self.log.warning( 'Tried to remove working directory but it doesnt exist') @destructive def rm_datadir(self): try: sh.rm(['-r', self.datadir]) except sh.ErrorReturnCode: self.log.warning( 'Tried to remove data directory but it doesnt exist') @destructive @useworkdir def clear_workdir(self): self.rm_workdir() mkdir(self.workdir) self.log.info('Emptied working directory %r' % self.workdir) @destructive @useworkdir def clear_rundir(self): #sh.cd(self.workdir) try: sh.rm(['-r', self.rundir]) except sh.ErrorReturnCode: self.log.warning( 'Tried to remove run directory but it doesnt exist') mkdir(self.rundir) self.log.info('Emptied run directory %r' % self.rundir) def get_restart_file(self, i): return P(self.restartdir, self.restartfmt % i) def get_outputdir(self, run): return P(self.datadir, self.runfmt % run) def set_resolution(self, res, num_levels=None): """Set the resolution of the model, based on the standard triangular truncations of the spectral core. For example, exp.set_resolution('T85', 25) creates a spectral core with enough modes to natively correspond to a 256x128 lon-lat resolution.""" delta = self.RESOLUTIONS[res] if num_levels is not None: delta['num_levels'] = num_levels self.update_namelist({'spectral_dynamics_nml': delta}) def update_namelist(self, new_vals): """Update the namelist sections, overwriting existing values.""" for sec in new_vals: if sec not in self.namelist: self.namelist[sec] = {} nml = self.namelist[sec] nml.update(new_vals[sec]) def write_namelist(self, outdir): namelist_file = P(outdir, 'input.nml') self.log.info('Writing namelist to %r' % namelist_file) self.namelist.write(namelist_file) def write_diag_table(self, outdir): outfile = P(outdir, 'diag_table') self.log.info('Writing diag_table to %r' % outfile) if self.diag_table.is_valid(): if self.diag_table.calendar is None: # diagnose the calendar from the namelist cal = self.get_calendar() self.diag_table.calendar = cal self.diag_table.write(outfile) else: self.log.error( "No output files defined in the DiagTable. Stopping.") raise ValueError() def write_field_table(self, outdir): self.log.info('Writing field_table to %r' % P(outdir, 'field_table')) sh.cp(self.field_table_file, P(outdir, 'field_table')) def log_output(self, outputstring): line = outputstring.strip() if 'warning' in line.lower(): self.log.warn(line) else: self.log.debug(line) #return clean_log_debug(outputstring) def delete_restart(self, run): resfile = self.get_restart_file(run) if os.path.isfile(resfile): sh.rm(resfile) self.log.info('Deleted restart file %s' % resfile) def get_calendar(self): """Get the value of 'main_nml/calendar. Returns a string name of calendar, or None if not set in namelist.'""" if 'main_nml' in self.namelist: return self.namelist['main_nml'].get('calendar') else: return None @destructive @useworkdir def run(self, i, restart_file=None, use_restart=True, multi_node=False, num_cores=8, overwrite_data=False, save_run=False, run_idb=False, nice_score=0): """Run the model. `num_cores`: Number of mpi cores to distribute over. `restart_file` (optional): A path to a valid restart archive. If None and `use_restart=True`, restart file (i-1) will be used. `save_run`: If True, copy the entire working directory over to GFDL_DATA so that the run can rerun without the python script. (This uses a lot of data storage!) """ self.clear_rundir() indir = P(self.rundir, 'INPUT') outdir = P(self.datadir, self.runfmt % i) resdir = P(self.rundir, 'RESTART') if os.path.isdir(outdir): if overwrite_data: self.log.warning( 'Data for run %d already exists and overwrite_data is True. Overwriting.' % i) sh.rm('-r', outdir) else: self.log.warn( 'Data for run %d already exists but overwrite_data is False. Stopping.' % i) return False # make the output run folder and copy over the input files mkdir([indir, resdir, self.restartdir]) self.codebase.write_source_control_status( P(self.rundir, 'git_hash_used.txt')) self.write_namelist(self.rundir) self.write_field_table(self.rundir) self.write_diag_table(self.rundir) for filename in self.inputfiles: sh.cp([filename, P(indir, os.path.split(filename)[1])]) mpirun_opts = '' if multi_node: mpirun_opts += ' -bootstrap pbsdsh -f $PBS_NODEFILE' if use_restart: if not restart_file: # get the restart from previous iteration restart_file = self.get_restart_file(i - 1) if not os.path.isfile(restart_file): self.log.error('Restart file not found, expecting file %r' % restart_file) raise IOError('Restart file not found, expecting file %r' % restart_file) else: self.log.info('Using restart file %r' % restart_file) self.extract_restart_archive(restart_file, indir) else: self.log.info('Running without restart file') restart_file = None vars = { 'rundir': self.rundir, 'execdir': self.codebase.builddir, 'executable': self.codebase.executable_name, 'env_source': self.env_source, 'mpirun_opts': mpirun_opts, 'num_cores': num_cores, 'run_idb': run_idb, 'nice_score': nice_score } runscript = self.templates.get_template('run.sh') # employ the template to create a runscript t = runscript.stream(**vars).dump(P(self.rundir, 'run.sh')) def _outhandler(line): handled = self.emit('run:output', self, line) if not handled: # only log the output when no event handler is used self.log_output(line) self.emit('run:ready', self, i) self.log.info("Beginning run %d" % i) try: #for line in sh.bash(P(self.rundir, 'run.sh'), _iter=True, _err_to_out=True): proc = sh.bash(P(self.rundir, 'run.sh'), _bg=True, _out=_outhandler, _err_to_out=True) self.log.info('process running as {}'.format(proc.process.pid)) proc.wait() completed = True except KeyboardInterrupt as e: self.log.error("Manual interrupt, killing process.") proc.process.terminate() proc.wait() #log.info("Cleaning run directory.") #self.clear_rundir() raise e except sh.ErrorReturnCode as e: completed = False self.log.error("Run %d failed. See log for details." % i) self.log.error("Error: %r" % e) self.emit('run:failed', self) raise FailedRunError() self.emit('run:completed', self, i) self.log.info('Run %d complete' % i) mkdir(outdir) if num_cores > 1: # use postprocessing tool to combine the output from several cores codebase_combine_script = P(self.codebase.builddir, 'mppnccombine_run.sh') if not os.path.exists(codebase_combine_script): self.log.warning( 'combine script does not exist in the commit you are running Isca from. Falling back to using $GFDL_BASE mppnccombine_run.sh script' ) sh.ln('-s', P(GFDL_BASE, 'postprocessing', 'mppnccombine_run.sh'), codebase_combine_script) combinetool = sh.Command(codebase_combine_script) for file in self.diag_table.files: netcdf_file = '%s.nc' % file filebase = P(self.rundir, netcdf_file) combinetool(self.codebase.builddir, filebase) # copy the combined netcdf file into the data archive directory sh.cp(filebase, P(outdir, netcdf_file)) # remove all netcdf fragments from the run directory sh.rm(glob.glob(filebase + '*')) self.log.debug('%s combined and copied to data directory' % netcdf_file) for restart in glob.glob(P(resdir, '*.res.nc.0000')): restartfile = restart.replace('.0000', '') combinetool(self.codebase.builddir, restartfile) sh.rm(glob.glob(restartfile + '.????')) self.log.debug("Restart file %s combined" % restartfile) self.emit('run:combined', self) # make the restart archive and delete the restart files self.make_restart_archive(self.get_restart_file(i), resdir) sh.rm('-r', resdir) if save_run: # copy the complete run directory to GFDL_DATA so that the run can # be recreated without the python script if required mkdir(resdir) sh.cp(['-a', self.rundir, outdir]) else: # just save some useful diagnostic information self.write_namelist(outdir) self.write_field_table(outdir) self.write_diag_table(outdir) self.codebase.write_source_control_status( P(outdir, 'git_hash_used.txt')) self.clear_rundir() return True def make_restart_archive(self, archive_file, restart_directory): with tarfile.open(archive_file, 'w:gz') as tar: tar.add(restart_directory, arcname='.') self.log.info("Restart archive created at %s" % archive_file) def extract_restart_archive(self, archive_file, input_directory): with tarfile.open(archive_file, 'r:gz') as tar: tar.extractall(path=input_directory) self.log.info("Restart %s extracted to %s" % (archive_file, input_directory)) def derive(self, new_experiment_name): """Derive a new experiment based on this one.""" new_exp = Experiment(new_experiment_name, self.codebase) new_exp.namelist = self.namelist.copy() new_exp.diag_table = self.diag_table.copy() new_exp.inputfiles = self.inputfiles[:] return new_exp