def test_write_run_directory_does_not_mutate_config(config_update): config = c12_config() config = update_recursive(config, config_update) config_copy = copy.deepcopy(config) with tempfile.TemporaryDirectory() as rundir: fv3config.write_run_directory(config, rundir) assert config == config_copy
def test_restart_directory_exists_and_empty(self): rundir = self.make_run_directory("test_rundir") restart_directory = os.path.join(rundir, "RESTART") config = DEFAULT_CONFIG.copy() fv3config.write_run_directory(config, rundir) self.assertTrue(os.path.isdir(restart_directory)) self.assertEqual(len(os.listdir(restart_directory)), 0)
def run_segment(config: dict, rundir: str): fv3config.write_run_directory(config, rundir) with cwd(rundir): manifest = find(".") with open("preexisting_files.txt", "w") as f: for path in manifest: print(path, file=f) x, y = config["namelist"]["fv_core_nml"]["layout"] nprocs = x * y * 6 with open("logs.txt", "w") as f: process = subprocess.Popen( [ "mpirun", "-n", str(nprocs), sys.executable, "-m", "mpi4py", runfile.absolute().as_posix(), ], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, ) for c in iter(lambda: process.stdout.read(32), b""): # type: ignore sys.stdout.write(c.decode("utf-8")) f.write(c.decode("utf-8")) return process.wait()
def write_run_directory(): args = _parse_write_run_directory_args() with fsspec.open(args.config) as f: config = fv3config.load(f) fv3config.write_run_directory(config, args.rundir)
def test_rundir_contains_nudging_asset_if_enabled(): config = c12_config() config["gfs_analysis_data"] = { "url": "gs://vcm-fv3config/data/gfs_nudging_data/v1.0", "filename_pattern": "%Y%m%d_%H.nc", } config["namelist"]["fv_core_nml"]["nudge"] = True with tempfile.TemporaryDirectory() as rundir: fv3config.write_run_directory(config, rundir) assert "20160801_00.nc" in os.listdir(os.path.join(rundir, "INPUT")) assert "20160801_06.nc" in os.listdir(os.path.join(rundir, "INPUT"))
def test_write_default_run_directory(self): rundir = self.make_run_directory("test_rundir") config = DEFAULT_CONFIG.copy() fv3config.write_run_directory(config, rundir) self.assert_subpaths_present( rundir, required_run_directory_subdirectories + required_default_initial_conditions_filenames + required_base_forcing_filenames + required_orographic_forcing_filenames + additional_required_filenames, )
def test_cache_diag_table(self): config = copy.deepcopy(DEFAULT_CONFIG) config[ "diag_table"] = "gs://vcm-fv3config/config/diag_table/default/v1.0/diag_table" cache_filename = os.path.join( fv3config.get_cache_dir(), "fv3config-cache/gs/vcm-fv3config/config/diag_table/default/v1.0/diag_table", ) assert not os.path.isfile(cache_filename) with tempfile.TemporaryDirectory() as rundir: fv3config.write_run_directory(config, rundir) assert os.path.isfile(cache_filename)
def setup_final_run(workdir, firsthalf_config, remove_phy_data=False): """Set up run directory for second two-hour run. This must be done after the first two-hour run has been performed so that the initial conditions are linked appropriately""" secondhalf_config = deepcopy(firsthalf_config) secondhalf_config['initial_conditions'] = os.path.abspath( join(workdir, 'firsthalf', 'RESTART') ) secondhalf_config = fv3config.enable_restart(secondhalf_config) fv3config.write_run_directory(secondhalf_config, join(workdir, 'secondhalf')) if remove_phy_data: for tile in range(1, 7): os.remove(join(workdir, 'secondhalf', 'INPUT', f'phy_data.tile{tile}.nc')) shutil.copy(RUN_SCRIPT_FILENAME, join(workdir, 'secondhalf', RUN_SCRIPT_FILENAME)) return secondhalf_config
def test_cached_diag_table_is_not_redownloaded(self): config = copy.deepcopy(DEFAULT_CONFIG) config[ "diag_table"] = "gs://vcm-fv3config/config/diag_table/default/v1.0/diag_table" with tempfile.TemporaryDirectory() as rundir: fv3config.write_run_directory(config, rundir) cache_filename = os.path.join( fv3config.get_cache_dir(), "fv3config-cache/gs/vcm-fv3config/config/diag_table/default/v1.0/diag_table", ) assert os.path.isfile(cache_filename) modification_time = os.path.getmtime(cache_filename) with tempfile.TemporaryDirectory() as rundir: fv3config.write_run_directory(config, rundir) assert os.path.getmtime(cache_filename) == modification_time
def setup_initial_runs(workdir, config_template): """Set up two run-directories, one for a four-hour run, one for two-hour run, both initialized from the same state example_restart state""" if not os.path.isdir(workdir): os.mkdir(workdir) fullrun_config = deepcopy(config_template) fullrun_config['namelist']['coupler_nml']['hours'] = 4 fullrun_config['namelist']['coupler_nml']['minutes'] = 0 firsthalf_config = deepcopy(fullrun_config) firsthalf_config['namelist']['coupler_nml']['hours'] = 2 fv3config.write_run_directory(fullrun_config, join(workdir, 'fullrun')) fv3config.write_run_directory(firsthalf_config, join(workdir, 'firsthalf')) for run in ['fullrun', 'firsthalf']: shutil.copy(RUN_SCRIPT_FILENAME, join(workdir, run, RUN_SCRIPT_FILENAME)) return fullrun_config, firsthalf_config
def test_write_run_directory_with_patch_file(self): sourcedir = self.make_run_directory("sourcedir") rundir = self.make_run_directory("test_rundir") config = DEFAULT_CONFIG.copy() open(os.path.join(sourcedir, "empty_file"), "a").close() config["patch_files"] = { "source_location": sourcedir, "source_name": "empty_file", "target_location": "", "target_name": "empty_file", "copy_method": "copy", } fv3config.write_run_directory(config, rundir) self.assert_subpaths_present( rundir, required_run_directory_subdirectories + required_default_initial_conditions_filenames + required_base_forcing_filenames + required_orographic_forcing_filenames + additional_required_filenames + ["empty_file"], )
#!/usr/bin/env python3 import fv3config import yaml config = yaml.safe_load(open('example/config.yml', 'r')) fv3config.write_run_directory(config, 'rundir')
def write_run_directory(config, dirname): fv3config.write_run_directory(config, dirname) shutil.copy(SUBMIT_JOB_FILENAME, os.path.join(dirname, 'submit_job.sh'))
import fv3config import argparse import os if __name__ == "__main__": parser = argparse.ArgumentParser( description="Create a run directory for FV3.") parser.add_argument("configfile", type=str, help="yaml configuration path") parser.add_argument("outdir", type=str, help="output directory") args = parser.parse_args() with open(args.configfile, "r") as f: config = fv3config.load(f) fv3config.write_run_directory(config, args.outdir) with open(os.path.join(args.outdir, "fv3config.yml"), "w") as f: fv3config.dump(config, f)
def test_write_run_directory_succeeds_with_diag_table_class(): config = c12_config() start_time = datetime.datetime(2000, 1, 1) config["diag_table"] = fv3config.DiagTable("name", start_time, []) with tempfile.TemporaryDirectory() as rundir: fv3config.write_run_directory(config, rundir)
def test_rundir_contains_fv3config_yml(): config = c12_config() with tempfile.TemporaryDirectory() as rundir: fv3config.write_run_directory(config, rundir) assert "fv3config.yml" in os.listdir(rundir)
def run_benchmark(nodes_per_tile_side, threads_per_rank, hyperthreading, executable, rank_layout, blocksize, partition, force, timesteps, wait, module_env, yaml_file, run_directory): # echo command print('\nRunning command:') print('', ' '.join(sys.argv)) # derived variables for Piz Daint if hyperthreading: assert HAS_HYPERTHREADING threads_per_core = 2 else: threads_per_core = 1 max_cores_per_node = SOCKETS_PER_NODE * CORES_PER_SOCKET max_threads_per_node = max_cores_per_node * threads_per_core # define SLURM parameters slurm_nodes = FV3_NUMBER_OF_TILES * nodes_per_tile_side**2 slurm_ntasks_per_node = max_threads_per_node // threads_per_rank assert threads_per_rank * slurm_ntasks_per_node == max_threads_per_node slurm_ntasks_per_core = threads_per_core slurm_cpus_per_task = threads_per_rank if hyperthreading: slurm_hint = "multithread" else: slurm_hint = "nomultithread" if DAINT_SECTION == 'hybrid': slurm_constraint = 'gpu' if DAINT_SECTION == 'multicore': slurm_constraint = 'mc' # define MPI decomposition ranks_per_tile = slurm_ntasks_per_node * nodes_per_tile_side**2 max_ranks_in_x = int(math.sqrt(ranks_per_tile)) rank_layouts = [] for ranks_in_x in range(1, ranks_per_tile + 1): if ranks_per_tile % ranks_in_x == 0: ranks_in_y = ranks_per_tile // ranks_in_x rank_layouts.append([ranks_in_x, ranks_in_y]) if ranks_in_x <= max_ranks_in_x: default_layout = len(rank_layouts) assert rank_layouts, 'Did not find a suitable rank decomposition' if rank_layout == -1: rank_layout = default_layout # load namelist config = yaml.safe_load(yaml_file) npx = config['namelist']['fv_core_nml']['npx'] npy = config['namelist']['fv_core_nml']['npx'] npz = config['namelist']['fv_core_nml']['npz'] dt = config['namelist']['coupler_nml']['dt_atmos'] # override benchmark parameters config['namelist']['atmos_model_nml']['blocksize'] = blocksize config['namelist']['fv_core_nml']['layout'] = rank_layouts[rank_layout - 1] config['namelist']['fv_core_nml']['io_layout'] = [1, 1] config['namelist']['coupler_nml']['atmos_nthreads'] = threads_per_rank config['namelist']['coupler_nml']['use_hyper_thread'] = hyperthreading config['namelist']['coupler_nml'][ 'ncores_per_node'] = SOCKETS_PER_NODE * CORES_PER_SOCKET config['namelist']['coupler_nml']['months'] = 0 config['namelist']['coupler_nml']['days'] = 0 config['namelist']['coupler_nml']['hours'] = 0 config['namelist']['coupler_nml']['minutes'] = 0 config['namelist']['coupler_nml']['seconds'] = dt * timesteps # echo some information print('') print(f'Configuration file:\n {os.path.abspath(yaml_file.name)}\n') print( f'Piz Daint resources:\n' f' {DAINT_SECTION}, {slurm_nodes} nodes, {slurm_nodes * max_cores_per_node} cores, ' f'{slurm_nodes * max_threads_per_node} threads\n') print( f'FV3 configuration:\n' f' {FV3_NUMBER_OF_TILES} tiles of {npx-1} x {npy-1} x {npz} gridpoints\n' f' {(npx-1) // rank_layouts[rank_layout-1][0]} x {(npy-1) // rank_layouts[rank_layout-1][1]}' f' x {npz} gridpoints per rank on {rank_layouts[rank_layout-1][0]} x {rank_layouts[rank_layout-1][1]} ranks per tile' f' (rank layout {rank_layout} of {len(rank_layouts)})\n' f' {threads_per_rank} thread(s) per rank with {threads_per_core} thread(s) per core\n' ) # create run directory run_directory = os.path.abspath(run_directory) if os.path.isdir(run_directory): if force: shutil.rmtree(run_directory) else: assert False, f'Run directory ({run_directory}) already exists (use --force to overwrite)' print(f'Writing run directory:\n {run_directory}\n') fv3config.write_run_directory(config, run_directory) # copy executable and some meta-information print(f'Copying executable:\n {executable}') shutil.copy2(executable, os.path.join(run_directory, 'fv3.exe')) md5_hash = hashlib.md5(open(executable, 'rb').read()).hexdigest() print(f' md5 hash is {md5_hash}\n') if module_env is not None: shutil.copy2(module_env, os.path.join(run_directory, 'module.env')) shutil.copy2(yaml_file.name, os.path.join(run_directory, 'config.yml')) # create SLURM job file slurm_job = f"""#!/bin/bash -l #SBATCH --job-name="fv3_bench" #SBATCH --account="s1053" #SBATCH --time=00:30:00 #SBATCH --nodes={slurm_nodes} #SBATCH --ntasks-per-core={slurm_ntasks_per_core} #SBATCH --ntasks-per-node={slurm_ntasks_per_node} #SBATCH --cpus-per-task={slurm_cpus_per_task} #SBATCH --partition={partition} #SBATCH --constraint={slurm_constraint} #SBATCH --hint={slurm_hint} export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK export CRAY_CUDA_MPS=1 export OMP_STACKSIZE=32M if [ -f ./module.env ] ; then source ./module.env fi t_start=$(date +%s) srun ./fv3.exe t_end=$(date +%s) t_elapsed=$(($t_end - $t_start)) echo "Elapsed[s]=${{t_elapsed}}" exit 0 """ job_file = os.path.join(run_directory, 'job') print(f'Writing SLURM job file:\n {job_file}\n') with open(job_file, 'w') as f: f.write(slurm_job) # run SLURM job print(f'Running SLURM job:') os.system(f'cd {run_directory}; sbatch {"--wait" if wait else ""} job')