def test_ens_compose_restore(simulation_compiled, job_restart, scheduler, tmpdir): sim = simulation_compiled ens = EnsembleSimulation() ens.add(job_restart) ens.add([sim, sim]) # Do not keep the members in memory ens_disk = copy.deepcopy(ens) compose_dir = pathlib.Path(tmpdir).joinpath('ensemble_disk') os.mkdir(str(compose_dir)) os.chdir(str(compose_dir)) ens_disk.compose() ens_disk_run_success = ens_disk.run() assert ens_disk_run_success == 0 # Keep the members in memory compose_dir = pathlib.Path(tmpdir).joinpath('ensemble_memory') os.mkdir(str(compose_dir)) os.chdir(str(compose_dir)) ens.compose(rm_members_from_memory=False) ens_run_success = ens.run() assert ens_run_success == 0 # Check that the members are all now simply pathlib objects assert all([type(mm) is str for mm in ens_disk.members]) ens_disk.restore_members() # Since the ens_disk has data from the run. Collect data from the run: ens.collect(output=False) # The members are not restored, the simultaion sub objects are: ens.restore_members() # These will never be the same for two different runs. for mem, dsk in zip(ens.members, ens_disk.members): del mem.jobs[0].job_end_time, mem.jobs[0].job_start_time del dsk.jobs[0].job_end_time, dsk.jobs[0].job_start_time from pprint import pprint assert deepdiff.DeepDiff(ens, ens_disk) == {}
def test_ens_parallel_compose(simulation_compiled, job_restart, scheduler, tmpdir): sim = simulation_compiled ens = EnsembleSimulation() ens.add(job_restart) with pytest.raises(Exception) as e_info: ens.compose() ens.add([sim, sim]) # Check the scheduler upon compose (dont run with scheduler) ens_w_sched = copy.deepcopy(ens) ens_w_sched.add(scheduler) compose_dir = pathlib.Path(tmpdir).joinpath('ensemble_compose_sched') os.mkdir(str(compose_dir)) os.chdir(str(compose_dir)) ens_w_sched.compose(rm_members_from_memory=False) assert ens_w_sched.members[0].scheduler.__dict__ == scheduler.__dict__ # Test a run where the members were not kept in memory # Make a copy to test against later ens_check_members = copy.deepcopy(ens) compose_dir = pathlib.Path(tmpdir).joinpath('ensemble_compose') os.mkdir(str(compose_dir)) os.chdir(str(compose_dir)) ens.compose() ens_run_success = ens.run() assert ens_run_success == 0 # Check that the members are all now simply pathlib objects assert all([type(mm) is str for mm in ens.members]) # Why pickle? ens.pickle( str(pathlib.Path(tmpdir) / 'ensemble_compose/WrfHydroEnsSim.pkl')) # The ensemble-in-memory version for checking the members. compose_dir = pathlib.Path(tmpdir).joinpath( 'ensemble_compose_check_members') os.mkdir(str(compose_dir)) os.chdir(str(compose_dir)) ens_check_members.compose(rm_members_from_memory=False) # The job gets heavily modified on compose. answer = { '_entry_cmd': 'bogus entry cmd', '_exe_cmd': './wrf_hydro.exe', '_exit_cmd': 'bogus exit cmd', '_hrldas_namelist': { 'noahlsm_offline': { 'btr_option': 1, 'canopy_stomatal_resistance_option': 1, 'hrldas_setup_file': './NWM/DOMAIN/wrfinput_d01.nc', 'indir': './FORCING', 'restart_filename_requested': './NWM/RESTART/RESTART.2011082600_DOMAIN1', 'restart_frequency_hours': 24, 'output_timestep': 86400 }, 'wrf_hydro_offline': { 'forc_typ': 1 } }, '_hrldas_times': { 'noahlsm_offline': { 'khour': 282480, 'restart_frequency_hours': 24, 'output_timestep': 86400, 'restart_filename_requested': 'NWM/RESTART/RESTART.2013101300_DOMAIN1', 'start_day': 14, 'start_hour': 0, 'start_min': 0, 'start_month': 10, 'start_year': 1984 } }, '_hydro_namelist': { 'hydro_nlist': { 'aggfactrt': 4, 'channel_option': 2, 'chanobs_domain': 0, 'chanrtswcrt': 1, 'chrtout_domain': 1, 'geo_static_flnm': './NWM/DOMAIN/geo_em.d01.nc', 'restart_file': './NWM/RESTART/HYDRO_RST.2011-08-26_00:00_DOMAIN1', 'udmp_opt': 1, 'rst_dt': 1440, 'out_dt': 1440 }, 'nudging_nlist': { 'maxagepairsbiaspersist': 3, 'minnumpairsbiaspersist': 1, 'nudginglastobsfile': './NWM/RESTART/nudgingLastObs.2011-08-26_00:00:00.nc' } }, '_hydro_times': { 'hydro_nlist': { 'restart_file': 'NWM/RESTART/HYDRO_RST.2013-10-13_00:00_DOMAIN1', 'rst_dt': 1440, 'out_dt': 1440 }, 'nudging_nlist': { 'nudginglastobsfile': 'NWM/RESTART/nudgingLastObs.2013-10-13_00:00:00.nc' } }, '_job_end_time': None, '_job_start_time': None, '_job_submission_time': None, '_model_end_time': pandas.Timestamp('2017-01-04 00:00:00'), '_model_start_time': pandas.Timestamp('1984-10-14 00:00:00'), 'exit_status': None, 'job_id': 'test_job_1', 'restart_dir': None, '_restart_dir_hydro': None, '_restart_dir_hrldas': None, 'restart_freq_hr_hydro': None, 'restart_freq_hr_hrldas': None, 'output_freq_hr_hydro': None, 'output_freq_hr_hrldas': None, 'restart': True, 'restart_file_time': '2013-10-13', '_restart_file_time_hrldas': pandas.Timestamp('2013-10-13 00:00:00'), '_restart_file_time_hydro': pandas.Timestamp('2013-10-13 00:00:00') } # For the ensemble where the compse retains the members... # This fails: # deepdiff.DeepDiff(answer, ens.members[0].jobs[0].__dict__) # Instead, iterate on keys to "declass": for kk in ens_check_members.members[0].jobs[0].__dict__.keys(): assert ens_check_members.members[0].jobs[0].__dict__[kk] == answer[kk] # The tmpdir gets nuked after the test... ? # Test the member pickle size in terms of load speed. # Note that the deletion of the model, domain, and output objects are # done for the members regardless of not removing the members # from memory (currently). os.chdir(str(pathlib.Path(tmpdir) / 'ensemble_compose/member_000')) time_taken = timeit.timeit( setup='import pickle', stmt='pickle.load(open("WrfHydroSim.pkl","rb"))', number=10000) # If your system is busy, this could take longer... and spuriously fail the test. # Notes(JLM): coverage is the limiting factor here. assert time_taken < 1.25 # Test the ensemble pickle size in terms of load speed. os.chdir(str(pathlib.Path(tmpdir) / 'ensemble_compose/')) time_taken = timeit.timeit( setup='import pickle', stmt='pickle.load(open("WrfHydroEnsSim.pkl","rb"))', number=10000) # If your system is busy, this could take longer... # Notes(JLM): chyenne scratch is slow sometimes. so is CI assert time_taken < .7