Exemplo n.º 1
0
def test_cycle_ensemble_parallel_compose(ensemble, job_restart, scheduler,
                                         tmpdir, init_times,
                                         restart_dirs_ensemble):
    ens = ensemble
    cy = CycleSimulation(init_times=init_times,
                         restart_dirs=restart_dirs_ensemble,
                         ncores=2)
    cy.add(job_restart)
    # Adding the scheduler ruins the run in CI.
    # cy.add(scheduler)

    # Make a copy where we keep the casts in memory for checking.
    cy_check_casts = copy.deepcopy(cy)
    cy_ens_compose = copy.deepcopy(cy)

    with pytest.raises(Exception) as e_info:
        cy.compose()

    cy_ens_compose.add(ens)
    compose_dir = pathlib.Path(tmpdir).joinpath('cycle_ensemble_compose')
    os.mkdir(str(compose_dir))
    os.chdir(str(compose_dir))
    pathlib.Path('../dummy_extant_dir').touch()
    cy_ens_compose.compose()

    cy_run_success = cy_ens_compose.run()
    assert cy_run_success == 0
    cy.pickle(
        str(
            pathlib.Path(tmpdir) /
            'cycle_ensemble_compose/WrfHydroCycleEns.pkl'))
    # Is this pickle used?

    # The cycle-in-memory version for checking the casts.
    cy_check_casts.add(ens)
    compose_dir = pathlib.Path(tmpdir).joinpath('cycle_compose_check_casts')
    os.mkdir(str(compose_dir))
    os.chdir(str(compose_dir))
    pathlib.Path('../dummy_extant_dir').touch()
    cy_check_casts.compose(rm_casts_from_memory=False,
                           rm_members_from_memory=False)

    # The job gets heavily modified on compose.
    answer = {
        '_entry_cmd': 'bogus entry cmd',
        '_exe_cmd': './wrf_hydro.exe',
        '_exit_cmd': 'bogus exit cmd',
        '_hrldas_namelist': {
            'noahlsm_offline': {
                'btr_option': 1,
                'canopy_stomatal_resistance_option': 1,
                'hrldas_setup_file': './NWM/DOMAIN/wrfinput_d01.nc',
                'indir': './FORCING',
                'output_timestep': 86400,
                'restart_filename_requested':
                './NWM/RESTART/RESTART.2011082600_DOMAIN1',
                'restart_frequency_hours': 24
            },
            'wrf_hydro_offline': {
                'forc_typ': 1
            }
        },
        '_hrldas_times': {
            'noahlsm_offline': {
                'khour': 282480,
                'restart_frequency_hours': 24,
                'output_timestep': 86400,
                'restart_filename_requested':
                'NWM/RESTART/RESTART.2013101300_DOMAIN1',
                'start_day': 12,
                'start_hour': 00,
                'start_min': 00,
                'start_month': 12,
                'start_year': 2012
            }
        },
        '_hydro_namelist': {
            'hydro_nlist': {
                'aggfactrt': 4,
                'channel_option': 2,
                'chanobs_domain': 0,
                'chanrtswcrt': 1,
                'chrtout_domain': 1,
                'geo_static_flnm': './NWM/DOMAIN/geo_em.d01.nc',
                'restart_file':
                './NWM/RESTART/HYDRO_RST.2011-08-26_00:00_DOMAIN1',
                'udmp_opt': 1,
                'rst_dt': 1440,
                'out_dt': 1440
            },
            'nudging_nlist': {
                'maxagepairsbiaspersist':
                3,
                'minnumpairsbiaspersist':
                1,
                'nudginglastobsfile':
                './NWM/RESTART/nudgingLastObs.2011-08-26_00:00:00.nc'
            }
        },
        '_hydro_times': {
            'hydro_nlist': {
                'out_dt': 1440,
                'rst_dt': 1440,
                'restart_file':
                'NWM/RESTART/HYDRO_RST.2013-10-13_00:00_DOMAIN1'
            },
            'nudging_nlist': {
                'nudginglastobsfile':
                'NWM/RESTART/nudgingLastObs.2013-10-13_00:00:00.nc'
            }
        },
        '_job_end_time': None,
        '_job_start_time': None,
        '_job_submission_time': None,
        '_model_end_time': pandas.Timestamp('2045-03-04 00:00:00'),
        '_model_start_time': pandas.Timestamp('2012-12-12 00:00:00'),
        'exit_status': None,
        'job_id': 'test_job_1',
        'restart_freq_hr_hydro': None,
        'restart_freq_hr_hrldas': None,
        'output_freq_hr_hydro': None,
        'output_freq_hr_hrldas': None,
        'restart': True,
        'restart_dir': None,
        '_restart_dir_hydro': None,
        '_restart_dir_hrldas': None,
        'restart_file_time': '2013-10-13',
        '_restart_file_time_hydro': pandas.Timestamp('2013-10-13 00:00:00'),
        '_restart_file_time_hrldas': pandas.Timestamp('2013-10-13 00:00:00')
    }

    # These answer patches respond to the variety of things in restart_dirs_ensemble
    dum_ext = str(tmpdir) + '/dummy_extant_dir/'
    answer_patches = {
        'cast_index': [0, 1, 2],
        'start_time_patch': [
            pandas.Timestamp('2012-12-12 00:00:00'),
            pandas.Timestamp('2012-12-15 00:00:00'),
            pandas.Timestamp('2012-12-18 00:00:00')
        ],
        'end_time_patch': [
            pandas.Timestamp('2045-03-04 00:00:00'),
            pandas.Timestamp('2045-03-07 00:00:00'),
            pandas.Timestamp('2045-03-10 00:00:00')
        ],

        # These "time patches" reveal the awkwardness of that construct.
        'lsm_times_patch': [
            'NWM/RESTART/RESTART.2013101300_DOMAIN1',
            '../../cast_2012121200/member_000/RESTART.2013101300_DOMAIN1',
            dum_ext + 'RESTART.2013101300_DOMAIN1'
        ],
        'hydro_times_patch': [
            'NWM/RESTART/HYDRO_RST.2013-10-13_00:00_DOMAIN1',
            '../../cast_2012121200/member_000/HYDRO_RST.2013-10-13_00:00_DOMAIN1',
            dum_ext + 'HYDRO_RST.2013-10-13_00:00_DOMAIN1'
        ],
        'ndg_times_patch': [
            'NWM/RESTART/nudgingLastObs.2013-10-13_00:00:00.nc',
            '../../cast_2012121200/member_000/nudgingLastObs.2013-10-13_00:00:00.nc',
            dum_ext + 'nudgingLastObs.2013-10-13_00:00:00.nc'
        ],

        # These namelist patches are consistent with the model times except in the
        # first "do nothing" case which leaves the start time != restart file time
        'lsm_nlst_patch': [
            './NWM/RESTART/RESTART.2011082600_DOMAIN1',
            '../../cast_2012121200/member_000/RESTART.2012121500_DOMAIN1',
            dum_ext + 'RESTART.2012121800_DOMAIN1'
        ],
        'hydro_nlst_patch': [
            './NWM/RESTART/HYDRO_RST.2011-08-26_00:00_DOMAIN1',
            '../../cast_2012121200/member_000/HYDRO_RST.2012-12-15_00:00_DOMAIN1',
            dum_ext + 'HYDRO_RST.2012-12-18_00:00_DOMAIN1'
        ],
        'ndg_nlst_patch': [
            './NWM/RESTART/nudgingLastObs.2011-08-26_00:00:00.nc',
            '../../cast_2012121200/member_000/nudgingLastObs.2012-12-15_00:00:00.nc',
            dum_ext + 'nudgingLastObs.2012-12-18_00:00:00.nc'
        ]
    }

    # Check a cycle where the compse retains the casts (otherwise nothing in memory).
    # This fails:
    # deepdiff.DeepDiff(answer, cy.casts[0].jobs[0].__dict__)
    # Instead, iterate on keys to "declass":
    # Just check the first ensemble cast.
    def sub_member(the_string, replace_num, find_num=0):
        replace = "member_{:03d}".format(replace_num)
        find = "member_{:03d}".format(find_num)
        return the_string.replace(find, replace)

    for ii in answer_patches['cast_index']:
        cc = cy_check_casts.casts[ii]

        for mm, member in enumerate(cc.members):

            answer['_model_start_time'] = answer_patches['start_time_patch'][
                ii]
            answer['_model_end_time'] = answer_patches['end_time_patch'][ii]

            keys = ['noahlsm_offline', 'restart_filename_requested']
            answer['_hrldas_namelist'][keys[0]][keys[1]] = \
                sub_member(answer_patches['lsm_nlst_patch'][ii], mm)
            answer['_hrldas_times'][keys[0]][keys[1]] = \
                sub_member(answer_patches['lsm_times_patch'][ii], mm)

            keys = ['_hydro_namelist', 'hydro_nlist', 'restart_file']
            answer[keys[0]][keys[1]][keys[2]] = \
                sub_member(answer_patches['hydro_nlst_patch'][ii], mm)

            keys = ['_hydro_namelist', 'nudging_nlist', 'nudginglastobsfile']
            answer[keys[0]][keys[1]][keys[2]] = \
                sub_member(answer_patches['ndg_nlst_patch'][ii], mm)

            keys = ['_hydro_times', 'hydro_nlist', 'restart_file']
            answer[keys[0]][keys[1]][keys[2]] =\
                sub_member(answer_patches['hydro_times_patch'][ii], mm)

            keys = ['_hydro_times', 'nudging_nlist', 'nudginglastobsfile']
            answer[keys[0]][keys[1]][keys[2]] = \
                sub_member(answer_patches['ndg_times_patch'][ii], mm)

            # hrldas times
            fmt_keys = {
                '%Y': 'start_year',
                '%m': 'start_month',
                '%d': 'start_day',
                '%H': 'start_hour'
            }
            the_mutable = answer['_hrldas_times']['noahlsm_offline']
            for fmt, key in fmt_keys.items():
                the_mutable[key] = int(
                    answer['_model_start_time'].strftime(fmt))

            # Actually check
            for kk in member.jobs[0].__dict__.keys():
                assert member.jobs[0].__dict__[kk] == answer[kk]

    # Check the scheduler too
    # assert cy_check_casts.casts[0].scheduler.__dict__ == scheduler.__dict__

    # For the cycle where the compse removes the casts...
    # Check that the casts are all now simply pathlib objects
    assert all([type(mm) is str for mm in cy.casts])

    # the tmpdir gets nuked after the test... ?
    # Test the cast pickle size in terms of load speed.
    # Note that the deletion of the model, domain, and output objects are
    # done for the casts regardless of not removing the casts
    # from memory (currently).
    os.chdir(
        str(pathlib.Path(tmpdir) / 'cycle_ensemble_compose/cast_2012121200'))
    time_taken = timeit.timeit(
        setup='import pickle',
        stmt='pickle.load(open("WrfHydroEns.pkl","rb"))',
        number=10000)
    # If your system is busy, this could take longer... and spuriously fail the test.
    # Notes(JLM): coverage makes this slow
    assert time_taken < 1.5

    # Test the cycle pickle size in terms of load speed.
    os.chdir(str(pathlib.Path(tmpdir) / 'cycle_ensemble_compose/'))
    time_taken = timeit.timeit(
        setup='import pickle',
        stmt='pickle.load(open("WrfHydroCycleEns.pkl","rb"))',
        number=10000)
    # If your system is busy, this could take longer...
    # Notes(JLM): coveage makes this slow
    assert time_taken < 1.5
Exemplo n.º 2
0
def test_cycle_parallel_compose(simulation_compiled, job_restart, scheduler,
                                tmpdir, init_times, restart_dirs):
    """ A more comprehensive test of the object composed."""
    # A compiled simulation passed. Successfull compose in parallel.
    cy = CycleSimulation(init_times=init_times,
                         restart_dirs=restart_dirs,
                         ncores=2)
    cy.add(job_restart)
    cy.add(simulation_compiled)

    # Make a copy where we keep the casts in memory for checking.
    cy_check_casts = copy.deepcopy(cy)

    compose_dir = pathlib.Path(tmpdir).joinpath('cycle_compose')
    os.mkdir(str(compose_dir))
    os.chdir(str(compose_dir))
    cy.compose()

    cy_run_success = cy.run()
    assert cy_run_success == 0
    cy.pickle(str(pathlib.Path(tmpdir) / 'cycle_compose/WrfHydroCycleSim.pkl'))

    # The cycle-in-memory version for checking the casts.
    compose_dir = pathlib.Path(tmpdir).joinpath('cycle_compose_check_casts')
    os.mkdir(str(compose_dir))
    os.chdir(str(compose_dir))
    cy_check_casts.compose(rm_casts_from_memory=False)

    # The job gets heavily modified on compose.
    answer = {
        '_entry_cmd': 'bogus entry cmd',
        '_exe_cmd': './wrf_hydro.exe',
        '_exit_cmd': 'bogus exit cmd',
        '_hrldas_namelist': {
            'noahlsm_offline': {
                'btr_option': 1,
                'canopy_stomatal_resistance_option': 1,
                'hrldas_setup_file': './NWM/DOMAIN/wrfinput_d01.nc',
                'indir': './FORCING',
                'output_timestep': 86400,
                'restart_filename_requested':
                './NWM/RESTART/RESTART.2011082600_DOMAIN1',
                'restart_frequency_hours': 24
            },
            'wrf_hydro_offline': {
                'forc_typ': 1
            }
        },
        '_hrldas_times': {
            'noahlsm_offline': {
                'khour': 282480,
                'restart_frequency_hours': 24,
                'output_timestep': 86400,
                'restart_filename_requested':
                'NWM/RESTART/RESTART.2013101300_DOMAIN1',
                'start_day': 12,
                'start_hour': 00,
                'start_min': 00,
                'start_month': 12,
                'start_year': 2012
            }
        },
        '_hydro_namelist': {
            'hydro_nlist': {
                'aggfactrt': 4,
                'channel_option': 2,
                'chanobs_domain': 0,
                'chanrtswcrt': 1,
                'chrtout_domain': 1,
                'geo_static_flnm': './NWM/DOMAIN/geo_em.d01.nc',
                'restart_file':
                './NWM/RESTART/HYDRO_RST.2011-08-26_00:00_DOMAIN1',
                'udmp_opt': 1,
                'rst_dt': 1440,
                'out_dt': 1440
            },
            'nudging_nlist': {
                'maxagepairsbiaspersist':
                3,
                'minnumpairsbiaspersist':
                1,
                'nudginglastobsfile':
                './NWM/RESTART/nudgingLastObs.2011-08-26_00:00:00.nc'
            }
        },
        '_hydro_times': {
            'hydro_nlist': {
                'out_dt': 1440,
                'rst_dt': 1440,
                'restart_file':
                'NWM/RESTART/HYDRO_RST.2013-10-13_00:00_DOMAIN1'
            },
            'nudging_nlist': {
                'nudginglastobsfile':
                'NWM/RESTART/nudgingLastObs.2013-10-13_00:00:00.nc'
            }
        },
        '_job_end_time': None,
        '_job_start_time': None,
        '_job_submission_time': None,
        '_model_end_time': pandas.Timestamp('2045-03-04 00:00:00'),
        '_model_start_time': pandas.Timestamp('2012-12-12 00:00:00'),
        'exit_status': None,
        'job_id': 'test_job_1',
        'restart_freq_hr_hydro': None,
        'restart_freq_hr_hrldas': None,
        'output_freq_hr_hydro': None,
        'output_freq_hr_hrldas': None,
        'restart': True,
        'restart_dir': None,
        '_restart_dir_hydro': None,
        '_restart_dir_hrldas': None,
        'restart_file_time': '2013-10-13',
        '_restart_file_time_hydro': pandas.Timestamp('2013-10-13 00:00:00'),
        '_restart_file_time_hrldas': pandas.Timestamp('2013-10-13 00:00:00')
    }

    # For the cycle where the compse retains the casts...

    # This fails:
    # deepdiff.DeepDiff(answer, cy.casts[0].jobs[0].__dict__)
    # Instead, iterate on keys to "declass":
    for kk in cy_check_casts.casts[0].jobs[0].__dict__.keys():
        assert cy_check_casts.casts[0].jobs[0].__dict__[kk] == answer[kk]
    # Check the scheduler too
    # assert cy_check_casts.casts[0].scheduler.__dict__ == scheduler.__dict__

    # For the cycle where the compse removes the casts...

    # Check that the casts are all now simply pathlib objects
    assert all([type(mm) is str for mm in cy.casts])

    # the tmpdir gets nuked after the test... ?
    # Test the cast pickle size in terms of load speed.
    # Note that the deletion of the model, domain, and output objects are
    # done for the casts regardless of not removing the casts
    # from memory (currently).
    os.chdir(str(pathlib.Path(tmpdir) / 'cycle_compose/cast_2012121200'))
    time_taken = timeit.timeit(
        setup='import pickle',
        stmt='pickle.load(open("WrfHydroSim.pkl","rb"))',
        number=10000)
    # If your system is busy, this could take longer... and spuriously fail the test.
    # Notes(JLM): coverage is the limiting factor
    assert time_taken < 1.5

    # Test the cycle pickle size in terms of load speed.
    os.chdir(str(pathlib.Path(tmpdir) / 'cycle_compose/'))
    time_taken = timeit.timeit(
        setup='import pickle',
        stmt='pickle.load(open("WrfHydroCycleSim.pkl","rb"))',
        number=10000)
    # If your system is busy, this could take longer...
    # Notes(JLM): coverage is the limiting factor.
    assert time_taken < 1.2