def test_dask_shifter_fail(tmpdir): pytest.importorskip("dask") pytest.importorskip("distributed") platform_file, config_file = write_basic_config_and_platform_files( tmpdir, value=1, shifter=True) framework = Framework(config_file_list=[str(config_file)], log_file_name=str(tmpdir.join('ips.log')), platform_file_name=str(platform_file), debug=None, verbose_debug=None, cmd_nodes=0, cmd_ppn=0) framework.run() # check output log file with open(str(tmpdir.join('sim.log')), 'r') as f: lines = f.readlines() # remove timestamp lines = [line[24:] for line in lines] assert "DASK__dask_worker_2 ERROR Requested to run dask within shifter but shifter not available\n" in lines # check simulation_log, make sure it includes events from dask tasks json_files = glob.glob(str(tmpdir.join("simulation_log").join("*.json"))) assert len(json_files) == 1 with open(json_files[0], 'r') as json_file: lines = json_file.readlines() lines = [json.loads(line.strip()) for line in lines] assert len(lines) == 12 assert lines[-1].get('eventtype') == "IPS_END" assert lines[-1].get('comment') == "Simulation Execution Error"
def test_bad_task(tmpdir): platform_file, config_file = write_basic_config_and_platform_files( tmpdir, worker='bad_task_worker') framework = Framework(config_file_list=[str(config_file)], log_file_name=str(tmpdir.join('ips.log')), platform_file_name=str(platform_file), debug=None, verbose_debug=None, cmd_nodes=0, cmd_ppn=0) framework.run() # check output log file with open(str(tmpdir.join('sim.log')), 'r') as f: lines = f.readlines() assert "ValueError: task binary of wrong type, expected str but found int\n" in lines assert "Exception: task binary of wrong type, expected str but found int\n" in lines # remove timestamp lines = [line[24:] for line in lines] assert "WORKER__bad_task_worker_2 ERROR Uncaught Exception in component method.\n" in lines assert "DRIVER__driver_1 ERROR Uncaught Exception in component method.\n" in lines
def test_assign_protected_attribute(tmpdir): platform_file, config_file = write_basic_config_and_platform_files( tmpdir, worker='assign_protected_attribute') framework = Framework(config_file_list=[str(config_file)], log_file_name=str(tmpdir.join('ips.log')), platform_file_name=str(platform_file), debug=None, verbose_debug=None, cmd_nodes=0, cmd_ppn=0) framework.run() # check output log file with open(str(tmpdir.join('sim.log')), 'r') as f: lines = f.readlines() # python 3.10 includes the attribute name in the error message assert "AttributeError: can't set attribute\n" in lines or "AttributeError: can't set attribute 'args'\n" in lines assert "Exception: can't set attribute\n" in lines or "Exception: can't set attribute 'args'\n" in lines # remove timestamp lines = [line[24:] for line in lines] assert "WORKER__assign_protected_attribute_2 ERROR Uncaught Exception in component method.\n" in lines assert "DRIVER__driver_1 ERROR Uncaught Exception in component method.\n" in lines
def test_dask(tmpdir): pytest.importorskip("dask") pytest.importorskip("distributed") platform_file, config_file = write_basic_config_and_platform_files(tmpdir, value=1) framework = Framework(config_file_list=[str(config_file)], log_file_name=str(tmpdir.join('ips.log')), platform_file_name=str(platform_file), debug=None, verbose_debug=None, cmd_nodes=0, cmd_ppn=0) framework.run() # check output log file with open(str(tmpdir.join('sim.log')), 'r') as f: lines = f.readlines() # remove timestamp lines = [line[24:] for line in lines] log = "DASK__dask_worker_2 INFO {}\n" assert log.format("cmd = /bin/sleep") in lines assert log.format("ret_val = 4") in lines # task successful and return 0 for i in range(4): assert log.format(f"task_{i} 0") in lines # check simulation_log, make sure it includes events from dask tasks json_files = glob.glob(str(tmpdir.join("simulation_log").join("*.json"))) assert len(json_files) == 1 with open(json_files[0], 'r') as json_file: lines = json_file.readlines() lines = [json.loads(line.strip()) for line in lines] assert len(lines) == 28 eventtypes = [e.get('eventtype') for e in lines] assert eventtypes.count('IPS_LAUNCH_DASK_TASK') == 4 assert eventtypes.count('IPS_TASK_END') == 5 launch_dask_comments = [ e.get('comment') for e in lines if e.get('eventtype') == "IPS_LAUNCH_DASK_TASK" ] for task in range(4): assert f'task_name = task_{task}, Target = /bin/sleep 1' in launch_dask_comments task_end_comments = [ e.get('comment')[:-4] for e in lines if e.get('eventtype') == "IPS_TASK_END" ] for task in range(4): assert f'task_name = task_{task}, elapsed time = 1' in task_end_comments
def test_srun_openmp_on_cori_pool(tmpdir): platform_file, config_file = write_basic_config_and_platform_files(tmpdir, "openmp_task_pool") framework = Framework(config_file_list=[str(config_file)], log_file_name=str(tmpdir.join('ips.log')), platform_file_name=str(platform_file), debug=None, verbose_debug=None, cmd_nodes=0, cmd_ppn=0) framework.run() # check simulation_log json_files = glob.glob(str(tmpdir.join("simulation_log").join("*.json"))) assert len(json_files) == 1 with open(json_files[0], 'r') as json_file: comments = [json.loads(line)['comment'].split(', ', maxsplit=5)[3:] for line in json_file.readlines()] # check that the process output log files are created work_dir = tmpdir.join("work").join("OPENMP__openmp_task_pool_1") # 1 assert comments[3][0] == "Target = srun -N 1 -n 4 -c 8 --threads-per-core=1 --cpu-bind=cores /usr/common/software/bin/check-mpi.gnu.cori " assert comments[3][1] == "task_name = task_1" assert comments[3][2] == "env = {'OMP_PLACES': 'threads', 'OMP_PROC_BIND': 'spread', 'OMP_NUM_THREADS': '8'}" lines = sorted(work_dir.join("log.1").readlines()) assert lines[0].startswith('Hello from rank 0') and lines[0].endswith('(core affinity = 0-7,32-39)\n') assert lines[1].startswith('Hello from rank 1') and lines[1].endswith('(core affinity = 16-23,48-55)\n') assert lines[2].startswith('Hello from rank 2') and lines[2].endswith('(core affinity = 8-15,40-47)\n') assert lines[3].startswith('Hello from rank 3') and lines[3].endswith('(core affinity = 24-31,56-63)\n') # 2 assert comments[5][0] == "Target = srun -N 1 -n 4 -c 4 --threads-per-core=1 --cpu-bind=cores /usr/common/software/bin/check-mpi.gnu.cori " assert comments[5][1] == "task_name = task_2" assert comments[5][2] == "env = {'OMP_PLACES': 'threads', 'OMP_PROC_BIND': 'spread', 'OMP_NUM_THREADS': '4'}" lines = sorted(work_dir.join("log.2").readlines()) assert lines[0].startswith('Hello from rank 0') and lines[0].endswith('(core affinity = 0-3,32-35)\n') assert lines[1].startswith('Hello from rank 1') and lines[1].endswith('(core affinity = 16-19,48-51)\n') assert lines[2].startswith('Hello from rank 2') and lines[2].endswith('(core affinity = 4-7,36-39)\n') assert lines[3].startswith('Hello from rank 3') and lines[3].endswith('(core affinity = 20-23,52-55)\n') # 3 assert comments[7][0] == "Target = srun -N 1 -n 4 -c 2 --threads-per-core=1 --cpu-bind=cores /usr/common/software/bin/check-mpi.gnu.cori " assert comments[7][1] == "task_name = task_3" assert comments[7][2] == "env = {'OMP_PLACES': 'threads', 'OMP_PROC_BIND': 'spread', 'OMP_NUM_THREADS': '2'}" lines = sorted(work_dir.join("log.3").readlines()) assert lines[0].startswith('Hello from rank 0') and lines[0].endswith('(core affinity = 0,1,32,33)\n') assert lines[1].startswith('Hello from rank 1') and lines[1].endswith('(core affinity = 16,17,48,49)\n') assert lines[2].startswith('Hello from rank 2') and lines[2].endswith('(core affinity = 2,3,34,35)\n') assert lines[3].startswith('Hello from rank 3') and lines[3].endswith('(core affinity = 18,19,50,51)\n')
def test_helloworld(tmpdir, capfd): data_dir = os.path.dirname(__file__) copy_config_and_replace(os.path.join(data_dir, "hello_world.ips"), tmpdir.join("hello_world.ips"), tmpdir) shutil.copy(os.path.join(data_dir, "platform.conf"), tmpdir) shutil.copy(os.path.join(data_dir, "hello_driver.py"), tmpdir) shutil.copy(os.path.join(data_dir, "hello_worker.py"), tmpdir) framework = Framework( config_file_list=[os.path.join(tmpdir, "hello_world.ips")], log_file_name=str(tmpdir.join('test.log')), platform_file_name=os.path.join(tmpdir, "platform.conf"), debug=None, verbose_debug=None, cmd_nodes=0, cmd_ppn=0) assert framework.log_file_name.endswith('test.log') fwk_components = framework.config_manager.get_framework_components() assert len(fwk_components) == 1 assert 'Hello_world_1_FWK@runspaceInitComponent@3' in fwk_components component_map = framework.config_manager.get_component_map() assert len(component_map) == 1 assert 'Hello_world_1' in component_map hello_world_1 = component_map['Hello_world_1'] assert len(hello_world_1) == 1 assert hello_world_1[0].get_class_name() == 'HelloDriver' assert hello_world_1[0].get_instance_name().startswith( 'Hello_world_1@HelloDriver') assert hello_world_1[0].get_seq_num() == 1 assert hello_world_1[0].get_serialization().startswith( 'Hello_world_1@HelloDriver') assert hello_world_1[0].get_sim_name() == 'Hello_world_1' framework.run() captured = capfd.readouterr() captured_out = captured.out.split('\n') assert captured_out[0].startswith("Starting IPS") assert captured_out[1] == "Created <class 'hello_driver.HelloDriver'>" assert captured_out[2] == "Created <class 'hello_worker.HelloWorker'>" assert captured_out[3] == 'HelloDriver: init' assert captured_out[4] == 'HelloDriver: finished worker init call' assert captured_out[5] == 'HelloDriver: beginning step call' assert captured_out[6] == 'Hello from HelloWorker' assert captured_out[7] == 'HelloDriver: finished worker call' assert captured.err == '' # check that portal didn't write anything since USE_PORTAL=False assert not os.path.exists(tmpdir.join("simulation_log")) assert not os.path.exists(tmpdir.join("www"))
def test_helloworld_task_pool_dask(tmpdir, capfd): pytest.importorskip("dask") pytest.importorskip("distributed") assert TaskPool.dask is not None data_dir = os.path.dirname(__file__) copy_config_and_replace(os.path.join(data_dir, "hello_world.ips"), tmpdir.join("hello_world.ips"), tmpdir, worker="hello_worker_task_pool_dask.py") shutil.copy(os.path.join(data_dir, "platform.conf"), tmpdir) shutil.copy(os.path.join(data_dir, "hello_driver.py"), tmpdir) shutil.copy(os.path.join(data_dir, "hello_worker_task_pool_dask.py"), tmpdir) framework = Framework( config_file_list=[os.path.join(tmpdir, "hello_world.ips")], log_file_name=str(tmpdir.join('test.log')), platform_file_name=os.path.join(tmpdir, "platform.conf"), debug=None, verbose_debug=None, cmd_nodes=0, cmd_ppn=0) framework.run() captured = capfd.readouterr() captured_out = captured.out.split('\n') assert captured_out[0].startswith("Starting IPS") assert captured_out[1] == "Created <class 'hello_driver.HelloDriver'>" assert captured_out[ 2] == "Created <class 'hello_worker_task_pool_dask.HelloWorker'>" assert captured_out[3] == 'HelloDriver: init' assert captured_out[4] == 'HelloDriver: finished worker init call' assert captured_out[5] == 'HelloDriver: beginning step call' assert captured_out[6] == 'Hello from HelloWorker' assert "Running setup of worker" in captured_out assert "Running teardown of worker" in captured_out assert 'ret_val = 9' in captured_out for duration in ("0.2", "0.4", "0.6"): for task in ["myFun", "myMethod"]: assert f'{task}({duration})' in captured_out exit_status = json.loads(captured_out[-3].replace("'", '"')) assert len(exit_status) == 9 for n in range(3): for task in ["bin", "meth", "func"]: assert f'{task}_{n}' in exit_status assert exit_status[f'{task}_{n}'] == 0
def test_dask_shifter_on_cori(tmpdir): """ This test requires the shifter image to be set. e.g. #SBATCH --image=continuumio/anaconda3:2020.11 """ exe = tmpdir.join("shifter_env.sh") exe.write( "#!/bin/bash\necho Running $1\necho SHIFTER_RUNTIME=$SHIFTER_RUNTIME\necho SHIFTER_IMAGEREQUEST=$SHIFTER_IMAGEREQUEST\n" ) exe.chmod(448) # 700 platform_file, config_file = write_basic_config_and_platform_files( tmpdir, exe=str(exe), logfile='task_{}.log', shifter=True) framework = Framework(config_file_list=[str(config_file)], log_file_name=str(tmpdir.join('ips.log')), platform_file_name=str(platform_file), debug=None, verbose_debug=None, cmd_nodes=0, cmd_ppn=0) framework.run() # check output log file with open(str(tmpdir.join('sim.log')), 'r') as f: lines = f.readlines() # remove timestamp lines = [line[24:] for line in lines] log = "DASK__dask_worker_2 INFO {}\n" assert log.format(f"cmd = {exe}") in lines assert log.format("ret_val = 4") in lines # task successful and return 0 for i in range(4): assert log.format(f"task_{i} 0") in lines # check that the process output log files are created work_dir = tmpdir.join("work").join("DASK__dask_worker_2") for i in range(4): log_file = work_dir.join(f"task_{i}.log") assert log_file.exists() lines = log_file.readlines() assert len(lines) == 3 assert lines[0] == f'Running {i}\n' assert lines[1] == 'SHIFTER_RUNTIME=1\n' assert lines[2].startswith("SHIFTER_IMAGEREQUEST")
def test_dask_logfile_errfile(tmpdir): pytest.importorskip("dask") pytest.importorskip("distributed") exe = tmpdir.join("stdouterr_write.sh") exe.write("#!/bin/bash\necho Running $1\n>&2 echo ERROR $1\n") exe.chmod(448) # 700 platform_file, config_file = write_basic_config_and_platform_files( tmpdir, exe=str(exe), logfile='task_{}.log', errfile='task_{}.err') framework = Framework(config_file_list=[str(config_file)], log_file_name=str(tmpdir.join('ips.log')), platform_file_name=str(platform_file), debug=None, verbose_debug=None, cmd_nodes=0, cmd_ppn=0) framework.run() # check output log file with open(str(tmpdir.join('sim.log')), 'r') as f: lines = f.readlines() # remove timestamp lines = [line[24:] for line in lines] log = "DASK__dask_worker_2 INFO {}\n" assert log.format(f"cmd = {exe}") in lines assert log.format("ret_val = 4") in lines # task successful and return 0 for i in range(4): assert log.format(f"task_{i} 0") in lines # check that the process output log files are created work_dir = tmpdir.join("work").join("DASK__dask_worker_2") for i in range(4): log_file = work_dir.join(f"task_{i}.log") assert log_file.exists() lines = log_file.readlines() assert len(lines) == 1 assert lines[0] == f'Running {i}\n' err_file = work_dir.join(f"task_{i}.err") assert err_file.exists() lines = err_file.readlines() assert len(lines) == 1 assert lines[0] == f'ERROR {i}\n'
def test_framework_missing_platform(capfd): with pytest.raises(SystemExit) as excinfo: Framework(config_file_list=[], log_file_name='log') assert excinfo.value.code == 1 captured = capfd.readouterr() assert captured.out.endswith('Need to specify a platform file\n') assert captured.err == ''
def test_framework_empty_config_list(tmpdir): with pytest.raises(ValueError) as excinfo: Framework(config_file_list=[], log_file_name=str(tmpdir.join('test.log')), platform_file_name='platform.conf', debug=None, verbose_debug=None, cmd_nodes=0, cmd_ppn=0) assert str(excinfo.value).endswith("Missing config file? Something is very wrong") # check output log file with open(str(tmpdir.join('test.log')), 'r') as f: lines = f.readlines() assert len(lines) == 8 assert "Traceback (most recent call last):\n" in lines assert " raise ValueError('Missing config file? Something is very wrong')\n" in lines assert "ValueError: Missing config file? Something is very wrong\n" in lines # remove timestamp lines = [line[24:] for line in lines] assert "FRAMEWORK ERROR Missing config file? Something is very wrong\n" in lines assert "FRAMEWORK ERROR Problem initializing managers\n" in lines
def test_dataManager_state_file(tmpdir): platform_file, config_file = write_basic_config_and_platform_files(tmpdir) framework = Framework(config_file_list=[str(config_file)], log_file_name=str(tmpdir.join('ips.log')), platform_file_name=str(platform_file), debug=None, verbose_debug=None, cmd_nodes=0, cmd_ppn=0) framework.run() # check output files exist for filename in ['state.dat', 'state100.dat']: assert os.path.exists( str( tmpdir.join('work').join('DATA_INIT__init_dataManager_1').join( filename))) assert os.path.exists( str( tmpdir.join('work').join( 'DATA_DRIVER__driver_dataManager_2').join(filename))) assert os.path.exists( str(tmpdir.join('work').join('state').join(filename))) # check output log file test_map = (('DATA_INIT__init_dataManager_1', 'state.dat', 1), ('DATA_INIT__init_dataManager_1', 'state100.dat', 100), ('DATA_DRIVER__driver_dataManager_2', 'state.dat', 2), ('DATA_DRIVER__driver_dataManager_2', 'state100.dat', 101), ('state', 'state.dat', 2), ('state', 'state100.dat', 101)) for (direc, filename, result) in test_map: with open(str(tmpdir.join('work').join(direc).join(filename)), 'r') as f: value = int(f.readline()) assert value == result # check merge_current_state logfile logfile = str( tmpdir.join('work').join('DATA_DRIVER__driver_dataManager_2').join( 'merge_current_state.log')) assert os.path.exists(logfile) # remove tmpdir from log output log = open(logfile).readline().replace(str(tmpdir), '') assert log == '-input /work/state/state.dat -updates /work/DATA_DRIVER__driver_dataManager_2/partial_state_file\n'
def test_component_logging(tmpdir): platform_file, config_file = write_basic_config_and_platform_files(tmpdir) framework = Framework(config_file_list=[str(config_file)], log_file_name=str(tmpdir.join('ips.log')), platform_file_name=str(platform_file), debug=None, verbose_debug=None, cmd_nodes=0, cmd_ppn=0) framework.run() # check output log file with open(str(tmpdir.join('sim.log')), 'r') as f: lines = f.readlines() # remove timestamp lines = [line[24:] for line in lines] component_id = "LOGGING__logging_tester_1" # for log_level=WARNING only WARNING, ERROR and CRITICAL logs should be included # DEBUG and INFO should be excluded for method in ["init", "step", "finalize"]: for log_type in ["warning", "error", "exception", "critical"]: assert f'{component_id} {map_log_to_level[log_type]:8} {method} msg: {log_type}\n' in lines for log_type in ["log", "debug", "info"]: assert f'{component_id} {map_log_to_level[log_type]:8} {method} msg: {log_type}\n' not in lines # check message formatting with arguments for log_type in ["warning", "error", "exception", "critical"]: assert f'{component_id} {map_log_to_level[log_type]:8} step msg: {log_type} timestamp=0 test\n' in lines for log_type in ["log", "debug", "info"]: assert f'{component_id} {map_log_to_level[log_type]:8} step msg: {log_type} timestamp=0 test\n' not in lines # check stdout redirect with open(str(tmpdir.join('test.out')), 'r') as f: lines = f.readlines() assert lines[0] == "test@[email protected]\n" assert lines[1] == "test@[email protected]\n" assert lines[2] == "test@[email protected]\n"
def test_using_module_components(tmpdir, capfd): platform_file, config_file = write_basic_config_and_platform_files(tmpdir) framework = Framework(config_file_list=[str(config_file)], log_file_name=str(tmpdir.join('test.log')), platform_file_name=str(platform_file), debug=None, verbose_debug=None, cmd_nodes=0, cmd_ppn=0) assert framework.log_file_name.endswith('test.log') assert len(framework.config_manager.get_framework_components()) == 2 component_map = framework.config_manager.get_component_map() assert len(component_map) == 1 assert 'test' in component_map test = component_map['test'] assert len(test) == 1 assert test[0].get_class_name() == 'HelloDriver' assert test[0].get_instance_name().startswith('test@HelloDriver') assert test[0].get_serialization().startswith('test@HelloDriver') assert test[0].get_sim_name() == 'test' # Don't run anything, just check initialization of components framework.terminate_all_sims() captured = capfd.readouterr() captured_out = captured.out.split('\n') assert captured_out[0].startswith("Starting IPS") assert captured_out[ 1] == "Created <class 'helloworld.hello_driver.HelloDriver'>" assert captured_out[ 2] == "Created <class 'helloworld.hello_worker.HelloWorker'>" assert captured.err == ''
def test_component_logging_debug(tmpdir): platform_file, config_file = write_basic_config_and_platform_files( tmpdir, debug=True) framework = Framework(config_file_list=[str(config_file)], log_file_name=str(tmpdir.join('ips.log')), platform_file_name=str(platform_file), debug=None, verbose_debug=None, cmd_nodes=0, cmd_ppn=0) framework.run() # check output log file with open(str(tmpdir.join('sim.log')), 'r') as f: lines = f.readlines() # remove timestamp lines = [line[24:] for line in lines] component_id = "LOGGING__logging_tester_1" # for log_level=DEBUG all logs should be included for method in ["init", "step", "finalize"]: for log_type in [ "log", "debug", "info", "warning", "error", "exception", "critical" ]: assert f'{component_id} {map_log_to_level[log_type]:8} {method} msg: {log_type}\n' in lines # check message formatting with arguments for log_type in [ "log", "debug", "info", "warning", "error", "exception", "critical" ]: assert f'{component_id} {map_log_to_level[log_type]:8} step msg: {log_type} timestamp=0 test\n' in lines
def test_dask_nproc(tmpdir): pytest.importorskip("dask") pytest.importorskip("distributed") platform_file, config_file = write_basic_config_and_platform_files(tmpdir, nproc=2, value=1) # Running with NPROC=2 should prevent dask from running and revert to normal task pool framework = Framework(config_file_list=[str(config_file)], log_file_name=str(tmpdir.join('ips.log')), platform_file_name=str(platform_file), debug=None, verbose_debug=None, cmd_nodes=0, cmd_ppn=0) framework.run() # check output log file with open(str(tmpdir.join('sim.log')), 'r') as f: lines = f.readlines() # remove timestamp lines = [line[24:] for line in lines] log = "DASK__dask_worker_2 INFO {}\n" assert log.format("cmd = /bin/sleep") in lines assert log.format("ret_val = 4") in lines # task timeouted and return -1 for i in range(4): assert log.format(f"task_{i} 0") in lines # check for warning message that dask isn't being used assert "DASK__dask_worker_2 WARNING Requested use_dask but cannot because multiple processors requested\n" in lines
def test_framework_log_output_debug(tmpdir): platform_file, config_file = write_basic_config_and_platform_files(tmpdir) framework = Framework(config_file_list=[str(config_file)], log_file_name=str(tmpdir.join('framework_log_debug_test.log')), platform_file_name=str(platform_file), debug=True, verbose_debug=False, cmd_nodes=0, cmd_ppn=0) framework.log("log message") framework.debug("debug message") framework.info("info message") framework.warning("warning message") framework.error("error message") try: raise ValueError("wrong value") except ValueError: framework.exception("exception message") framework.critical("critical message") framework.terminate_all_sims() # check output log file with open(str(tmpdir.join('framework_log_debug_test.log')), 'r') as f: lines = f.readlines() assert len(lines) == 32 assert "Traceback (most recent call last):\n" in lines assert ' raise ValueError("wrong value")\n' in lines assert "ValueError: wrong value\n" in lines # remove timestamp lines = [line[24:] for line in lines] assert "FRAMEWORK INFO log message\n" in lines assert "FRAMEWORK DEBUG debug message\n" in lines assert "FRAMEWORK INFO info message\n" in lines assert "FRAMEWORK WARNING warning message\n" in lines assert "FRAMEWORK ERROR error message\n" in lines assert "FRAMEWORK ERROR exception message\n" in lines assert "FRAMEWORK CRITICAL critical message\n" in lines
def test_helloworld_launch_task(tmpdir, capfd): data_dir = os.path.dirname(__file__) copy_config_and_replace(os.path.join(data_dir, "hello_world.ips"), tmpdir.join("hello_world.ips"), tmpdir, worker="hello_worker_launch_task.py") shutil.copy(os.path.join(data_dir, "platform.conf"), tmpdir) shutil.copy(os.path.join(data_dir, "hello_driver.py"), tmpdir) shutil.copy(os.path.join(data_dir, "hello_worker_launch_task.py"), tmpdir) framework = Framework( config_file_list=[os.path.join(tmpdir, "hello_world.ips")], log_file_name=str(tmpdir.join('test.log')), platform_file_name=os.path.join(tmpdir, "platform.conf"), debug=None, verbose_debug=None, cmd_nodes=0, cmd_ppn=0) assert framework.log_file_name.endswith('test.log') fwk_components = framework.config_manager.get_framework_components() assert len(fwk_components) == 1 assert 'Hello_world_1_FWK@runspaceInitComponent@3' in fwk_components component_map = framework.config_manager.get_component_map() assert len(component_map) == 1 assert 'Hello_world_1' in component_map hello_world_1 = component_map['Hello_world_1'] assert len(hello_world_1) == 1 assert hello_world_1[0].get_class_name() == 'HelloDriver' assert hello_world_1[0].get_instance_name().startswith( 'Hello_world_1@HelloDriver') assert hello_world_1[0].get_seq_num() == 1 assert hello_world_1[0].get_serialization().startswith( 'Hello_world_1@HelloDriver') assert hello_world_1[0].get_sim_name() == 'Hello_world_1' framework.run() captured = capfd.readouterr() captured_out = captured.out.split('\n') assert captured_out[0].startswith("Starting IPS") assert captured_out[1] == "Created <class 'hello_driver.HelloDriver'>" assert captured_out[2] == 'HelloDriver: init' assert captured_out[3] == 'HelloDriver: finished worker init call' assert captured_out[4] == 'HelloDriver: beginning step call' assert captured_out[5] == 'Hello from HelloWorker' assert captured_out[6] == 'Starting tasks = 0' assert captured_out[7] == 'Number of tasks = 1' assert captured_out[8] == 'wait_task ret_val = 0' assert captured_out[9] == 'Number of tasks = 2' assert captured_out[10] == 'wait_tasklist ret_val = {2: 0, 3: 0}' assert captured_out[11] == 'Number of tasks = 0' assert captured_out[12] == 'Number of tasks = 1' assert captured_out[13] == 'kill_task' assert captured_out[14] == 'Number of tasks = 0' assert captured_out[15] == 'Number of tasks = 2' assert captured_out[16] == 'kill_all_tasks' assert captured_out[17] == 'Number of tasks = 0' assert captured_out[18] == 'Timeout task 1 retval = -1' assert captured_out[19] == 'Timeout task 2 retval = -9' assert captured_out[20] == 'HelloDriver: finished worker call' assert captured.err == ''
def test_basic_serial_multi(tmpdir, capfd): # This is the same as test_basic_serial1 except that 2 simulation files are use at the same time datadir = os.path.dirname(__file__) copy_config_and_replace("basic_serial1.ips", datadir, tmpdir) copy_config_and_replace("basic_serial2.ips", datadir, tmpdir) shutil.copy(os.path.join(datadir, "platform.conf"), tmpdir) # setup 'input' files os.system(f"cd {tmpdir}; touch file1 ofile1 ofile2 sfile1 sfile2") framework = Framework(config_file_list=[os.path.join(tmpdir, 'basic_serial1.ips'), os.path.join(tmpdir, 'basic_serial2.ips')], log_file_name=os.path.join(tmpdir, 'test.log'), platform_file_name=os.path.join(tmpdir, "platform.conf"), debug=None, verbose_debug=None, cmd_nodes=0, cmd_ppn=0) framework.run() # Check stdout # skip checking the output because they sometimes write over the top of each other when running in parallel """ captured = capfd.readouterr() captured_out = captured.out.split('\n') assert captured_out[0] == "Created <class 'small_worker.small_worker'>" assert captured_out[1] == "Created <class 'medium_worker.medium_worker'>" assert captured_out[2] == "Created <class 'large_worker.large_worker'>" assert captured_out[3] == "Created <class 'small_worker.small_worker'>" assert captured_out[4] == "Created <class 'medium_worker.medium_worker'>" assert captured_out[5] == "Created <class 'large_worker.large_worker'>" assert captured_out[7] == "small_worker : init() called" assert captured_out[9] == "small_worker : init() called" assert captured_out[11] == "medium_worker : init() called" assert captured_out[13] == "medium_worker : init() called" assert captured_out[15] == "large_worker : init() called" assert captured_out[17] == "large_worker : init() called" assert captured_out[19] == "Current time = 1.00" assert captured_out[20] == "Current time = 1.00" assert captured_out[21] == "Current time = 2.00" assert captured_out[22] == "Current time = 2.00" assert captured_out[23] == "Current time = 3.00" assert captured_out[24] == "Current time = 3.00" """ # check files copied and created for no in ["1", "2"]: # This should also work for 2 if no == "2": continue driver_files = [os.path.basename(f) for f in glob.glob(str(tmpdir.join(f"test_basic_serial{no}_0/work/drivers_testing_basic_serial1_*/*")))] for infile in ["file1", "ofile1", "ofile2", "sfile1", "sfile2"]: assert infile in driver_files small_worker_files = [os.path.basename(f) for f in glob.glob(str(tmpdir.join(f"test_basic_serial{no}_0/work/workers_testing_small_worker_*/*")))] medium_worker_files = [os.path.basename(f) for f in glob.glob(str(tmpdir.join(f"test_basic_serial{no}_0/work/workers_testing_medium_worker_*/*")))] large_worker_files = [os.path.basename(f) for f in glob.glob(str(tmpdir.join(f"test_basic_serial{no}_0/work/workers_testing_large_worker_*/*")))] for outfile in ["my_out3.50", "my_out3.60", "my_out3.70"]: assert outfile in small_worker_files assert outfile in medium_worker_files assert outfile in large_worker_files # check contents of my_out files for outfile in ["my_out3.50", "my_out3.60", "my_out3.70"]: for worker in ["workers_testing_small_worker_2", "workers_testing_medium_worker_3"]: with open(str(tmpdir.join("test_basic_serial1_0/work").join(worker).join(outfile)), 'r') as f: lines = f.readlines() assert "results = ['Rank 0 slept for 1.0 seconds']\n" in lines worker = "workers_testing_large_worker_4" with open(str(tmpdir.join("test_basic_serial1_0/work").join(worker).join(outfile)), 'r') as f: lines = f.readlines() assert "results = ['Rank 0 slept for 1.0 seconds', 'Rank 1 slept for 1.0 seconds']\n" in lines for outfile in ["my_out3.40", "my_out3.50", "my_out3.60"]: for worker in ["workers_testing_small_worker_6", "workers_testing_medium_worker_7"]: with open(str(tmpdir.join("test_basic_serial2_0/work").join(worker).join(outfile)), 'r') as f: lines = f.readlines() assert "results = ['Rank 0 slept for 1.0 seconds']\n" in lines worker = "workers_testing_large_worker_8" with open(str(tmpdir.join("test_basic_serial2_0/work").join(worker).join(outfile)), 'r') as f: lines = f.readlines() assert "results = ['Rank 0 slept for 1.0 seconds', 'Rank 1 slept for 1.0 seconds']\n" in lines # check basic_serial1 sim log file with open(str(tmpdir.join("test_basic_serial1_0").join("test_basic_serial1_0.log")), 'r') as f: lines = f.readlines() # remove timestamp lines = [line[24:] for line in lines] for worker in ["small_worker_2", "medium_worker_3", "large_worker_4"]: for timestamp in ["3.50", "3.60", "3.70"]: assert f'workers_testing_{worker} INFO Stepping Worker timestamp={timestamp}\n' in lines # check basic_serial2 sim log file with open(str(tmpdir.join("test_basic_serial2_0").join("test_basic_serial2_0.log")), 'r') as f: lines = f.readlines() # remove timestamp lines = [line[24:] for line in lines] for worker in ["small_worker_6", "medium_worker_7", "large_worker_8"]: for timestamp in ["3.40", "3.50", "3.60"]: assert f'workers_testing_{worker} INFO Stepping Worker timestamp={timestamp}\n' in lines
def test_helloworld_portal(tmpdir, capfd): data_dir = os.path.dirname(__file__) copy_config_and_replace(os.path.join(data_dir, "hello_world.ips"), tmpdir.join("hello_world.ips"), tmpdir, portal=True) shutil.copy(os.path.join(data_dir, "platform.conf"), tmpdir) shutil.copy(os.path.join(data_dir, "hello_driver.py"), tmpdir) shutil.copy(os.path.join(data_dir, "hello_worker.py"), tmpdir) # standup simple socketserver to capture data from sendPost.py data = [] class TCPHandler(socketserver.BaseRequestHandler): def handle(self): data.append(self.request.recv(1024).strip().decode()) with socketserver.TCPServer(("localhost", 8080), TCPHandler) as server: server.timeout = 1 framework = Framework( config_file_list=[os.path.join(tmpdir, "hello_world.ips")], log_file_name=str(tmpdir.join('test.log')), platform_file_name=os.path.join(tmpdir, "platform.conf"), debug=None, verbose_debug=None, cmd_nodes=0, cmd_ppn=0) assert framework.log_file_name.endswith('test.log') fwk_components = framework.config_manager.get_framework_components() assert len(fwk_components) == 2 assert 'Hello_world_1_FWK@runspaceInitComponent@3' in fwk_components assert 'Hello_world_1_FWK@PortalBridge@4' in fwk_components component_map = framework.config_manager.get_component_map() assert len(component_map) == 1 assert 'Hello_world_1' in component_map hello_world_1 = component_map['Hello_world_1'] assert len(hello_world_1) == 1 assert hello_world_1[0].get_class_name() == 'HelloDriver' assert hello_world_1[0].get_instance_name().startswith( 'Hello_world_1@HelloDriver') assert hello_world_1[0].get_seq_num() == 1 assert hello_world_1[0].get_serialization().startswith( 'Hello_world_1@HelloDriver') assert hello_world_1[0].get_sim_name() == 'Hello_world_1' framework.run() # just get the first 5 events for _ in range(5): server.handle_request() captured = capfd.readouterr() captured_out = captured.out.split('\n') assert captured_out[0].startswith("Starting IPS") assert captured_out[1] == "Created <class 'hello_driver.HelloDriver'>" assert captured_out[2] == "Created <class 'hello_worker.HelloWorker'>" assert captured_out[3] == 'HelloDriver: init' assert captured_out[4] == 'HelloDriver: finished worker init call' assert captured_out[5] == 'HelloDriver: beginning step call' assert captured_out[6] == 'Hello from HelloWorker' assert captured_out[7] == 'HelloDriver: finished worker call' assert captured.err == '' # check that portal created output folders assert os.path.exists(tmpdir.join("simulation_log")) assert os.path.exists(tmpdir.join("www")) # check output files exist www_files = glob.glob(str(tmpdir.join("www").join("*"))) assert len(www_files) == 1 assert os.path.basename(www_files[0]).startswith("Hello_world_1_") assert www_files[0].endswith(".html") files = glob.glob(str(tmpdir.join("simulation_log").join("*"))) assert len(files) == 3 exts = [os.path.splitext(f)[1] for f in files] assert '.json' in exts assert '.html' in exts assert '.eventlog' in exts # check data sent to portal assert len(data) == 5 # get first event to check event = json.loads(data[0].split('\r\n')[-1]) assert event['code'] == 'Framework' assert event['eventtype'] == 'IPS_START' assert event['comment'] == 'Starting IPS Simulation' assert event['state'] == 'Running' assert event['sim_name'] == 'Hello_world_1' assert event['seqnum'] == 0 assert 'ips_version' in event # get last event to check event = json.loads(data[-1].split('\r\n')[-1]) assert event['code'] == 'DRIVERS_HELLO_HelloDriver' assert event['eventtype'] == 'IPS_CALL_END' assert event[ 'comment'] == 'Target = Hello_world_1@HelloWorker@2:init(0.000)' assert event['state'] == 'Running' assert event['sim_name'] == 'Hello_world_1' assert 'trace' in event trace = event['trace'] assert 'duration' in trace assert 'timestamp' in trace assert 'id' in trace assert trace['id'] == hashlib.md5( 'Hello_world_1@HelloWorker@2:init(0.000)'.encode()).hexdigest()[:16] assert 'traceId' in trace assert trace['traceId'] == hashlib.md5( event['portal_runid'].encode()).hexdigest() assert 'parentId' in trace assert trace['parentId'] == hashlib.md5( 'Hello_world_1@HelloDriver@1:init(0)'.encode()).hexdigest()[:16] assert 'localEndpoint' in trace assert trace['localEndpoint'][ 'serviceName'] == 'Hello_world_1@HelloWorker@2'
def test_srun_openmp_on_cori(tmpdir): platform_file, config_file = write_basic_config_and_platform_files(tmpdir, "openmp_task") framework = Framework(config_file_list=[str(config_file)], log_file_name=str(tmpdir.join('ips.log')), platform_file_name=str(platform_file), debug=None, verbose_debug=None, cmd_nodes=0, cmd_ppn=0) framework.run() # check simulation_log json_files = glob.glob(str(tmpdir.join("simulation_log").join("*.json"))) assert len(json_files) == 1 with open(json_files[0], 'r') as json_file: comments = [json.loads(line)['comment'].split(', ', maxsplit=4)[3:] for line in json_file.readlines()] # check that the process output log files are created work_dir = tmpdir.join("work").join("OPENMP__openmp_task_1") # 0 for c in (2, 4, 6): assert comments[c][0] == "Target = srun -N 1 -n 1 -c 32 --threads-per-core=1 --cpu-bind=cores /usr/common/software/bin/check-mpi.gnu.cori " assert comments[c][1] == "env = {'OMP_PLACES': 'threads', 'OMP_PROC_BIND': 'spread', 'OMP_NUM_THREADS': '32'}" for log in ('01', '02', '03'): lines = sorted(work_dir.join(f"log.{log}").readlines()) assert lines[0].startswith('Hello from rank 0') and lines[0].endswith('(core affinity = 0-63)\n') # 1 for c in (8, 10, 12): assert comments[c][0] == "Target = srun -N 1 -n 4 -c 8 --threads-per-core=1 --cpu-bind=cores /usr/common/software/bin/check-mpi.gnu.cori " assert comments[c][1] == "env = {'OMP_PLACES': 'threads', 'OMP_PROC_BIND': 'spread', 'OMP_NUM_THREADS': '8'}" for log in ('11', '12', '13'): lines = sorted(work_dir.join(f"log.{log}").readlines()) assert lines[0].startswith('Hello from rank 0') and lines[0].endswith('(core affinity = 0-7,32-39)\n') assert lines[1].startswith('Hello from rank 1') and lines[1].endswith('(core affinity = 16-23,48-55)\n') assert lines[2].startswith('Hello from rank 2') and lines[2].endswith('(core affinity = 8-15,40-47)\n') assert lines[3].startswith('Hello from rank 3') and lines[3].endswith('(core affinity = 24-31,56-63)\n') # 2 for c in (14, 16, 18): assert comments[c][0] == "Target = srun -N 1 -n 32 -c 1 --threads-per-core=1 --cpu-bind=cores /usr/common/software/bin/check-mpi.gnu.cori " assert comments[c][1] == "env = {'OMP_PLACES': 'threads', 'OMP_PROC_BIND': 'spread', 'OMP_NUM_THREADS': '1'}" for log in ('21', '22', '23'): lines = sorted(work_dir.join(f"log.{log}").readlines(), key=lambda a: int(a.split()[3].replace(',', ''))) for n, l in enumerate(lines): cores = n//2 + n % 2*16 assert lines[n].startswith(f'Hello from rank {n}') and lines[n].endswith(f'(core affinity = {cores},{cores+32})\n') # 31 assert comments[20][0] == "Target = srun -N 1 -n 4 -c 8 --threads-per-core=1 --cpu-bind=cores /usr/common/software/bin/check-mpi.gnu.cori " assert comments[20][1] == "env = {'OMP_PLACES': 'threads', 'OMP_PROC_BIND': 'spread', 'OMP_NUM_THREADS': '8'}" lines = sorted(work_dir.join("log.31").readlines()) assert lines[0].startswith('Hello from rank 0') and lines[0].endswith('(core affinity = 0-7,32-39)\n') assert lines[1].startswith('Hello from rank 1') and lines[1].endswith('(core affinity = 16-23,48-55)\n') assert lines[2].startswith('Hello from rank 2') and lines[2].endswith('(core affinity = 8-15,40-47)\n') assert lines[3].startswith('Hello from rank 3') and lines[3].endswith('(core affinity = 24-31,56-63)\n') # 32 assert comments[22][0] == "Target = srun -N 1 -n 4 -c 4 --threads-per-core=1 --cpu-bind=cores /usr/common/software/bin/check-mpi.gnu.cori " assert comments[22][1] == "env = {'OMP_PLACES': 'threads', 'OMP_PROC_BIND': 'spread', 'OMP_NUM_THREADS': '4'}" lines = sorted(work_dir.join("log.32").readlines()) assert lines[0].startswith('Hello from rank 0') and lines[0].endswith('(core affinity = 0-3,32-35)\n') assert lines[1].startswith('Hello from rank 1') and lines[1].endswith('(core affinity = 16-19,48-51)\n') assert lines[2].startswith('Hello from rank 2') and lines[2].endswith('(core affinity = 4-7,36-39)\n') assert lines[3].startswith('Hello from rank 3') and lines[3].endswith('(core affinity = 20-23,52-55)\n') # 33 assert comments[24][0] == "Target = srun -N 1 -n 4 -c 2 --threads-per-core=1 --cpu-bind=cores /usr/common/software/bin/check-mpi.gnu.cori " assert comments[24][1] == "env = {'OMP_PLACES': 'threads', 'OMP_PROC_BIND': 'spread', 'OMP_NUM_THREADS': '2'}" lines = sorted(work_dir.join("log.33").readlines()) assert lines[0].startswith('Hello from rank 0') and lines[0].endswith('(core affinity = 0,1,32,33)\n') assert lines[1].startswith('Hello from rank 1') and lines[1].endswith('(core affinity = 16,17,48,49)\n') assert lines[2].startswith('Hello from rank 2') and lines[2].endswith('(core affinity = 2,3,34,35)\n') assert lines[3].startswith('Hello from rank 3') and lines[3].endswith('(core affinity = 18,19,50,51)\n') # openmp # 41 assert comments[26][0] == "Target = srun -N 1 -n 4 -c 8 --threads-per-core=1 --cpu-bind=cores /usr/common/software/bin/check-hybrid.gnu.cori " assert comments[26][1] == "env = {'OMP_PLACES': 'threads', 'OMP_PROC_BIND': 'spread', 'OMP_NUM_THREADS': '8'}" lines = sorted(work_dir.join("log.41").readlines()) for n, l in enumerate(lines): assert l.startswith(f"Hello from rank {n//8}, thread {n%8}") assert l.endswith(f"(core affinity = {n%8 + n//16*8 + n//8%2*16})\n") # 42 assert comments[28][0] == "Target = srun -N 1 -n 4 -c 4 --threads-per-core=1 --cpu-bind=cores /usr/common/software/bin/check-hybrid.gnu.cori " assert comments[28][1] == "env = {'OMP_PLACES': 'threads', 'OMP_PROC_BIND': 'spread', 'OMP_NUM_THREADS': '4'}" lines = sorted(work_dir.join("log.42").readlines()) for n, l in enumerate(lines): assert l.startswith(f"Hello from rank {n//4}, thread {n%4}") assert l.endswith(f"(core affinity = {n%4 + n//8*4 + n//4%2*16})\n") # 43 assert comments[30][0] == "Target = srun -N 1 -n 4 -c 2 --threads-per-core=1 --cpu-bind=cores /usr/common/software/bin/check-hybrid.gnu.cori " assert comments[30][1] == "env = {'OMP_PLACES': 'threads', 'OMP_PROC_BIND': 'spread', 'OMP_NUM_THREADS': '2'}" lines = sorted(work_dir.join("log.43").readlines()) for n, l in enumerate(lines): assert l.startswith(f"Hello from rank {n//2}, thread {n%2}") assert l.endswith(f"(core affinity = {n%2 + n//4*2 + n//2%2*16})\n")
def test_helloworld_task_pool(tmpdir, capfd): data_dir = os.path.dirname(__file__) copy_config_and_replace(os.path.join(data_dir, "hello_world.ips"), tmpdir.join("hello_world.ips"), tmpdir, worker="hello_worker_task_pool.py") shutil.copy(os.path.join(data_dir, "platform.conf"), tmpdir) shutil.copy(os.path.join(data_dir, "hello_driver.py"), tmpdir) shutil.copy(os.path.join(data_dir, "hello_worker_task_pool.py"), tmpdir) framework = Framework( config_file_list=[os.path.join(tmpdir, "hello_world.ips")], log_file_name=str(tmpdir.join('test.log')), platform_file_name=os.path.join(tmpdir, "platform.conf"), debug=None, verbose_debug=None, cmd_nodes=0, cmd_ppn=0) assert framework.log_file_name.endswith('test.log') assert len(framework.config_manager.get_framework_components()) == 1 component_map = framework.config_manager.get_component_map() assert len(component_map) == 1 assert 'Hello_world_1' in component_map hello_world_1 = component_map['Hello_world_1'] assert len(hello_world_1) == 1 assert hello_world_1[0].get_class_name() == 'HelloDriver' assert hello_world_1[0].get_instance_name().startswith( 'Hello_world_1@HelloDriver') assert hello_world_1[0].get_seq_num() == 1 assert hello_world_1[0].get_serialization().startswith( 'Hello_world_1@HelloDriver') assert hello_world_1[0].get_sim_name() == 'Hello_world_1' framework.run() captured = capfd.readouterr() captured_out = captured.out.split('\n') assert captured_out[0].startswith("Starting IPS") assert captured_out[1] == "Created <class 'hello_driver.HelloDriver'>" assert captured_out[ 2] == "Created <class 'hello_worker_task_pool.HelloWorker'>" assert captured_out[3] == 'HelloDriver: init' assert captured_out[4] == 'HelloDriver: finished worker init call' assert captured_out[5] == 'HelloDriver: beginning step call' assert captured_out[6] == 'Hello from HelloWorker' assert captured_out[7] == 'ret_val = 3' exit_status = json.loads(captured_out[8].replace("'", '"')) assert len(exit_status) == 3 for n in range(3): assert f'task_{n}' in exit_status assert exit_status[f'task_{n}'] == 0 assert captured_out[9] == "====== Non Blocking " for line in range(9, len(captured_out) - 2): if "Nonblock_task" in captured_out[line]: assert captured_out[line].endswith("': 0}") assert captured_out[-5] == 'Active = 0 Finished = 3' # output from remove_task_pool assert captured_out[-4] == 'ret_val = 2' assert captured_out[-3] == "KeyError('pool')" assert captured_out[-2] == 'HelloDriver: finished worker call' assert captured.err == ''
def test_basic_concurrent1(tmpdir, capfd): datadir = os.path.dirname(__file__) copy_config_and_replace("basic_concurrent1.ips", datadir, tmpdir) shutil.copy(os.path.join(datadir, "platform.conf"), tmpdir) # setup 'input' files os.system(f"cd {tmpdir}; touch file1 ofile1 ofile2 sfile1 sfile2") framework = Framework(config_file_list=[os.path.join(tmpdir, 'basic_concurrent1.ips')], log_file_name=os.path.join(tmpdir, 'test.log'), platform_file_name=os.path.join(tmpdir, "platform.conf"), debug=None, verbose_debug=None, cmd_nodes=0, cmd_ppn=0) framework.run() # Check stdout captured = capfd.readouterr() captured_out = captured.out.split('\n') assert captured_out[0].startswith("Starting IPS") assert captured_out[1] == "Created <class 'small_worker.small_worker'>" assert captured_out[2] == "Created <class 'medium_worker.medium_worker'>" assert captured_out[3] == "Created <class 'large_worker.large_worker'>" assert captured_out[4] == "small_worker : init() called" assert captured_out[6] == "medium_worker : init() called" assert captured_out[8] == "large_worker : init() called" assert captured_out[10] == "Current time = 3.50" assert captured_out[11] == "nonblocking wait_call() invoked before call 10 finished" assert captured_out[12] == "Current time = 3.60" assert captured_out[13] == "nonblocking wait_call() invoked before call 13 finished" assert captured_out[14] == "Current time = 3.70" assert captured_out[15] == "nonblocking wait_call() invoked before call 16 finished" # check files copied and created driver_files = [os.path.basename(f) for f in glob.glob(str(tmpdir.join("test_basic_concurrent1_0/work/drivers_testing_basic_concurrent1_*/*")))] for infile in ["file1", "ofile1", "ofile2", "sfile1", "sfile2"]: assert infile in driver_files small_worker_files = [os.path.basename(f) for f in glob.glob(str(tmpdir.join("test_basic_concurrent1_0/work/workers_testing_small_worker_*/*")))] medium_worker_files = [os.path.basename(f) for f in glob.glob(str(tmpdir.join("test_basic_concurrent1_0/work/workers_testing_medium_worker_*/*")))] large_worker_files = [os.path.basename(f) for f in glob.glob(str(tmpdir.join("test_basic_concurrent1_0/work/workers_testing_large_worker_*/*")))] for outfile in ["my_out3.50", "my_out3.60", "my_out3.70"]: assert outfile in small_worker_files assert outfile in medium_worker_files assert outfile in large_worker_files # check contents of my_out files for outfile in ["my_out3.50", "my_out3.60", "my_out3.70"]: for worker in ["workers_testing_small_worker_2", "workers_testing_medium_worker_3"]: with open(str(tmpdir.join("test_basic_concurrent1_0/work").join(worker).join(outfile)), 'r') as f: lines = f.readlines() assert "results = ['Rank 0 slept for 1.0 seconds']\n" in lines worker = "workers_testing_large_worker_4" with open(str(tmpdir.join("test_basic_concurrent1_0/work").join(worker).join(outfile)), 'r') as f: lines = f.readlines() assert "results = ['Rank 0 slept for 1.0 seconds', 'Rank 1 slept for 1.0 seconds']\n" in lines # check sim log file with open(str(tmpdir.join("test_basic_concurrent1_0").join("test_basic_concurrent1_0.log")), 'r') as f: lines = f.readlines() # remove timestamp lines = [line[24:] for line in lines] for worker in ["small_worker_2", "medium_worker_3", "large_worker_4"]: for timestamp in ["3.50", "3.60", "3.70"]: assert f'workers_testing_{worker} INFO Stepping Worker timestamp={timestamp}\n' in lines
def test_framework_simple(tmpdir, capfd): platform_file, config_file = write_basic_config_and_platform_files(tmpdir) framework = Framework(config_file_list=[str(config_file)], log_file_name=str(tmpdir.join('test.log')), platform_file_name=str(platform_file), debug=None, verbose_debug=None, cmd_nodes=0, cmd_ppn=0) assert framework.log_file_name.endswith('test.log') assert len(framework.config_manager.get_framework_components()) == 2 component_map = framework.config_manager.get_component_map() assert len(component_map) == 1 assert 'test' in component_map test = component_map['test'] assert len(test) == 1 assert test[0].get_class_name() == 'test_driver' assert test[0].get_instance_name().startswith('test@test_driver') assert test[0].get_seq_num() == 1 assert test[0].get_serialization().startswith('test@test_driver') assert test[0].get_sim_name() == 'test' # check all registered service handlers service_handlers = sorted(framework.service_handler.keys()) assert service_handlers == ['createListener', 'create_simulation', 'existsTopic', 'finish_task', 'getSubscription', 'getTopic', 'get_allocation', 'get_config_parameter', 'get_port', 'get_time_loop', 'init_call', 'init_task', 'init_task_pool', 'launch_task', 'merge_current_plasma_state', 'processEvents', 'registerEventListener', 'registerSubscriber', 'release_allocation', 'removeSubscription', 'sendEvent', 'set_config_parameter', 'stage_state', 'unregisterEventListener', 'unregisterSubscriber', 'update_state', 'wait_call'] framework.run() # check simulation_log json_files = glob.glob(str(tmpdir.join("simulation_log").join("*.json"))) assert len(json_files) == 1 with open(json_files[0], 'r') as json_file: json_lines = json_file.readlines() assert len(json_lines) == 9 event0 = json.loads(json_lines[0]) event1 = json.loads(json_lines[1]) event2 = json.loads(json_lines[8]) assert event0['eventtype'] == 'IPS_START' assert event1['eventtype'] == 'IPS_RESOURCE_ALLOC' assert event2['eventtype'] == 'IPS_END' for event in [event0, event1, event2]: assert str(event['ok']) == 'True' assert event['sim_name'] == 'test' captured = capfd.readouterr() assert captured.out.startswith('Starting IPS') assert captured.err == ''
def test_dask_fake_shifter(tmpdir, monkeypatch): pytest.importorskip("dask") pytest.importorskip("distributed") shifter = tmpdir.join("shifter") shifter.write( "#!/bin/bash\necho Running $@ in shifter >> shifter.log\n$@\n") shifter.chmod(448) # 700 old_PATH = os.environ['PATH'] monkeypatch.setenv("PATH", str(tmpdir), prepend=os.pathsep) # need to reimport to get fake shifter importlib.reload(ipsframework.services) platform_file, config_file = write_basic_config_and_platform_files( tmpdir, value=1, shifter=True) framework = Framework(config_file_list=[str(config_file)], log_file_name=str(tmpdir.join('ips.log')), platform_file_name=str(platform_file), debug=None, verbose_debug=None, cmd_nodes=0, cmd_ppn=0) framework.run() monkeypatch.setenv("PATH", old_PATH) # need to reimport to remove fake shifter importlib.reload(ipsframework.services) # check output log file with open(str(tmpdir.join('sim.log')), 'r') as f: lines = f.readlines() # remove timestamp lines = [line[24:] for line in lines] log = "DASK__dask_worker_2 INFO {}\n" assert log.format("cmd = /bin/sleep") in lines assert log.format("ret_val = 4") in lines # task successful and return 0 for i in range(4): assert log.format(f"task_{i} 0") in lines # check simulation_log, make sure it includes events from dask tasks json_files = glob.glob(str(tmpdir.join("simulation_log").join("*.json"))) assert len(json_files) == 1 with open(json_files[0], 'r') as json_file: lines = json_file.readlines() lines = [json.loads(line.strip()) for line in lines] assert len(lines) == 28 eventtypes = [e.get('eventtype') for e in lines] assert eventtypes.count('IPS_LAUNCH_DASK_TASK') == 4 assert eventtypes.count('IPS_TASK_END') == 5 launch_dask_comments = [ e.get('comment') for e in lines if e.get('eventtype') == "IPS_LAUNCH_DASK_TASK" ] for task in range(4): assert f'task_name = task_{task}, Target = /bin/sleep 1' in launch_dask_comments task_end_comments = [ e.get('comment')[:-4] for e in lines if e.get('eventtype') == "IPS_TASK_END" ] for task in range(4): assert f'task_name = task_{task}, elapsed time = 1' in task_end_comments # check shifter.log file with open( str(tmpdir.join('/work/DASK__dask_worker_2').join('shifter.log')), 'r') as f: lines = sorted(f.readlines()) assert lines[0].startswith( 'Running dask-scheduler --no-dashboard --scheduler-file') assert lines[0].endswith('--port 0 in shifter\n') assert lines[1].startswith('Running dask-worker --scheduler-file') assert lines[1].endswith( '--nprocs 1 --nthreads 0 --no-dashboard in shifter\n')
def test_timeloop_checkpoint_restart(tmpdir): platform_file, config_file = write_basic_config_and_platform_files(tmpdir) framework = Framework(config_file_list=[str(config_file)], log_file_name=str(tmpdir.join('ips.log')), platform_file_name=str(platform_file), debug=None, verbose_debug=None, cmd_nodes=0, cmd_ppn=0) framework.run() # check output log file with open(str(tmpdir.join('sim.log')), 'r') as f: lines = f.readlines() # remove timestamp lines = [line[24:] for line in lines] for time in ["100.0", "112.5", "125.0", "137.5", "150.0"]: assert f"TIMELOOP_COMP__timeloop_comp_2 INFO step({time})\n" in lines assert f"TIMELOOP_COMP2__timeloop_comp_3 INFO step({time})\n" in lines for comp in ["TIMELOOP__timeloop_driver_1", "TIMELOOP_COMP__timeloop_comp_2", "TIMELOOP_COMP2__timeloop_comp_3"]: assert f"{comp} INFO checkpoint({time})\n" in lines # check output files # state file state_files = tmpdir.join("work").join("state").listdir() assert len(state_files) == 1 state_file = state_files[0].readlines() assert len(state_file) == 18 # restart files restart_dir = tmpdir.join("restart") assert len(restart_dir.listdir()) == 2 assert restart_dir.join("137.500").join("TIMELOOP_COMP__timeloop_comp").exists() assert restart_dir.join("150.000").join("TIMELOOP_COMP__timeloop_comp").exists() assert restart_dir.join("137.500").join("TIMELOOP_COMP2__timeloop_comp").exists() assert restart_dir.join("150.000").join("TIMELOOP_COMP2__timeloop_comp").exists() # 137.500 restart_files = restart_dir.join("137.500").join("TIMELOOP_COMP__timeloop_comp") assert len(restart_files.listdir()) == 2 assert restart_files.join("w1_1.dat").exists() assert len(restart_files.join("w1_1.dat").readlines()) == 5 assert restart_files.join("test_ps.dat").exists() assert len(restart_files.join("test_ps.dat").readlines()) == 14 restart_files = restart_dir.join("137.500").join("TIMELOOP_COMP2__timeloop_comp") assert len(restart_files.listdir()) == 2 assert restart_files.join("w2_1.dat").exists() assert len(restart_files.join("w2_1.dat").readlines()) == 5 assert restart_files.join("test_ps.dat").exists() assert len(restart_files.join("test_ps.dat").readlines()) == 15 # 150.000 restart_files = restart_dir.join("150.000").join("TIMELOOP_COMP__timeloop_comp") assert len(restart_files.listdir()) == 2 assert restart_files.join("w1_1.dat").exists() assert len(restart_files.join("w1_1.dat").readlines()) == 6 assert restart_files.join("test_ps.dat").exists() assert len(restart_files.join("test_ps.dat").readlines()) == 17 restart_files = restart_dir.join("150.000").join("TIMELOOP_COMP2__timeloop_comp") assert len(restart_files.listdir()) == 2 assert restart_files.join("w2_1.dat").exists() assert len(restart_files.join("w2_1.dat").readlines()) == 6 assert restart_files.join("test_ps.dat").exists() assert len(restart_files.join("test_ps.dat").readlines()) == 18 # check output from services.stage_output_files results_dir = tmpdir.join("simulation_results") assert len(results_dir.listdir()) == 8 for time in ["100.0", "112.5", "125.0", "137.5", "150.0"]: assert results_dir.join('TIMELOOP_COMP__timeloop_comp_2').join(f'w1_1_{time}.dat').exists() assert results_dir.join('TIMELOOP_COMP__timeloop_comp_2').join(f'w1_2_{time}.dat').exists() assert results_dir.join('TIMELOOP_COMP2__timeloop_comp_3').join(f'w2_1_{time}.dat').exists() assert results_dir.join('TIMELOOP_COMP2__timeloop_comp_3').join(f'w2_2_{time}.dat').exists() # Now do SIMULATION_MODE=RESTART platform_file, restart_config_file = write_basic_config_and_platform_files(tmpdir, restart=True) framework = Framework(config_file_list=[str(restart_config_file)], log_file_name=str(tmpdir.join('ips_restart.log')), platform_file_name=str(platform_file), debug=None, verbose_debug=None, cmd_nodes=0, cmd_ppn=0) framework.run() # check output log file with open(str(tmpdir.join('sim_restart.log')), 'r') as f: lines = f.readlines() # remove timestamp lines = [line[24:] for line in lines] for time in ["162.5", "175.0", "187.5", "200.0"]: assert f"TIMELOOP_COMP__timeloop_comp_8 INFO step({time})\n" in lines assert f"TIMELOOP_COMP2__timeloop_comp_9 INFO step({time})\n" in lines for comp in ["TIMELOOP__timeloop_driver_7", "TIMELOOP_COMP__timeloop_comp_8", "TIMELOOP_COMP2__timeloop_comp_9"]: assert f"{comp} INFO checkpoint({time})\n" in lines # check output files # state file state_files = tmpdir.join("work").join("state").listdir() assert len(state_files) == 1 state_file = state_files[0].readlines() assert len(state_file) == 33 # restart files restart_dir = tmpdir.join("restart") assert len(restart_dir.listdir()) == 2 assert restart_dir.join("187.500").join("TIMELOOP_COMP__timeloop_comp").exists() assert restart_dir.join("200.000").join("TIMELOOP_COMP__timeloop_comp").exists() assert restart_dir.join("187.500").join("TIMELOOP_COMP2__timeloop_comp").exists() assert restart_dir.join("200.000").join("TIMELOOP_COMP2__timeloop_comp").exists() # 137.500 restart_files = restart_dir.join("187.500").join("TIMELOOP_COMP__timeloop_comp") assert len(restart_files.listdir()) == 2 assert restart_files.join("w1_1.dat").exists() assert len(restart_files.join("w1_1.dat").readlines()) == 10 assert restart_files.join("test_ps.dat").exists() assert len(restart_files.join("test_ps.dat").readlines()) == 29 restart_files = restart_dir.join("187.500").join("TIMELOOP_COMP2__timeloop_comp") assert len(restart_files.listdir()) == 2 assert restart_files.join("w2_1.dat").exists() assert len(restart_files.join("w2_1.dat").readlines()) == 10 assert restart_files.join("test_ps.dat").exists() assert len(restart_files.join("test_ps.dat").readlines()) == 30 # 200.000 restart_files = restart_dir.join("200.000").join("TIMELOOP_COMP__timeloop_comp") assert len(restart_files.listdir()) == 2 assert restart_files.join("w1_1.dat").exists() assert len(restart_files.join("w1_1.dat").readlines()) == 11 assert restart_files.join("test_ps.dat").exists() assert len(restart_files.join("test_ps.dat").readlines()) == 32 restart_files = restart_dir.join("200.000").join("TIMELOOP_COMP2__timeloop_comp") assert len(restart_files.listdir()) == 2 assert restart_files.join("w2_1.dat").exists() assert len(restart_files.join("w2_1.dat").readlines()) == 11 assert restart_files.join("test_ps.dat").exists() assert len(restart_files.join("test_ps.dat").readlines()) == 33 # work files, w[1,2]_1.dat should include previous data where w[1,2]_2.dat shouldn't work_files = tmpdir.join("work") assert work_files.join("TIMELOOP_COMP__timeloop_comp_8").join("w1_1.dat").exists() assert work_files.join("TIMELOOP_COMP__timeloop_comp_8").join("w1_2.dat").exists() assert work_files.join("TIMELOOP_COMP__timeloop_comp_8").join("test_ps.dat").exists() assert len(work_files.join("TIMELOOP_COMP__timeloop_comp_8").join("w1_1.dat").readlines()) == 11 assert len(work_files.join("TIMELOOP_COMP__timeloop_comp_8").join("w1_2.dat").readlines()) == 5 assert len(work_files.join("TIMELOOP_COMP__timeloop_comp_8").join("test_ps.dat").readlines()) == 32 assert work_files.join("TIMELOOP_COMP2__timeloop_comp_9").join("w2_1.dat").exists() assert work_files.join("TIMELOOP_COMP2__timeloop_comp_9").join("w2_2.dat").exists() assert work_files.join("TIMELOOP_COMP2__timeloop_comp_9").join("test_ps.dat").exists() assert len(work_files.join("TIMELOOP_COMP2__timeloop_comp_9").join("w2_1.dat").readlines()) == 11 assert len(work_files.join("TIMELOOP_COMP2__timeloop_comp_9").join("w2_2.dat").readlines()) == 5 assert len(work_files.join("TIMELOOP_COMP2__timeloop_comp_9").join("test_ps.dat").readlines()) == 33 # check output from services.stage_output_files results_dir = tmpdir.join("simulation_results") assert len(results_dir.listdir()) == 14 for time in ["162.5", "175.0", "187.5", "200.0"]: assert results_dir.join('TIMELOOP_COMP__timeloop_comp_8').join(f'w1_1_{time}.dat').exists() assert results_dir.join('TIMELOOP_COMP__timeloop_comp_8').join(f'w1_2_{time}.dat').exists() assert results_dir.join('TIMELOOP_COMP2__timeloop_comp_9').join(f'w2_1_{time}.dat').exists() assert results_dir.join('TIMELOOP_COMP2__timeloop_comp_9').join(f'w2_2_{time}.dat').exists()
def test_hello_world_nested(tmpdir, capfd): data_dir = os.path.dirname(__file__) copy_config_and_replace(os.path.join(data_dir, "hello_world.config"), tmpdir.join("hello_world.config"), tmpdir) copy_config_and_replace(os.path.join(data_dir, "hello_world_sub.config"), tmpdir.join("hello_world_sub.config"), tmpdir) shutil.copy(os.path.join(data_dir, "workstation.conf"), tmpdir) shutil.copy(os.path.join(data_dir, "hello_driver.py"), tmpdir) shutil.copy(os.path.join(data_dir, "hello_worker.py"), tmpdir) shutil.copy(os.path.join(data_dir, "hello_driver_sub.py"), tmpdir) shutil.copy(os.path.join(data_dir, "hello_worker_sub.py"), tmpdir) with open(tmpdir.join('input.txt'), 'w') as f: f.write("INPUT FILE\n") framework = Framework( config_file_list=[os.path.join(tmpdir, "hello_world.config")], log_file_name=str(tmpdir.join('test.log')), platform_file_name=os.path.join(tmpdir, "workstation.conf"), debug=None, verbose_debug=None, cmd_nodes=0, cmd_ppn=0) framework.run() captured = capfd.readouterr() captured_out = captured.out.split('\n') assert captured_out[0].startswith("Starting IPS") assert captured_out[1] == "Created <class 'hello_driver.HelloDriver'>" assert captured_out[2] == "Created <class 'hello_worker.HelloWorker'>" assert captured_out[3] == "Hello from HelloWorker - new1" assert captured_out[4] == "Created <class 'hello_driver_sub.HelloDriver'>" assert captured_out[5] == "Created <class 'hello_worker_sub.HelloWorker'>" assert captured_out[6] == "Hello from HelloWorker - sub" assert captured_out[7] == "made it out of the worker call" assert captured.err == '' # check sim log file with open(str(tmpdir.join("Hello_world_sim.log")), 'r') as f: lines = f.readlines() # remove timestamp lines = [line[24:] for line in lines] assert 'WORKERSSUB_HELLO_HelloWorker_6 INFO Hello from HelloWorker - sub\n' in lines # check sub workflow results file sub_out = tmpdir.join( "hello_example_SUPER/work/WORKERS_HELLO_HelloWorker_2/Subflow_01/simulation_results/DRIVERS_HELLOSUB_HelloDriver_5/sub_out_0.0.txt" ) assert os.path.exists(str(sub_out)) assert os.path.islink(str(sub_out)) with open(str(sub_out), 'r') as f: lines = f.readlines() assert lines[0] == "SUB OUTPUT FILE\n" # check results file in parent workflow sub_out = tmpdir.join( "hello_example_SUPER/work/WORKERS_HELLO_HelloWorker_2/sub_out.txt") assert os.path.exists(str(sub_out)) with open(str(sub_out), 'r') as f: lines = f.readlines() assert lines[0] == "SUB OUTPUT FILE\n" # check input file staging sub_input = tmpdir.join( "hello_example_SUPER/work/WORKERS_HELLO_HelloWorker_2/input.txt") assert os.path.exists(str(sub_input)) with open(str(sub_input), 'r') as f: lines = f.readlines() assert lines[0] == "SUB INPUT FILE\n" sub_input = tmpdir.join( "hello_example_SUPER/work/WORKERS_HELLO_HelloWorker_2/HELLO_DRIVER/input.txt" ) assert os.path.exists(str(sub_input)) with open(str(sub_input), 'r') as f: lines = f.readlines() assert lines[0] == "SUB INPUT FILE\n"