def test_pre_processor(get_bout_run_setup: Callable[[str], BoutRunSetup]) -> None: """ Test the pre-processor. Parameters ---------- get_bout_run_setup : function Function which returns the BoutRunSetup object based on the conduction directory """ bout_run_setup = get_bout_run_setup("test_run_group_pre") run_graph = RunGraph() run_graph.add_function_node("1") run_group_pre = RunGroup( run_graph, bout_run_setup, name="test_pre", waiting_for="1" ) run_group_pre.add_pre_processor( {"function": lambda: None, "args": None, "kwargs": None} ) run_group_pre.add_pre_processor( {"function": lambda: None, "args": None, "kwargs": None} ) root_nodes = next(run_graph) assert len(root_nodes) == 3
def __init__(self, run_graph: Optional[RunGraph] = None, wait_time: int = 5) -> None: """ Set the member data. Parameters ---------- run_graph : None or RunGraph The run graph to be executed If None the run graph will be constructed and added parameters from the default BoutRunSetup wait_time : int Time to wait before checking if a job has completed """ self.wait_time = wait_time if run_graph is None: self.__run_graph = RunGraph() _ = RunGroup(self.__run_graph, BoutRunSetup()) else: self.__run_graph = run_graph if (len([ node for node in self.__run_graph.nodes if node.startswith("bout_run") ]) == 0): logging.warning( "The provided run_graph does not contain any bout_runs")
def test_post_processor(get_bout_run_setup: Callable[[str], BoutRunSetup]) -> None: """ Test the post-processor. Parameters ---------- get_bout_run_setup : function Function which returns the BoutRunSetup object based on the conduction directory """ bout_run_setup = get_bout_run_setup("test_run_group_post") run_graph = RunGraph() run_graph.add_function_node("1") run_group_post = RunGroup( run_graph, bout_run_setup, name="test_post", waiting_for="1" ) run_group_post.add_post_processor( {"function": lambda: None, "args": None, "kwargs": None} ) run_group_post.add_post_processor( {"function": lambda: None, "args": None, "kwargs": None} ) expected = ( "1", "bout_run_test_post", "post_processor_test_post_0", "post_processor_test_post_1", ) assert set(expected) == set(run_graph.get_waiting_for_tuple("1"))
def test_constructor(get_bout_run_setup: Callable[[str], BoutRunSetup]) -> None: """ Test the constructor. Parameters ---------- get_bout_run_setup : function Function which returns the BoutRunSetup object based on the conduction directory """ bout_run_setup = get_bout_run_setup("test_run_group_constructor") run_graph = RunGraph() run_graph.add_function_node("1") run_group_0 = RunGroup(run_graph, bout_run_setup) run_number_first_run = int(run_group_0.bout_run_node_name.split("_")[-1]) assert run_group_0.bout_run_node_name == f"bout_run_{run_number_first_run}" run_group_test = RunGroup(run_graph, bout_run_setup, name="test") assert run_group_test.bout_run_node_name == "bout_run_test" run_group_1 = RunGroup(run_graph, bout_run_setup, waiting_for="1") run_number_second_run = int(run_group_1.bout_run_node_name.split("_")[-1]) assert run_number_second_run > run_number_first_run assert run_group_1.bout_run_node_name == f"bout_run_{run_number_second_run}" expected = ( "1", f"bout_run_{run_number_second_run}", ) assert expected == run_graph.get_waiting_for_tuple("1")
def test_get_dot_string() -> None: """Test the ability to get the dot string.""" run_graph = RunGraph() run_graph.add_function_node("42") hex_id_submitter = hex(id(run_graph["42"]["submitter"])) expected = ( "strict digraph " "{\n42 [args=None, function=None, kwargs=None, path=None, status=ready, " "submitter=<bout_runners.submitter.local_submitter.LocalSubmitter object at " f"{hex_id_submitter}" ">];\n}\n") assert expected == run_graph.get_dot_string()
def test_function_run(tmp_path: Path) -> None: """ Test the function run method. Parameters ---------- tmp_path : Path Temporary path """ run_graph = RunGraph() runner = BoutRunner(run_graph) path = tmp_path.joinpath("return_none.py") submitter = runner.run_function(path, return_none) submitter.wait_until_completed() assert path.is_file() path = tmp_path.joinpath("return_sum_of_two.py") submitter = runner.run_function(path, return_sum_of_two, (1, 2)) submitter.wait_until_completed() assert path.is_file() path = tmp_path.joinpath("return_sum_of_three.py") submitter = runner.run_function(path, return_sum_of_three, (1, 2), {"number_3": 3}) submitter.wait_until_completed() assert path.is_file()
def make_run_group( name: str, make_project: Path, run_graph: Optional[RunGraph] = None, restart_from: Optional[Path] = None, waiting_for: Optional[Union[str, Iterable[str]]] = None, ) -> RunGroup: """ Return a basic RunGroup. Parameters ---------- run_graph name : str Name of RunGroup and DatabaseConnector make_project : Path The path to the conduction example run_graph : RunGraph The RunGraph object restart_from : Path or None The path to copy the restart files from waiting_for : None or str or iterable Name of nodes this node will wait for to finish before executing Returns ------- run_group : RunGroup A basic run group """ # Make project to save time project_path = make_project # Create the `bout_paths` object bout_paths = BoutPaths( project_path=project_path, bout_inp_src_dir=project_path.joinpath("data"), bout_inp_dst_dir=project_path.joinpath(name), ) # Create the input objects run_parameters = RunParameters({"global": {"nout": 0}}) default_parameters = DefaultParameters(bout_paths) final_parameters = FinalParameters(default_parameters, run_parameters) executor = Executor( bout_paths=bout_paths, submitter=LocalSubmitter(bout_paths.project_path), run_parameters=run_parameters, restart_from=restart_from, ) db_connector = DatabaseConnector(name) bout_run_setup = BoutRunSetup(executor, db_connector, final_parameters) # Create the `run_group` run_graph = run_graph if run_graph is not None else RunGraph() run_group = RunGroup(run_graph, bout_run_setup, name=name, waiting_for=waiting_for) return run_group
def make_graph() -> RunGraph: """ Yield a simple graph. Returns ------- run_graph : RunGraph A simple graph """ run_graph = RunGraph() for i in range(6): run_graph.add_function_node(str(i)) run_graph.add_waiting_for("4", "3") run_graph.add_waiting_for("5", "3") run_graph.add_waiting_for("3", "2") run_graph.add_waiting_for("2", "0") run_graph.add_waiting_for("1", "0") return run_graph
def test_add_edge() -> None: """Test ability to add edges, and the ability to detect if a graph is cyclic.""" run_graph = RunGraph() run_graph.add_function_node("1") run_graph.add_function_node("2") run_graph.add_edge("1", "2") with pytest.raises(ValueError): run_graph.add_edge("2", "1") expected = {"1", "2"} assert expected == set(run_graph.nodes)
def test_get_nodes_orders(graph: RunGraph, reverse: bool, expected: Tuple[Tuple[int, ...], ...]) -> None: """ Test that get_node_orders works as expected. Parameters ---------- graph : nx.DiGraph The graph to search reverse : bool Whether or not to reverse search expected : tuple of int The expected result """ result = graph.get_node_orders(reverse) assert result == expected
def test_constructor(yield_conduction_path) -> None: """ Test the constructor of BoutRunner. Parameters ---------- yield_conduction_path : Path Path to the BOUT++ conduction example See the yield_conduction_path for more details """ # Assert that auto setting of the setup works project_path = yield_conduction_path with change_directory(project_path): runner = BoutRunner() node_name = list(runner.run_graph.nodes.keys())[0] assert isinstance(runner.run_graph[node_name]["bout_run_setup"], BoutRunSetup) # Assert that an empty graph can be added run_graph = RunGraph() runner = BoutRunner(run_graph) assert len(runner.run_graph.nodes) == 0
def simple_graph() -> RunGraph: """ Return a simple graph. Returns ------- graph : RunGraph A simple graph """ graph = RunGraph() graph.add_edge("0", "1") graph.add_edge("0", "2") graph.add_edge("1", "3") graph.add_edge("1", "4") return graph
def test_add_function_node() -> None: """Test ability to write and rewrite a function node.""" run_graph = RunGraph() run_graph.add_function_node("test", function_dict={ "function": None, "args": ("pass", 42), "kwargs": None }) assert len(run_graph.nodes) == 1 assert run_graph["test"]["function"] is None assert run_graph["test"]["args"] == ("pass", 42) assert run_graph["test"]["kwargs"] is None assert isinstance(run_graph["test"]["submitter"], LocalSubmitter) with pytest.raises(ValueError): run_graph.add_function_node("test") assert len(run_graph.nodes) == 1
def test_add_bout_run_node(get_bout_run_setup) -> None: """ Test ability to write and rewrite a BoutRunSetup node. Parameters ---------- get_bout_run_setup : function Function which returns the BoutRunSetup object based on the conduction directory """ run_graph = RunGraph() bout_run_setup = get_bout_run_setup("test_run_graph") run_graph.add_bout_run_node("test", bout_run_setup) assert len(run_graph.nodes) == 1 assert isinstance(run_graph["test"]["bout_run_setup"], BoutRunSetup) with pytest.raises(ValueError): run_graph.add_function_node("test") assert len(run_graph.nodes) == 1
def another_complex_graph() -> RunGraph: """ Return another complex graph. Returns ------- graph : RunGraph A simple graph """ graph = RunGraph() graph.add_edge("0", "2") graph.add_edge("0", "3") graph.add_edge("0", "4") graph.add_edge("1", "5") graph.add_edge("1", "6") graph.add_edge("3", "7") graph.add_edge("3", "8") graph.add_edge("4", "9") graph.add_edge("5", "9") graph.add_edge("8", "10") graph.add_edge("9", "10") return graph
def test_run_bout_run( make_project: Path, clean_default_db_dir: Path, get_bout_run_setup: Callable[[str], BoutRunSetup], yield_number_of_rows_for_all_tables: Callable[[DatabaseReader], Dict[str, int]], tear_down_restart_directories: Callable[[Path], None], ) -> None: """ Test the BOUT++ run method. Parameters ---------- make_project : Path The path to the conduction example clean_default_db_dir : Path Path to the default database dir get_bout_run_setup : function Function which returns the BoutRunSetup object based on the conduction directory yield_number_of_rows_for_all_tables : function Function which returns the number of rows for all tables in a schema tear_down_restart_directories : function Function used for removal of restart directories """ # For automatic clean-up _ = clean_default_db_dir # Make project to save time _ = make_project run_graph = RunGraph() runner = BoutRunner(run_graph) bout_run_setup = get_bout_run_setup("test_run_bout_run") tear_down_restart_directories(bout_run_setup.bout_paths.bout_inp_dst_dir) bout_paths = bout_run_setup.bout_paths db_connector = bout_run_setup.db_connector # Run once submitter = runner.run_bout_run(bout_run_setup) if submitter is not None: submitter.wait_until_completed() # Assert that the run went well database_reader = assert_first_run(bout_paths, db_connector) # Assert that the number of runs is 1 assert_tables_have_expected_len(database_reader, yield_number_of_rows_for_all_tables, expected_run_number=1) # Check that the run will not be executed again assert runner.run_bout_run(bout_run_setup) is None # Assert that the number of runs is 1 assert_tables_have_expected_len(database_reader, yield_number_of_rows_for_all_tables, expected_run_number=1) # Check that force overrides the behaviour submitter = runner.run_bout_run(bout_run_setup, force=True) if submitter is not None: submitter.wait_until_completed() assert_tables_have_expected_len(database_reader, yield_number_of_rows_for_all_tables, expected_run_number=2) dump_dir_parent = bout_paths.bout_inp_dst_dir.parent dump_dir_name = bout_paths.bout_inp_dst_dir.name # Check that restart makes another entry submitter = runner.run_bout_run(bout_run_setup, restart_from_bout_inp_dst=True) if submitter is not None: submitter.wait_until_completed() assert_tables_have_expected_len( database_reader, yield_number_of_rows_for_all_tables, expected_run_number=3, restarted=True, ) # NOTE: The test in tests.unit.bout_runners.runner.test_bout_runner is testing # restart_all=True, whether this is testing restart_from_bout_inp_dst=True assert_dump_files_exist( dump_dir_parent.joinpath(f"{dump_dir_name}_restart_0")) # ...and yet another entry submitter = runner.run_bout_run(bout_run_setup, restart_from_bout_inp_dst=True) if submitter is not None: submitter.wait_until_completed() assert_tables_have_expected_len( database_reader, yield_number_of_rows_for_all_tables, expected_run_number=4, restarted=True, ) # NOTE: The test in tests.unit.bout_runners.runner.test_bout_runner is testing # restart_all=True, whether this is testing restart_from_bout_inp_dst=True assert_dump_files_exist( dump_dir_parent.joinpath(f"{dump_dir_name}_restart_1"))
def test_run_bout_run( make_project: Path, get_bout_run_setup: Callable[[str], BoutRunSetup], yield_number_of_rows_for_all_tables: Callable[[DatabaseReader], Dict[str, int]], file_state_restorer: FileStateRestorer, ) -> None: """ Test the BOUT++ run method. Parameters ---------- make_project : Path The path to the conduction example get_bout_run_setup : function Function which returns the BoutRunSetup object based on the conduction directory yield_number_of_rows_for_all_tables : function Function which returns the number of rows for all tables in a schema file_state_restorer : FileStateRestorer Object for restoring files to original state """ # Make project to save time _ = make_project run_graph = RunGraph() runner = BoutRunner(run_graph) bout_run_setup = get_bout_run_setup("test_run_bout_run") bout_paths = bout_run_setup.bout_paths db_connector = bout_run_setup.db_connector # NOTE: bout_run_setup.bout_paths.bout_inp_dst_dir will be removed in the # yield_bout_path_conduction fixture (through the get_bout_run_setup # fixture) # Hence we do not need to add bout_run_setup.bout_paths.bout_inp_dst_dir # to the file_state_restorer file_state_restorer.add(db_connector.db_path, force_mark_removal=True) # Run once submitter = bout_run_setup.submitter if runner.run_bout_run(bout_run_setup): submitter.wait_until_completed() # Assert that the run went well database_reader = assert_first_run(bout_paths, db_connector) # Assert that the number of runs is 1 assert_tables_have_expected_len(database_reader, yield_number_of_rows_for_all_tables, expected_run_number=1) # Check that the run will not be executed again assert not runner.run_bout_run(bout_run_setup) # Assert that the number of runs is 1 assert_tables_have_expected_len(database_reader, yield_number_of_rows_for_all_tables, expected_run_number=1) # Check that force overrides the behaviour if runner.run_bout_run(bout_run_setup, force=True): submitter.wait_until_completed() assert_tables_have_expected_len(database_reader, yield_number_of_rows_for_all_tables, expected_run_number=2) dump_dir_parent = bout_paths.bout_inp_dst_dir.parent dump_dir_name = bout_paths.bout_inp_dst_dir.name # Check that restart makes another entry bout_run_setup.executor.restart_from = bout_run_setup.bout_paths.bout_inp_dst_dir copy_restart_files(bout_run_setup.executor.restart_from, bout_run_setup.bout_paths.bout_inp_dst_dir) if runner.run_bout_run(bout_run_setup): submitter.wait_until_completed() expected_run_number = 3 assert_tables_have_expected_len( database_reader, yield_number_of_rows_for_all_tables, expected_run_number=expected_run_number, restarted=True, ) # NOTE: The test in tests.unit.bout_runners.runner.test_bout_runner is testing # restart_all=True, whether this is testing restart_from_bout_inp_dst=True assert_dump_files_exist( dump_dir_parent.joinpath(f"{dump_dir_name}_restart_0")) file_state_restorer.add( dump_dir_parent.joinpath(f"{dump_dir_name}_restart_0"), force_mark_removal=True) # ...and yet another entry bout_run_setup.executor.restart_from = bout_run_setup.bout_paths.bout_inp_dst_dir copy_restart_files(bout_run_setup.executor.restart_from, bout_run_setup.bout_paths.bout_inp_dst_dir) if runner.run_bout_run(bout_run_setup): submitter.wait_until_completed() assert_tables_have_expected_len( database_reader, yield_number_of_rows_for_all_tables, expected_run_number=expected_run_number + 1, restarted=True, ) # NOTE: The test in tests.unit.bout_runners.runner.test_bout_runner is testing # restart_all=True, whether this is testing restart_from_bout_inp_dst=True assert_dump_files_exist( dump_dir_parent.joinpath(f"{dump_dir_name}_restart_1")) file_state_restorer.add( dump_dir_parent.joinpath(f"{dump_dir_name}_restart_1"), force_mark_removal=True)
class BoutRunner: r""" Class for executing a run and store its metadata. Attributes ---------- __run_graph : RunGraph Getter variable for executor the run graph run_graph : Graph The run graph to be executed wait_time : int Time to wait before checking if a job has completed Methods ------- __add_waiting_for(node_name) Add the job_ids to wait for in the submission script __prepare_run(force, restart_all) Prepare the run sequence __updates_when_restart_all_is_true() Update paths and nodes when restart_all is True __inject_copy_restart_files_node(node_with_restart) Inject a node which copy restart files __make_restart_files_node(to_node_name, copy_restart_from, copy_restart_to) Make nodes which copies restart files __next_order_has_local(submitter_dict) Check if the current order of nodes has any local submitters __monitor_runs(submitter_dict, raise_errors) Monitor the runs belonging to the same order __run_status_checker(node_name) Run the StatusChecker __this_order_has_local(submitter_dict) Check if the current order of nodes has any local submitters __update_submitter_dict_after_run_bout_run(node_name, submitted, submitter_dict) Update the submitter dict after calling run_bout_run find_matching_order_number(node_names, node_orders) Return the order matching the node names run_bout_run(bout_run_setup, restart_from_bout_inp_dst, force) Perform the BOUT++ run and capture the related metadata run_function(path, function, args, kwargs, submitter) Submit a function for execution reset() Reset the run_graph release_nodes(nodes_to_release) Release nodes to a submission queue if applicable cluster_node_exist(node_names) Check if any of the nodes have a submitter of type AbstractClusterSubmitter wait_until_completed(self) Wait until all submitted nodes are completed run(restart_all, force, raise_errors) Execute the run Examples -------- The easiest way to use BoutRunner is to run a script from the root directory of the project (i.e. where the `Makefile` and `data` directory are normally situated. The script can simply call >>> BoutRunner().run() and `BoutRunner` takes care of the rest. A more elaborate example where all the dependency objects are built manually: Import dependencies >>> from pathlib import Path >>> from bout_runners.executor.bout_paths import BoutPaths >>> from bout_runners.executor.executor import BoutRunExecutor >>> from bout_runners.database.database_connector import DatabaseConnector >>> from bout_runners.parameters.default_parameters import DefaultParameters >>> from bout_runners.parameters.run_parameters import RunParameters >>> from bout_runners.parameters.final_parameters import FinalParameters >>> from bout_runners.submitter.local_submitter import LocalSubmitter >>> from bout_runners.runner.bout_run_setup import BoutRunSetup >>> from bout_runners.runner.run_graph import RunGraph >>> from bout_runners.runner.run_group import RunGroup Create the `bout_paths` object >>> project_path = Path().joinpath('path', 'to', 'project') >>> bout_inp_src_dir = Path().joinpath('path', 'to', 'source', 'BOUT.inp') >>> bout_inp_dst_dir = Path().joinpath('path', 'to', 'destination', 'BOUT.inp') >>> bout_paths = BoutPaths(project_path=project_path, ... bout_inp_src_dir=bout_inp_src_dir, ... bout_inp_dst_dir=bout_inp_dst_dir) Create the input objects >>> default_parameters = DefaultParameters(bout_paths) >>> run_parameters = RunParameters({'global': {'nout': 0}}) >>> final_parameters = FinalParameters(default_parameters, ... run_parameters) >>> executor = BoutRunExecutor( ... bout_paths=bout_paths, ... submitter=LocalSubmitter(bout_paths.project_path), ... run_parameters=run_parameters) >>> db_connector = DatabaseConnector('name_of_database', db_root_path=Path()) >>> bout_run_setup = BoutRunSetup(executor, db_connector, final_parameters) >>> run_graph = RunGraph() >>> # The RunGroup can attach pre and post-processors to the run >>> # See the user manual for more info >>> _ = RunGroup(run_graph, bout_run_setup, name='my_test_run') Run the project >>> runner = BoutRunner(run_graph) >>> runner.run() """ def __init__(self, run_graph: Optional[RunGraph] = None, wait_time: int = 5) -> None: """ Set the member data. Parameters ---------- run_graph : None or RunGraph The run graph to be executed If None the run graph will be constructed and added parameters from the default BoutRunSetup wait_time : int Time to wait before checking if a job has completed """ self.wait_time = wait_time if run_graph is None: self.__run_graph = RunGraph() _ = RunGroup(self.__run_graph, BoutRunSetup()) else: self.__run_graph = run_graph if (len([ node for node in self.__run_graph.nodes if node.startswith("bout_run") ]) == 0): logging.warning( "The provided run_graph does not contain any bout_runs") def __add_waiting_for(self, node_name: str) -> None: """ Add the job_ids to wait for in the submission script. Parameters ---------- node_name : str Name of current node """ predecessors = self.__run_graph.predecessors(node_name) waiting_for = ( self.__run_graph[p_name]["submitter"].job_id for p_name in predecessors if isinstance( self.__run_graph[p_name]["submitter"], AbstractClusterSubmitter, ) and not self.__run_graph[p_name]["submitter"].completed()) self.__run_graph[node_name]["submitter"].add_waiting_for(waiting_for) def __prepare_run(self, force: bool, restart_all: bool) -> None: """ Prepare the run sequence. If any bout_run nodes contain restart_from this function will create a node which copies the restart files Parameters ---------- restart_all : bool All the BOUT++ runs in the run graph will be restarted force : bool Execute the run even if has been performed with the same parameters Raises ------ RuntimeError If none of the nodes in the `run_graph` has status "ready" """ logging.info("Start: Preparing all runs") if force or restart_all: if restart_all: self.__updates_when_restart_all_is_true() logging.debug("Resetting the graph as %s == True", "force" if force else "restart_all") self.reset() if len(self.__run_graph) == 0: if len(self.__run_graph.nodes) == 0: msg = "The 'run_graph' does not contain any nodes." else: msg = ( "None of the nodes in 'run_graph' has the status 'ready'. " "Reset the 'run_graph' if you'd like to run the original graph" ) logging.critical(msg) raise RuntimeError(msg) for node in tuple(self.__run_graph.nodes): if (node.startswith("bout_run") and self.__run_graph[node] ["bout_run_setup"].executor.restart_from is not None): logging.info( "Found restart_from in node %s, " "will inject node which copies restart files", node, ) self.__inject_copy_restart_files_node(node) logging.info("Done: Preparing all runs") def __updates_when_restart_all_is_true(self) -> None: """Update paths and nodes when restart_all is True.""" logging.info("Updating executor.restart_from as restart_all=True") for node in tuple(self.__run_graph.nodes): if node.startswith("bout_run"): # Input must now point at previous destination self.__run_graph[node][ "bout_run_setup"].bout_paths.bout_inp_src_dir = self.__run_graph[ node]["bout_run_setup"].bout_paths.bout_inp_dst_dir self.__run_graph[node][ "bout_run_setup"].executor.restart_from = self.__run_graph[ node]["bout_run_setup"].bout_paths.bout_inp_src_dir # Any copy restart nodes must be copied for predecessor in self.__run_graph.predecessors(node): if predecessor.startswith("copy_restart_files"): if (self.run_graph[predecessor]["function"].__module__ == "bout_runners.utils.file_operations" and self.run_graph[predecessor] ["function"].__name__ == "copy_restart_files"): logging.info( "Updating the arguments to %s as restart_all=True", predecessor, ) old_args = self.run_graph[predecessor]["args"] new_args = ( self.__run_graph[node] ["bout_run_setup"].executor.restart_from, self.__run_graph[node] ["bout_run_setup"].bout_paths.bout_inp_dst_dir, ) logging.debug( "Changing argument 'copy_restart_from' from %s to %s", old_args[0], new_args[0], ) logging.debug( "Changing argument 'copy_restart_to' from %s to %s", old_args[1], new_args[1], ) self.run_graph[predecessor]["args"] = new_args else: logging.warning( "restart_all=True, but node %s waits for %s which is " "a function from %s. No updates to this node will be " "made", node, predecessor, self.run_graph[predecessor] ["function"].__module__ + "." + self.run_graph[predecessor] ["function"].__name__, ) break def __inject_copy_restart_files_node(self, node_with_restart: str) -> None: """ Inject a node which copy restart files. Parameters ---------- node_with_restart : str Name of the node which will wait for a restart node """ logging.info("Start: Injecting node which copies restart files") restart_from = self.__run_graph[node_with_restart][ "bout_run_setup"].executor.restart_from copy_to = self.__run_graph[node_with_restart][ "bout_run_setup"].bout_paths.bout_inp_dst_dir copy_node = self.__make_restart_files_node(node_with_restart, restart_from, copy_to) for predecessor in self.__run_graph.predecessors(node_with_restart): self.__run_graph.remove_edge(predecessor, node_with_restart) self.__run_graph.add_edge(predecessor, copy_node) self.__run_graph.add_edge(copy_node, node_with_restart) logging.info("Done: Injecting node which copies restart files") def __make_restart_files_node(self, to_node_name: str, copy_restart_from: Path, copy_restart_to: Path) -> str: """ Make nodes which copies restart files. Parameters ---------- to_node_name : str Name of the node which will wait for a restart node copy_restart_from : Path Path to copy restart files from copy_restart_to : Path Path to copy restart files to Returns ------- current_node_name : str Name of the node which copies files """ current_node_name = ( f"copy_restart_files_from_{copy_restart_from.name}_to_" f"{copy_restart_to.name}_for_{to_node_name}") function_dict: Dict[str, Optional[Union[Callable, Tuple[Any, ...], Dict[str, Any]]]] = { "function": copy_restart_files, "args": (copy_restart_from, copy_restart_to), "kwargs": None, } path = copy_restart_to.joinpath(f"{current_node_name}.py") submitter = get_submitter() if isinstance(submitter, AbstractClusterSubmitter): submitter.store_dir = copy_restart_to submitter.job_name = current_node_name self.__run_graph.add_function_node( name=current_node_name, function_dict=function_dict, path=path, submitter=submitter, ) return current_node_name def __next_order_has_local( self, submitter_dict: Dict[str, Dict[str, Union[Optional[AbstractSubmitter], Union[DatabaseConnector, Path]]], ], ) -> bool: """ Check if the current order of nodes has any local submitters. Parameters ---------- submitter_dict : dict Dict containing the the node names as keys and a new dict as values The new dict contains the keywords 'submitter' with value AbstractSubmitter Returns ------- bool True if the current order has local submitters """ for node_name in submitter_dict.keys(): for successor_name in self.__run_graph.successors(node_name): if isinstance(self.__run_graph[successor_name]["submitter"], LocalSubmitter): logging.info( "%s in the next node order is of local submitter type, " "will monitor this node order", successor_name, ) return True return False def __monitor_runs( self, submitter_dict: Dict[str, Dict[str, Union[Optional[AbstractSubmitter], Union[DatabaseConnector, Path]]], ], raise_errors: bool, ) -> None: """ Monitor the runs belonging to the same order. Parameters ---------- submitter_dict : dict Dict containing the the node names as keys and a new dict as values The new dict contains the keywords 'submitter' with value AbstractSubmitter If the submitter contains a bout run, the new dict will also contain the keyword 'db_connector' with the value DatabaseConnector and the keyword 'project_path' with the value Path which will be used in the StatusChecker raise_errors : bool If True the program will raise any error caught when during the running of the nodes If False the program will continue execution, but all nodes depending on the errored node will be marked as errored and not submitted Raises ------ RuntimeError If the types in the dict are unexpected """ logging.info("Start: Monitoring jobs at current order") node_names = list(node_name for node_name in submitter_dict.keys()) while len(node_names) != 0: for node_name in node_names: submitter = submitter_dict[node_name]["submitter"] if not isinstance(submitter, AbstractSubmitter): msg = ( f"The submitter of the '{node_name}' node was expected to be " f"of type 'AbstractSubmitter', but got '{type(submitter)}' " f"instead") logging.critical(msg) raise RuntimeError(msg) if submitter.completed(): if submitter.errored(): self.__run_graph.change_status_node_and_dependencies( node_name) if raise_errors: submitter.raise_error() node_names.remove(node_name) else: logging.debug( "job_id=%s found, %s seems to be running", submitter.job_id, node_name, ) if node_name.startswith("bout_run"): self.__run_status_checker(node_name) sleep(self.wait_time) logging.info("Done: Monitoring jobs at current order") def __run_status_checker(self, node_name: str) -> None: """ Run the StatusChecker. Parameters ---------- node_name : str Name of node to run the status checker for Raises ------ RuntimeError If the types of self.__run_graph[node_name]["db_connector"] or self.__run_graph[node_name]["project_path"] are unexpected """ db_connector = self.__run_graph[node_name]["db_connector"] if not isinstance(db_connector, DatabaseConnector): raise RuntimeError( f"The db_connector of the '{node_name}' node was expected " f"to be of type 'DatabaseConnector', but got " f"'{type(db_connector)}' instead") project_path = self.__run_graph[node_name]["project_path"] if not isinstance(project_path, Path): raise RuntimeError( f"The project_path of the '{node_name}' node was expected " f"to be of type 'Path', but got '{type(project_path)}' " f"instead") StatusChecker(db_connector, project_path).check_and_update_status() @staticmethod def __this_order_has_local( submitter_dict: Dict[str, Dict[str, Union[Optional[AbstractSubmitter], Union[DatabaseConnector, Path]]], ] ) -> bool: """ Check if the current order of nodes has any local submitters. Parameters ---------- submitter_dict : dict Dict containing the the node names as keys and a new dict as values The new dict contains the keywords 'submitter' with value AbstractSubmitter Returns ------- bool True if the current order has local submitters """ for node_name in submitter_dict.keys(): if isinstance(submitter_dict[node_name]["submitter"], LocalSubmitter): logging.debug( "%s is of local submitter type, will monitor this node order", node_name, ) return True return False def __update_submitter_dict_after_run_bout_run( self, node_name: str, submitted: bool, submitter_dict: Dict[str, Dict[str, Union[Optional[AbstractSubmitter], Union[DatabaseConnector, Path]], ], ], ) -> None: """ Update the submitter dict after calling run_bout_run. If the run has been submitted we add information about the database in the dict. Else we pop the node name from the dict in order not to monitor it. Parameters ---------- node_name : str Name of current node submitted : bool Whether or not the run was submitted submitter_dict : dict Dict containing the the node names as keys and a new dict as values The new dict contains the keywords 'submitter' with value AbstractSubmitter """ if submitted: self.__run_graph[node_name]["db_connector"] = self.__run_graph[ node_name]["bout_run_setup"].db_connector self.__run_graph[node_name]["project_path"] = self.__run_graph[ node_name]["bout_run_setup"].bout_paths.project_path else: submitter_dict.pop(node_name) @property def run_graph(self) -> RunGraph: """ Get the properties of self.run_graph. Returns ------- self.__run_graph : RunGraph The RunGraph object """ return self.__run_graph @staticmethod def find_matching_order_number( node_names: Tuple[str, ...], node_orders: Tuple[Tuple[str, ...], ...]) -> Optional[int]: """ Return the order matching the node names. Parameters ---------- node_names : tuple of str Node names node_orders : tuple of tuple of str Ordered tuple of orders Returns ------- order_number : int or None The first order where a match was found If no match was found 0 is returned """ order_number = -1 found = False for order_nodes in node_orders: for node_name in node_names: if node_name in order_nodes: found = True break order_number += 1 if found: return order_number return None @staticmethod def run_bout_run( bout_run_setup: BoutRunSetup, force: bool = False, ) -> bool: """ Perform the BOUT++ run and capture the related metadata. Parameters ---------- bout_run_setup : BoutRunSetup The setup for the BOUT++ run force : bool Execute the run even if has been performed with the same parameters Returns ------- bool Whether or not the run was submitted """ restart = bool(bout_run_setup.executor.restart_from) if restart and force: logging.warning( "force has been set to True for a run which is to use restart files. " "Will therefore ignore force") run_id = bout_run_setup.metadata_recorder.capture_new_data_from_run( bout_run_setup.executor.submitter.processor_split, restart, force) if run_id is None: if not restart: logging.info("Executing the run") else: logging.info("Executing the run from restart files") bout_run_setup.executor.execute(restart) else: logging.warning( "Run with the same configuration has been executed before, " "see run with run_id %d", run_id, ) if force: logging.info("Executing the run as force==True") bout_run_setup.executor.execute() else: return False return True @staticmethod def run_function( path: Path, submitter: AbstractSubmitter, function: Callable, args: Optional[Tuple[Any, ...]] = None, kwargs: Optional[Dict[str, Any]] = None, ) -> AbstractSubmitter: """ Submit a function for execution. Parameters ---------- path : Path Absolute path to store the python file which holds the function and its arguments submitter : AbstractSubmitter The submitter to submit the function with Uses the default LocalSubmitter if None function : function The function to call args : None or tuple The positional arguments kwargs : None or dict The keyword arguments Returns ------- submitter : AbstractSubmitter The submitter used """ logging.info( "Submitting %s, with positional parameters %s, and keyword parameters %s", function.__name__, args, kwargs, ) submitter.write_python_script(path, function, args, kwargs) command = f"python3 {path}" submitter.submit_command(command) return submitter def reset(self) -> None: """Reset the run_graph.""" logging.debug("Resetting the graph") self.__run_graph.reset() def release_nodes(self, nodes_to_release: Tuple[Tuple[str, ...], ...]) -> None: """ Release nodes to a submission queue if applicable. Parameters ---------- nodes_to_release : iterable Name of nodes to release """ if len(nodes_to_release) != 0: logging.info("Start: Releasing held cluster nodes") logging.debug("Release order: %s", nodes_to_release) for order in nodes_to_release: for node in order: if isinstance(self.__run_graph[node]["submitter"], AbstractClusterSubmitter): self.__run_graph[node]["submitter"].release() logging.info("Done: Releasing held cluster nodes") def cluster_node_exist(self, node_names: Iterable[str]) -> bool: """ Check if any of the nodes have a submitter of type AbstractClusterSubmitter. Parameters ---------- node_names : iterable of str Iterable containing node names Returns ------- bool Whether the iterable contains any cluster nodes """ for node in node_names: if isinstance(self.__run_graph[node]["submitter"], AbstractClusterSubmitter): return True return False def wait_until_completed(self) -> None: """Wait until all submitted nodes are completed.""" logging.info("Start: Waiting for all submitted jobs to complete") for node_name in self.__run_graph.nodes: if self.__run_graph[node_name]["status"] == "submitted": self.__run_graph[node_name]["submitter"].wait_until_completed() self.__run_graph[node_name]["status"] = "completed" if node_name.startswith("bout_run"): self.__run_status_checker(node_name) logging.info("Done: Waiting for all submitted jobs to complete") def run(self, restart_all: bool = False, force: bool = False, raise_errors: bool = True) -> None: """ Execute all the nodes in the run_graph. Parameters ---------- restart_all : bool All the BOUT++ runs in the run graph will be restarted force : bool Execute the run even if has been performed with the same parameters raise_errors : bool If True the program will raise any error caught when during the running of the nodes If False the program will continue execution, but all nodes depending on the errored node will be marked as errored and not submitted """ logging.info("Start: Calling .run() in BoutRunners") self.__prepare_run(force, restart_all) logging.debug("Dot-graph of the run\n%s", self.__run_graph.get_dot_string()) for nodes_at_current_order in self.__run_graph: logging.info("Start: Processing nodes at current order") submitter_dict: Dict[str, Dict[str, Union[Optional[AbstractSubmitter], Union[DatabaseConnector, Path]], ], ] = dict() for node_name in nodes_at_current_order: if self.__run_graph[node_name]["status"] != "ready": logging.info( "Skipping node '%s' as it has status=%s", node_name, self.__run_graph[node_name]["status"], ) continue logging.info("Start: Processing %s", node_name) if isinstance( self.__run_graph[node_name]["submitter"], AbstractClusterSubmitter, ): self.__add_waiting_for(node_name) submitter_dict[node_name] = dict() submitter_dict[node_name]["submitter"] = self.__run_graph[ node_name]["submitter"] if node_name.startswith("bout_run"): submitted = self.run_bout_run( self.__run_graph[node_name]["bout_run_setup"], force, ) self.__update_submitter_dict_after_run_bout_run( node_name, submitted, submitter_dict) else: self.run_function( self.__run_graph[node_name]["path"], self.__run_graph[node_name]["submitter"], self.__run_graph[node_name]["function"], self.__run_graph[node_name]["args"], self.__run_graph[node_name]["kwargs"], ) self.__run_graph[node_name]["status"] = "submitted" logging.info("Done: Processing %s", node_name) # We only monitor the runs if any local_submitters are present in # the current or the next order # Else the clusters will handle the monitoring monitor_run = False if self.__this_order_has_local( submitter_dict) or self.__next_order_has_local( submitter_dict): monitor_run = True if monitor_run: if self.cluster_node_exist(self.__run_graph.nodes): logging.warning( "Mixed local and cluster nodes found in graph. " "Releasing the cluster nodes up until the order of the " "LocalSubmitter. This can cause a node waiting for one of " "these nodes to be submitted after those nodes have finished " "so that the cluster will reject those jobs.") reverse_sorted_node_orders = self.__run_graph.get_node_orders( reverse=True) order_number = self.find_matching_order_number( tuple(submitter_dict.keys()), reverse_sorted_node_orders) orders_to_release = reverse_sorted_node_orders[ order_number:] self.release_nodes(orders_to_release) # We also need to release the current order in case # the graph is not connected self.release_nodes((tuple(submitter_dict.keys()), )) self.__monitor_runs(submitter_dict, raise_errors) logging.info("Done: Processing nodes at current order") if self.cluster_node_exist(self.__run_graph.nodes): reverse_sorted_node_orders = self.__run_graph.get_node_orders( reverse=True) self.release_nodes(reverse_sorted_node_orders) logging.info("Done: Calling .run() in BoutRunners")
def test_restart_documentation( clean_up_bout_inp_src_and_dst: Callable[[str, str], Tuple[Path, Path, Path]] ) -> None: """ Test that the restart documentation runs without error. Parameters ---------- clean_up_bout_inp_src_and_dst : function Function which adds temporary BOUT.inp directories to removal. """ # NOTE: We are aware of the number of locals, and are here only testing the docs # pylint: disable=too-many-locals project_path, bout_inp_src_dir, bout_inp_dst_dir = clean_up_bout_inp_src_and_dst( "test_restart_documentation_src", "test_restart_documentation_dst") bout_paths = BoutPaths( project_path=project_path, bout_inp_src_dir=bout_inp_src_dir, bout_inp_dst_dir=bout_inp_dst_dir, ) default_parameters = DefaultParameters(bout_paths) run_parameters = RunParameters({"global": {"nout": 0}}) final_parameters = FinalParameters(default_parameters, run_parameters) basic_executor = Executor( bout_paths=bout_paths, submitter=LocalSubmitter(bout_paths.project_path), run_parameters=run_parameters, ) # NOTE: We set the database to bout_inp_dst_dir as this will be removed later db_connector = DatabaseConnector("name_of_database", db_root_path=bout_inp_dst_dir) basic_bout_run_setup = BoutRunSetup(basic_executor, db_connector, final_parameters) run_graph = RunGraph() name = "my_restart_runs" basic_run_group = RunGroup(run_graph, basic_bout_run_setup, name=name) # New section in the documentation restart_executor = Executor( bout_paths=bout_paths, submitter=LocalSubmitter(bout_paths.project_path), run_parameters=run_parameters, restart_from=bout_paths.bout_inp_dst_dir, ) restart_bout_run_setup = BoutRunSetup(restart_executor, db_connector, final_parameters) RunGroup( run_graph, restart_bout_run_setup, name=name, waiting_for=basic_run_group.bout_run_node_name, ) # New section in the documentation new_run_parameters = RunParameters({"solver": {"adams_moulton": True}}) new_final_parameters = FinalParameters(default_parameters, run_parameters) restart_with_changing_parameters_executor = Executor( bout_paths=bout_paths, submitter=LocalSubmitter(bout_paths.project_path), run_parameters=new_run_parameters, restart_from=bout_paths.bout_inp_dst_dir, ) BoutRunSetup(restart_with_changing_parameters_executor, db_connector, new_final_parameters) RunGroup( run_graph, restart_bout_run_setup, name=name, waiting_for=basic_run_group.bout_run_node_name, ) # New section in the documentation run_graph.get_dot_string() # New section in the documentation runner = BoutRunner(run_graph) runner.run()
class BoutRunner: r""" Class for executing a run and store its metadata. Attributes ---------- __run_graph : RunGraph Getter variable for executor the run graph run_graph : Graph The run graph to be executed Methods ------- __reset_bout_inp_dst_dir(bout_run_setup) Reset the bout_inp_dst_dir (inplace) to reflect that this is a restart run copy_restart_files(bout_run_setup) Copy the restart files (if any) run_bout_run(bout_run_setup, restart_from_bout_inp_dst, force) Perform the BOUT++ run and capture the related metadata run_function(path, function, args, kwargs, submitter) Submit a function for execution reset() Reset the RunGraph run(restart_all, force) Execute the run Examples -------- The easiest way to use BoutRunner is to run a script from the root directory of the project (i.e. where the `Makefile` and `data` directory are normally situated. The script can simply call >>> BoutRunner().run() and `BoutRunner` takes care of the rest. A more elaborate example where all the dependency objects are built manually: Import dependencies >>> from pathlib import Path >>> from bout_runners.executor.bout_paths import BoutPaths >>> from bout_runners.executor.executor import Executor >>> from bout_runners.database.database_connector import DatabaseConnector >>> from bout_runners.parameters.default_parameters import DefaultParameters >>> from bout_runners.parameters.run_parameters import RunParameters >>> from bout_runners.parameters.final_parameters import FinalParameters >>> from bout_runners.submitter.local_submitter import LocalSubmitter >>> from bout_runners.runner.bout_run_setup import BoutRunSetup >>> from bout_runners.runner.run_graph import RunGraph >>> from bout_runners.runner.run_group import RunGroup Create the `bout_paths` object >>> project_path = Path().joinpath('path', 'to', 'project') >>> bout_inp_src_dir = Path().joinpath('path', 'to', 'source', 'BOUT.inp') >>> bout_inp_dst_dir = Path().joinpath('path', 'to', 'destination', 'BOUT.inp') >>> bout_paths = BoutPaths(project_path=project_path, ... bout_inp_src_dir=bout_inp_src_dir, ... bout_inp_dst_dir=bout_inp_dst_dir) Create the input objects >>> default_parameters = DefaultParameters(bout_paths) >>> run_parameters = RunParameters({'global': {'nout': 0}}) >>> final_parameters = FinalParameters(default_parameters, ... run_parameters) >>> executor = Executor( ... bout_paths=bout_paths, ... submitter=LocalSubmitter(bout_paths.project_path), ... run_parameters=run_parameters) >>> db_connector = DatabaseConnector('name_of_database', db_root_path=Path()) >>> bout_run_setup = BoutRunSetup(executor, db_connector, final_parameters) >>> run_graph = RunGraph() >>> # The RunGroup can attach pre and post-processors to the run >>> # See the user manual for more info >>> _ = RunGroup(run_graph, bout_run_setup, name='my_test_run') Run the project >>> runner = BoutRunner(run_graph) >>> runner.run() """ def __init__(self, run_graph: Optional[RunGraph] = None) -> None: """ Set the member data. Parameters ---------- run_graph : None or RunGraph The run graph to be executed If None the run graph will be constructed and added parameters from the default BoutRunSetup """ if run_graph is None: self.__run_graph = RunGraph() _ = RunGroup(self.__run_graph, BoutRunSetup()) else: self.__run_graph = run_graph if ( len( [ node for node in self.__run_graph.nodes if node.startswith("bout_run") ] ) == 0 ): logging.warning("The provided run_graph does not contain any bout_runs") @property def run_graph(self) -> RunGraph: """ Get the properties of self.run_graph. Returns ------- self.__run_graph : RunGraph The RunGraph object """ return self.__run_graph @staticmethod def run_bout_run( bout_run_setup: BoutRunSetup, restart_from_bout_inp_dst: bool = False, force: bool = False, ) -> Optional[AbstractSubmitter]: """ Perform the BOUT++ run and capture the related metadata. Parameters ---------- bout_run_setup : BoutRunSetup The setup for the BOUT++ run restart_from_bout_inp_dst : bool Restarts the run from the dump directory (bout_run_setup.bout_paths.bout_inp_dst_dir) Note that it is also possible to specify the directory to restart from in executor.restart_from If True it will have precedence over anything specified in executor.restart_from force : bool Execute the run even if has been performed with the same parameters Returns ------- submitter : AbstractSubmitter or None The submitter used If the run is skipped None will be returned """ if ( restart_from_bout_inp_dst and bout_run_setup.executor.restart_from is not None ): logging.warning( "Both restart_from_bout_inp_dst and " "bout_run_setup.executor.restart_from specified. " "Using restart_from_bout_inp_dst" ) if restart_from_bout_inp_dst: bout_run_setup.executor.restart_from = ( bout_run_setup.bout_paths.bout_inp_dst_dir ) if bout_run_setup.executor.restart_from is not None: # NOTE: bout_run_setup is changed inplace BoutRunner.__reset_bout_inp_dst_dir(bout_run_setup) restart = True else: restart = False if restart and force: logging.warning( "force has been set to True for a run which is to use restart files. " "Will therefore ignore force" ) run_id = bout_run_setup.metadata_recorder.capture_new_data_from_run( bout_run_setup.executor.submitter.processor_split, restart, force ) submitter = None if run_id is None: if not restart: logging.info("Executing the run") else: BoutRunner.copy_restart_files(bout_run_setup) logging.info("Executing the run from restart files") submitter = bout_run_setup.executor.execute(restart) elif force: logging.info("Executing the run as force==True") submitter = bout_run_setup.executor.execute() else: logging.warning( "Run with the same configuration has been executed before, " "see run with run_id %d", run_id, ) return submitter @staticmethod def copy_restart_files(bout_run_setup: BoutRunSetup) -> None: """ Copy the restart files (if any). Parameters ---------- bout_run_setup : BoutRunSetup The BoutRunSetup object Raises ------ FileNotFoundError If no restart files are found in bout_run_setup.executor.restart_from """ if bout_run_setup.executor.restart_from is not None: src_list = list(bout_run_setup.executor.restart_from.glob("BOUT.restart.*")) if len(src_list) == 0: msg = ( f"No restart files files found in " f"{bout_run_setup.executor.restart_from}" ) logging.error(msg) raise FileNotFoundError(msg) for src in src_list: dst = bout_run_setup.bout_paths.bout_inp_dst_dir.joinpath(src.name) shutil.copy(src, dst) logging.debug("Copied %s to %s", src, dst) @staticmethod def __reset_bout_inp_dst_dir(bout_run_setup: BoutRunSetup): """ Reset the bout_inp_dst_dir (inplace) to reflect that this is a restart run. The new bout_inp_dst_dir will be the same as bout_run_setup.executor.restart_from with _restart_/d* appended /d* will be the next digit based on the number of other restart directories Parameters ---------- bout_run_setup : BoutRunSetup BoutRunSetup where bout_run_setup.bout_paths.bout_inp_dst_dir is going to be altered """ if bout_run_setup.executor.restart_from is not None: restart_dir_parent = bout_run_setup.executor.restart_from.parent restart_dir_name = bout_run_setup.executor.restart_from.name restart_dirs = list(restart_dir_parent.glob(f"{restart_dir_name}*")) restart_number = 0 restart_numbers = list() pattern = r"_restart_(\d)+$" for restart_dir in restart_dirs: match = re.search(pattern, restart_dir.name) if match is not None: # NOTE: THe zeroth group is the matching string restart_numbers.append(int(match.group(1))) if len(restart_numbers) != 0: restart_numbers.sort() restart_number = restart_numbers[-1] + 1 prev_inp_dst_dir = bout_run_setup.bout_paths.bout_inp_dst_dir stripped_restart_dir_name = re.sub(pattern, "", restart_dir_name) new_inp_dst_dir = restart_dir_parent.joinpath( f"{stripped_restart_dir_name}_restart_{restart_number}" ) bout_run_setup.bout_paths.bout_inp_dst_dir = new_inp_dst_dir logging.info( "bout_run_setup.bout_paths.bout_inp_dst_dir set from %s to %s", prev_inp_dst_dir, new_inp_dst_dir, ) @staticmethod def run_function( path: Path, function: Callable, args: Optional[Tuple[Any, ...]] = None, kwargs: Optional[Dict[str, Any]] = None, submitter: Optional[AbstractSubmitter] = None, ) -> AbstractSubmitter: """ Submit a function for execution. Parameters ---------- path : Path Absolute path to store the python file which holds the function and its arguments function : function The function to call args : None or tuple The positional arguments kwargs : None or dict The keyword arguments submitter : None or AbstractSubmitter The submitter to submit the function with Uses the default LocalSubmitter if None Returns ------- submitter : AbstractSubmitter The submitter used """ logging.info( "Submitting %s, with positional parameters %s, and keyword parameters %s", function.__name__, args, kwargs, ) submitter = submitter if submitter is not None else LocalSubmitter() submitter.write_python_script(path, function, args, kwargs) command = f"python3 {path}" submitter.submit_command(command) return submitter def reset(self) -> None: """Reset the run_graph.""" logging.info("Resetting the graph") self.__run_graph.reset() def run( self, restart_all: bool = False, force: bool = False, raise_errors: bool = False, wait_time: int = 1, ) -> None: """ Execute all the nodes in the run_graph. Parameters ---------- restart_all : bool All the BOUT++ runs in the run graph will be restarted force : bool Execute the run even if has been performed with the same parameters raise_errors : bool If True the program will raise any error caught when during the running of the nodes If False the program will continue execution, but all nodes depending on the errored node will be marked as errored and not submitted wait_time : int Time to wait before checking if a job has completed Raises ------ RuntimeError If none of the nodes in the `run_graph` has status "ready" """ if force or restart_all: logging.debug( "Resetting the graph as %s == True", "force" if force else "restart_all" ) self.reset() if len(self.__run_graph) == 0: if len(self.__run_graph.nodes) == 0: msg = "The 'run_graph' does not contain any nodes." else: msg = ( "None of the nodes in 'run_graph' has the status 'ready'. " "Reset the 'run_graph' if you'd like to run the original graph" ) logging.error(msg) raise RuntimeError(msg) for nodes_at_current_order in self.__run_graph: submitter_dict: Dict[ str, Dict[ str, Union[Optional[AbstractSubmitter], Union[DatabaseConnector, Path]], ], ] = dict() for node_name in nodes_at_current_order.keys(): logging.info("Executing %s", node_name) submitter_dict[node_name] = dict() if node_name.startswith("bout_run"): submitter = self.run_bout_run( nodes_at_current_order[node_name]["bout_run_setup"], restart_all, force, ) submitter_dict[node_name]["submitter"] = submitter submitter_dict[node_name]["db_connector"] = nodes_at_current_order[ node_name ]["bout_run_setup"].db_connector submitter_dict[node_name]["project_path"] = nodes_at_current_order[ node_name ]["bout_run_setup"].bout_paths.project_path else: submitter = self.run_function( nodes_at_current_order[node_name]["path"], nodes_at_current_order[node_name]["function"], nodes_at_current_order[node_name]["args"], nodes_at_current_order[node_name]["kwargs"], nodes_at_current_order[node_name]["submitter"], ) submitter_dict[node_name]["submitter"] = submitter if submitter is not None: logging.debug( "Node '%s' submitted with pid %s", node_name, submitter.pid ) self.__monitor_runs(submitter_dict, raise_errors, wait_time) def __monitor_runs( self, submitter_dict: Dict[ str, Dict[ str, Union[Optional[AbstractSubmitter], Union[DatabaseConnector, Path]] ], ], raise_errors: bool, wait_time: int, ) -> None: """ Monitor the runs belonging to the same order. Parameters ---------- submitter_dict : dict Dict containing the the node names as keys and a new dict as values The new dict contains the keywords 'submitter' with value AbstractSubmitter If the submitter contains a bout run, the new dict will also contain the keyword 'db_connector' with the value DatabaseConnector and the keyword 'project_path' with the value Path which will be used in the StatusChecker raise_errors : bool If True the program will raise any error caught when during the running of the nodes If False the program will continue execution, but all nodes depending on the errored node will be marked as errored and not submitted wait_time : int Time to wait before checking if a job has completed Raises ------ RuntimeError If the types in the dict are unexpected """ node_names = list( node_name for node_name in submitter_dict.keys() if submitter_dict[node_name]["submitter"] is not None ) while len(node_names) != 0: for node_name in node_names: submitter = submitter_dict[node_name]["submitter"] if not isinstance(submitter, AbstractSubmitter): raise RuntimeError( f"The submitter of the '{node_name}' node was expected to be " f"of type 'AbstractSubmitter', but got '{type(submitter)}' " f"instead" ) if submitter.completed(): if submitter.errored(): self.__run_graph.change_status_node_and_dependencies(node_name) if raise_errors: submitter.raise_error() node_names.remove(node_name) else: logging.debug( "pid=%s found, %s seems to be running", submitter.pid, node_name ) if node_name.startswith("bout_run"): db_connector = submitter_dict[node_name]["db_connector"] if not isinstance(db_connector, DatabaseConnector): raise RuntimeError( f"The db_connector of the '{node_name}' node was expected " f"to be of type 'DatabaseConnector', but got " f"'{type(db_connector)}' instead" ) project_path = submitter_dict[node_name]["project_path"] if not isinstance(project_path, Path): raise RuntimeError( f"The project_path of the '{node_name}' node was expected " f"to be of type 'Path', but got '{type(project_path)}' " f"instead" ) StatusChecker(db_connector, project_path).check_and_update_status() sleep(wait_time)
def test_add_waiting_for() -> None: """Test the ability to let a node wait for other nodes.""" run_graph = RunGraph() run_graph.add_function_node("1") run_graph.add_function_node("2") run_graph.add_function_node("3") run_graph.add_waiting_for("2", "1") run_graph.add_waiting_for("3", ("2", "1")) expected = ("1", "2", "3") assert expected == run_graph.get_waiting_for_tuple("1")
def test_restart_documentation( make_project: Path, copy_bout_inp: Callable[[Path, str], Path], file_state_restorer: FileStateRestorer, ) -> None: """ Test that the restart documentation runs without error. Parameters ---------- make_project : Path The path to the conduction example copy_bout_inp : function Function which copies BOUT.inp and returns the path to the temporary directory file_state_restorer : FileStateRestorer Object for restoring files to original state """ # NOTE: We are aware of the number of locals, and are here only testing the docs # pylint: disable=too-many-locals project_path = make_project bout_inp_src_dir = copy_bout_inp(project_path, "test_restart_documentation_src") bout_inp_dst_dir = project_path.joinpath("test_restart_documentation_dst") # NOTE: bout_inp_src_dir removed by copy_bout_inp teardown file_state_restorer.add(bout_inp_dst_dir, force_mark_removal=True) bout_paths = BoutPaths( project_path=project_path, bout_inp_src_dir=bout_inp_src_dir, bout_inp_dst_dir=bout_inp_dst_dir, ) default_parameters = DefaultParameters(bout_paths) run_parameters = RunParameters({"global": {"nout": 0}}) final_parameters = FinalParameters(default_parameters, run_parameters) basic_executor = BoutRunExecutor( bout_paths=bout_paths, submitter=LocalSubmitter(bout_paths.project_path), run_parameters=run_parameters, ) # NOTE: We set the database to bout_inp_dst_dir as this will be removed later db_connector = DatabaseConnector("name_of_database", db_root_path=bout_inp_dst_dir) file_state_restorer.add(db_connector.db_path, force_mark_removal=True) basic_bout_run_setup = BoutRunSetup(basic_executor, db_connector, final_parameters) run_graph = RunGraph() name = "my_restart_runs" basic_run_group = RunGroup(run_graph, basic_bout_run_setup, name=name) # New section in the documentation restart_executor = BoutRunExecutor( bout_paths=bout_paths, submitter=LocalSubmitter(bout_paths.project_path), run_parameters=run_parameters, restart_from=bout_paths.bout_inp_dst_dir, ) file_state_restorer.add(restart_executor.bout_paths.bout_inp_dst_dir, force_mark_removal=True) restart_bout_run_setup = BoutRunSetup(restart_executor, db_connector, final_parameters) RunGroup( run_graph, restart_bout_run_setup, name=name, waiting_for=basic_run_group.bout_run_node_name, ) # New section in the documentation new_run_parameters = RunParameters({"solver": {"adams_moulton": True}}) new_final_parameters = FinalParameters(default_parameters, run_parameters) restart_with_changing_parameters_executor = BoutRunExecutor( bout_paths=bout_paths, submitter=LocalSubmitter(bout_paths.project_path), run_parameters=new_run_parameters, restart_from=bout_paths.bout_inp_dst_dir, ) file_state_restorer.add( restart_with_changing_parameters_executor.bout_paths.bout_inp_dst_dir, force_mark_removal=True, ) BoutRunSetup(restart_with_changing_parameters_executor, db_connector, new_final_parameters) RunGroup( run_graph, restart_bout_run_setup, name=name, waiting_for=basic_run_group.bout_run_node_name, ) # New section in the documentation run_graph.get_dot_string() # New section in the documentation runner = BoutRunner(run_graph) runner.run()
def make_run_group( run_group_parameters: Dict[str, Union[str, Optional[RunGraph], Optional[Union[str, Iterable[str]]]]], make_project: Path, file_state_restorer: FileStateRestorer, restart_from: Optional[Path] = None, ) -> RunGroup: """ Return a basic RunGroup. Parameters ---------- run_group_parameters : dict Parameters to the run_group containing the keys - name : str Name of the run_group Note that the name will also be used for the destination dir and the name of the database - run_graph: None or RunGraph The run_graph to use - waiting_for : None or str or iterable of str Name of nodes this node will wait for to finish before executing make_project : Path The path to the conduction example file_state_restorer : FileStateRestorer Object for restoring files to original state restart_from : Path or None The path to copy the restart files from Returns ------- run_group : RunGroup A basic run group Raises ------ ValueError If the shape or types of the run_group_parameters are wrong """ # NOTE: The following is a mypy guard which could be solved with TypedDict # However, TypedDict is new from 3.8 if "name" not in run_group_parameters.keys() or not isinstance( run_group_parameters["name"], str): raise ValueError( "'name' must be of string type in run_group_parameters") if "run_graph" not in run_group_parameters.keys() or not ( isinstance(run_group_parameters["run_graph"], RunGraph) or run_group_parameters["run_graph"] is None): raise ValueError( "'run_graph' must be of RunGroup type or None in run_group_parameters" ) if ("waiting_for" not in run_group_parameters.keys() or not (hasattr(run_group_parameters["waiting_for"], "__iter__") or run_group_parameters["waiting_for"] is None) or isinstance(run_group_parameters["waiting_for"], RunGraph)): raise ValueError( "'waiting_for' must be of RunGroup type or None in run_group_parameters" ) # Make project to save time project_path = make_project # Create the `bout_paths` object bout_paths = BoutPaths( project_path=project_path, bout_inp_src_dir=project_path.joinpath("data"), bout_inp_dst_dir=project_path.joinpath(run_group_parameters["name"]), ) # Create the input objects run_parameters = RunParameters({"global": {"nout": 0}}) default_parameters = DefaultParameters(bout_paths) final_parameters = FinalParameters(default_parameters, run_parameters) submitter = get_submitter() if isinstance(submitter, LocalSubmitter): submitter.run_path = bout_paths.project_path executor = BoutRunExecutor( bout_paths=bout_paths, submitter=submitter, run_parameters=run_parameters, restart_from=restart_from, ) db_connector = DatabaseConnector(name=run_group_parameters["name"], db_root_path=project_path) bout_run_setup = BoutRunSetup(executor, db_connector, final_parameters) # Create the `run_group` run_group = RunGroup( run_group_parameters["run_graph"] if run_group_parameters["run_graph"] is not None else RunGraph(), bout_run_setup, name=run_group_parameters["name"], waiting_for=run_group_parameters["waiting_for"], ) file_state_restorer.add(executor.bout_paths.bout_inp_dst_dir, force_mark_removal=True) file_state_restorer.add(db_connector.db_path, force_mark_removal=True) file_state_restorer.add( executor.bout_paths.project_path.joinpath("settings_run"), force_mark_removal=True, ) return run_group
def complex_graph() -> RunGraph: """ Return a complex graph. Returns ------- graph : RunGraph A simple graph """ graph = RunGraph() graph.add_edge("0", "2") graph.add_edge("1", "2") graph.add_edge("2", "3") graph.add_edge("2", "5") graph.add_edge("2", "6") graph.add_edge("2", "7") graph.add_edge("4", "9") graph.add_edge("6", "9") graph.add_edge("7", "9") graph.add_edge("9", "10") graph.add_edge("4", "8") graph.add_edge("6", "8") graph.add_edge("8", "10") graph.add_edge("12", "11") graph.add_edge("11", "4") return graph
def test_pre_and_post_documentation( clean_up_bout_inp_src_and_dst: Callable[[str, str], Tuple[Path, Path, Path]] ) -> None: """ Test that the pre and post documentation runs without error. Parameters ---------- clean_up_bout_inp_src_and_dst : function Function which adds temporary BOUT.inp directories to removal. """ # NOTE: We are aware of the number of locals, and are here only testing the docs # pylint: disable=too-many-locals project_path, bout_inp_src_dir, bout_inp_dst_dir = clean_up_bout_inp_src_and_dst( "test_pre_post_documentation_src", "test_pre_post_documentation_dst") bout_paths = BoutPaths( project_path=project_path, bout_inp_src_dir=bout_inp_src_dir, bout_inp_dst_dir=bout_inp_dst_dir, ) default_parameters = DefaultParameters(bout_paths) run_parameters = RunParameters({"global": {"nout": 0}}) final_parameters = FinalParameters(default_parameters, run_parameters) basic_executor = Executor( bout_paths=bout_paths, submitter=LocalSubmitter(bout_paths.project_path), run_parameters=run_parameters, ) # NOTE: We set the database to bout_inp_dst_dir as this will be removed later db_connector = DatabaseConnector("name_of_database", db_root_path=bout_inp_dst_dir) basic_bout_run_setup = BoutRunSetup(basic_executor, db_connector, final_parameters) run_graph = RunGraph() name = "my_restart_runs" basic_run_group = RunGroup(run_graph, basic_bout_run_setup, name=name) # New section in the documentation basic_run_group.add_post_processor({ "function": return_none, "args": None, "kwargs": None }) expanded_noise_restarts_dir = bout_paths.bout_inp_dst_dir.parent.joinpath( "expanded_noise_restarts") kwargs = { "newNz": 16, "path": bout_paths.bout_inp_dst_dir, "output": expanded_noise_restarts_dir, } expand_node_name = basic_run_group.add_post_processor({ "function": mock_expand, "args": None, "kwargs": kwargs }) # New section in the documentation # NOTE: Add these for removal clean_up_bout_inp_src_and_dst("expanded_noise_restarts", "expanded_noise_restarts") # Create the RunGroup restart_executor = Executor( bout_paths=bout_paths, submitter=LocalSubmitter(bout_paths.project_path), run_parameters=run_parameters, restart_from=expanded_noise_restarts_dir, ) restart_bout_run_setup = BoutRunSetup(restart_executor, db_connector, final_parameters) restart_run_group = RunGroup(run_graph, restart_bout_run_setup, name=name) kwargs = {"path": expanded_noise_restarts_dir, "scale": 1e-5} restart_run_group.add_pre_processor( { "function": return_none, "args": None, "kwargs": kwargs }, waiting_for=expand_node_name, ) # New section in the documentation run_graph.get_dot_string() # New section in the documentation runner = BoutRunner(run_graph) runner.run()
def test_pre_and_post_documentation( make_project: Path, copy_bout_inp: Callable[[Path, str], Path], file_state_restorer: FileStateRestorer, ) -> None: """ Test that the pre and post documentation runs without error. Parameters ---------- make_project : Path The path to the conduction example copy_bout_inp : function Function which copies BOUT.inp and returns the path to the temporary directory file_state_restorer : FileStateRestorer Object for restoring files to original state """ # NOTE: We are aware of the number of locals, and are here only testing the docs # pylint: disable=too-many-locals project_path = make_project bout_inp_src_dir = copy_bout_inp(project_path, "test_pre_post_documentation_src") bout_inp_dst_dir = project_path.joinpath("test_pre_post_documentation_dst") # NOTE: bout_inp_src_dir removed by copy_bout_inp teardown file_state_restorer.add(bout_inp_dst_dir, force_mark_removal=True) bout_paths = BoutPaths( project_path=project_path, bout_inp_src_dir=bout_inp_src_dir, bout_inp_dst_dir=bout_inp_dst_dir, ) default_parameters = DefaultParameters(bout_paths) run_parameters = RunParameters({"global": {"nout": 0}}) final_parameters = FinalParameters(default_parameters, run_parameters) basic_executor = BoutRunExecutor( bout_paths=bout_paths, submitter=LocalSubmitter(bout_paths.project_path), run_parameters=run_parameters, ) # NOTE: We set the database to bout_inp_dst_dir as this will be removed later db_connector = DatabaseConnector("name_of_database", db_root_path=bout_inp_dst_dir) file_state_restorer.add(db_connector.db_path, force_mark_removal=True) basic_bout_run_setup = BoutRunSetup(basic_executor, db_connector, final_parameters) run_graph = RunGraph() name = "my_restart_runs" basic_run_group = RunGroup(run_graph, basic_bout_run_setup, name=name) # New section in the documentation basic_run_group.add_post_processor({ "function": return_none, "args": None, "kwargs": None }) expanded_noise_restarts_dir = bout_paths.bout_inp_dst_dir.parent.joinpath( "expanded_noise_restarts") file_state_restorer.add(expanded_noise_restarts_dir, force_mark_removal=True) kwargs = { "newNz": 16, "path": bout_paths.bout_inp_dst_dir, "output": expanded_noise_restarts_dir, } expand_node_name = basic_run_group.add_post_processor( { "function": mock_expand, "args": None, "kwargs": kwargs, }, ) # New section in the documentation # Create the RunGroup restart_executor = BoutRunExecutor( bout_paths=bout_paths, submitter=LocalSubmitter(bout_paths.project_path), run_parameters=run_parameters, restart_from=expanded_noise_restarts_dir, ) file_state_restorer.add(restart_executor.bout_paths.bout_inp_dst_dir, force_mark_removal=True) restart_bout_run_setup = BoutRunSetup(restart_executor, db_connector, final_parameters) restart_run_group = RunGroup(run_graph, restart_bout_run_setup, name=name) kwargs = {"path": expanded_noise_restarts_dir, "scale": 1e-5} restart_run_group.add_pre_processor( { "function": return_none, "args": None, "kwargs": kwargs, }, waiting_for=expand_node_name, ) # New section in the documentation run_graph.get_dot_string() # New section in the documentation runner = BoutRunner(run_graph) runner.run()
def assert_waiting_for_graph( node_zero_submitter: AbstractSubmitter, node_one_submitter: AbstractSubmitter, node_two_submitter: AbstractSubmitter, node_three_submitter: AbstractSubmitter, save_dir, ) -> None: """ Assert that the graph is running in correct order. Parameters ---------- node_zero_submitter : AbstractSubmitter Submitter object for node one node_one_submitter : AbstractSubmitter Submitter object for node one node_two_submitter : AbstractSubmitter Submitter object for node one node_three_submitter : AbstractSubmitter Submitter object for node one save_dir : Path Path to where the job artifacts are stored """ graph = RunGraph() graph.add_function_node( "node_zero", { "function": node_zero, "args": (save_dir, ), "kwargs": None }, save_dir.joinpath("node_zero.py"), node_zero_submitter, ) graph.add_function_node( "node_one", { "function": node_one, "args": (save_dir, ), "kwargs": None }, save_dir.joinpath("node_one.py"), node_one_submitter, ) graph.add_function_node( "node_two", { "function": node_two, "args": (save_dir, ), "kwargs": None }, save_dir.joinpath("node_two.py"), node_two_submitter, ) graph.add_function_node( "node_three", { "function": node_three, "args": (save_dir, ), "kwargs": None }, save_dir.joinpath("node_three.py"), node_three_submitter, ) graph.add_waiting_for("node_two", "node_one") graph.add_waiting_for("node_three", ("node_one", "node_two")) runner = BoutRunner(graph) runner.run() node_three_submitter.wait_until_completed() assert save_dir.joinpath("node_three.log").is_file()