def _filter_pipeline( self, pipeline: Pipeline, tags: Iterable[str] = None, from_nodes: Iterable[str] = None, to_nodes: Iterable[str] = None, node_names: Iterable[str] = None, from_inputs: Iterable[str] = None, ) -> Pipeline: """Filter the pipeline as the intersection of all conditions.""" new_pipeline = pipeline # We need to intersect with the pipeline because the order # of operations matters, so we don't want to do it incrementally. # As an example, with a pipeline of nodes 1,2,3, think of # "from 1", and "only 1 and 3" - the order you do them in results in # either 1 & 3, or just 1. if tags: new_pipeline &= pipeline.only_nodes_with_tags(*tags) if not new_pipeline.nodes: raise KedroContextError( "Pipeline contains no nodes with tags: {}".format( str(tags))) if from_nodes: new_pipeline &= pipeline.from_nodes(*from_nodes) if to_nodes: new_pipeline &= pipeline.to_nodes(*to_nodes) if node_names: new_pipeline &= pipeline.only_nodes(*node_names) if from_inputs: new_pipeline &= pipeline.from_inputs(*from_inputs) if not new_pipeline.nodes: raise KedroContextError("Pipeline contains no nodes") return new_pipeline
def _suggest_resume_scenario(self, pipeline: Pipeline, done_nodes: Iterable[Node]) -> None: remaining_nodes = set(pipeline.nodes) - set(done_nodes) command = "kedro run" if done_nodes: node_names = [n.name for n in remaining_nodes] resume_pipeline = pipeline.only_nodes(*node_names) command += " --from-inputs {}".format(",".join( resume_pipeline.inputs())) self._logger.warning( "There are %d nodes that have not run.\n" "You can resume the pipeline run with the following command:\n%s", len(remaining_nodes), command, )
def _suggest_resume_scenario(self, pipeline: Pipeline, done_nodes: Iterable[Node]) -> None: remaining_nodes = set(pipeline.nodes) - set(done_nodes) postfix = "" if done_nodes: node_names = (n.name for n in remaining_nodes) resume_p = pipeline.only_nodes(*node_names) start_p = resume_p.only_nodes_with_inputs(*resume_p.inputs()) start_node_names = (n.name for n in start_p.nodes) postfix += ' --from-nodes "{}"'.format(",".join(start_node_names)) self._logger.warning( "There are %d nodes that have not run.\n" "You can resume the pipeline run by adding the following " "argument to your previous command:\n%s", len(remaining_nodes), postfix, )
def test_only_nodes_missing(self, pipeline_list_with_lists, target_node_names): pattern = r"Pipeline does not contain nodes" full = Pipeline(pipeline_list_with_lists["nodes"]) with pytest.raises(ValueError, match=pattern): full.only_nodes(*target_node_names)
def test_only_nodes(self, target_node_names, pipeline_list_with_lists): full = Pipeline(pipeline_list_with_lists["nodes"]) partial = full.only_nodes(*target_node_names) target_list = list(target_node_names) names = map(lambda node_: node_.name, partial.nodes) assert sorted(names) == sorted(target_list)