def run( # pylint: disable=too-many-arguments,too-many-locals self, pipeline_name: str = None, tags: Iterable[str] = None, runner: AbstractRunner = None, node_names: Iterable[str] = None, from_nodes: Iterable[str] = None, to_nodes: Iterable[str] = None, from_inputs: Iterable[str] = None, load_versions: Dict[str, str] = None, extra_params: Dict[str, Any] = None, ) -> Dict[str, Any]: """Runs the pipeline with a specified runner. Args: pipeline_name: Name of the pipeline that is being run. tags: An optional list of node tags which should be used to filter the nodes of the ``Pipeline``. If specified, only the nodes containing *any* of these tags will be run. runner: An optional parameter specifying the runner that you want to run the pipeline with. node_names: An optional list of node names which should be used to filter the nodes of the ``Pipeline``. If specified, only the nodes with these names will be run. from_nodes: An optional list of node names which should be used as a starting point of the new ``Pipeline``. to_nodes: An optional list of node names which should be used as an end point of the new ``Pipeline``. from_inputs: An optional list of input datasets which should be used as a starting point of the new ``Pipeline``. load_versions: An optional flag to specify a particular dataset version timestamp to load. extra_params: Additional run parameters. Raises: Exception: Any uncaught exception during the run will be re-raised after being passed to ``on_pipeline_error`` hook. Returns: Any node outputs that cannot be processed by the ``DataCatalog``. These are returned in a dictionary, where the keys are defined by the node outputs. """ # pylint: disable=protected-access,no-member # Report project name logging.info("** Kedro project %s", self._project_path.name) save_version = run_id = self.store["session_id"] extra_params = deepcopy(extra_params) or dict() context = self.context pipeline = context._get_pipeline(name=pipeline_name) filtered_pipeline = context._filter_pipeline( pipeline=pipeline, tags=tags, from_nodes=from_nodes, to_nodes=to_nodes, node_names=node_names, from_inputs=from_inputs, ) record_data = { "run_id": run_id, "project_path": self._project_path.as_posix(), "env": context.env, "kedro_version": self.store["kedro_version"], "tags": tags, "from_nodes": from_nodes, "to_nodes": to_nodes, "node_names": node_names, "from_inputs": from_inputs, "load_versions": load_versions, "extra_params": extra_params, "pipeline_name": pipeline_name, } catalog = context._get_catalog(save_version=save_version, load_versions=load_versions) # Run the runner runner = runner or SequentialRunner() hook = get_hook_manager().hook hook.before_pipeline_run(run_params=record_data, pipeline=filtered_pipeline, catalog=catalog) try: run_result = runner.run(filtered_pipeline, catalog, run_id) except Exception as error: hook.on_pipeline_error( error=error, run_params=record_data, pipeline=filtered_pipeline, catalog=catalog, ) raise hook.after_pipeline_run( run_params=record_data, run_result=run_result, pipeline=filtered_pipeline, catalog=catalog, ) return run_result
def run( # pylint: disable=too-many-arguments self, tags: Iterable[str] = None, runner: AbstractRunner = None, node_names: Iterable[str] = None, from_nodes: Iterable[str] = None, to_nodes: Iterable[str] = None, from_inputs: Iterable[str] = None, pipeline: Pipeline = None, catalog: DataCatalog = None, ) -> Dict[str, Any]: """Runs the pipeline with a specified runner. Args: tags: An optional list of node tags which should be used to filter the nodes of the ``Pipeline``. If specified, only the nodes containing *any* of these tags will be run. runner: An optional parameter specifying the runner that you want to run the pipeline with. node_names: An optional list of node names which should be used to filter the nodes of the ``Pipeline``. If specified, only the nodes with these names will be run. from_nodes: An optional list of node names which should be used as a starting point of the new ``Pipeline``. to_nodes: An optional list of node names which should be used as an end point of the new ``Pipeline``. from_inputs: An optional list of input datasets which should be used as a starting point of the new ``Pipeline``. pipeline: Optional Pipeline to run, defaults to self.pipeline. catalog: Optional DataCatalog to run with, defaults to self.catalog. Raises: KedroContextError: If the resulting ``Pipeline`` is empty or incorrect tags are provided. Returns: Any node outputs that cannot be processed by the ``DataCatalog``. These are returned in a dictionary, where the keys are defined by the node outputs. """ # Report project name logging.info("** Kedro project %s", self.project_path.name) pipeline = self._filter_pipeline( pipeline=pipeline, tags=tags, from_nodes=from_nodes, to_nodes=to_nodes, node_names=node_names, from_inputs=from_inputs, ) catalog = catalog or self.catalog self._record_version_journal( catalog, tags=tags, from_nodes=from_nodes, to_nodes=to_nodes, node_names=node_names, from_inputs=from_inputs, ) # Run the runner runner = runner or SequentialRunner() return runner.run(pipeline, catalog)
def run( # pylint: disable=too-many-arguments,too-many-locals self, tags: Iterable[str] = None, runner: AbstractRunner = None, node_names: Iterable[str] = None, from_nodes: Iterable[str] = None, to_nodes: Iterable[str] = None, from_inputs: Iterable[str] = None, load_versions: Dict[str, str] = None, pipeline_name: str = None, ) -> Dict[str, Any]: """Runs the pipeline with a specified runner. Args: tags: An optional list of node tags which should be used to filter the nodes of the ``Pipeline``. If specified, only the nodes containing *any* of these tags will be run. runner: An optional parameter specifying the runner that you want to run the pipeline with. node_names: An optional list of node names which should be used to filter the nodes of the ``Pipeline``. If specified, only the nodes with these names will be run. from_nodes: An optional list of node names which should be used as a starting point of the new ``Pipeline``. to_nodes: An optional list of node names which should be used as an end point of the new ``Pipeline``. from_inputs: An optional list of input datasets which should be used as a starting point of the new ``Pipeline``. load_versions: An optional flag to specify a particular dataset version timestamp to load. pipeline_name: Name of the ``Pipeline`` to execute. Defaults to "__default__". Raises: KedroContextError: If the resulting ``Pipeline`` is empty or incorrect tags are provided. Exception: Any uncaught exception will be re-raised after being passed to``on_pipeline_error``. Returns: Any node outputs that cannot be processed by the ``DataCatalog``. These are returned in a dictionary, where the keys are defined by the node outputs. """ # Report project name logging.info("** Kedro project %s", self.project_path.name) try: pipeline = self._get_pipeline(name=pipeline_name) except NotImplementedError: common_migration_message = ( "`ProjectContext._get_pipeline(self, name)` method is expected. " "Please refer to the 'Modular Pipelines' section of the documentation." ) if pipeline_name: raise KedroContextError( "The project is not fully migrated to use multiple pipelines. " + common_migration_message) warn( "You are using the deprecated pipeline construction mechanism. " + common_migration_message, DeprecationWarning, ) pipeline = self.pipeline filtered_pipeline = self._filter_pipeline( pipeline=pipeline, tags=tags, from_nodes=from_nodes, to_nodes=to_nodes, node_names=node_names, from_inputs=from_inputs, ) save_version = self._get_save_version() run_id = self.run_id or save_version record_data = { "run_id": run_id, "project_path": str(self.project_path), "env": self.env, "kedro_version": self.project_version, "tags": tags, "from_nodes": from_nodes, "to_nodes": to_nodes, "node_names": node_names, "from_inputs": from_inputs, "load_versions": load_versions, "pipeline_name": pipeline_name, "extra_params": self._extra_params, } journal = Journal(record_data) catalog = self._get_catalog(save_version=save_version, journal=journal, load_versions=load_versions) # Run the runner runner = runner or SequentialRunner() self._hook_manager.hook.before_pipeline_run( # pylint: disable=no-member run_params=record_data, pipeline=filtered_pipeline, catalog=catalog) try: run_result = runner.run(filtered_pipeline, catalog, run_id) except Exception as error: self._hook_manager.hook.on_pipeline_error( # pylint: disable=no-member error=error, run_params=record_data, pipeline=filtered_pipeline, catalog=catalog, ) raise error self._hook_manager.hook.after_pipeline_run( # pylint: disable=no-member run_params=record_data, run_result=run_result, pipeline=filtered_pipeline, catalog=catalog, ) return run_result
def run( # pylint: disable=too-many-arguments self, tags: Iterable[str] = None, runner: AbstractRunner = None, node_names: Iterable[str] = None, from_nodes: Iterable[str] = None, to_nodes: Iterable[str] = None, pipeline: Pipeline = None, catalog: DataCatalog = None, ) -> Dict[str, Any]: """Runs the pipeline with a specified runner. Args: tags: An optional list of node tags which should be used to filter the nodes of the ``Pipeline``. If specified, only the nodes containing *any* of these tags will be run. runner: An optional parameter specifying the runner that you want to run the pipeline with. node_names: An optional list of node names which should be used to filter the nodes of the ``Pipeline``. If specified, only the nodes with these names will be run. from_nodes: An optional list of node names which should be used as a starting point of the new ``Pipeline``. to_nodes: An optional list of node names which should be used as an end point of the new ``Pipeline``. pipeline: Optional Pipeline to run, defaults to self.pipeline. catalog: Optional DataCatalog to run with, defaults to self.catalog. Raises: KedroContextError: If the resulting ``Pipeline`` is empty or incorrect tags are provided. Returns: Any node outputs that cannot be processed by the ``DataCatalog``. These are returned in a dictionary, where the keys are defined by the node outputs. """ # Report project name logging.info("** Kedro project {}".format(self.project_path.name)) # Load the pipeline as the intersection of all conditions pipeline = pipeline or self.pipeline if tags: pipeline = pipeline & self.pipeline.only_nodes_with_tags(*tags) if not pipeline.nodes: raise KedroContextError( "Pipeline contains no nodes with tags: {}".format( str(tags))) if from_nodes: pipeline = pipeline & self.pipeline.from_nodes(*from_nodes) if to_nodes: pipeline = pipeline & self.pipeline.to_nodes(*to_nodes) if node_names: pipeline = pipeline & self.pipeline.only_nodes(*node_names) if not pipeline.nodes: raise KedroContextError("Pipeline contains no nodes") catalog = catalog or self.catalog # Run the runner runner = runner or SequentialRunner() return runner.run(pipeline, catalog)