def pytask_resolve_dependencies(session: Session) -> bool | None: """Create a directed acyclic graph (DAG) capturing dependencies between functions. Parameters ---------- session : _pytask.session.Session Dictionary containing tasks. """ try: session.dag = session.hook.pytask_resolve_dependencies_create_dag( session=session, tasks=session.tasks) session.hook.pytask_resolve_dependencies_modify_dag(session=session, dag=session.dag) session.hook.pytask_resolve_dependencies_validate_dag(session=session, dag=session.dag) session.hook.pytask_resolve_dependencies_select_execution_dag( session=session, dag=session.dag) except Exception: report = ResolvingDependenciesReport.from_exception(sys.exc_info()) session.hook.pytask_resolve_dependencies_log(session=session, report=report) session.resolving_dependencies_report = report raise ResolvingDependenciesError from None else: return True
def markers(**config_from_cli: Any) -> NoReturn: """Show all registered markers.""" config_from_cli["command"] = "markers" try: # Duplication of the same mechanism in :func:`pytask.main.main`. pm = get_plugin_manager() from _pytask import cli pm.register(cli) pm.hook.pytask_add_hooks(pm=pm) config = pm.hook.pytask_configure(pm=pm, config_from_cli=config_from_cli) session = Session.from_config(config) except (ConfigurationError, Exception): console.print_exception() session = Session({}, None) session.exit_code = ExitCode.CONFIGURATION_FAILED else: table = Table("Marker", "Description", leading=1) for name, description in config["markers"].items(): table.add_row(f"pytask.mark.{name}", description) console.print(table) sys.exit(session.exit_code)
def pytask_execute_task_process_report(session: Session, report: ExecutionReport) -> bool: """Process the execution report of a task. If a task failed, skip all subsequent tasks. Else, update the states of related nodes in the database. """ task = report.task if report.outcome == TaskOutcome.SUCCESS: update_states_in_database(session.dag, task.name) else: for descending_task_name in descending_tasks(task.name, session.dag): descending_task = session.dag.nodes[descending_task_name]["task"] descending_task.markers.append( Mark( "skip_ancestor_failed", (), {"reason": f"Previous task {task.name!r} failed."}, )) session.n_tasks_failed += 1 if session.n_tasks_failed >= session.config["max_failures"]: session.should_stop = True if report.exc_info and isinstance(report.exc_info[1], Exit): session.should_stop = True return True
def build_dag(config_from_cli: dict[str, Any]) -> nx.DiGraph: """Build the DAG. This function is the programmatic interface to ``pytask dag`` and returns a preprocessed :class:`pydot.Dot` which makes plotting easier than with matplotlib. To change the style of the graph, it might be easier to convert the graph back to networkx, set attributes, and convert back to pydot or pygraphviz. Parameters ---------- config_from_cli : Dict[str, Any] The configuration usually received from the CLI. For example, use ``{"paths": "example-directory/"}`` to collect tasks from a directory. Returns ------- pydot.Dot A preprocessed graph which can be customized and exported. """ try: pm = get_plugin_manager() from _pytask import cli pm.register(cli) pm.hook.pytask_add_hooks(pm=pm) config = pm.hook.pytask_configure(pm=pm, config_from_cli=config_from_cli) session = Session.from_config(config) except (ConfigurationError, Exception): console.print_exception() session = Session({}, None) session.exit_code = ExitCode.CONFIGURATION_FAILED else: try: session.hook.pytask_log_session_header(session=session) import_optional_dependency("pydot") check_for_optional_program( session.config["layout"], extra="The layout program is part of the graphviz package which you " "can install with conda.", ) session.hook.pytask_collect(session=session) session.hook.pytask_resolve_dependencies(session=session) dag = _refine_dag(session) except Exception: raise else: return dag
def pytask_execute(session: Session) -> None: """Execute tasks.""" session.hook.pytask_execute_log_start(session=session) session.scheduler = session.hook.pytask_execute_create_scheduler( session=session) session.hook.pytask_execute_build(session=session) session.hook.pytask_execute_log_end(session=session, reports=session.execution_reports)
def pytask_collect_log( session: Session, reports: list[CollectionReport], tasks: list[Task] ) -> None: """Log collection.""" session.collection_end = time.time() console.print(f"Collected {len(tasks)} task{'' if len(tasks) == 1 else 's'}.") failed_reports = [r for r in reports if r.outcome == CollectionOutcome.FAIL] if failed_reports: counts = count_outcomes(reports, CollectionOutcome) console.print() console.rule( Text("Failures during collection", style=CollectionOutcome.FAIL.style), style=CollectionOutcome.FAIL.style, ) for report in failed_reports: if report.node is None: header = "Error" else: if isinstance(report.node, Task): short_name = format_task_id( report.node, editor_url_scheme="no_link", short_name=True ) else: short_name = reduce_node_name(report.node, session.config["paths"]) header = f"Could not collect {short_name}" console.rule( Text(header, style=CollectionOutcome.FAIL.style), style=CollectionOutcome.FAIL.style, ) console.print() console.print( render_exc_info(*report.exc_info, session.config["show_locals"]) ) console.print() panel = create_summary_panel( counts, CollectionOutcome, "Collected errors and tasks" ) console.print(panel) session.hook.pytask_log_session_footer( session=session, duration=session.collection_end - session.collection_start, outcome=CollectionOutcome.FAIL if counts[CollectionOutcome.FAIL] else CollectionOutcome.SUCCESS, ) raise CollectionError
def collect(**config_from_cli: Any | None) -> NoReturn: """Collect tasks and report information about them.""" config_from_cli["command"] = "collect" try: # Duplication of the same mechanism in :func:`pytask.main.main`. pm = get_plugin_manager() from _pytask import cli pm.register(cli) pm.hook.pytask_add_hooks(pm=pm) config = pm.hook.pytask_configure(pm=pm, config_from_cli=config_from_cli) session = Session.from_config(config) except (ConfigurationError, Exception): session = Session({}, None) session.exit_code = ExitCode.CONFIGURATION_FAILED console.print_exception() else: try: session.hook.pytask_log_session_header(session=session) session.hook.pytask_collect(session=session) session.hook.pytask_resolve_dependencies(session=session) tasks = _select_tasks_by_expressions_and_marker(session) common_ancestor = _find_common_ancestor_of_all_nodes( tasks, session.config["paths"], session.config["nodes"]) dictionary = _organize_tasks(tasks) if dictionary: _print_collected_tasks( dictionary, session.config["nodes"], session.config["editor_url_scheme"], common_ancestor, ) console.print() console.rule(style="neutral") except CollectionError: session.exit_code = ExitCode.COLLECTION_FAILED except ResolvingDependenciesError: session.exit_code = ExitCode.RESOLVING_DEPENDENCIES_FAILED except Exception: session.exit_code = ExitCode.FAILED console.print_exception() console.rule(style="failed") sys.exit(session.exit_code)
def profile(**config_from_cli: Any) -> NoReturn: """Show information about tasks like runtime and memory consumption of products.""" config_from_cli["command"] = "profile" try: # Duplication of the same mechanism in :func:`pytask.main.main`. pm = get_plugin_manager() from _pytask import cli pm.register(cli) pm.hook.pytask_add_hooks(pm=pm) config = pm.hook.pytask_configure(pm=pm, config_from_cli=config_from_cli) session = Session.from_config(config) except (ConfigurationError, Exception): # pragma: no cover session = Session({}, None) session.exit_code = ExitCode.CONFIGURATION_FAILED exc_info: tuple[ type[BaseException], BaseException, TracebackType | None ] = sys.exc_info() console.print(render_exc_info(*exc_info, show_locals=config["show_locals"])) else: try: session.hook.pytask_log_session_header(session=session) session.hook.pytask_collect(session=session) session.hook.pytask_resolve_dependencies(session=session) profile: dict[str, dict[str, Any]] = { task.name: {} for task in session.tasks } session.hook.pytask_profile_add_info_on_task( session=session, tasks=session.tasks, profile=profile ) profile = _process_profile(profile) _print_profile_table(profile, session.tasks, session.config) session.hook.pytask_profile_export_profile(session=session, profile=profile) console.rule(style="neutral") except CollectionError: # pragma: no cover session.exit_code = ExitCode.COLLECTION_FAILED except Exception: # pragma: no cover session.exit_code = ExitCode.FAILED console.print_exception() console.rule(style="failed") sys.exit(session.exit_code)
def dag(**config_from_cli: Any) -> NoReturn: """Create a visualization of the project's directed acyclic graph.""" try: pm = get_plugin_manager() from _pytask import cli pm.register(cli) pm.hook.pytask_add_hooks(pm=pm) config = pm.hook.pytask_configure(pm=pm, config_from_cli=config_from_cli) session = Session.from_config(config) except (ConfigurationError, Exception): console.print_exception() session = Session({}, None) session.exit_code = ExitCode.CONFIGURATION_FAILED else: try: session.hook.pytask_log_session_header(session=session) import_optional_dependency("pydot") check_for_optional_program( session.config["layout"], extra="The layout program is part of the graphviz package which you " "can install with conda.", ) session.hook.pytask_collect(session=session) session.hook.pytask_resolve_dependencies(session=session) dag = _refine_dag(session) _write_graph(dag, session.config["output_path"], session.config["layout"]) except CollectionError: session.exit_code = ExitCode.COLLECTION_FAILED except ResolvingDependenciesError: session.exit_code = ExitCode.RESOLVING_DEPENDENCIES_FAILED except Exception: session.exit_code = ExitCode.FAILED exc_info = remove_internal_traceback_frames_from_exc_info(sys.exc_info()) console.print() console.print(Traceback.from_exception(*exc_info)) console.rule(style="failed") sys.exit(session.exit_code)
def pytask_collect(session: Session) -> bool: """Collect tasks.""" session.collection_start = time.time() _collect_from_paths(session) try: session.hook.pytask_collect_modify_tasks(session=session, tasks=session.tasks) except Exception: report = CollectionReport.from_exception( outcome=CollectionOutcome.FAIL, exc_info=sys.exc_info() ) session.collection_reports.append(report) session.hook.pytask_collect_log( session=session, reports=session.collection_reports, tasks=session.tasks ) return True
def pytask_execute_task_protocol(session: Session, task: Task) -> ExecutionReport: """Follow the protocol to execute each task.""" session.hook.pytask_execute_task_log_start(session=session, task=task) try: session.hook.pytask_execute_task_setup(session=session, task=task) session.hook.pytask_execute_task(session=session, task=task) session.hook.pytask_execute_task_teardown(session=session, task=task) except KeyboardInterrupt: short_exc_info = remove_traceback_from_exc_info(sys.exc_info()) report = ExecutionReport.from_task_and_exception(task, short_exc_info) session.should_stop = True except Exception: report = ExecutionReport.from_task_and_exception(task, sys.exc_info()) else: report = ExecutionReport.from_task(task) session.hook.pytask_execute_task_process_report(session=session, report=report) session.hook.pytask_execute_task_log_end(session=session, task=task, report=report) return report
def pytask_execute_log_end(session: Session, reports: list[ExecutionReport]) -> bool: """Log information on the execution.""" session.execution_end = time.time() counts = count_outcomes(reports, TaskOutcome) if session.config["show_traceback"]: console.print() if counts[TaskOutcome.FAIL]: console.rule( Text("Failures", style=TaskOutcome.FAIL.style), style=TaskOutcome.FAIL.style, ) console.print() for report in reports: if report.outcome in (TaskOutcome.FAIL, TaskOutcome.SKIP_PREVIOUS_FAILED): _print_errored_task_report(session, report) console.rule(style="dim") panel = create_summary_panel(counts, TaskOutcome, "Collected tasks") console.print(panel) session.hook.pytask_log_session_footer( session=session, duration=session.execution_end - session.execution_start, outcome=TaskOutcome.FAIL if counts[TaskOutcome.FAIL] else TaskOutcome.SUCCESS, ) if counts[TaskOutcome.FAIL]: raise ExecutionError return True
def _create_session(config_from_cli: dict[str, Any]) -> nx.DiGraph: """Create a session object.""" try: pm = get_plugin_manager() from _pytask import cli pm.register(cli) pm.hook.pytask_add_hooks(pm=pm) config = pm.hook.pytask_configure(pm=pm, config_from_cli=config_from_cli) session = Session.from_config(config) except (ConfigurationError, Exception): console.print_exception() session = Session({}, None) session.exit_code = ExitCode.CONFIGURATION_FAILED else: try: session.hook.pytask_log_session_header(session=session) import_optional_dependency("pydot") check_for_optional_program(session.config["layout"]) session.hook.pytask_collect(session=session) session.hook.pytask_resolve_dependencies(session=session) except CollectionError: session.exit_code = ExitCode.COLLECTION_FAILED except ResolvingDependenciesError: session.exit_code = ExitCode.RESOLVING_DEPENDENCIES_FAILED except Exception: session.exit_code = ExitCode.FAILED console.print_exception() console.rule(style="failed") return session
def clean(**config_from_cli: Any) -> NoReturn: """Clean the provided paths by removing files unknown to pytask.""" config_from_cli["command"] = "clean" try: # Duplication of the same mechanism in :func:`pytask.main.main`. pm = get_plugin_manager() from _pytask import cli pm.register(cli) pm.hook.pytask_add_hooks(pm=pm) config = pm.hook.pytask_configure(pm=pm, config_from_cli=config_from_cli) session = Session.from_config(config) except Exception: session = Session({}, None) session.exit_code = ExitCode.CONFIGURATION_FAILED exc_info: tuple[type[BaseException], BaseException, TracebackType | None] = sys.exc_info() console.print(render_exc_info(*exc_info)) else: try: session.hook.pytask_log_session_header(session=session) session.hook.pytask_collect(session=session) known_paths = _collect_all_paths_known_to_pytask(session) exclude = session.config["exclude"] include_directories = session.config["directories"] unknown_paths = _find_all_unknown_paths(session, known_paths, exclude, include_directories) common_ancestor = find_common_ancestor(*unknown_paths, *session.config["paths"]) if unknown_paths: targets = "Files" if session.config["directories"]: targets += " and directories" console.print(f"\n{targets} which can be removed:\n") for path in unknown_paths: short_path = relative_to(path, common_ancestor) if session.config["mode"] == "dry-run": console.print(f"Would remove {short_path}") else: should_be_deleted = session.config[ "mode"] == "force" or click.confirm( f"Would you like to remove {short_path}?") if should_be_deleted: if not session.config["quiet"]: console.print(f"Remove {short_path}") if path.is_dir(): shutil.rmtree(path) else: path.unlink() else: console.print() console.print( "There are no files and directories which can be deleted.") console.print() console.rule(style=None) except CollectionError: session.exit_code = ExitCode.COLLECTION_FAILED console.rule(style="failed") except Exception: exc_info = sys.exc_info() console.print( render_exc_info(*exc_info, show_locals=config["show_locals"])) console.rule(style="failed") session.exit_code = ExitCode.FAILED sys.exit(session.exit_code)
def pytask_execute_log_start(session: Session) -> None: """Start logging.""" session.execution_start = time.time() # New line to separate note on collected items from task statuses. console.print()
def main(config_from_cli: dict[str, Any]) -> Session: """Run pytask. This is the main command to run pytask which usually receives kwargs from the command line interface. It can also be used to run pytask interactively. Pass configuration in a dictionary. Parameters ---------- config_from_cli : dict[str, Any] A dictionary with options passed to pytask. In general, this dictionary holds the information passed via the command line interface. Returns ------- session : _pytask.session.Session The session captures all the information of the current run. """ try: pm = get_plugin_manager() from _pytask import cli pm.register(cli) pm.hook.pytask_add_hooks(pm=pm) config = pm.hook.pytask_configure(pm=pm, config_from_cli=config_from_cli) session = Session.from_config(config) except (ConfigurationError, Exception): exc_info = sys.exc_info() exc_info = remove_internal_traceback_frames_from_exc_info(exc_info) traceback = Traceback.from_exception(*exc_info) console.print(traceback) session = Session({}, None) session.exit_code = ExitCode.CONFIGURATION_FAILED else: try: session.hook.pytask_log_session_header(session=session) session.hook.pytask_collect(session=session) session.hook.pytask_resolve_dependencies(session=session) session.hook.pytask_execute(session=session) except CollectionError: session.exit_code = ExitCode.COLLECTION_FAILED except ResolvingDependenciesError: session.exit_code = ExitCode.RESOLVING_DEPENDENCIES_FAILED except ExecutionError: session.exit_code = ExitCode.FAILED except Exception: exc_info = sys.exc_info() exc_info = remove_internal_traceback_frames_from_exc_info(exc_info) traceback = Traceback.from_exception(*exc_info) console.print(traceback) session.exit_code = ExitCode.FAILED session.hook.pytask_unconfigure(session=session) return session