示例#1
0
def cloud(name, project, version, watch, logs):
    """
    Run a deployed flow in Prefect Cloud.

    \b
    Options:
        --name, -n      TEXT    The name of a flow to run                                       [required]
        --project, -p   TEXT    The name of a project that contains the flow                    [required]
        --version, -v   INTEGER A flow version to run
        --watch, -w             Watch current state of the flow run, stream output to stdout
        --logs, -l              Get logs of the flow run, stream output to stdout
    """

    if watch and logs:
        click.secho(
            "Streaming state and logs not currently supported together.",
            fg="red")
        return

    query = {
        "query": {
            with_args(
                "flow",
                {
                    "where": {
                        "_and": {
                            "name": {
                                "_eq": name
                            },
                            "version": {
                                "_eq": version
                            },
                            "project": {
                                "name": {
                                    "_eq": project
                                }
                            },
                        }
                    },
                    "order_by": {
                        "name": EnumValue("asc"),
                        "version": EnumValue("desc"),
                    },
                    "distinct_on": EnumValue("name"),
                },
            ): {
                "id": True
            }
        }
    }

    client = Client()
    result = client.graphql(query)

    flow_data = result.data.flow

    if flow_data:
        flow_id = flow_data[0].id
    else:
        click.secho("{} not found".format(name), fg="red")
        return

    flow_run_id = client.create_flow_run(flow_id=flow_id)
    click.echo("Flow Run ID: {}".format(flow_run_id))

    if watch:
        current_states = []
        while True:
            query = {
                "query": {
                    with_args("flow_run_by_pk", {"id": flow_run_id}): {
                        with_args(
                            "states",
                            {
                                "order_by": {
                                    EnumValue("timestamp"): EnumValue("asc")
                                }
                            },
                        ): {
                            "state": True,
                            "timestamp": True
                        }
                    }
                }
            }

            result = client.graphql(query)

            # Filter through retrieved states and output in order
            for state_index in result.data.flow_run_by_pk.states:
                state = state_index.state
                if state not in current_states:
                    if state != "Success" and state != "Failed":
                        click.echo("{} -> ".format(state), nl=False)
                    else:
                        click.echo(state)
                        return

                    current_states.append(state)

            time.sleep(3)

    if logs:
        all_logs = []

        log_query = {
            with_args("logs", {
                "order_by": {
                    EnumValue("timestamp"): EnumValue("asc")
                }
            }): {
                "timestamp": True,
                "message": True,
                "level": True
            },
            "start_time": True,
        }

        query = {
            "query": {
                with_args(
                    "flow_run",
                    {
                        "where": {
                            "id": {
                                "_eq": flow_run_id
                            }
                        },
                        "order_by": {
                            EnumValue("start_time"): EnumValue("desc")
                        },
                    },
                ):
                log_query
            }
        }

        while True:
            result = Client().graphql(query)

            flow_run = result.data.flow_run
            if not flow_run:
                click.secho("{} not found".format(flow_run_id), fg="red")
                return

            new_run = flow_run[0]
            logs = new_run.logs
            output = []

            for i in logs:
                if [i.timestamp, i.level, i.message] not in all_logs:

                    if not len(all_logs):
                        click.echo(
                            tabulate(
                                [[i.timestamp, i.level, i.message]],
                                headers=["TIMESTAMP", "LEVEL", "MESSAGE"],
                                tablefmt="plain",
                                numalign="left",
                                stralign="left",
                            ))
                        all_logs.append([i.timestamp, i.level, i.message])
                        continue

                    output.append([i.timestamp, i.level, i.message])
                    all_logs.append([i.timestamp, i.level, i.message])

            if output:
                click.echo(
                    tabulate(output,
                             tablefmt="plain",
                             numalign="left",
                             stralign="left"))

            # Check if state is either Success or Failed, exit if it is
            pk_query = {
                "query": {
                    with_args("flow_run_by_pk", {"id": flow_run_id}): {
                        "state": True
                    }
                }
            }
            result = client.graphql(pk_query)

            if (result.data.flow_run_by_pk.state == "Success"
                    or result.data.flow_run_by_pk.state == "Failed"):
                return

            time.sleep(3)
示例#2
0
    def run(
        self,
        flow_name: str = None,
        project_name: str = None,
        parameters: dict = None,
        idempotency_key: str = None,
        new_flow_context: dict = None,
        run_name: str = None,
        scheduled_start_time: datetime.datetime = None,
    ) -> str:
        """
        Run method for the task; responsible for scheduling the specified flow run.

        Args:
            - flow_name (str, optional): the name of the flow to schedule; if not provided,
                this method will use the flow name provided at initialization
            - project_name (str, optional): the Cloud project in which the flow is located; if
                not provided, this method will use the project provided at initialization. If
                running with Prefect Core's server as the backend, this should not be provided.
            - parameters (dict, optional): the parameters to pass to the flow run being
                scheduled; if not provided, this method will use the parameters provided at
                initialization
            - idempotency_key (str, optional): an optional idempotency key for scheduling the
                flow run; if provided, ensures that only one run is created if this task is retried
                or rerun with the same inputs.  If not provided, the current flow run ID will be used.
            - new_flow_context (dict, optional): the optional run context for the new flow run
            - run_name (str, optional): name to be set for the flow run
            - scheduled_start_time (datetime, optional): the time to schedule the execution
                for; if not provided, defaults to now

        Returns:
            - str: the ID of the newly-scheduled flow run

        Raises:
            - ValueError: if flow was not provided, cannot be found, or if a project name was
                not provided while using Cloud as a backend

        Example:
            ```python
            from prefect.tasks.prefect.flow_run import StartFlowRun

            kickoff_task = StartFlowRun(project_name="Hello, World!", flow_name="My Cloud Flow")
            ```

        """

        # verify that flow and project names were passed where necessary
        if flow_name is None:
            raise ValueError("Must provide a flow name.")
        if project_name is None:
            raise ValueError("Must provide a project name.")

        where_clause = {
            "name": {
                "_eq": flow_name
            },
            "archived": {
                "_eq": False
            },
            "project": {
                "name": {
                    "_eq": project_name
                }
            },
        }

        # find the flow ID to schedule
        query = {
            "query": {
                with_args(
                    "flow",
                    {
                        "where": where_clause,
                        "order_by": {
                            "version": EnumValue("desc")
                        },
                        "limit": 1,
                    },
                ): {"id"}
            }
        }

        client = Client()
        flow = client.graphql(query).data.flow

        # verify that a flow has been returned
        if not flow:
            raise ValueError("Flow '{}' not found.".format(flow_name))

        # grab the ID for the most recent version
        flow_id = flow[0].id

        idem_key = None
        if context.get("flow_run_id"):
            map_index = context.get("map_index")
            default = context.get("flow_run_id") + (f"-{map_index}"
                                                    if map_index else "")
            idem_key = idempotency_key or default

        # providing an idempotency key ensures that retries for this task
        # will not create additional flow runs
        flow_run_id = client.create_flow_run(
            flow_id=flow_id,
            parameters=parameters,
            idempotency_key=idem_key or idempotency_key,
            context=new_flow_context,
            run_name=run_name,
            scheduled_start_time=scheduled_start_time,
        )

        self.logger.debug(f"Flow Run {flow_run_id} created.")

        if not self.wait:
            return flow_run_id

        while True:
            time.sleep(10)
            flow_run_state = client.get_flow_run_info(flow_run_id).state
            if flow_run_state.is_finished():
                exc = signal_from_state(flow_run_state)(
                    f"{flow_run_id} finished in state {flow_run_state}")
                raise exc
示例#3
0
    def run(self,
            project_name: str = None,
            flow_name: str = None,
            parameters: dict = None) -> str:
        """
        Run method for the task; responsible for scheduling the specified flow run.

        Args:
            - project_name (str, optional): the project in which the flow is located; if not provided, this method
                will use the project provided at initialization
            - flow_name (str, optional): the name of the flow to schedule; if not provided, this method will
                use the project provided at initialization
            - parameters (dict, optional): the parameters to pass to the flow run being scheduled; if not provided,
                this method will use the parameters provided at initialization

        Returns:
            - str: the ID of the newly-scheduled flow run

        Raises:
            - ValueError: if flow or project names were not provided, or if the flow provided cannot be found

        Example:
            ```python
            from prefect.tasks.cloud.flow_run import FlowRunTask

            kickoff_task = FlowRunTask(project_name="My Cloud Project", flow_name="My Cloud Flow")
            ```

        """
        # verify that flow and project names were passed in some capacity or another
        if project_name is None:
            raise ValueError("Must provide a project name.")
        if flow_name is None:
            raise ValueError("Must provide a flow name.")

        # find the flow ID to schedule
        query = {
            "query": {
                with_args(
                    "flow",
                    {
                        "where": {
                            "name": {
                                "_eq": flow_name
                            },
                            "project": {
                                "name": {
                                    "_eq": project_name
                                }
                            },
                            "archived": {
                                "_eq": False
                            },
                        },
                        "order_by": {
                            "version": EnumValue("desc")
                        },
                        "limit": 1,
                    },
                ): {"id"}
            }
        }
        client = Client()
        flow = client.graphql(query).data.flow
        # verify that something's been returned
        if not flow:
            raise ValueError("No flow {} found in project {}.".format(
                flow_name, project_name))
        # grab the ID for the most recent version
        flow_id = flow[0].id
        return client.create_flow_run(flow_id=flow_id, parameters=parameters)
示例#4
0
def flow(
    id,
    name,
    project,
    version,
    parameters_file,
    parameters_string,
    run_name,
    context,
    watch,
    logs,
    no_url,
):
    """
    Run a flow that is registered to the Prefect API

    \b
    Options:
        --id, -i                    TEXT        The ID of a flow to run
        --name, -n                  TEXT        The name of a flow to run
        --project, -p               TEXT        The name of a project that contains the flow
        --version, -v               INTEGER     A flow version to run
        --parameters-file, -pf      FILE PATH   A filepath of a JSON file containing
                                                parameters
        --parameters-string, -ps    TEXT        A string of JSON parameters (note: to ensure these are
                                                parsed correctly, it is best to include the full payload
                                                within single quotes)
        --run-name, -rn             TEXT        A name to assign for this run
        --context, -c               TEXT        A string of JSON key / value pairs to include in context
                                                (note: to ensure these are parsed correctly, it is best
                                                to include the full payload within single quotes)
        --watch, -w                             Watch current state of the flow run, stream
                                                output to stdout
        --logs, -l                              Get logs of the flow run, stream output to
                                                stdout
        --no-url                                Only output the flow run id instead of a
                                                link

    \b
    Either `id` or both `name` and `project` must be provided to run a flow.

    \b
    If both `--parameters-file` and `--parameters-string` are provided then the values
    passed in through the string will override the values provided from the file.

    \b
    e.g.
    File contains:  {"a": 1, "b": 2}
    String:         '{"a": 3}'
    Parameters passed to the flow run: {"a": 3, "b": 2}

    \b
    Example:
        $ prefect run flow -n "Test-Flow" -p "My Project" -ps '{"my_param": 42}'
        Flow Run: https://cloud.prefect.io/myslug/flow-run/2ba3rrfd-411c-4d99-bb2a-f64a6dea78f9
    """
    if not id and not (name and project):
        click.secho(
            "A flow ID or some combination of flow name and project must be provided.",
            fg="red",
        )
        return

    if id and (name or project):
        click.secho(
            "Both a flow ID and a name/project combination cannot be provided.",
            fg="red",
        )
        return

    if watch and logs:
        click.secho(
            "Streaming state and logs not currently supported together.", fg="red"
        )
        return

    client = Client()
    flow_id = id
    if not flow_id:
        where_clause = {
            "_and": {
                "name": {"_eq": name},
                "version": {"_eq": version},
                "project": {"name": {"_eq": project}},
            }
        }

        query = {
            "query": {
                with_args(
                    "flow",
                    {
                        "where": where_clause,
                        "order_by": {
                            "name": EnumValue("asc"),
                            "version": EnumValue("desc"),
                        },
                        "distinct_on": EnumValue("name"),
                    },
                ): {"id": True}
            }
        }

        result = client.graphql(query)

        flow_data = result.data.flow

        if flow_data:
            flow_id = flow_data[0].id
        else:
            click.secho("{} not found".format(name), fg="red")
            return

    # Load parameters from file if provided
    file_params = {}
    if parameters_file:
        with open(parameters_file) as params_file:
            file_params = json.load(params_file)

    # Load parameters from string if provided
    string_params = {}
    if parameters_string:
        string_params = json.loads(parameters_string)

    if context:
        context = json.loads(context)
    flow_run_id = client.create_flow_run(
        flow_id=flow_id,
        context=context,
        parameters={**file_params, **string_params},
        run_name=run_name,
    )

    if no_url:
        click.echo("Flow Run ID: {}".format(flow_run_id))
    else:
        flow_run_url = client.get_cloud_url("flow-run", flow_run_id)
        click.echo("Flow Run: {}".format(flow_run_url))

    if watch:
        current_states = []
        while True:
            query = {
                "query": {
                    with_args("flow_run_by_pk", {"id": flow_run_id}): {
                        with_args(
                            "states",
                            {"order_by": {EnumValue("timestamp"): EnumValue("asc")}},
                        ): {"state": True, "timestamp": True}
                    }
                }
            }

            result = client.graphql(query)

            # Filter through retrieved states and output in order
            for state_index in result.data.flow_run_by_pk.states:
                state = state_index.state
                if state not in current_states:
                    if state != "Success" and state != "Failed":
                        click.echo("{} -> ".format(state), nl=False)
                    else:
                        click.echo(state)
                        return flow_run_id

                    current_states.append(state)

            time.sleep(3)

    if logs:
        all_logs = []

        log_query = {
            with_args(
                "logs", {"order_by": {EnumValue("timestamp"): EnumValue("asc")}}
            ): {"timestamp": True, "message": True, "level": True},
            "start_time": True,
            "state": True,
        }

        query = {
            "query": {
                with_args(
                    "flow_run",
                    {
                        "where": {"id": {"_eq": flow_run_id}},
                        "order_by": {EnumValue("start_time"): EnumValue("desc")},
                    },
                ): log_query
            }
        }

        while True:
            result = client.graphql(query)

            flow_run = result.data.flow_run
            if not flow_run:
                click.secho("{} not found".format(flow_run_id), fg="red")
                return

            new_run = flow_run[0]
            logs = new_run.logs
            output = []

            for i in logs:
                if [i.timestamp, i.level, i.message] not in all_logs:

                    if not len(all_logs):
                        click.echo(
                            tabulate(
                                [[i.timestamp, i.level, i.message]],
                                headers=["TIMESTAMP", "LEVEL", "MESSAGE"],
                                tablefmt="plain",
                                numalign="left",
                                stralign="left",
                            )
                        )
                        all_logs.append([i.timestamp, i.level, i.message])
                        continue

                    output.append([i.timestamp, i.level, i.message])
                    all_logs.append([i.timestamp, i.level, i.message])

            if output:
                click.echo(
                    tabulate(output, tablefmt="plain", numalign="left", stralign="left")
                )

            if new_run.state == "Success" or new_run.state == "Failed":
                return flow_run_id

            time.sleep(3)

    return flow_run_id
示例#5
0
    def run(self,
            flow_name: str = None,
            project_name: str = None,
            parameters: dict = None) -> str:
        """
        Run method for the task; responsible for scheduling the specified flow run.

        Args:
            - flow_name (str, optional): the name of the flow to schedule; if not provided, this method will
                use the flow name provided at initialization
            - project_name (str, optional): the Cloud project in which the flow is located; if not provided, this method
                will use the project provided at initialization. If running with Prefect Core's server as the backend,
                this should not be provided.
            - parameters (dict, optional): the parameters to pass to the flow run being scheduled; if not provided,
                this method will use the parameters provided at initialization

        Returns:
            - str: the ID of the newly-scheduled flow run

        Raises:
            - ValueError: if flow was not provided, cannot be found, or if a project name was not provided while using
                Cloud as a backend

        Example:
            ```python
            from prefect.tasks.prefect.flow_run import FlowRunTask

            kickoff_task = FlowRunTask(project_name="My Project", flow_name="My Cloud Flow")
            ```

        """
        # verify that flow and project names were passed where necessary
        if flow_name is None:
            raise ValueError("Must provide a flow name.")
        if project_name is None and config.backend == "cloud":
            raise ValueError("Must provide a project name.")

        where_clause = {
            "name": {
                "_eq": flow_name
            },
            "archived": {
                "_eq": False
            },
        }

        if project_name:
            where_clause["project"] = {"name": {"_eq": project_name}}

        # find the flow ID to schedule
        query = {
            "query": {
                with_args(
                    "flow",
                    {
                        "where": where_clause,
                        "order_by": {
                            "version": EnumValue("desc")
                        },
                        "limit": 1,
                    },
                ): {"id"}
            }
        }

        client = Client()
        flow = client.graphql(query).data.flow

        # verify that a flow has been returned
        if not flow:
            raise ValueError("Flow '{}' not found.".format(flow_name))

        # grab the ID for the most recent version
        flow_id = flow[0].id
        return client.create_flow_run(flow_id=flow_id, parameters=parameters)
示例#6
0
文件: run.py 项目: zhangxsgit/prefect
def _run_flow(
    name,
    version,
    parameters_file,
    parameters_string,
    run_name,
    watch,
    logs,
    no_url,
    project=None,
):
    if watch and logs:
        click.secho(
            "Streaming state and logs not currently supported together.", fg="red"
        )
        return

    where_clause = {"_and": {"name": {"_eq": name}, "version": {"_eq": version},}}

    if project:
        where_clause["_and"]["project"] = {"name": {"_eq": project}}

    query = {
        "query": {
            with_args(
                "flow",
                {
                    "where": where_clause,
                    "order_by": {
                        "name": EnumValue("asc"),
                        "version": EnumValue("desc"),
                    },
                    "distinct_on": EnumValue("name"),
                },
            ): {"id": True}
        }
    }

    client = Client()
    result = client.graphql(query)

    flow_data = result.data.flow

    if flow_data:
        flow_id = flow_data[0].id
    else:
        click.secho("{} not found".format(name), fg="red")
        return

    # Load parameters from file if provided
    file_params = {}
    if parameters_file:
        with open(parameters_file) as params_file:
            file_params = json.load(params_file)

    # Load parameters from string if provided
    string_params = {}
    if parameters_string:
        string_params = json.loads(parameters_string)

    flow_run_id = client.create_flow_run(
        flow_id=flow_id, parameters={**file_params, **string_params}, run_name=run_name
    )

    if no_url:
        click.echo("Flow Run ID: {}".format(flow_run_id))
    else:
        flow_run_url = client.get_cloud_url("flow-run", flow_run_id)
        click.echo("Flow Run: {}".format(flow_run_url))

    if watch:
        current_states = []
        while True:
            query = {
                "query": {
                    with_args("flow_run_by_pk", {"id": flow_run_id}): {
                        with_args(
                            "states",
                            {"order_by": {EnumValue("timestamp"): EnumValue("asc")}},
                        ): {"state": True, "timestamp": True}
                    }
                }
            }

            result = client.graphql(query)

            # Filter through retrieved states and output in order
            for state_index in result.data.flow_run_by_pk.states:
                state = state_index.state
                if state not in current_states:
                    if state != "Success" and state != "Failed":
                        click.echo("{} -> ".format(state), nl=False)
                    else:
                        click.echo(state)
                        return flow_run_id

                    current_states.append(state)

            time.sleep(3)

    if logs:
        all_logs = []

        log_query = {
            with_args(
                "logs", {"order_by": {EnumValue("timestamp"): EnumValue("asc")}}
            ): {"timestamp": True, "message": True, "level": True},
            "start_time": True,
            "state": True,
        }

        query = {
            "query": {
                with_args(
                    "flow_run",
                    {
                        "where": {"id": {"_eq": flow_run_id}},
                        "order_by": {EnumValue("start_time"): EnumValue("desc")},
                    },
                ): log_query
            }
        }

        while True:
            result = client.graphql(query)

            flow_run = result.data.flow_run
            if not flow_run:
                click.secho("{} not found".format(flow_run_id), fg="red")
                return

            new_run = flow_run[0]
            logs = new_run.logs
            output = []

            for i in logs:
                if [i.timestamp, i.level, i.message] not in all_logs:

                    if not len(all_logs):
                        click.echo(
                            tabulate(
                                [[i.timestamp, i.level, i.message]],
                                headers=["TIMESTAMP", "LEVEL", "MESSAGE"],
                                tablefmt="plain",
                                numalign="left",
                                stralign="left",
                            )
                        )
                        all_logs.append([i.timestamp, i.level, i.message])
                        continue

                    output.append([i.timestamp, i.level, i.message])
                    all_logs.append([i.timestamp, i.level, i.message])

            if output:
                click.echo(
                    tabulate(output, tablefmt="plain", numalign="left", stralign="left")
                )

            if new_run.state == "Success" or new_run.state == "Failed":
                return flow_run_id

            time.sleep(3)

    return flow_run_id
示例#7
0
def run(
    ctx,
    flow_or_group_id,
    project,
    path,
    module,
    name,
    labels,
    context_vars,
    params,
    execute,
    idempotency_key,
    schedule,
    log_level,
    param_file,
    run_name,
    quiet,
    no_logs,
    watch,
):
    """Run a flow"""
    # Since the old command was a subcommand of this, we have to do some
    # mucking to smoothly deprecate it. Can be removed with `prefect run flow`
    # is removed.
    if ctx.invoked_subcommand is not None:
        if any([params, no_logs, quiet, flow_or_group_id]):
            # These options are not supported by `prefect run flow`
            raise ClickException("Got unexpected extra argument (%s)" %
                                 ctx.invoked_subcommand)
        return

    # Define a simple function so we don't have to have a lot of `if not quiet` logic
    quiet_echo = ((lambda *_, **__: None) if quiet else
                  lambda *args, **kwargs: click.secho(*args, **kwargs))

    # Cast labels to a list instead of a tuple so we can extend it
    labels = list(labels)

    # Ensure that the user has not passed conflicting options
    given_lookup_options = {
        key
        for key, option in {
            "--id": flow_or_group_id,
            "--project": project,
            "--path": path,
            "--module": module,
        }.items() if option is not None
    }
    # Since `name` can be passed in conjunction with several options and also alone
    # it requires a special case here
    if not given_lookup_options and not name:
        raise ClickException("Received no options to look up the flow." +
                             FLOW_LOOKUP_MSG)
    if "--id" in given_lookup_options and name:
        raise ClickException("Received too many options to look up the flow; "
                             "cannot specifiy both `--name` and `--id`" +
                             FLOW_LOOKUP_MSG)
    if len(given_lookup_options) > 1:
        raise ClickException("Received too many options to look up the flow: "
                             f"{', '.join(given_lookup_options)}" +
                             FLOW_LOOKUP_MSG)

    # Load parameters and context ------------------------------------------------------
    context_dict = load_json_key_values(context_vars, "context")

    file_params = {}
    if param_file:

        try:
            with open(param_file) as fp:
                file_params = json.load(fp)
        except FileNotFoundError:
            raise TerminalError(
                f"Parameter file does not exist: {os.path.abspath(param_file)!r}"
            )
        except ValueError as exc:
            raise TerminalError(
                f"Failed to parse JSON at {os.path.abspath(param_file)!r}: {exc}"
            )

    cli_params = load_json_key_values(params, "parameter")
    conflicting_keys = set(cli_params.keys()).intersection(file_params.keys())
    if conflicting_keys:
        quiet_echo(
            "The following parameters were specified by file and CLI, the CLI value "
            f"will be used: {conflicting_keys}")
    params_dict = {**file_params, **cli_params}

    # Local flow run -------------------------------------------------------------------

    if path or module:
        # We can load a flow for local execution immediately if given a path or module,
        # otherwise, we'll lookup the flow then pull from storage for a local run
        with try_error_done("Retrieving local flow...",
                            quiet_echo,
                            traceback=True):
            flow = get_flow_from_path_or_module(path=path,
                                                module=module,
                                                name=name)

        # Set the desired log level
        if no_logs:
            log_level = 100  # CRITICAL is 50 so this should do it

        run_info = ""
        if params_dict:
            run_info += f"└── Parameters: {params_dict}\n"
        if context_dict:
            run_info += f"└── Context: {context_dict}\n"

        if run_info:
            quiet_echo("Configured local flow run")
            quiet_echo(run_info, nl=False)

        quiet_echo("Running flow locally...")
        with temporary_logger_config(
                level=log_level,
                stream_fmt="└── %(asctime)s | %(levelname)-7s | %(message)s",
                stream_datefmt="%H:%M:%S",
        ):
            with prefect.context(**context_dict):
                try:
                    result_state = flow.run(parameters=params_dict,
                                            run_on_schedule=schedule)
                except Exception as exc:
                    quiet_echo("Flow runner encountered an exception!")
                    log_exception(exc, indent=2)
                    raise TerminalError("Flow run failed!")

        if result_state.is_failed():
            quiet_echo("Flow run failed!", fg="red")
            sys.exit(1)
        else:
            quiet_echo("Flow run succeeded!", fg="green")

        return

    # Backend flow run -----------------------------------------------------------------

    if schedule:
        raise ClickException(
            "`--schedule` can only be specified for local flow runs")

    client = Client()

    # Validate the flow look up options we've been given and get the flow from the
    # backend
    with try_error_done("Looking up flow metadata...", quiet_echo):
        flow_view = get_flow_view(
            flow_or_group_id=flow_or_group_id,
            project=project,
            name=name,
        )

    if log_level:
        run_config = flow_view.run_config
        if not run_config.env:
            run_config.env = {}
        run_config.env["PREFECT__LOGGING__LEVEL"] = log_level
    else:
        run_config = None

    if execute:
        # Add a random label to prevent an agent from picking up this run
        labels.append(f"agentless-run-{str(uuid.uuid4())[:8]}")

    try:  # Handle keyboard interrupts during creation
        flow_run_id = None

        # Create a flow run in the backend
        with try_error_done(
                f"Creating run for flow {flow_view.name!r}...",
                quiet_echo,
                traceback=True,
                # Display 'Done' manually after querying for data to display so there is not
                # a lag
                skip_done=True,
        ):
            flow_run_id = client.create_flow_run(
                flow_id=flow_view.flow_id,
                parameters=params_dict,
                context=context_dict,
                # If labels is an empty list pass `None` to get defaults
                # https://github.com/PrefectHQ/server/blob/77c301ce0c8deda4f8771f7e9991b25e7911224a/src/prefect_server/api/runs.py#L136
                labels=labels or None,
                run_name=run_name,
                # We only use the run config for setting logging levels right now
                run_config=run_config,
                idempotency_key=idempotency_key,
            )

        if quiet:
            # Just display the flow run id in quiet mode
            click.echo(flow_run_id)
            flow_run = None
        else:
            # Grab information about the flow run (if quiet we can skip this query)
            flow_run = FlowRunView.from_flow_run_id(flow_run_id)
            run_url = client.get_cloud_url("flow-run", flow_run_id)

            # Display "Done" for creating flow run after pulling the info so there
            # isn't a weird lag
            quiet_echo(" Done", fg="green")
            quiet_echo(
                textwrap.dedent(f"""
                        └── Name: {flow_run.name}
                        └── UUID: {flow_run.flow_run_id}
                        └── Labels: {flow_run.labels}
                        └── Parameters: {flow_run.parameters}
                        └── Context: {flow_run.context}
                        └── URL: {run_url}
                        """).strip())

    except KeyboardInterrupt:
        # If the user interrupts here, they will expect the flow run to be cancelled
        quiet_echo("\nKeyboard interrupt detected! Aborting...", fg="yellow")
        if flow_run_id:
            client.cancel_flow_run(flow_run_id=flow_run_id)
            quiet_echo("Cancelled flow run.")
        else:
            # The flow run was not created so we can just exit
            quiet_echo("Aborted.")
        return

    # Handle agentless execution
    if execute:
        quiet_echo("Executing flow run...")
        try:
            with temporary_logger_config(
                    level=(100 if no_logs or quiet else
                           log_level),  # Disable logging if asked
                    stream_fmt=
                    "└── %(asctime)s | %(levelname)-7s | %(message)s",
                    stream_datefmt="%H:%M:%S",
            ):
                execute_flow_run_in_subprocess(flow_run_id)
        except KeyboardInterrupt:
            quiet_echo("Keyboard interrupt detected! Aborting...", fg="yellow")
            pass

    elif watch:
        try:
            quiet_echo("Watching flow run execution...")
            for log in watch_flow_run(
                    flow_run_id=flow_run_id,
                    stream_logs=not no_logs,
            ):
                level_name = logging.getLevelName(log.level)
                timestamp = log.timestamp.in_tz(tz="local")
                echo_with_log_color(
                    log.level,
                    f"└── {timestamp:%H:%M:%S} | {level_name:<7} | {log.message}",
                )

        except KeyboardInterrupt:
            quiet_echo("Keyboard interrupt detected!", fg="yellow")
            try:
                cancel = click.confirm(
                    "On exit, we can leave your flow run executing or cancel it.\n"
                    "Do you want to cancel this flow run?",
                    default=True,
                )
            except click.Abort:
                # A second keyboard interrupt will exit without cancellation
                pass
            else:
                if cancel:
                    client.cancel_flow_run(flow_run_id=flow_run_id)
                    quiet_echo("Cancelled flow run.", fg="green")
                    return

            quiet_echo("Exiting without cancelling flow run!", fg="yellow")
            raise  # Re-raise the interrupt

    else:
        # If not watching or executing, exit without checking state
        return

    # Get the final flow run state
    flow_run = FlowRunView.from_flow_run_id(flow_run_id)

    # Wait for the flow run to be done up to 3 seconds
    elapsed_time = 0
    while not flow_run.state.is_finished() and elapsed_time < 3:
        time.sleep(1)
        elapsed_time += 1
        flow_run = flow_run.get_latest()

    # Display the final state
    if flow_run.state.is_failed():
        quiet_echo("Flow run failed!", fg="red")
        sys.exit(1)
    elif flow_run.state.is_successful():
        quiet_echo("Flow run succeeded!", fg="green")
    else:
        quiet_echo(f"Flow run is in unexpected state: {flow_run.state}",
                   fg="yellow")
        sys.exit(1)
示例#8
0
    def run(
        self, flow_name: str = None, project_name: str = None, parameters: dict = None
    ) -> str:
        """
        Run method for the task; responsible for scheduling the specified flow run.

        Args:
            - flow_name (str, optional): the name of the flow to schedule; if not provided,
                this method will use the flow name provided at initialization
            - project_name (str, optional): the Cloud project in which the flow is located; if
                not provided, this method will use the project provided at initialization. If
                running with Prefect Core's server as the backend, this should not be provided.
            - parameters (dict, optional): the parameters to pass to the flow run being
                scheduled; if not provided, this method will use the parameters provided at
                initialization

        Returns:
            - str: the ID of the newly-scheduled flow run

        Raises:
            - ValueError: if flow was not provided, cannot be found, or if a project name was
                not provided while using Cloud as a backend

        Example:
            ```python
            from prefect.tasks.prefect.flow_run import FlowRunTask

            kickoff_task = FlowRunTask(project_name="Hello, World!", flow_name="My Cloud Flow")
            ```

        """
        is_hosted_backend = "prefect.io" in urlparse(config.cloud.api).netloc

        # verify that flow and project names were passed where necessary
        if flow_name is None:
            raise ValueError("Must provide a flow name.")
        if project_name is None and is_hosted_backend:
            raise ValueError("Must provide a project name.")

        where_clause = {
            "name": {"_eq": flow_name},
            "archived": {"_eq": False},
        }

        if project_name:
            where_clause["project"] = {"name": {"_eq": project_name}}

        # find the flow ID to schedule
        query = {
            "query": {
                with_args(
                    "flow",
                    {
                        "where": where_clause,
                        "order_by": {"version": EnumValue("desc")},
                        "limit": 1,
                    },
                ): {"id"}
            }
        }

        client = Client()
        flow = client.graphql(query).data.flow

        # verify that a flow has been returned
        if not flow:
            raise ValueError("Flow '{}' not found.".format(flow_name))

        # grab the ID for the most recent version
        flow_id = flow[0].id

        # providing an idempotency key ensures that retries for this task
        # will not create additional flow runs
        flow_run_id = client.create_flow_run(
            flow_id=flow_id,
            parameters=parameters,
            idempotency_key=context.get("flow_run_id"),
        )

        self.logger.debug(f"Flow Run {flow_run_id} created.")

        if not self.wait:
            return flow_run_id

        while True:
            time.sleep(10)
            flow_run_state = client.get_flow_run_info(flow_run_id).state
            if flow_run_state.is_finished():
                exc = signal_from_state(flow_run_state)(
                    f"{flow_run_id} finished in state {flow_run_state}"
                )
                raise exc
示例#9
0
文件: run.py 项目: crawftv/prefect
def cloud(
    name,
    project,
    version,
    parameters_file,
    parameters_string,
    run_name,
    watch,
    logs,
    no_url,
):
    """
    Run a registered flow in Prefect Cloud.

    \b
    Options:
        --name, -n                  TEXT        The name of a flow to run                                       [required]
        --project, -p               TEXT        The name of a project that contains the flow                    [required]
        --version, -v               INTEGER     A flow version to run
        --parameters-file, -pf      FILE PATH   A filepath of a JSON file containing parameters
        --parameters-string, -ps    TEXT        A string of JSON parameters
        --run-name, -rn             TEXT        A name to assign for this run
        --watch, -w                             Watch current state of the flow run, stream output to stdout
        --logs, -l                              Get logs of the flow run, stream output to stdout
        --no-url                                Only output the flow run id instead of a link

    \b
    If both `--parameters-file` and `--parameters-string` are provided then the values passed
    in through the string will override the values provided from the file.

    \b
    e.g.
    File contains:  {"a": 1, "b": 2}
    String:         '{"a": 3}'
    Parameters passed to the flow run: {"a": 3, "b": 2}
    """

    if watch and logs:
        click.secho(
            "Streaming state and logs not currently supported together.",
            fg="red")
        return

    query = {
        "query": {
            with_args(
                "flow",
                {
                    "where": {
                        "_and": {
                            "name": {
                                "_eq": name
                            },
                            "version": {
                                "_eq": version
                            },
                            "project": {
                                "name": {
                                    "_eq": project
                                }
                            },
                        }
                    },
                    "order_by": {
                        "name": EnumValue("asc"),
                        "version": EnumValue("desc"),
                    },
                    "distinct_on": EnumValue("name"),
                },
            ): {
                "id": True
            }
        }
    }

    client = Client()
    result = client.graphql(query)

    flow_data = result.data.flow

    if flow_data:
        flow_id = flow_data[0].id
    else:
        click.secho("{} not found".format(name), fg="red")
        return

    # Load parameters from file if provided
    file_params = {}
    if parameters_file:
        with open(parameters_file) as params_file:
            file_params = json.load(params_file)

    # Load parameters from string if provided
    string_params = {}
    if parameters_string:
        string_params = json.loads(parameters_string)

    flow_run_id = client.create_flow_run(flow_id=flow_id,
                                         parameters={
                                             **file_params,
                                             **string_params
                                         },
                                         run_name=run_name)

    if no_url:
        click.echo("Flow Run ID: {}".format(flow_run_id))
    else:
        # Generate direct link to Cloud run
        tenant_slug = client.get_default_tenant_slug()

        url = (re.sub("api-", "", config.cloud.api)
               if re.search("api-", config.cloud.api) else re.sub(
                   "api", "cloud", config.cloud.api))

        flow_run_url = "/".join(
            [url.rstrip("/"), tenant_slug, "flow-run", flow_run_id])

        click.echo("Flow Run: {}".format(flow_run_url))

    if watch:
        current_states = []
        while True:
            query = {
                "query": {
                    with_args("flow_run_by_pk", {"id": flow_run_id}): {
                        with_args(
                            "states",
                            {
                                "order_by": {
                                    EnumValue("timestamp"): EnumValue("asc")
                                }
                            },
                        ): {
                            "state": True,
                            "timestamp": True
                        }
                    }
                }
            }

            result = client.graphql(query)

            # Filter through retrieved states and output in order
            for state_index in result.data.flow_run_by_pk.states:
                state = state_index.state
                if state not in current_states:
                    if state != "Success" and state != "Failed":
                        click.echo("{} -> ".format(state), nl=False)
                    else:
                        click.echo(state)
                        return

                    current_states.append(state)

            time.sleep(3)

    if logs:
        all_logs = []

        log_query = {
            with_args("logs", {
                "order_by": {
                    EnumValue("timestamp"): EnumValue("asc")
                }
            }): {
                "timestamp": True,
                "message": True,
                "level": True
            },
            "start_time": True,
        }

        query = {
            "query": {
                with_args(
                    "flow_run",
                    {
                        "where": {
                            "id": {
                                "_eq": flow_run_id
                            }
                        },
                        "order_by": {
                            EnumValue("start_time"): EnumValue("desc")
                        },
                    },
                ):
                log_query
            }
        }

        while True:
            result = client.graphql(query)

            flow_run = result.data.flow_run
            if not flow_run:
                click.secho("{} not found".format(flow_run_id), fg="red")
                return

            new_run = flow_run[0]
            logs = new_run.logs
            output = []

            for i in logs:
                if [i.timestamp, i.level, i.message] not in all_logs:

                    if not len(all_logs):
                        click.echo(
                            tabulate(
                                [[i.timestamp, i.level, i.message]],
                                headers=["TIMESTAMP", "LEVEL", "MESSAGE"],
                                tablefmt="plain",
                                numalign="left",
                                stralign="left",
                            ))
                        all_logs.append([i.timestamp, i.level, i.message])
                        continue

                    output.append([i.timestamp, i.level, i.message])
                    all_logs.append([i.timestamp, i.level, i.message])

            if output:
                click.echo(
                    tabulate(output,
                             tablefmt="plain",
                             numalign="left",
                             stralign="left"))

            # Check if state is either Success or Failed, exit if it is
            pk_query = {
                "query": {
                    with_args("flow_run_by_pk", {"id": flow_run_id}): {
                        "state": True
                    }
                }
            }
            result = client.graphql(pk_query)

            if (result.data.flow_run_by_pk.state == "Success"
                    or result.data.flow_run_by_pk.state == "Failed"):
                return

            time.sleep(3)
示例#10
0
def cloud(name, project, version, watch):
    """
    Run a deployed flow in Prefect Cloud.

    \b
    Options:
        --name, -n      TEXT    The name of a flow to run                                       [required]
        --project, -p   TEXT    The name of a project that contains the flow                    [required]
        --version, -v   INTEGER A flow version to run
        --watch, -w             Watch current state of the flow run, stream output to stdout
    """

    query = {
        "query": {
            with_args(
                "flow",
                {
                    "where": {
                        "_and": {
                            "name": {
                                "_eq": name
                            },
                            "version": {
                                "_eq": version
                            },
                            "project": {
                                "name": {
                                    "_eq": project
                                }
                            },
                        }
                    },
                    "order_by": {
                        "name": EnumValue("asc"),
                        "version": EnumValue("desc"),
                    },
                    "distinct_on": EnumValue("name"),
                },
            ): {
                "id": True
            }
        }
    }

    client = Client()
    result = client.graphql(query)

    flow_data = result.data.flow

    if flow_data:
        flow_id = flow_data[0].id
    else:
        click.secho("{} not found".format(name), fg="red")
        return

    flow_run_id = client.create_flow_run(flow_id=flow_id)
    click.echo("Flow Run ID: {}".format(flow_run_id))

    # TODO: Convert to using a subscription and make output prettier
    if watch:
        current_state = ""
        while True:
            query = {
                "query": {
                    with_args("flow_run_by_pk", {"id": flow_run_id}): {
                        "state": True
                    }
                }
            }

            result = client.graphql(query)

            if result.data.flow_run_by_pk.state != current_state:
                current_state = result.data.flow_run_by_pk.state
                if current_state != "Success" and current_state != "Failed":
                    click.echo("{} -> ".format(current_state), nl=False)
                else:
                    click.echo(current_state)
                    break
            time.sleep(3)
示例#11
0
        if idempotency_key is None:
            idempotency_key = context.get("task_run_id", None)
=======
        idem_key = None
        if context.get("flow_run_id"):
            map_index = context.get("map_index")
            default = context.get("flow_run_id") + (
                f"-{map_index}" if map_index else ""
            )
            idem_key = idempotency_key or default
>>>>>>> prefect clone

        # providing an idempotency key ensures that retries for this task
        # will not create additional flow runs
        flow_run_id = client.create_flow_run(
            flow_id=flow_id,
            parameters=parameters,
<<<<<<< HEAD
            run_config=run_config,
            idempotency_key=idempotency_key,
=======
            idempotency_key=idem_key or idempotency_key,
>>>>>>> prefect clone
            context=new_flow_context,
            run_name=run_name,
            scheduled_start_time=scheduled_start_time,
        )

        self.logger.debug(f"Flow Run {flow_run_id} created.")

<<<<<<< HEAD
示例#12
0
    def run(
        self,
        flow_name: str = None,
        project_name: str = None,
        parameters: dict = None,
        run_config: RunConfig = None,
        new_flow_context: dict = None,
        run_name: str = None,
        idempotency_key: str = None,
        scheduled_start_time: datetime.datetime = None,
    ) -> str:
        """
        Run method for the task; responsible for scheduling the specified flow run.

        Args:
            - flow_name (str, optional): the name of the flow to schedule; if not provided,
                this method will use the flow name provided at initialization
            - project_name (str, optional): the Cloud project in which the flow is located; if
                not provided, this method will use the project provided at initialization. If
                running with Prefect Core's server as the backend, this should not be provided.
            - parameters (dict, optional): the parameters to pass to the flow run being
                scheduled; if not provided, this method will use the parameters provided at
                initialization
            - run_config (RunConfig, optional): a run-config to use for this flow
                run, overriding any existing flow settings.
            - new_flow_context (dict, optional): the optional run context for the new flow run
            - run_name (str, optional): name to be set for the flow run
            - idempotency_key (str, optional): a unique idempotency key for scheduling the
                flow run. Duplicate flow runs with the same idempotency key will only create
                a single flow run. This is useful for ensuring that only one run is created
                if this task is retried. If not provided, defaults to the active `task_run_id`.
            - scheduled_start_time (datetime, optional): the time to schedule the execution
                for; if not provided, defaults to now

        Returns:
            - str: the ID of the newly-scheduled flow run

        Raises:
            - ValueError: if flow was not provided, cannot be found, or if a project name was
                not provided while using Cloud as a backend

        Example:
            ```python
            from prefect.tasks.prefect.flow_run import StartFlowRun

            kickoff_task = StartFlowRun(project_name="Hello, World!", flow_name="My Cloud Flow")
            ```

        """

        # verify that flow and project names were passed where necessary
        if flow_name is None:
            raise ValueError("Must provide a flow name.")
        if project_name is None:
            raise ValueError("Must provide a project name.")

        where_clause = {
            "name": {
                "_eq": flow_name
            },
            "archived": {
                "_eq": False
            },
            "project": {
                "name": {
                    "_eq": project_name
                }
            },
        }

        # find the flow ID to schedule
        query = {
            "query": {
                with_args(
                    "flow",
                    {
                        "where": where_clause,
                        "order_by": {
                            "version": EnumValue("desc")
                        },
                        "limit": 1,
                    },
                ): {"id"}
            }
        }

        client = Client()
        flow = client.graphql(query).data.flow

        # verify that a flow has been returned
        if not flow:
            raise ValueError("Flow '{}' not found.".format(flow_name))

        # grab the ID for the most recent version
        flow_id = flow[0].id

        if idempotency_key is None:
            idempotency_key = context.get("task_run_id", None)

        # providing an idempotency key ensures that retries for this task
        # will not create additional flow runs
        flow_run_id = client.create_flow_run(
            flow_id=flow_id,
            parameters=parameters,
            run_config=run_config,
            idempotency_key=idempotency_key,
            context=new_flow_context,
            run_name=run_name,
            scheduled_start_time=scheduled_start_time,
        )

        self.logger.debug(f"Flow Run {flow_run_id} created.")

        self.logger.debug(
            f"Creating link artifact for Flow Run {flow_run_id}.")
        run_link = client.get_cloud_url("flow-run", flow_run_id, as_user=False)
        create_link(urlparse(run_link).path)
        self.logger.info(f"Flow Run: {run_link}")

        if not self.wait:
            return flow_run_id

        while True:
            time.sleep(self.poll_interval.total_seconds())
            flow_run_state = client.get_flow_run_info(flow_run_id).state
            if flow_run_state.is_finished():
                exc = signal_from_state(flow_run_state)(
                    f"{flow_run_id} finished in state {flow_run_state}")
                raise exc