def cloud(name, project, version, watch, logs): """ Run a deployed flow in Prefect Cloud. \b Options: --name, -n TEXT The name of a flow to run [required] --project, -p TEXT The name of a project that contains the flow [required] --version, -v INTEGER A flow version to run --watch, -w Watch current state of the flow run, stream output to stdout --logs, -l Get logs of the flow run, stream output to stdout """ if watch and logs: click.secho( "Streaming state and logs not currently supported together.", fg="red") return query = { "query": { with_args( "flow", { "where": { "_and": { "name": { "_eq": name }, "version": { "_eq": version }, "project": { "name": { "_eq": project } }, } }, "order_by": { "name": EnumValue("asc"), "version": EnumValue("desc"), }, "distinct_on": EnumValue("name"), }, ): { "id": True } } } client = Client() result = client.graphql(query) flow_data = result.data.flow if flow_data: flow_id = flow_data[0].id else: click.secho("{} not found".format(name), fg="red") return flow_run_id = client.create_flow_run(flow_id=flow_id) click.echo("Flow Run ID: {}".format(flow_run_id)) if watch: current_states = [] while True: query = { "query": { with_args("flow_run_by_pk", {"id": flow_run_id}): { with_args( "states", { "order_by": { EnumValue("timestamp"): EnumValue("asc") } }, ): { "state": True, "timestamp": True } } } } result = client.graphql(query) # Filter through retrieved states and output in order for state_index in result.data.flow_run_by_pk.states: state = state_index.state if state not in current_states: if state != "Success" and state != "Failed": click.echo("{} -> ".format(state), nl=False) else: click.echo(state) return current_states.append(state) time.sleep(3) if logs: all_logs = [] log_query = { with_args("logs", { "order_by": { EnumValue("timestamp"): EnumValue("asc") } }): { "timestamp": True, "message": True, "level": True }, "start_time": True, } query = { "query": { with_args( "flow_run", { "where": { "id": { "_eq": flow_run_id } }, "order_by": { EnumValue("start_time"): EnumValue("desc") }, }, ): log_query } } while True: result = Client().graphql(query) flow_run = result.data.flow_run if not flow_run: click.secho("{} not found".format(flow_run_id), fg="red") return new_run = flow_run[0] logs = new_run.logs output = [] for i in logs: if [i.timestamp, i.level, i.message] not in all_logs: if not len(all_logs): click.echo( tabulate( [[i.timestamp, i.level, i.message]], headers=["TIMESTAMP", "LEVEL", "MESSAGE"], tablefmt="plain", numalign="left", stralign="left", )) all_logs.append([i.timestamp, i.level, i.message]) continue output.append([i.timestamp, i.level, i.message]) all_logs.append([i.timestamp, i.level, i.message]) if output: click.echo( tabulate(output, tablefmt="plain", numalign="left", stralign="left")) # Check if state is either Success or Failed, exit if it is pk_query = { "query": { with_args("flow_run_by_pk", {"id": flow_run_id}): { "state": True } } } result = client.graphql(pk_query) if (result.data.flow_run_by_pk.state == "Success" or result.data.flow_run_by_pk.state == "Failed"): return time.sleep(3)
def run( self, flow_name: str = None, project_name: str = None, parameters: dict = None, idempotency_key: str = None, new_flow_context: dict = None, run_name: str = None, scheduled_start_time: datetime.datetime = None, ) -> str: """ Run method for the task; responsible for scheduling the specified flow run. Args: - flow_name (str, optional): the name of the flow to schedule; if not provided, this method will use the flow name provided at initialization - project_name (str, optional): the Cloud project in which the flow is located; if not provided, this method will use the project provided at initialization. If running with Prefect Core's server as the backend, this should not be provided. - parameters (dict, optional): the parameters to pass to the flow run being scheduled; if not provided, this method will use the parameters provided at initialization - idempotency_key (str, optional): an optional idempotency key for scheduling the flow run; if provided, ensures that only one run is created if this task is retried or rerun with the same inputs. If not provided, the current flow run ID will be used. - new_flow_context (dict, optional): the optional run context for the new flow run - run_name (str, optional): name to be set for the flow run - scheduled_start_time (datetime, optional): the time to schedule the execution for; if not provided, defaults to now Returns: - str: the ID of the newly-scheduled flow run Raises: - ValueError: if flow was not provided, cannot be found, or if a project name was not provided while using Cloud as a backend Example: ```python from prefect.tasks.prefect.flow_run import StartFlowRun kickoff_task = StartFlowRun(project_name="Hello, World!", flow_name="My Cloud Flow") ``` """ # verify that flow and project names were passed where necessary if flow_name is None: raise ValueError("Must provide a flow name.") if project_name is None: raise ValueError("Must provide a project name.") where_clause = { "name": { "_eq": flow_name }, "archived": { "_eq": False }, "project": { "name": { "_eq": project_name } }, } # find the flow ID to schedule query = { "query": { with_args( "flow", { "where": where_clause, "order_by": { "version": EnumValue("desc") }, "limit": 1, }, ): {"id"} } } client = Client() flow = client.graphql(query).data.flow # verify that a flow has been returned if not flow: raise ValueError("Flow '{}' not found.".format(flow_name)) # grab the ID for the most recent version flow_id = flow[0].id idem_key = None if context.get("flow_run_id"): map_index = context.get("map_index") default = context.get("flow_run_id") + (f"-{map_index}" if map_index else "") idem_key = idempotency_key or default # providing an idempotency key ensures that retries for this task # will not create additional flow runs flow_run_id = client.create_flow_run( flow_id=flow_id, parameters=parameters, idempotency_key=idem_key or idempotency_key, context=new_flow_context, run_name=run_name, scheduled_start_time=scheduled_start_time, ) self.logger.debug(f"Flow Run {flow_run_id} created.") if not self.wait: return flow_run_id while True: time.sleep(10) flow_run_state = client.get_flow_run_info(flow_run_id).state if flow_run_state.is_finished(): exc = signal_from_state(flow_run_state)( f"{flow_run_id} finished in state {flow_run_state}") raise exc
def run(self, project_name: str = None, flow_name: str = None, parameters: dict = None) -> str: """ Run method for the task; responsible for scheduling the specified flow run. Args: - project_name (str, optional): the project in which the flow is located; if not provided, this method will use the project provided at initialization - flow_name (str, optional): the name of the flow to schedule; if not provided, this method will use the project provided at initialization - parameters (dict, optional): the parameters to pass to the flow run being scheduled; if not provided, this method will use the parameters provided at initialization Returns: - str: the ID of the newly-scheduled flow run Raises: - ValueError: if flow or project names were not provided, or if the flow provided cannot be found Example: ```python from prefect.tasks.cloud.flow_run import FlowRunTask kickoff_task = FlowRunTask(project_name="My Cloud Project", flow_name="My Cloud Flow") ``` """ # verify that flow and project names were passed in some capacity or another if project_name is None: raise ValueError("Must provide a project name.") if flow_name is None: raise ValueError("Must provide a flow name.") # find the flow ID to schedule query = { "query": { with_args( "flow", { "where": { "name": { "_eq": flow_name }, "project": { "name": { "_eq": project_name } }, "archived": { "_eq": False }, }, "order_by": { "version": EnumValue("desc") }, "limit": 1, }, ): {"id"} } } client = Client() flow = client.graphql(query).data.flow # verify that something's been returned if not flow: raise ValueError("No flow {} found in project {}.".format( flow_name, project_name)) # grab the ID for the most recent version flow_id = flow[0].id return client.create_flow_run(flow_id=flow_id, parameters=parameters)
def flow( id, name, project, version, parameters_file, parameters_string, run_name, context, watch, logs, no_url, ): """ Run a flow that is registered to the Prefect API \b Options: --id, -i TEXT The ID of a flow to run --name, -n TEXT The name of a flow to run --project, -p TEXT The name of a project that contains the flow --version, -v INTEGER A flow version to run --parameters-file, -pf FILE PATH A filepath of a JSON file containing parameters --parameters-string, -ps TEXT A string of JSON parameters (note: to ensure these are parsed correctly, it is best to include the full payload within single quotes) --run-name, -rn TEXT A name to assign for this run --context, -c TEXT A string of JSON key / value pairs to include in context (note: to ensure these are parsed correctly, it is best to include the full payload within single quotes) --watch, -w Watch current state of the flow run, stream output to stdout --logs, -l Get logs of the flow run, stream output to stdout --no-url Only output the flow run id instead of a link \b Either `id` or both `name` and `project` must be provided to run a flow. \b If both `--parameters-file` and `--parameters-string` are provided then the values passed in through the string will override the values provided from the file. \b e.g. File contains: {"a": 1, "b": 2} String: '{"a": 3}' Parameters passed to the flow run: {"a": 3, "b": 2} \b Example: $ prefect run flow -n "Test-Flow" -p "My Project" -ps '{"my_param": 42}' Flow Run: https://cloud.prefect.io/myslug/flow-run/2ba3rrfd-411c-4d99-bb2a-f64a6dea78f9 """ if not id and not (name and project): click.secho( "A flow ID or some combination of flow name and project must be provided.", fg="red", ) return if id and (name or project): click.secho( "Both a flow ID and a name/project combination cannot be provided.", fg="red", ) return if watch and logs: click.secho( "Streaming state and logs not currently supported together.", fg="red" ) return client = Client() flow_id = id if not flow_id: where_clause = { "_and": { "name": {"_eq": name}, "version": {"_eq": version}, "project": {"name": {"_eq": project}}, } } query = { "query": { with_args( "flow", { "where": where_clause, "order_by": { "name": EnumValue("asc"), "version": EnumValue("desc"), }, "distinct_on": EnumValue("name"), }, ): {"id": True} } } result = client.graphql(query) flow_data = result.data.flow if flow_data: flow_id = flow_data[0].id else: click.secho("{} not found".format(name), fg="red") return # Load parameters from file if provided file_params = {} if parameters_file: with open(parameters_file) as params_file: file_params = json.load(params_file) # Load parameters from string if provided string_params = {} if parameters_string: string_params = json.loads(parameters_string) if context: context = json.loads(context) flow_run_id = client.create_flow_run( flow_id=flow_id, context=context, parameters={**file_params, **string_params}, run_name=run_name, ) if no_url: click.echo("Flow Run ID: {}".format(flow_run_id)) else: flow_run_url = client.get_cloud_url("flow-run", flow_run_id) click.echo("Flow Run: {}".format(flow_run_url)) if watch: current_states = [] while True: query = { "query": { with_args("flow_run_by_pk", {"id": flow_run_id}): { with_args( "states", {"order_by": {EnumValue("timestamp"): EnumValue("asc")}}, ): {"state": True, "timestamp": True} } } } result = client.graphql(query) # Filter through retrieved states and output in order for state_index in result.data.flow_run_by_pk.states: state = state_index.state if state not in current_states: if state != "Success" and state != "Failed": click.echo("{} -> ".format(state), nl=False) else: click.echo(state) return flow_run_id current_states.append(state) time.sleep(3) if logs: all_logs = [] log_query = { with_args( "logs", {"order_by": {EnumValue("timestamp"): EnumValue("asc")}} ): {"timestamp": True, "message": True, "level": True}, "start_time": True, "state": True, } query = { "query": { with_args( "flow_run", { "where": {"id": {"_eq": flow_run_id}}, "order_by": {EnumValue("start_time"): EnumValue("desc")}, }, ): log_query } } while True: result = client.graphql(query) flow_run = result.data.flow_run if not flow_run: click.secho("{} not found".format(flow_run_id), fg="red") return new_run = flow_run[0] logs = new_run.logs output = [] for i in logs: if [i.timestamp, i.level, i.message] not in all_logs: if not len(all_logs): click.echo( tabulate( [[i.timestamp, i.level, i.message]], headers=["TIMESTAMP", "LEVEL", "MESSAGE"], tablefmt="plain", numalign="left", stralign="left", ) ) all_logs.append([i.timestamp, i.level, i.message]) continue output.append([i.timestamp, i.level, i.message]) all_logs.append([i.timestamp, i.level, i.message]) if output: click.echo( tabulate(output, tablefmt="plain", numalign="left", stralign="left") ) if new_run.state == "Success" or new_run.state == "Failed": return flow_run_id time.sleep(3) return flow_run_id
def run(self, flow_name: str = None, project_name: str = None, parameters: dict = None) -> str: """ Run method for the task; responsible for scheduling the specified flow run. Args: - flow_name (str, optional): the name of the flow to schedule; if not provided, this method will use the flow name provided at initialization - project_name (str, optional): the Cloud project in which the flow is located; if not provided, this method will use the project provided at initialization. If running with Prefect Core's server as the backend, this should not be provided. - parameters (dict, optional): the parameters to pass to the flow run being scheduled; if not provided, this method will use the parameters provided at initialization Returns: - str: the ID of the newly-scheduled flow run Raises: - ValueError: if flow was not provided, cannot be found, or if a project name was not provided while using Cloud as a backend Example: ```python from prefect.tasks.prefect.flow_run import FlowRunTask kickoff_task = FlowRunTask(project_name="My Project", flow_name="My Cloud Flow") ``` """ # verify that flow and project names were passed where necessary if flow_name is None: raise ValueError("Must provide a flow name.") if project_name is None and config.backend == "cloud": raise ValueError("Must provide a project name.") where_clause = { "name": { "_eq": flow_name }, "archived": { "_eq": False }, } if project_name: where_clause["project"] = {"name": {"_eq": project_name}} # find the flow ID to schedule query = { "query": { with_args( "flow", { "where": where_clause, "order_by": { "version": EnumValue("desc") }, "limit": 1, }, ): {"id"} } } client = Client() flow = client.graphql(query).data.flow # verify that a flow has been returned if not flow: raise ValueError("Flow '{}' not found.".format(flow_name)) # grab the ID for the most recent version flow_id = flow[0].id return client.create_flow_run(flow_id=flow_id, parameters=parameters)
def _run_flow( name, version, parameters_file, parameters_string, run_name, watch, logs, no_url, project=None, ): if watch and logs: click.secho( "Streaming state and logs not currently supported together.", fg="red" ) return where_clause = {"_and": {"name": {"_eq": name}, "version": {"_eq": version},}} if project: where_clause["_and"]["project"] = {"name": {"_eq": project}} query = { "query": { with_args( "flow", { "where": where_clause, "order_by": { "name": EnumValue("asc"), "version": EnumValue("desc"), }, "distinct_on": EnumValue("name"), }, ): {"id": True} } } client = Client() result = client.graphql(query) flow_data = result.data.flow if flow_data: flow_id = flow_data[0].id else: click.secho("{} not found".format(name), fg="red") return # Load parameters from file if provided file_params = {} if parameters_file: with open(parameters_file) as params_file: file_params = json.load(params_file) # Load parameters from string if provided string_params = {} if parameters_string: string_params = json.loads(parameters_string) flow_run_id = client.create_flow_run( flow_id=flow_id, parameters={**file_params, **string_params}, run_name=run_name ) if no_url: click.echo("Flow Run ID: {}".format(flow_run_id)) else: flow_run_url = client.get_cloud_url("flow-run", flow_run_id) click.echo("Flow Run: {}".format(flow_run_url)) if watch: current_states = [] while True: query = { "query": { with_args("flow_run_by_pk", {"id": flow_run_id}): { with_args( "states", {"order_by": {EnumValue("timestamp"): EnumValue("asc")}}, ): {"state": True, "timestamp": True} } } } result = client.graphql(query) # Filter through retrieved states and output in order for state_index in result.data.flow_run_by_pk.states: state = state_index.state if state not in current_states: if state != "Success" and state != "Failed": click.echo("{} -> ".format(state), nl=False) else: click.echo(state) return flow_run_id current_states.append(state) time.sleep(3) if logs: all_logs = [] log_query = { with_args( "logs", {"order_by": {EnumValue("timestamp"): EnumValue("asc")}} ): {"timestamp": True, "message": True, "level": True}, "start_time": True, "state": True, } query = { "query": { with_args( "flow_run", { "where": {"id": {"_eq": flow_run_id}}, "order_by": {EnumValue("start_time"): EnumValue("desc")}, }, ): log_query } } while True: result = client.graphql(query) flow_run = result.data.flow_run if not flow_run: click.secho("{} not found".format(flow_run_id), fg="red") return new_run = flow_run[0] logs = new_run.logs output = [] for i in logs: if [i.timestamp, i.level, i.message] not in all_logs: if not len(all_logs): click.echo( tabulate( [[i.timestamp, i.level, i.message]], headers=["TIMESTAMP", "LEVEL", "MESSAGE"], tablefmt="plain", numalign="left", stralign="left", ) ) all_logs.append([i.timestamp, i.level, i.message]) continue output.append([i.timestamp, i.level, i.message]) all_logs.append([i.timestamp, i.level, i.message]) if output: click.echo( tabulate(output, tablefmt="plain", numalign="left", stralign="left") ) if new_run.state == "Success" or new_run.state == "Failed": return flow_run_id time.sleep(3) return flow_run_id
def run( ctx, flow_or_group_id, project, path, module, name, labels, context_vars, params, execute, idempotency_key, schedule, log_level, param_file, run_name, quiet, no_logs, watch, ): """Run a flow""" # Since the old command was a subcommand of this, we have to do some # mucking to smoothly deprecate it. Can be removed with `prefect run flow` # is removed. if ctx.invoked_subcommand is not None: if any([params, no_logs, quiet, flow_or_group_id]): # These options are not supported by `prefect run flow` raise ClickException("Got unexpected extra argument (%s)" % ctx.invoked_subcommand) return # Define a simple function so we don't have to have a lot of `if not quiet` logic quiet_echo = ((lambda *_, **__: None) if quiet else lambda *args, **kwargs: click.secho(*args, **kwargs)) # Cast labels to a list instead of a tuple so we can extend it labels = list(labels) # Ensure that the user has not passed conflicting options given_lookup_options = { key for key, option in { "--id": flow_or_group_id, "--project": project, "--path": path, "--module": module, }.items() if option is not None } # Since `name` can be passed in conjunction with several options and also alone # it requires a special case here if not given_lookup_options and not name: raise ClickException("Received no options to look up the flow." + FLOW_LOOKUP_MSG) if "--id" in given_lookup_options and name: raise ClickException("Received too many options to look up the flow; " "cannot specifiy both `--name` and `--id`" + FLOW_LOOKUP_MSG) if len(given_lookup_options) > 1: raise ClickException("Received too many options to look up the flow: " f"{', '.join(given_lookup_options)}" + FLOW_LOOKUP_MSG) # Load parameters and context ------------------------------------------------------ context_dict = load_json_key_values(context_vars, "context") file_params = {} if param_file: try: with open(param_file) as fp: file_params = json.load(fp) except FileNotFoundError: raise TerminalError( f"Parameter file does not exist: {os.path.abspath(param_file)!r}" ) except ValueError as exc: raise TerminalError( f"Failed to parse JSON at {os.path.abspath(param_file)!r}: {exc}" ) cli_params = load_json_key_values(params, "parameter") conflicting_keys = set(cli_params.keys()).intersection(file_params.keys()) if conflicting_keys: quiet_echo( "The following parameters were specified by file and CLI, the CLI value " f"will be used: {conflicting_keys}") params_dict = {**file_params, **cli_params} # Local flow run ------------------------------------------------------------------- if path or module: # We can load a flow for local execution immediately if given a path or module, # otherwise, we'll lookup the flow then pull from storage for a local run with try_error_done("Retrieving local flow...", quiet_echo, traceback=True): flow = get_flow_from_path_or_module(path=path, module=module, name=name) # Set the desired log level if no_logs: log_level = 100 # CRITICAL is 50 so this should do it run_info = "" if params_dict: run_info += f"└── Parameters: {params_dict}\n" if context_dict: run_info += f"└── Context: {context_dict}\n" if run_info: quiet_echo("Configured local flow run") quiet_echo(run_info, nl=False) quiet_echo("Running flow locally...") with temporary_logger_config( level=log_level, stream_fmt="└── %(asctime)s | %(levelname)-7s | %(message)s", stream_datefmt="%H:%M:%S", ): with prefect.context(**context_dict): try: result_state = flow.run(parameters=params_dict, run_on_schedule=schedule) except Exception as exc: quiet_echo("Flow runner encountered an exception!") log_exception(exc, indent=2) raise TerminalError("Flow run failed!") if result_state.is_failed(): quiet_echo("Flow run failed!", fg="red") sys.exit(1) else: quiet_echo("Flow run succeeded!", fg="green") return # Backend flow run ----------------------------------------------------------------- if schedule: raise ClickException( "`--schedule` can only be specified for local flow runs") client = Client() # Validate the flow look up options we've been given and get the flow from the # backend with try_error_done("Looking up flow metadata...", quiet_echo): flow_view = get_flow_view( flow_or_group_id=flow_or_group_id, project=project, name=name, ) if log_level: run_config = flow_view.run_config if not run_config.env: run_config.env = {} run_config.env["PREFECT__LOGGING__LEVEL"] = log_level else: run_config = None if execute: # Add a random label to prevent an agent from picking up this run labels.append(f"agentless-run-{str(uuid.uuid4())[:8]}") try: # Handle keyboard interrupts during creation flow_run_id = None # Create a flow run in the backend with try_error_done( f"Creating run for flow {flow_view.name!r}...", quiet_echo, traceback=True, # Display 'Done' manually after querying for data to display so there is not # a lag skip_done=True, ): flow_run_id = client.create_flow_run( flow_id=flow_view.flow_id, parameters=params_dict, context=context_dict, # If labels is an empty list pass `None` to get defaults # https://github.com/PrefectHQ/server/blob/77c301ce0c8deda4f8771f7e9991b25e7911224a/src/prefect_server/api/runs.py#L136 labels=labels or None, run_name=run_name, # We only use the run config for setting logging levels right now run_config=run_config, idempotency_key=idempotency_key, ) if quiet: # Just display the flow run id in quiet mode click.echo(flow_run_id) flow_run = None else: # Grab information about the flow run (if quiet we can skip this query) flow_run = FlowRunView.from_flow_run_id(flow_run_id) run_url = client.get_cloud_url("flow-run", flow_run_id) # Display "Done" for creating flow run after pulling the info so there # isn't a weird lag quiet_echo(" Done", fg="green") quiet_echo( textwrap.dedent(f""" └── Name: {flow_run.name} └── UUID: {flow_run.flow_run_id} └── Labels: {flow_run.labels} └── Parameters: {flow_run.parameters} └── Context: {flow_run.context} └── URL: {run_url} """).strip()) except KeyboardInterrupt: # If the user interrupts here, they will expect the flow run to be cancelled quiet_echo("\nKeyboard interrupt detected! Aborting...", fg="yellow") if flow_run_id: client.cancel_flow_run(flow_run_id=flow_run_id) quiet_echo("Cancelled flow run.") else: # The flow run was not created so we can just exit quiet_echo("Aborted.") return # Handle agentless execution if execute: quiet_echo("Executing flow run...") try: with temporary_logger_config( level=(100 if no_logs or quiet else log_level), # Disable logging if asked stream_fmt= "└── %(asctime)s | %(levelname)-7s | %(message)s", stream_datefmt="%H:%M:%S", ): execute_flow_run_in_subprocess(flow_run_id) except KeyboardInterrupt: quiet_echo("Keyboard interrupt detected! Aborting...", fg="yellow") pass elif watch: try: quiet_echo("Watching flow run execution...") for log in watch_flow_run( flow_run_id=flow_run_id, stream_logs=not no_logs, ): level_name = logging.getLevelName(log.level) timestamp = log.timestamp.in_tz(tz="local") echo_with_log_color( log.level, f"└── {timestamp:%H:%M:%S} | {level_name:<7} | {log.message}", ) except KeyboardInterrupt: quiet_echo("Keyboard interrupt detected!", fg="yellow") try: cancel = click.confirm( "On exit, we can leave your flow run executing or cancel it.\n" "Do you want to cancel this flow run?", default=True, ) except click.Abort: # A second keyboard interrupt will exit without cancellation pass else: if cancel: client.cancel_flow_run(flow_run_id=flow_run_id) quiet_echo("Cancelled flow run.", fg="green") return quiet_echo("Exiting without cancelling flow run!", fg="yellow") raise # Re-raise the interrupt else: # If not watching or executing, exit without checking state return # Get the final flow run state flow_run = FlowRunView.from_flow_run_id(flow_run_id) # Wait for the flow run to be done up to 3 seconds elapsed_time = 0 while not flow_run.state.is_finished() and elapsed_time < 3: time.sleep(1) elapsed_time += 1 flow_run = flow_run.get_latest() # Display the final state if flow_run.state.is_failed(): quiet_echo("Flow run failed!", fg="red") sys.exit(1) elif flow_run.state.is_successful(): quiet_echo("Flow run succeeded!", fg="green") else: quiet_echo(f"Flow run is in unexpected state: {flow_run.state}", fg="yellow") sys.exit(1)
def run( self, flow_name: str = None, project_name: str = None, parameters: dict = None ) -> str: """ Run method for the task; responsible for scheduling the specified flow run. Args: - flow_name (str, optional): the name of the flow to schedule; if not provided, this method will use the flow name provided at initialization - project_name (str, optional): the Cloud project in which the flow is located; if not provided, this method will use the project provided at initialization. If running with Prefect Core's server as the backend, this should not be provided. - parameters (dict, optional): the parameters to pass to the flow run being scheduled; if not provided, this method will use the parameters provided at initialization Returns: - str: the ID of the newly-scheduled flow run Raises: - ValueError: if flow was not provided, cannot be found, or if a project name was not provided while using Cloud as a backend Example: ```python from prefect.tasks.prefect.flow_run import FlowRunTask kickoff_task = FlowRunTask(project_name="Hello, World!", flow_name="My Cloud Flow") ``` """ is_hosted_backend = "prefect.io" in urlparse(config.cloud.api).netloc # verify that flow and project names were passed where necessary if flow_name is None: raise ValueError("Must provide a flow name.") if project_name is None and is_hosted_backend: raise ValueError("Must provide a project name.") where_clause = { "name": {"_eq": flow_name}, "archived": {"_eq": False}, } if project_name: where_clause["project"] = {"name": {"_eq": project_name}} # find the flow ID to schedule query = { "query": { with_args( "flow", { "where": where_clause, "order_by": {"version": EnumValue("desc")}, "limit": 1, }, ): {"id"} } } client = Client() flow = client.graphql(query).data.flow # verify that a flow has been returned if not flow: raise ValueError("Flow '{}' not found.".format(flow_name)) # grab the ID for the most recent version flow_id = flow[0].id # providing an idempotency key ensures that retries for this task # will not create additional flow runs flow_run_id = client.create_flow_run( flow_id=flow_id, parameters=parameters, idempotency_key=context.get("flow_run_id"), ) self.logger.debug(f"Flow Run {flow_run_id} created.") if not self.wait: return flow_run_id while True: time.sleep(10) flow_run_state = client.get_flow_run_info(flow_run_id).state if flow_run_state.is_finished(): exc = signal_from_state(flow_run_state)( f"{flow_run_id} finished in state {flow_run_state}" ) raise exc
def cloud( name, project, version, parameters_file, parameters_string, run_name, watch, logs, no_url, ): """ Run a registered flow in Prefect Cloud. \b Options: --name, -n TEXT The name of a flow to run [required] --project, -p TEXT The name of a project that contains the flow [required] --version, -v INTEGER A flow version to run --parameters-file, -pf FILE PATH A filepath of a JSON file containing parameters --parameters-string, -ps TEXT A string of JSON parameters --run-name, -rn TEXT A name to assign for this run --watch, -w Watch current state of the flow run, stream output to stdout --logs, -l Get logs of the flow run, stream output to stdout --no-url Only output the flow run id instead of a link \b If both `--parameters-file` and `--parameters-string` are provided then the values passed in through the string will override the values provided from the file. \b e.g. File contains: {"a": 1, "b": 2} String: '{"a": 3}' Parameters passed to the flow run: {"a": 3, "b": 2} """ if watch and logs: click.secho( "Streaming state and logs not currently supported together.", fg="red") return query = { "query": { with_args( "flow", { "where": { "_and": { "name": { "_eq": name }, "version": { "_eq": version }, "project": { "name": { "_eq": project } }, } }, "order_by": { "name": EnumValue("asc"), "version": EnumValue("desc"), }, "distinct_on": EnumValue("name"), }, ): { "id": True } } } client = Client() result = client.graphql(query) flow_data = result.data.flow if flow_data: flow_id = flow_data[0].id else: click.secho("{} not found".format(name), fg="red") return # Load parameters from file if provided file_params = {} if parameters_file: with open(parameters_file) as params_file: file_params = json.load(params_file) # Load parameters from string if provided string_params = {} if parameters_string: string_params = json.loads(parameters_string) flow_run_id = client.create_flow_run(flow_id=flow_id, parameters={ **file_params, **string_params }, run_name=run_name) if no_url: click.echo("Flow Run ID: {}".format(flow_run_id)) else: # Generate direct link to Cloud run tenant_slug = client.get_default_tenant_slug() url = (re.sub("api-", "", config.cloud.api) if re.search("api-", config.cloud.api) else re.sub( "api", "cloud", config.cloud.api)) flow_run_url = "/".join( [url.rstrip("/"), tenant_slug, "flow-run", flow_run_id]) click.echo("Flow Run: {}".format(flow_run_url)) if watch: current_states = [] while True: query = { "query": { with_args("flow_run_by_pk", {"id": flow_run_id}): { with_args( "states", { "order_by": { EnumValue("timestamp"): EnumValue("asc") } }, ): { "state": True, "timestamp": True } } } } result = client.graphql(query) # Filter through retrieved states and output in order for state_index in result.data.flow_run_by_pk.states: state = state_index.state if state not in current_states: if state != "Success" and state != "Failed": click.echo("{} -> ".format(state), nl=False) else: click.echo(state) return current_states.append(state) time.sleep(3) if logs: all_logs = [] log_query = { with_args("logs", { "order_by": { EnumValue("timestamp"): EnumValue("asc") } }): { "timestamp": True, "message": True, "level": True }, "start_time": True, } query = { "query": { with_args( "flow_run", { "where": { "id": { "_eq": flow_run_id } }, "order_by": { EnumValue("start_time"): EnumValue("desc") }, }, ): log_query } } while True: result = client.graphql(query) flow_run = result.data.flow_run if not flow_run: click.secho("{} not found".format(flow_run_id), fg="red") return new_run = flow_run[0] logs = new_run.logs output = [] for i in logs: if [i.timestamp, i.level, i.message] not in all_logs: if not len(all_logs): click.echo( tabulate( [[i.timestamp, i.level, i.message]], headers=["TIMESTAMP", "LEVEL", "MESSAGE"], tablefmt="plain", numalign="left", stralign="left", )) all_logs.append([i.timestamp, i.level, i.message]) continue output.append([i.timestamp, i.level, i.message]) all_logs.append([i.timestamp, i.level, i.message]) if output: click.echo( tabulate(output, tablefmt="plain", numalign="left", stralign="left")) # Check if state is either Success or Failed, exit if it is pk_query = { "query": { with_args("flow_run_by_pk", {"id": flow_run_id}): { "state": True } } } result = client.graphql(pk_query) if (result.data.flow_run_by_pk.state == "Success" or result.data.flow_run_by_pk.state == "Failed"): return time.sleep(3)
def cloud(name, project, version, watch): """ Run a deployed flow in Prefect Cloud. \b Options: --name, -n TEXT The name of a flow to run [required] --project, -p TEXT The name of a project that contains the flow [required] --version, -v INTEGER A flow version to run --watch, -w Watch current state of the flow run, stream output to stdout """ query = { "query": { with_args( "flow", { "where": { "_and": { "name": { "_eq": name }, "version": { "_eq": version }, "project": { "name": { "_eq": project } }, } }, "order_by": { "name": EnumValue("asc"), "version": EnumValue("desc"), }, "distinct_on": EnumValue("name"), }, ): { "id": True } } } client = Client() result = client.graphql(query) flow_data = result.data.flow if flow_data: flow_id = flow_data[0].id else: click.secho("{} not found".format(name), fg="red") return flow_run_id = client.create_flow_run(flow_id=flow_id) click.echo("Flow Run ID: {}".format(flow_run_id)) # TODO: Convert to using a subscription and make output prettier if watch: current_state = "" while True: query = { "query": { with_args("flow_run_by_pk", {"id": flow_run_id}): { "state": True } } } result = client.graphql(query) if result.data.flow_run_by_pk.state != current_state: current_state = result.data.flow_run_by_pk.state if current_state != "Success" and current_state != "Failed": click.echo("{} -> ".format(current_state), nl=False) else: click.echo(current_state) break time.sleep(3)
if idempotency_key is None: idempotency_key = context.get("task_run_id", None) ======= idem_key = None if context.get("flow_run_id"): map_index = context.get("map_index") default = context.get("flow_run_id") + ( f"-{map_index}" if map_index else "" ) idem_key = idempotency_key or default >>>>>>> prefect clone # providing an idempotency key ensures that retries for this task # will not create additional flow runs flow_run_id = client.create_flow_run( flow_id=flow_id, parameters=parameters, <<<<<<< HEAD run_config=run_config, idempotency_key=idempotency_key, ======= idempotency_key=idem_key or idempotency_key, >>>>>>> prefect clone context=new_flow_context, run_name=run_name, scheduled_start_time=scheduled_start_time, ) self.logger.debug(f"Flow Run {flow_run_id} created.") <<<<<<< HEAD
def run( self, flow_name: str = None, project_name: str = None, parameters: dict = None, run_config: RunConfig = None, new_flow_context: dict = None, run_name: str = None, idempotency_key: str = None, scheduled_start_time: datetime.datetime = None, ) -> str: """ Run method for the task; responsible for scheduling the specified flow run. Args: - flow_name (str, optional): the name of the flow to schedule; if not provided, this method will use the flow name provided at initialization - project_name (str, optional): the Cloud project in which the flow is located; if not provided, this method will use the project provided at initialization. If running with Prefect Core's server as the backend, this should not be provided. - parameters (dict, optional): the parameters to pass to the flow run being scheduled; if not provided, this method will use the parameters provided at initialization - run_config (RunConfig, optional): a run-config to use for this flow run, overriding any existing flow settings. - new_flow_context (dict, optional): the optional run context for the new flow run - run_name (str, optional): name to be set for the flow run - idempotency_key (str, optional): a unique idempotency key for scheduling the flow run. Duplicate flow runs with the same idempotency key will only create a single flow run. This is useful for ensuring that only one run is created if this task is retried. If not provided, defaults to the active `task_run_id`. - scheduled_start_time (datetime, optional): the time to schedule the execution for; if not provided, defaults to now Returns: - str: the ID of the newly-scheduled flow run Raises: - ValueError: if flow was not provided, cannot be found, or if a project name was not provided while using Cloud as a backend Example: ```python from prefect.tasks.prefect.flow_run import StartFlowRun kickoff_task = StartFlowRun(project_name="Hello, World!", flow_name="My Cloud Flow") ``` """ # verify that flow and project names were passed where necessary if flow_name is None: raise ValueError("Must provide a flow name.") if project_name is None: raise ValueError("Must provide a project name.") where_clause = { "name": { "_eq": flow_name }, "archived": { "_eq": False }, "project": { "name": { "_eq": project_name } }, } # find the flow ID to schedule query = { "query": { with_args( "flow", { "where": where_clause, "order_by": { "version": EnumValue("desc") }, "limit": 1, }, ): {"id"} } } client = Client() flow = client.graphql(query).data.flow # verify that a flow has been returned if not flow: raise ValueError("Flow '{}' not found.".format(flow_name)) # grab the ID for the most recent version flow_id = flow[0].id if idempotency_key is None: idempotency_key = context.get("task_run_id", None) # providing an idempotency key ensures that retries for this task # will not create additional flow runs flow_run_id = client.create_flow_run( flow_id=flow_id, parameters=parameters, run_config=run_config, idempotency_key=idempotency_key, context=new_flow_context, run_name=run_name, scheduled_start_time=scheduled_start_time, ) self.logger.debug(f"Flow Run {flow_run_id} created.") self.logger.debug( f"Creating link artifact for Flow Run {flow_run_id}.") run_link = client.get_cloud_url("flow-run", flow_run_id, as_user=False) create_link(urlparse(run_link).path) self.logger.info(f"Flow Run: {run_link}") if not self.wait: return flow_run_id while True: time.sleep(self.poll_interval.total_seconds()) flow_run_state = client.get_flow_run_info(flow_run_id).state if flow_run_state.is_finished(): exc = signal_from_state(flow_run_state)( f"{flow_run_id} finished in state {flow_run_state}") raise exc