def emit(self, record) -> None: # type: ignore # if we shouldn't log to cloud, don't emit if not prefect.context.config.logging.log_to_cloud: return try: from prefect.client import Client if self.client is None: self.client = Client() # type: ignore assert isinstance(self.client, Client) # mypy assert record_dict = record.__dict__.copy() log = dict() log["flow_run_id"] = prefect.context.get("flow_run_id", None) log["task_run_id"] = prefect.context.get("task_run_id", None) log["timestamp"] = pendulum.from_timestamp( record_dict.pop("created", time.time())).isoformat() log["name"] = record_dict.pop("name", None) log["message"] = record_dict.pop("message", None) log["level"] = record_dict.pop("levelname", None) if record_dict.get("exc_text") is not None: log["message"] += "\n" + record_dict.pop("exc_text", "") record_dict.pop("exc_info", None) log["info"] = record_dict self.put(log) except Exception as exc: message = "Failed to write log with error: {}".format(str(exc)) self.logger.critical(message) self.put(self._make_error_log(message))
def execute_cloud_flow(): flow_run_id = prefect.context.get("flow_run_id") if not flow_run_id: click.echo("Not currently executing a flow within a cloud context.") return query = { "query": { with_args("flow_run", {"where": { "id": { "_eq": flow_run_id } }}): { "flow": { "name": True, "storage": True, "environment": True } } } } result = Client().graphql(query) flow_data = result.data.flow_run[0].flow storage_schema = prefect.serialization.storage.StorageSchema() storage = storage_schema.load(flow_data.storage) environment_schema = prefect.serialization.environment.EnvironmentSchema() environment = environment_schema.load(flow_data.environment) environment.execute(storage=storage, flow_location=storage.flows[flow_data.name])
def get_key_value(key: str) -> Any: """ Get the value for a key Args: - key (str): the name of the key Returns: - value (Any): A json compatible value Raises: - ValueError: if the specified key does not exist - ClientError: if using Prefect Server instead of Cloud """ if prefect.config.backend != "cloud": raise ClientError(NON_CLOUD_BACKEND_ERROR_MESSAGE) query = { "query": { with_args("key_value", {"where": { "key": { "_eq": key } }}): {"value"} } } client = Client() result = client.graphql(query) # type: Any if len(result.data.key_value) == 0: raise ValueError(f"No value found for key: {key}") return result.data.key_value[0].value
def run(self, flow_run_id: str, flow_run_name: str) -> bool: """ Args: - flow_run_id (str, optional): The ID of the flow run to rename - flow_run_name (str, optional): The new flow run name Returns: - bool: Boolean representing whether the flow run was renamed successfully or not. Raises: - ValueError: If flow_run_id or name is not provided Example: ```python from prefect.tasks.prefect.flow_rename import FlowRenameTask rename_flow = FlowRenameTask(flow_run_id="id123", flow_name="A new flow run name") ``` """ if flow_run_id is None: raise ValueError("Must provide a flow run ID.") if flow_run_name is None: raise ValueError("Must provide a flow name.") client = Client() return client.set_flow_run_name(flow_run_id, flow_run_name)
def set_key_value(key: str, value: Any) -> str: """ Set key value pair, overwriting values for existing key Args: - key (str): the name of the key - value (Any): A json compatible value Returns: - id (str): the id of the key value pair Raises: - ClientError: if using Prefect Server instead of Cloud - ValueError: if `value` exceeds 10 KB limit """ if prefect.config.backend != "cloud": raise ClientError(NON_CLOUD_BACKEND_ERROR_MESSAGE) # check value is under size limit # note this will be enforced by the API value_size = sys.getsizeof(json.dumps(value)) if value_size > 10000: # 10 KB max raise ValueError("Value payload exceedes 10 KB limit.") mutation = { "mutation($input: set_key_value_input!)": { "set_key_value(input: $input)": {"id"} } } client = Client() result = client.graphql(query=mutation, variables=dict(input=dict(key=key, value=value))) return result.data.set_key_value.id
def _hash_flow(self, flow: Flow) -> str: """ In Prefect Cloud, all versions of a flow in a project are tied together by a `flow_group_id`. This is the unique identifier used to store flows in Saturn. Since this library registers a flow with Saturn Cloud before registering it with Prefect Cloud, it can't rely on the `flow_group_id` generated by Prefect Cloud. Instead, this function hashes these pieces of information that uniquely identify a flow group: * project name * flow name * tenant id The identifier produced here should uniquely identify all versions of a flow with a given name, in a given Prefect Cloud project, for a given Prefect Cloud tenant. """ identifying_content = [ self.prefect_cloud_project_name, flow.name, Client()._active_tenant_id, # pylint: disable=protected-access ] hasher = hashlib.sha256() hasher.update(cloudpickle.dumps(identifying_content)) return hasher.hexdigest()
def cloud_flow(): """ Execute a flow's environment in the context of Prefect Cloud. Note: this is a command that runs during Cloud execution of flows and is not meant for local use. """ flow_run_id = prefect.context.get("flow_run_id") if not flow_run_id: click.echo("Not currently executing a flow within a Cloud context.") raise Exception( "Not currently executing a flow within a Cloud context.") query = { "query": { with_args("flow_run", {"where": { "id": { "_eq": flow_run_id } }}): { "flow": { "name": True, "storage": True, "environment": True }, "version": True, } } } client = Client() result = client.graphql(query) flow_run = result.data.flow_run if not flow_run: click.echo("Flow run {} not found".format(flow_run_id)) raise ValueError("Flow run {} not found".format(flow_run_id)) try: flow_data = flow_run[0].flow storage_schema = prefect.serialization.storage.StorageSchema() storage = storage_schema.load(flow_data.storage) environment_schema = prefect.serialization.environment.EnvironmentSchema( ) environment = environment_schema.load(flow_data.environment) environment.setup(storage=storage) environment.execute(storage=storage, flow_location=storage.flows[flow_data.name]) except Exception as exc: msg = "Failed to load and execute Flow's environment: {}".format( repr(exc)) state = prefect.engine.state.Failed(message=msg) version = result.data.flow_run[0].version client.set_flow_run_state(flow_run_id=flow_run_id, version=version, state=state) click.echo(str(exc)) raise exc
def run(self, flow_run_id: str, flow_run_name: str) -> bool: """ Args: - flow_run_id (str, optional): The ID of the flow run to rename. If `None`, the `flow_run_id` from `prefect.context` will be used as default value - flow_run_name (str, optional): The new flow run name Returns: - bool: Boolean representing whether the flow run was renamed successfully or not. Raises: - ValueError: If `flow_run_id` is not provided and `flow_run_id` does not exist in `prefect.context` - ValueError: If `flow_run_name` is not provided Example: ```python from prefect.tasks.prefect.flow_rename import FlowRenameTask rename_flow = FlowRenameTask(flow_name="A new flow run name") ``` """ flow_run_id = flow_run_id or prefect.context.get("flow_run_id") if not flow_run_id: raise ValueError( "`flow_run_id` must be explicitly provided or available in the context" ) if flow_run_name is None: raise ValueError("Must provide a flow name.") client = Client() return client.set_flow_run_name(flow_run_id, flow_run_name)
def __init__(self, flow: Flow, state_handlers: Iterable[Callable] = None) -> None: self.client = Client() super().__init__(flow=flow, task_runner_cls=CloudTaskRunner, state_handlers=state_handlers)
def emit(self, record) -> None: # type: ignore try: from prefect.client import Client if self.client is None: self.client = Client() # type: ignore assert isinstance(self.client, Client) # mypy assert record_dict = record.__dict__.copy() flow_run_id = prefect.context.get("flow_run_id", None) task_run_id = prefect.context.get("task_run_id", None) timestamp = pendulum.from_timestamp( record_dict.get("created", time.time())) name = record_dict.get("name", None) message = record_dict.get("message", None) level = record_dict.get("levelname", None) if record_dict.get("exc_text") is not None: message += "\n" + record_dict["exc_text"] record_dict.pop("exc_info", None) self.client.write_run_log( flow_run_id=flow_run_id, task_run_id=task_run_id, timestamp=timestamp, name=name, message=message, level=level, info=record_dict, ) except Exception as exc: self.logger.critical("Failed to write log with error: {}".format( str(exc)))
def __init__( self, name: str = None, labels: Iterable[str] = None, env_vars: dict = None, max_polls: int = None, ) -> None: self.name = name or config.cloud.agent.get("name", "agent") self.labels = list( labels or ast.literal_eval(config.cloud.agent.get("labels", "[]")) ) self.env_vars = env_vars or dict() self.max_polls = max_polls self.log_to_cloud = config.logging.log_to_cloud token = config.cloud.agent.get("auth_token") self.client = Client(api_token=token) if config.backend == "cloud": self._verify_token(token) self.client.attach_headers({"X-PREFECT-AGENT-ID": self._register_agent()}) logger = logging.getLogger(self.name) logger.setLevel(config.cloud.agent.get("level")) if not any([isinstance(h, logging.StreamHandler) for h in logger.handlers]): ch = logging.StreamHandler(sys.stdout) formatter = logging.Formatter(context.config.logging.format) formatter.converter = time.gmtime # type: ignore ch.setFormatter(formatter) logger.addHandler(ch) self.logger = logger self.submitting_flow_runs = set() # type: Set[str]
def set_key_value(key: str, value: Any) -> str: """ Set key value pair, overwriting values for existing key Args: - key (str): the name of the key - value (Any): A json compatible value Returns: - id (str): the id of the key value pair Raises: - ClientError: if using Prefect Server instead of Cloud """ if prefect.config.backend != "cloud": raise ClientError(NON_CLOUD_BACKEND_ERROR_MESSAGE) mutation = { "mutation($input: set_key_value_input!)": { "set_key_value(input: $input)": {"id"} } } client = Client() result = client.graphql(query=mutation, variables=dict(input=dict(key=key, value=value))) return result.data.set_key_value.id
def __init__( self, name: str = None, labels: Iterable[str] = None, env_vars: dict = None ) -> None: self.name = name or config.cloud.agent.get("name", "agent") self.labels = list( labels or ast.literal_eval(config.cloud.agent.get("labels", "[]")) ) self.env_vars = env_vars or dict() self.log_to_cloud = config.logging.log_to_cloud token = config.cloud.agent.get("auth_token") self.client = Client(api_token=token) self._verify_token(token) logger = logging.getLogger(self.name) logger.setLevel(config.cloud.agent.get("level")) if not any([isinstance(h, logging.StreamHandler) for h in logger.handlers]): ch = logging.StreamHandler(sys.stdout) formatter = logging.Formatter(context.config.logging.format) formatter.converter = time.gmtime # type: ignore ch.setFormatter(formatter) logger.addHandler(ch) self.logger = logger
def flows(name, version, project): """ Describe a Prefect flow. \b Options: --name, -n TEXT A flow name to query [required] --version, -v INTEGER A flow version to query --project, -p TEXT The name of a project to query """ query = { "query": { with_args( "flow", { "where": { "_and": { "name": { "_eq": name }, "version": { "_eq": version }, "project": { "name": { "_eq": project } }, } }, "order_by": { "name": EnumValue("asc"), "version": EnumValue("desc"), }, "distinct_on": EnumValue("name"), }, ): { "name": True, "version": True, "project": { "name": True }, "created": True, "description": True, "parameters": True, "archived": True, "storage": True, "environment": True, } } } result = Client().graphql(query) flow_data = result.data.flow if flow_data: click.echo(flow_data[0]) else: click.secho("{} not found".format(name), fg="red")
def emit(self, record) -> None: # type: ignore try: from prefect.client import Client if self.errored_out is True: return if self.client is None: self.client = Client() # type: ignore assert isinstance(self.client, Client) # mypy asser record_dict = record.__dict__.copy() flow_run_id = prefect.context.get("flow_run_id", None) task_run_id = prefect.context.get("task_run_id", None) timestamp = pendulum.from_timestamp(record_dict.get("created", time.time())) name = record_dict.get("name", None) message = record_dict.get("message", None) level = record_dict.get("level", None) self.client.write_run_log( flow_run_id=flow_run_id, task_run_id=task_run_id, timestamp=timestamp, name=name, message=message, level=level, info=record_dict, ) except: self.errored_out = True
def test_serialize_with_attributes(self): handler = CloudResultHandler(result_handler_service="http://foo.bar") handler.client = Client() serialized = ResultHandlerSchema().dump(handler) assert isinstance(serialized, dict) assert serialized["type"] == "CloudResultHandler" assert serialized["result_handler_service"] == "http://foo.bar" assert "client" not in serialized
def projects(name): """ Query information regarding your Prefect projects. \b Options: --name, -n TEXT A project name to query """ query = { "query": { with_args( "project", { "where": { "_and": { "name": { "_eq": name } } }, "order_by": { "name": EnumValue("asc") }, }, ): { "name": True, "created": True, "description": True, with_args("flows_aggregate", { "distinct_on": EnumValue("name") }): { EnumValue("aggregate"): EnumValue("count") }, } } } result = Client().graphql(query) project_data = result.data.project output = [] for item in project_data: output.append([ item.name, item.flows_aggregate.aggregate.count, pendulum.parse(item.created).diff_for_humans(), item.description, ]) click.echo( tabulate( output, headers=["NAME", "FLOW COUNT", "AGE", "DESCRIPTION"], tablefmt="plain", numalign="left", stralign="left", ))
def flow_runs(name, flow_name, output): """ Describe a Prefect flow run. \b Options: --name, -n TEXT A flow run name to query [required] --flow-name, -fn TEXT A flow name to query --output, -o TEXT Output style, currently supports `json`. Defaults to Python dictionary format. """ query = { "query": { with_args( "flow_run", { "where": { "_and": { "name": { "_eq": name }, "flow": { "name": { "_eq": flow_name } }, } } }, ): { "name": True, "flow": { "name": True }, "created": True, "parameters": True, "auto_scheduled": True, "scheduled_start_time": True, "start_time": True, "end_time": True, "duration": True, "heartbeat": True, "serialized_state": True, } } } result = Client().graphql(query) flow_run_data = result.data.flow_run if flow_run_data: if output == "json": click.echo(json.dumps(flow_run_data[0])) else: click.echo(flow_run_data[0]) else: click.secho("{} not found".format(name), fg="red")
def tasks(name, version, project): """ Describe tasks from a Prefect flow. This command is similar to `prefect describe flow` but instead of flow metadata it outputs task metadata. \b Options: --name, -n TEXT A flow name to query [required] --version, -v INTEGER A flow version to query --project, -p TEXT The name of a project to query """ query = { "query": { with_args( "flow", { "where": { "_and": { "name": {"_eq": name}, "version": {"_eq": version}, "project": {"name": {"_eq": project}}, } }, "order_by": { "name": EnumValue("asc"), "version": EnumValue("desc"), }, "distinct_on": EnumValue("name"), }, ): { "tasks": { "name": True, "created": True, "slug": True, "description": True, "type": True, "max_retries": True, "retry_delay": True, "mapped": True, } } } } result = Client().graphql(query) flow_data = result.data.flow if not flow_data: click.secho("{} not found".format(name), fg="red") return task_data = flow_data[0].tasks if task_data: for item in task_data: click.echo(item) else: click.secho("No tasks found for flow {}".format(name), fg="red")
def __init__( self, task: Task, state_handlers: Iterable[Callable] = None, flow_result: Result = None, ) -> None: self.client = Client() super().__init__(task=task, state_handlers=state_handlers, flow_result=flow_result)
def __init__( self, task: Task, state_handlers: Iterable[Callable] = None, result_handler: ResultHandler = None, ) -> None: self.client = Client() super().__init__(task=task, state_handlers=state_handlers, result_handler=result_handler)
def test_deserialize_cloud_result_handler(self): schema = ResultHandlerSchema() handler = CloudResultHandler(result_handler_service="http://foo.bar") handler._client = Client() obj = schema.load(schema.dump(handler)) assert isinstance(obj, CloudResultHandler) assert hasattr(obj, "logger") assert obj.logger.name == "prefect.CloudResultHandler" assert obj.result_handler_service == "http://foo.bar" assert obj._client is None
def flows(name, version, project, output): """ Describe a Prefect flow. \b Options: --name, -n TEXT A flow name to query [required] --version, -v INTEGER A flow version to query --project, -p TEXT The name of a project to query --output, -o TEXT Output format, one of {'json', 'yaml'}. Defaults to json. """ where_clause = { "_and": { "name": {"_eq": name}, "version": {"_eq": version}, "project": {"name": {"_eq": project}}, } } query_results = { "name": True, "version": True, "project": {"name": True}, "created": True, "description": True, "parameters": True, "archived": True, "storage": True, "environment": True, } query = { "query": { with_args( "flow", { "where": where_clause, "order_by": { "name": EnumValue("asc"), "version": EnumValue("desc"), }, "distinct_on": EnumValue("name"), }, ): query_results } } result = Client().graphql(query) flow_data = result.data.flow if flow_data: display_output(flow_data[0].to_dict(), output=output) else: click.secho("{} not found".format(name), fg="red")
def _execute_flow_run(): flow_run_id = prefect.context.get("flow_run_id") if not flow_run_id: click.echo("Not currently executing a flow within a Cloud context.") raise Exception( "Not currently executing a flow within a Cloud context.") query = { "query": { with_args("flow_run", {"where": { "id": { "_eq": flow_run_id } }}): { "flow": { "name": True, "storage": True, "environment": True }, "version": True, } } } client = Client() result = client.graphql(query) flow_run = result.data.flow_run if not flow_run: click.echo("Flow run {} not found".format(flow_run_id)) raise ValueError("Flow run {} not found".format(flow_run_id)) try: flow_data = flow_run[0].flow storage_schema = prefect.serialization.storage.StorageSchema() storage = storage_schema.load(flow_data.storage) # populate global secrets secrets = prefect.context.get("secrets", {}) for secret in storage.secrets: secrets[secret] = PrefectSecret(name=secret).run() with prefect.context(secrets=secrets, loading_flow=True): flow = storage.get_flow(storage.flows[flow_data.name]) environment = flow.environment environment.setup(flow) environment.execute(flow) except Exception as exc: msg = "Failed to load and execute Flow's environment: {}".format( repr(exc)) state = prefect.engine.state.Failed(message=msg) client.set_flow_run_state(flow_run_id=flow_run_id, state=state) click.echo(str(exc)) raise exc
def flow_runs(name, flow_name, output): """ Describe a Prefect flow run. \b Options: --name, -n TEXT A flow run name to query [required] --flow-name, -fn TEXT A flow name to query --output, -o TEXT Output format, one of {'json', 'yaml'}. Defaults to json. """ query = { "query": { with_args( "flow_run", { "where": { "_and": { "name": { "_eq": name }, "flow": { "name": { "_eq": flow_name } }, } } }, ): { "name": True, "flow": { "name": True }, "created": True, "parameters": True, "auto_scheduled": True, "scheduled_start_time": True, "start_time": True, "end_time": True, "duration": True, "heartbeat": True, "serialized_state": True, } } } result = Client().graphql(query) flow_run_data = result.data.flow_run if flow_run_data: display_output(flow_run_data[0].to_dict(), output=output) else: click.secho("{} not found".format(name), fg="red")
def create_prefect_project(environment: str, prefect_token_secret_name: str): """ Get the Prefect Agent definition for an environment that run workflows on AWS ECS Fargate Parameters: environment [str] -- environment to create the prefect project prefect_token_secret_name [str] -- aws secret name for the prefect token """ client = Client(api_token=get_prefect_token( secret_name=prefect_token_secret_name)) client.create_project(project_name=f"{environment}_dataflow_automation")
def load_active_run_config(): client = Client() query = { "query": { with_args("flow_run_by_pk", {"id": prefect.context.flow_run_id}): { "run_config": True } } } blob = client.graphql(query).data.flow_run_by_pk.run_config return RunConfigSchema().load(blob)
def load_and_run_flow() -> None: """ Loads a flow (and the corresponding environment), then runs the flow with the environment. This is useful for environments whose `execute` method schedules a job that later needs to run the flow. Raises: - ValueError: if no `flow_run_id` is found in context """ logger = logging.get_logger("Environment") try: flow_run_id = prefect.context.get("flow_run_id") if not flow_run_id: raise ValueError("No flow run ID found in context.") query = { "query": { with_args("flow_run", {"where": { "id": { "_eq": flow_run_id } }}): { "flow": { "name": True, "storage": True }, } } } client = Client() result = client.graphql(query) flow_run = result.data.flow_run[0] flow_data = flow_run.flow storage_schema = prefect.serialization.storage.StorageSchema() storage = storage_schema.load(flow_data.storage) # populate global secrets secrets = prefect.context.get("secrets", {}) for secret in storage.secrets: secrets[secret] = prefect.tasks.secrets.PrefectSecret( name=secret).run() with prefect.context(secrets=secrets): flow = storage.get_flow(flow_data.name) flow.environment.run(flow) except Exception as exc: logger.exception( "Unexpected error raised during flow run: {}".format(exc)) raise exc
def project(name, description, skip_if_exists): """ Create projects with the Prefect API that organize flows. Does nothing if the project already exists. \b Arguments: name TEXT The name of a project to create [required] \b Options: --description, -d TEXT A project description --skip-if-exists Optionally skip creation call if project already exists """ if skip_if_exists: result = Client().graphql( query={ "query": { with_args("project", {"where": { "name": { "_eq": name } }}): { "id": True } } }) if result.data.project: click.secho("{} already exists".format(name), fg="green") return try: Client().create_project(project_name=name, project_description=description) except ClientError as exc: click.echo(f"{type(exc).__name__}: {exc}") click.secho("Error creating project", fg="red") return click.secho("{} created".format(name), fg="green")
def flow_runs(name, flow_name): """ Describe a Prefect flow run. \b Options: --name, -n TEXT A flow run name to query [required] --flow-name, -fn TEXT A flow name to query """ query = { "query": { with_args( "flow_run", { "where": { "_and": { "name": { "_eq": name }, "flow": { "name": { "_eq": flow_name } }, } } }, ): { "name": True, "flow": { "name": True }, "created": True, "parameters": True, "auto_scheduled": True, "scheduled_start_time": True, "start_time": True, "end_time": True, "duration": True, "heartbeat": True, "serialized_state": True, } } } result = Client().graphql(query) flow_run_data = result.data.flow_run if flow_run_data: click.echo(flow_run_data[0]) else: click.secho("{} not found".format(name), fg="red")