def __init__(self, name: str = None, labels: Iterable[str] = None, env_vars: dict = None) -> None: self.name = name or config.cloud.agent.get("name", "agent") self.labels = list( labels or ast.literal_eval(config.cloud.agent.get("labels", "[]"))) self.env_vars = env_vars or dict() self.log_to_cloud = config.logging.log_to_cloud token = config.cloud.agent.get("auth_token") self.client = Client(api_token=token) self._verify_token(token) logger = logging.getLogger(self.name) logger.setLevel(config.cloud.agent.get("level")) if not any( [isinstance(h, logging.StreamHandler) for h in logger.handlers]): ch = logging.StreamHandler(sys.stdout) formatter = logging.Formatter(context.config.logging.format) formatter.converter = time.gmtime # type: ignore ch.setFormatter(formatter) logger.addHandler(ch) self.logger = logger self.submitting_flow_runs = set() # type: Set[str]
def __init__(self, flow: Flow, state_handlers: Iterable[Callable] = None) -> None: self.client = Client() super().__init__(flow=flow, task_runner_cls=CloudTaskRunner, state_handlers=state_handlers)
def run(self, flow_run_id: str, flow_run_name: str) -> bool: """ Args: - flow_run_id (str, optional): The ID of the flow run to rename - flow_run_name (str, optional): The new flow run name Returns: - bool: Boolean representing whether the flow run was renamed successfully or not. Raises: - ValueError: If flow_run_id or name is not provided Example: ```python from prefect.tasks.prefect.flow_rename import FlowRenameTask rename_flow = FlowRenameTask(flow_run_id="id123", flow_name="A new flow run name") ``` """ if flow_run_id is None: raise ValueError("Must provide a flow run ID.") if flow_run_name is None: raise ValueError("Must provide a flow name.") client = Client() return client.set_flow_run_name(flow_run_id, flow_run_name)
def cloud_flow(): """ Execute a flow's environment in the context of Prefect Cloud. Note: this is a command that runs during Cloud execution of flows and is not meant for local use. """ flow_run_id = prefect.context.get("flow_run_id") if not flow_run_id: click.echo("Not currently executing a flow within a Cloud context.") raise Exception( "Not currently executing a flow within a Cloud context.") query = { "query": { with_args("flow_run", {"where": { "id": { "_eq": flow_run_id } }}): { "flow": { "name": True, "storage": True, "environment": True }, "version": True, } } } client = Client() result = client.graphql(query) flow_run = result.data.flow_run if not flow_run: click.echo("Flow run {} not found".format(flow_run_id)) raise ValueError("Flow run {} not found".format(flow_run_id)) try: flow_data = flow_run[0].flow storage_schema = prefect.serialization.storage.StorageSchema() storage = storage_schema.load(flow_data.storage) flow = storage.get_flow(storage.flows[flow_data.name]) environment = flow.environment environment.setup(storage=storage) environment.execute(storage=storage, flow_location=storage.flows[flow_data.name]) except Exception as exc: msg = "Failed to load and execute Flow's environment: {}".format( repr(exc)) state = prefect.engine.state.Failed(message=msg) version = result.data.flow_run[0].version client.set_flow_run_state(flow_run_id=flow_run_id, version=version, state=state) click.echo(str(exc)) raise exc
def __init__( self, task: Task, state_handlers: Iterable[Callable] = None, flow_result: Result = None, ) -> None: self.client = Client() super().__init__(task=task, state_handlers=state_handlers, flow_result=flow_result)
def __init__( self, task: Task, state_handlers: Iterable[Callable] = None, result_handler: ResultHandler = None, ) -> None: self.client = Client() super().__init__(task=task, state_handlers=state_handlers, result_handler=result_handler)
class CloudFlowRunner(FlowRunner): """ FlowRunners handle the execution of Flows and determine the State of a Flow before, during and after the Flow is run. In particular, through the FlowRunner you can specify which tasks should be the first tasks to run, which tasks should be returned after the Flow is finished, and what states each task should be initialized with. Args: - flow (Flow): the `Flow` to be run - state_handlers (Iterable[Callable], optional): A list of state change handlers that will be called whenever the flow changes state, providing an opportunity to inspect or modify the new state. The handler will be passed the flow runner instance, the old (prior) state, and the new (current) state, with the following signature: ``` state_handler( flow_runner: FlowRunner, old_state: State, new_state: State) -> State ``` If multiple functions are passed, then the `new_state` argument will be the result of the previous handler. Note: new FlowRunners are initialized within the call to `Flow.run()` and in general, this is the endpoint through which FlowRunners will be interacted with most frequently. Example: ```python @task def say_hello(): print('hello') with Flow("My Flow") as f: say_hello() fr = FlowRunner(flow=f) flow_state = fr.run() ``` """ def __init__(self, flow: Flow, state_handlers: Iterable[Callable] = None) -> None: self.client = Client() super().__init__( flow=flow, task_runner_cls=CloudTaskRunner, state_handlers=state_handlers ) def _heartbeat(self) -> bool: try: # use empty string for testing purposes flow_run_id = prefect.context.get("flow_run_id", "") # type: str self.client.update_flow_run_heartbeat(flow_run_id)
def _execute_flow_run(): flow_run_id = prefect.context.get("flow_run_id") if not flow_run_id: click.echo("Not currently executing a flow within a Cloud context.") raise Exception( "Not currently executing a flow within a Cloud context.") query = { "query": { with_args("flow_run", {"where": { "id": { "_eq": flow_run_id } }}): { "flow": { "name": True, "storage": True, "environment": True }, "version": True, } } } client = Client() result = client.graphql(query) flow_run = result.data.flow_run if not flow_run: click.echo("Flow run {} not found".format(flow_run_id)) raise ValueError("Flow run {} not found".format(flow_run_id)) try: flow_data = flow_run[0].flow storage_schema = prefect.serialization.storage.StorageSchema() storage = storage_schema.load(flow_data.storage) # populate global secrets secrets = prefect.context.get("secrets", {}) for secret in storage.secrets: secrets[secret] = PrefectSecret(name=secret).run() with prefect.context(secrets=secrets, loading_flow=True): flow = storage.get_flow(storage.flows[flow_data.name]) environment = flow.environment environment.setup(flow) environment.execute(flow) except Exception as exc: msg = "Failed to load and execute Flow's environment: {}".format( repr(exc)) state = prefect.engine.state.Failed(message=msg) client.set_flow_run_state(flow_run_id=flow_run_id, state=state) click.echo(str(exc)) raise exc
def load_and_run_flow() -> None: """ Loads a flow (and the corresponding environment), then runs the flow with the environment. This is useful for environments whose `execute` method schedules a job that later needs to run the flow. Raises: - ValueError: if no `flow_run_id` is found in context """ logger = logging.get_logger("Environment") try: flow_run_id = prefect.context.get("flow_run_id") if not flow_run_id: raise ValueError("No flow run ID found in context.") query = { "query": { with_args("flow_run", {"where": { "id": { "_eq": flow_run_id } }}): { "flow": { "name": True, "storage": True }, } } } client = Client() result = client.graphql(query) flow_run = result.data.flow_run[0] flow_data = flow_run.flow storage_schema = prefect.serialization.storage.StorageSchema() storage = storage_schema.load(flow_data.storage) # populate global secrets secrets = prefect.context.get("secrets", {}) for secret in storage.secrets: secrets[secret] = prefect.tasks.secrets.PrefectSecret( name=secret).run() with prefect.context(secrets=secrets): flow = storage.get_flow(flow_data.name) flow.environment.run(flow) except Exception as exc: logger.exception( "Unexpected error raised during flow run: {}".format(exc)) raise exc
def create_prefect_project(environment: str, prefect_token_secret_name: str): """ Get the Prefect Agent definition for an environment that run workflows on AWS ECS Fargate Parameters: environment [str] -- environment to create the prefect project prefect_token_secret_name [str] -- aws secret name for the prefect token """ client = Client(api_token=get_prefect_token( secret_name=prefect_token_secret_name)) client.create_project(project_name=f"{environment}_dataflow_automation")
def load_active_run_config(): client = Client() query = { "query": { with_args("flow_run_by_pk", {"id": prefect.context.flow_run_id}): { "run_config": True } } } blob = client.graphql(query).data.flow_run_by_pk.run_config return RunConfigSchema().load(blob)
def __init__( self, name: str = None, labels: Iterable[str] = None, env_vars: dict = None, max_polls: int = None, agent_address: str = None, no_cloud_logs: bool = False, ) -> None: self.name = name or config.cloud.agent.get("name", "agent") self.labels = labels or config.cloud.agent.get("labels", []) # quick hack in case config has not been evaluated to a list yet if isinstance(self.labels, str): self.labels = ast.literal_eval(self.labels) self.env_vars = env_vars or config.cloud.agent.get("env_vars", dict()) self.max_polls = max_polls self.log_to_cloud = False if no_cloud_logs else True self.agent_address = agent_address or config.cloud.agent.get( "agent_address", "") self._api_server = None # type: ignore self._api_server_loop = None # type: Optional[IOLoop] self._api_server_thread = None # type: Optional[threading.Thread] logger = logging.getLogger(self.name) logger.setLevel(config.cloud.agent.get("level")) if not any( [isinstance(h, logging.StreamHandler) for h in logger.handlers]): ch = logging.StreamHandler(sys.stdout) formatter = logging.Formatter(context.config.logging.format) formatter.converter = time.gmtime # type: ignore ch.setFormatter(formatter) logger.addHandler(ch) self.logger = logger self.submitting_flow_runs = set() # type: Set[str] self.logger.debug("Verbose logs enabled") self.logger.debug(f"Environment variables: {[*self.env_vars]}") self.logger.debug(f"Max polls: {self.max_polls}") self.logger.debug(f"Agent address: {self.agent_address}") self.logger.debug(f"Log to Cloud: {self.log_to_cloud}") token = config.cloud.agent.get("auth_token") self.logger.debug(f"Prefect backend: {config.backend}") self.client = Client(api_token=token) if config.backend == "cloud": self._verify_token(token) self.client.attach_headers( {"X-PREFECT-AGENT-ID": self._register_agent()})
def __init__( self, agent_config_id: str = None, name: str = None, labels: Iterable[str] = None, env_vars: dict = None, max_polls: int = None, agent_address: str = None, no_cloud_logs: bool = None, flow_run_batch_limit: bool = None, ) -> None: # Load token for backwards compatibility token = config.cloud.agent.get("auth_token") # Auth with an API key will be loaded from the config or disk by the Client self.client = Client(api_server=config.cloud.api, api_token=token) self.agent_config_id = agent_config_id self._agent_config: Optional[dict] = None self.name = name or config.cloud.agent.get("name", "agent") self.labels = labels or list(config.cloud.agent.get("labels", [])) self.env_vars = env_vars or config.cloud.agent.get("env_vars", dict()) self.max_polls = max_polls self.flow_run_batch_limit = flow_run_batch_limit if no_cloud_logs is None: # Load from config if unset self.log_to_cloud = config.cloud.send_flow_run_logs else: self.log_to_cloud = not no_cloud_logs self.heartbeat_period = 60 # exposed for testing self.agent_address = agent_address or config.cloud.agent.get( "agent_address") # These track background task objects so we can tear them down on exit self._api_server: Optional[HTTPServer] = None self._api_server_loop: Optional[IOLoop] = None self._api_server_thread: Optional[threading.Thread] = None self._heartbeat_thread: Optional[threading.Thread] = None # Create the default logger self.logger = self._get_logger() # Store a set of flows that are being submitted to prevent duplicate submissions self.submitting_flow_runs: Set[str] = set() # Log configuration options self.logger.debug(f"Environment variables: {[*self.env_vars]}") self.logger.debug(f"Max polls: {self.max_polls}") self.logger.debug(f"Agent address: {self.agent_address}") self.logger.debug(f"Log to Cloud: {self.log_to_cloud}") self.logger.debug(f"Prefect backend: {config.backend}")
def __init__(self) -> None: self.client = Client(api_token=config.cloud.agent.get("auth_token")) logger = logging.getLogger("agent") logger.setLevel(logging.DEBUG) ch = logging.StreamHandler() ch.setLevel(logging.DEBUG) formatter = logging.Formatter( "%(asctime)s - %(name)s - %(levelname)s - %(message)s") ch.setFormatter(formatter) logger.addHandler(ch) self.logger = logger
def emit(self, record) -> None: # type: ignore try: from prefect.client import Client if self.errored_out is True: return if self.client is None: self.client = Client() # type: ignore assert isinstance(self.client, Client) # mypy assert r = self.client.post(path="", server=self.logger_server, **record.__dict__) except: self.errored_out = True
def emit(self, record): try: if self.errored_out is True: return if self.client is None: from prefect.client import Client self.client = Client() r = self.client.post(path="", server=self.logger_server, **record.__dict__) except: self.errored_out = True
class CloudHandler(logging.StreamHandler): def __init__(self) -> None: super().__init__() self.client = None self.logger = logging.getLogger("CloudHandler") handler = logging.StreamHandler() formatter = logging.Formatter(context.config.logging.format) formatter.converter = time.gmtime # type: ignore handler.setFormatter(formatter) self.logger.addHandler(handler) self.logger.setLevel(context.config.logging.level) def emit(self, record) -> None: # type: ignore # if we shouldn't log to cloud, don't emit if not prefect.context.config.logging.log_to_cloud: return try: from prefect.client import Client if self.client is None: self.client = Client() # type: ignore assert isinstance(self.client, Client) # mypy assert record_dict = record.__dict__.copy() flow_run_id = prefect.context.get("flow_run_id", None) task_run_id = prefect.context.get("task_run_id", None) timestamp = pendulum.from_timestamp( record_dict.get("created", time.time())) name = record_dict.get("name", None) message = record_dict.get("message", None) level = record_dict.get("levelname", None) if record_dict.get("exc_text") is not None: message += "\n" + record_dict["exc_text"] record_dict.pop("exc_info", None) self.client.write_run_log( flow_run_id=flow_run_id, task_run_id=task_run_id, timestamp=timestamp, name=name, message=message, level=level, info=record_dict, ) except Exception as exc: self.logger.critical("Failed to write log with error: {}".format( str(exc)))
def run(self, flow_run_id: str = None) -> bool: """ Args: - flow_run_id (str, optional): The ID of the flow run to cancel Returns: - bool: Whether the flow run was canceled successfully or not """ flow_run_id = flow_run_id or prefect.context.get("flow_run_id") if not flow_run_id: raise ValueError("Can't cancel a flow run without flow run ID.") client = Client() return client.cancel_flow_run(flow_run_id)
def __init__( self, agent_config_id: str = None, name: str = None, labels: Iterable[str] = None, env_vars: dict = None, max_polls: int = None, agent_address: str = None, no_cloud_logs: bool = False, ) -> None: # Load token and initialize client token = config.cloud.agent.get("auth_token") self.client = Client(api_server=config.cloud.api, api_token=token) self.agent_config_id = agent_config_id self.name = name or config.cloud.agent.get("name", "agent") self.labels = labels or list(config.cloud.agent.get("labels", [])) self.env_vars = env_vars or config.cloud.agent.get("env_vars", dict()) self.max_polls = max_polls self.log_to_cloud = False if no_cloud_logs else True self.heartbeat_period = 60 # exposed for testing self.agent_address = agent_address or config.cloud.agent.get( "agent_address", "" ) self._api_server = None # type: ignore self._api_server_loop = None # type: Optional[IOLoop] self._api_server_thread = None # type: Optional[threading.Thread] self._heartbeat_thread = None # type: Optional[threading.Thread] logger = logging.getLogger(self.name) logger.setLevel(config.cloud.agent.get("level")) if not any(isinstance(h, logging.StreamHandler) for h in logger.handlers): ch = logging.StreamHandler(sys.stdout) formatter = logging.Formatter(context.config.logging.format) formatter.converter = time.gmtime # type: ignore ch.setFormatter(formatter) logger.addHandler(ch) self.logger = logger self.submitting_flow_runs = set() # type: Set[str] self.logger.debug("Verbose logs enabled") self.logger.debug(f"Environment variables: {[*self.env_vars]}") self.logger.debug(f"Max polls: {self.max_polls}") self.logger.debug(f"Agent address: {self.agent_address}") self.logger.debug(f"Log to Cloud: {self.log_to_cloud}") self.logger.debug(f"Prefect backend: {config.backend}")
def list_keys() -> List[str]: """ List all keys Returns: - keys (list): A list of keys Raises: - ClientError: if using Prefect Server instead of Cloud """ if prefect.config.backend != "cloud": raise ClientError(NON_CLOUD_BACKEND_ERROR_MESSAGE) client = Client() result = client.graphql({"query": {"key_value": {"key"}}}) # type: ignore return sorted([res["key"] for res in result.data.key_value])
def execute_cloud_flow(): flow_run_id = prefect.context.get("flow_run_id") if not flow_run_id: click.echo("Not currently executing a flow within a cloud context.") return query = { "query": { with_args("flow_run", {"where": { "id": { "_eq": flow_run_id } }}): { "flow": { "name": True, "storage": True, "environment": True } } } } result = Client().graphql(query) flow_data = result.data.flow_run[0].flow storage_schema = prefect.serialization.storage.StorageSchema() storage = storage_schema.load(flow_data.storage) environment_schema = prefect.serialization.environment.EnvironmentSchema() environment = environment_schema.load(flow_data.environment) environment.execute(storage=storage, flow_location=storage.flows[flow_data.name])
def flows(name, version, project): """ Describe a Prefect flow. \b Options: --name, -n TEXT A flow name to query [required] --version, -v INTEGER A flow version to query --project, -p TEXT The name of a project to query """ query = { "query": { with_args( "flow", { "where": { "_and": { "name": { "_eq": name }, "version": { "_eq": version }, "project": { "name": { "_eq": project } }, } }, "order_by": { "name": EnumValue("asc"), "version": EnumValue("desc"), }, "distinct_on": EnumValue("name"), }, ): { "name": True, "version": True, "project": { "name": True }, "created": True, "description": True, "parameters": True, "archived": True, "storage": True, "environment": True, } } } result = Client().graphql(query) flow_data = result.data.flow if flow_data: click.echo(flow_data[0]) else: click.secho("{} not found".format(name), fg="red")
def emit(self, record) -> None: # type: ignore # if we shouldn't log to cloud, don't emit if not prefect.context.config.logging.log_to_cloud: return try: from prefect.client import Client if self.client is None: self.client = Client() # type: ignore assert isinstance(self.client, Client) # mypy assert record_dict = record.__dict__.copy() log = dict() log["flow_run_id"] = prefect.context.get("flow_run_id", None) log["task_run_id"] = prefect.context.get("task_run_id", None) log["timestamp"] = pendulum.from_timestamp( record_dict.pop("created", time.time())).isoformat() log["name"] = record_dict.pop("name", None) log["message"] = record_dict.pop("message", None) log["level"] = record_dict.pop("levelname", None) if record_dict.get("exc_text") is not None: log["message"] += "\n" + record_dict.pop("exc_text", "") record_dict.pop("exc_info", None) log["info"] = record_dict self.put(log) except Exception as exc: message = "Failed to write log with error: {}".format(str(exc)) self.logger.critical(message) self.put(self._make_error_log(message))
def _hash_flow(self, flow: Flow) -> str: """ In Prefect Cloud, all versions of a flow in a project are tied together by a `flow_group_id`. This is the unique identifier used to store flows in Saturn. Since this library registers a flow with Saturn Cloud before registering it with Prefect Cloud, it can't rely on the `flow_group_id` generated by Prefect Cloud. Instead, this function hashes these pieces of information that uniquely identify a flow group: * project name * flow name * tenant id The identifier produced here should uniquely identify all versions of a flow with a given name, in a given Prefect Cloud project, for a given Prefect Cloud tenant. """ identifying_content = [ self.prefect_cloud_project_name, flow.name, Client()._active_tenant_id, # pylint: disable=protected-access ] hasher = hashlib.sha256() hasher.update(cloudpickle.dumps(identifying_content)) return hasher.hexdigest()
def test_serialize_with_attributes(self): handler = CloudResultHandler(result_handler_service="http://foo.bar") handler.client = Client() serialized = ResultHandlerSchema().dump(handler) assert isinstance(serialized, dict) assert serialized["type"] == "CloudResultHandler" assert serialized["result_handler_service"] == "http://foo.bar" assert "client" not in serialized
def __init__(self, name: str = None) -> None: self.name = name or config.cloud.agent.get("name", "agent") token = config.cloud.agent.get("auth_token") self.client = Client(api_token=token) self._verify_token(token) logger = logging.getLogger(self.name) logger.setLevel(config.cloud.agent.get("level")) ch = logging.StreamHandler() formatter = logging.Formatter( "%(asctime)s - %(name)s - %(levelname)s - %(message)s") ch.setFormatter(formatter) logger.addHandler(ch) self.logger = logger
def tasks(name, version, project): """ Describe tasks from a Prefect flow. This command is similar to `prefect describe flow` but instead of flow metadata it outputs task metadata. \b Options: --name, -n TEXT A flow name to query [required] --version, -v INTEGER A flow version to query --project, -p TEXT The name of a project to query """ query = { "query": { with_args( "flow", { "where": { "_and": { "name": {"_eq": name}, "version": {"_eq": version}, "project": {"name": {"_eq": project}}, } }, "order_by": { "name": EnumValue("asc"), "version": EnumValue("desc"), }, "distinct_on": EnumValue("name"), }, ): { "tasks": { "name": True, "created": True, "slug": True, "description": True, "type": True, "max_retries": True, "retry_delay": True, "mapped": True, } } } } result = Client().graphql(query) flow_data = result.data.flow if not flow_data: click.secho("{} not found".format(name), fg="red") return task_data = flow_data[0].tasks if task_data: for item in task_data: click.echo(item) else: click.secho("No tasks found for flow {}".format(name), fg="red")
def projects(name): """ Query information regarding your Prefect projects. \b Options: --name, -n TEXT A project name to query """ query = { "query": { with_args( "project", { "where": { "_and": { "name": { "_eq": name } } }, "order_by": { "name": EnumValue("asc") }, }, ): { "name": True, "created": True, "description": True, with_args("flows_aggregate", { "distinct_on": EnumValue("name") }): { EnumValue("aggregate"): EnumValue("count") }, } } } result = Client().graphql(query) project_data = result.data.project output = [] for item in project_data: output.append([ item.name, item.flows_aggregate.aggregate.count, pendulum.parse(item.created).diff_for_humans(), item.description, ]) click.echo( tabulate( output, headers=["NAME", "FLOW COUNT", "AGE", "DESCRIPTION"], tablefmt="plain", numalign="left", stralign="left", ))
def flow_runs(name, flow_name, output): """ Describe a Prefect flow run. \b Options: --name, -n TEXT A flow run name to query [required] --flow-name, -fn TEXT A flow name to query --output, -o TEXT Output style, currently supports `json`. Defaults to Python dictionary format. """ query = { "query": { with_args( "flow_run", { "where": { "_and": { "name": { "_eq": name }, "flow": { "name": { "_eq": flow_name } }, } } }, ): { "name": True, "flow": { "name": True }, "created": True, "parameters": True, "auto_scheduled": True, "scheduled_start_time": True, "start_time": True, "end_time": True, "duration": True, "heartbeat": True, "serialized_state": True, } } } result = Client().graphql(query) flow_run_data = result.data.flow_run if flow_run_data: if output == "json": click.echo(json.dumps(flow_run_data[0])) else: click.echo(flow_run_data[0]) else: click.secho("{} not found".format(name), fg="red")
def test_deserialize_cloud_result_handler(self): schema = ResultHandlerSchema() handler = CloudResultHandler(result_handler_service="http://foo.bar") handler._client = Client() obj = schema.load(schema.dump(handler)) assert isinstance(obj, CloudResultHandler) assert hasattr(obj, "logger") assert obj.logger.name == "prefect.CloudResultHandler" assert obj.result_handler_service == "http://foo.bar" assert obj._client is None