def execute( # type: ignore self, storage: "Storage", flow_location: str, **kwargs: Any # type: ignore ) -> None: flow_run_info = None flow_run_id = prefect.context.get("flow_run_id") if self._on_execute: # If an on_execute Callable has been provided, retrieve the flow run parameters # and then allow the Callable a chance to update _provider_kwargs. This allows # better sizing of the cluster resources based on parameters for this Flow run. try: client = Client() flow_run_info = client.get_flow_run_info(flow_run_id) parameters = flow_run_info.parameters or {} # type: ignore self._on_execute(parameters, self._provider_kwargs) except Exception as exc: self.logger.info( "Failed to retrieve flow run info with error: {}".format(repr(exc)) ) if "image" not in self._provider_kwargs or not self._provider_kwargs.get( "image" ): # If image is not specified, use the Flow's image so that dependencies are # identical on all containers: Flow runner, Dask scheduler, and Dask workers flow_id = prefect.context.get("flow_id") try: client = Client() if not flow_id: # We've observed cases where flow_id is None if not flow_run_info: flow_run_info = client.get_flow_run_info(flow_run_id) flow_id = flow_run_info.flow_id flow_info = client.graphql( """query { flow(where: {id: {_eq: "%s"}}) { storage } }""" % flow_id ) storage_info = flow_info["data"]["flow"][0]["storage"] image = "{}/{}:{}".format( storage_info["registry_url"], storage_info["image_name"], storage_info["image_tag"], ) self.logger.info( "Using Flow's Docker image for Dask scheduler & workers: {}".format( image ) ) self._provider_kwargs["image"] = image except Exception as exc: self.logger.info( "Failed to retrieve flow info with error: {}".format(repr(exc)) ) self._create_dask_cluster() self.logger.info( "Executing on dynamically created Dask Cluster with scheduler address: {}".format( self.executor_kwargs["address"] ) ) super().execute(storage, flow_location, **kwargs)
weather_data = json.loads(response.text) logger.debug(weather_data) return weather_data else: raise Exception("Failed to query " + api_endpoint) with Flow( "Get Paris' weather", storage=Docker( base_url="unix:///var/run/docker.sock", registry_url="172.17.0.1:5000", base_image="172.17.0.1:5000/weather/base_image:latest", ignore_healthchecks=True ), ) as flow: woeid = get_woeid("Paris") weather_data = get_weather(woeid) if __name__ == "__main__": try: client = Client() client.create_project(project_name="weather") except prefect.utilities.exceptions.ClientError as e: logger.info("Project already exists") flow.register(project_name="weather", labels=["development"], add_default_labels=False) # Optionally run the code now flow.run()
from prefect import Client from prefect.schedules import CronSchedule from reddit_daily import flow c = Client() s = CronSchedule("0 * * * *") flow.schedule = s flow.deploy(project="Dylan's Project")
from prefect import Client from prefect.utilities.graphql import with_args c = Client() name = "my_flow" c.graphql({"query": {with_args("flow", {"where": {"name": {"_eq": name}}}): "id"}}) # c.graphql({"query": "'query' {'flow'('where': { 'name': { '_eq': 'ltest' } }) {'id'}}"})
from datetime import datetime from prefect import Client client = Client() client.create_flow_run( flow_id="a4bab79c-055c-49b8-b7a7-cf66fa8bd6d2", parameters={ 'start': datetime(2020, 11, 23, 7).isoformat(), 'end_inclusive': datetime(2020, 12, 8, 22).isoformat(), 'environment': 'prod', # 'environment': 'staging' })
class ResourceManager: """ The resource manager is responsible for cleaning up old completed/failed k8s jobs and pods from the cluster. This is optional and does not need to me used for the agent to work. """ def __init__(self) -> None: self.loop_interval = prefect_config.cloud.agent.resource_manager.get( "loop_interval") self.client = Client( api_token=prefect_config.cloud.agent.get("auth_token")) self.namespace = os.getenv("NAMESPACE", "default") logger = logging.getLogger("resource-manager") logger.setLevel(logging.DEBUG) ch = logging.StreamHandler(sys.stdout) ch.setLevel(logging.DEBUG) formatter = logging.Formatter(context.config.logging.format) formatter.converter = time.gmtime # type: ignore ch.setFormatter(formatter) logger.addHandler(ch) self.logger = logger from kubernetes import client, config try: config.load_incluster_config() except config.config_exception.ConfigException as exc: self.logger.warning( "{} Using out of cluster configuration option.".format(exc)) config.load_kube_config() self.k8s_client = client def start(self) -> None: """ Main loop which waits on a `LOOP_INTERVAL` and looks for finished jobs to clean """ self.logger.info("Starting {}".format(type(self).__name__)) while True: try: self.clean_resources() except Exception as exc: self.logger.exception(exc) time.sleep(self.loop_interval) # IDENTIFICATION def clean_resources(self) -> None: """ Find jobs that are either completed or failed to delete from the cluster """ batch_client = self.k8s_client.BatchV1Api() try: jobs = batch_client.list_namespaced_job(namespace=self.namespace) except self.k8s_client.rest.ApiException: self.logger.exception( "Error attempting to list jobs in namespace {}".format( self.namespace)) return for job in jobs.items: if job.status.succeeded or job.status.failed: identifier = job.metadata.labels.get("identifier") name = job.metadata.name if job.status.failed: self.logger.info( "Found failed job {} in namespace {}".format( name, self.namespace)) self.report_failed_job(identifier=identifier) self.delete_job(name=name) self.delete_pods(job_name=name, identifier=identifier) if not jobs.items: self.clean_extra_pods() def clean_extra_pods(self) -> None: """ Any runaway pods which failed due to unexpected reasons will be cleaned up here. ImagePullBackoffs, Evictions, etc... """ core_client = self.k8s_client.CoreV1Api() try: pods = core_client.list_namespaced_pod(namespace=self.namespace) except self.k8s_client.rest.ApiException: self.logger.exception( "Error attempting to list pods in namespace {}".format( self.namespace)) return for pod in pods.items: phase = pod.status.phase if phase != "Running": name = pod.metadata.name if phase == "Failed": self.report_failed_pod(pod=pod) if phase == "Unknown": self.report_unknown_pod(pod=pod) if phase == "Pending": if pod.status.container_statuses: self.report_pod_image_pull_error(pod=pod) self.delete_extra_pod(name=name) # DELETION def delete_job(self, name: str) -> None: """ Delete a job based on the name """ batch_client = self.k8s_client.BatchV1Api() self.logger.info("Deleting job {} in namespace {}".format( name, self.namespace)) try: batch_client.delete_namespaced_job( name=name, namespace=self.namespace, body=self.k8s_client.V1DeleteOptions(), ) except self.k8s_client.rest.ApiException: self.logger.exception( "Error attempting to delete job {} in namespace {}".format( name, self.namespace)) def delete_pods(self, job_name: str, identifier: str) -> None: """ Delete a pod based on the job name and identifier """ core_client = self.k8s_client.CoreV1Api() try: pods = core_client.list_namespaced_pod( namespace=self.namespace, label_selector="identifier={}".format(identifier), ) except self.k8s_client.rest.ApiException: self.logger.exception( "Error attempting to list pods in namespace {}".format( self.namespace)) return if pods: self.logger.info( "Deleting {} pods for job {} in namespace {}".format( len(pods.items), job_name, self.namespace)) for pod in pods.items: name = pod.metadata.name try: core_client.delete_namespaced_pod( name=name, namespace=self.namespace, body=self.k8s_client.V1DeleteOptions(), ) except self.k8s_client.rest.ApiException: self.logger.exception( "Error attempting to delete pod {} in namespace {}".format( name, self.namespace)) def delete_extra_pod(self, name: str) -> None: """ Delete a pod based on the name """ core_client = self.k8s_client.CoreV1Api() self.logger.info("Deleting extra pod {} in namespace {}".format( name, self.namespace)) try: core_client.delete_namespaced_pod( name=name, namespace=self.namespace, body=self.k8s_client.V1DeleteOptions(), ) except self.k8s_client.rest.ApiException: self.logger.exception( "Error attempting to delete pod {} in namespace {}".format( name, self.namespace)) # REPORTING def report_failed_job(self, identifier: str) -> None: """ Report jobs that failed for reasons outside of a flow run """ core_client = self.k8s_client.CoreV1Api() try: pods = core_client.list_namespaced_pod( namespace=self.namespace, label_selector="identifier={}".format(identifier), ) except self.k8s_client.rest.ApiException: self.logger.exception( "Error attempting to list pods in namespace {}".format( self.namespace)) return for pod in pods.items: phase = pod.status.phase if phase == "Failed": self.report_failed_pod(pod) def report_failed_pod(self, pod: "kubernetes.client.V1Pod") -> None: """ Report pods that failed for reasons outside of a flow run. Write cloud log """ # deferred import to reduce import time for prefect from requests.exceptions import HTTPError core_client = self.k8s_client.CoreV1Api() name = pod.metadata.name if pod.status.reason == "Evicted": logs = "Pod was evicted due to cluster resource constraints / auto scaling." else: try: logs = core_client.read_namespaced_pod_log( namespace=self.namespace, name=name) except self.k8s_client.rest.ApiException: self.logger.exception( "Error attempting to read pod logs for {} in namespace {}". format(name, self.namespace)) return self.logger.info("Reporting failed pod {} in namespace {}".format( name, self.namespace)) try: self.client.write_run_logs([ dict( flow_run_id=pod.metadata.labels.get("flow_run_id"), timestamp=pendulum.now(), name="resource-manager", message=logs, level="ERROR", info={}, ) ]) except HTTPError as exc: self.logger.exception(exc) def report_unknown_pod(self, pod: "kubernetes.client.V1Pod") -> None: """ Write cloud log of pods that entered unknonw states """ # deferred import to reduce import time for prefect from requests.exceptions import HTTPError name = pod.metadata.name self.logger.info("Reporting unknown pod {} in namespace {}".format( name, self.namespace)) try: self.client.write_run_logs([ dict( flow_run_id=pod.metadata.labels.get("flow_run_id"), timestamp=pendulum.now(), name="resource-manager", message= "Flow run pod {} entered an unknown state in namespace {}". format(name, self.namespace), level="ERROR", info={}, ) ]) except HTTPError as exc: self.logger.exception(exc) def report_pod_image_pull_error(self, pod: "kubernetes.client.V1Pod") -> None: """ Write cloud log of pods that ahd image pull errors """ # deferred import to reduce import time for prefect from requests.exceptions import HTTPError for status in pod.status.container_statuses: waiting = status.state.waiting if waiting and waiting.reason == "ImagePullBackoff": self.logger.info( "Reporting image pull error for pod {} in namespace {}". format(pod.metadata.name, self.namespace)) try: self.client.write_run_logs([ dict( flow_run_id=pod.metadata.labels.get("flow_run_id"), timestamp=pendulum.now(), name="resource-manager", message= "Flow run image pull error for pod {} in namespace {}" .format(pod.metadata.name, self.namespace), level="ERROR", info={}, ) ]) except HTTPError as exc: self.logger.exception(exc)
def create_flow_run( flow_id: str = None, flow_name: str = None, project_name: str = "", parameters: dict = None, context: dict = None, labels: Iterable[str] = None, run_name: str = None, run_config: Optional[RunConfig] = None, scheduled_start_time: Optional[Union[pendulum.DateTime, datetime.datetime, pendulum.Duration, datetime.timedelta, ]] = None, idempotency_key: str = None, ) -> str: """ Task to create a flow run in the Prefect backend. The flow to run must be registered and an agent must be available to deploy the flow run. Args: - flow_id: The flow or flow group uuid to lookup the flow to run - flow_name: The flow name to lookup the flow to run - project_name: The project name to lookup the flow to run. For use with `flow_name` if you have flows with the same name in multiple projects - parameters: An optional dictionary of parameters to pass to the flow run - context: An optional dictionary of context variables to pass to the flow run - labels: An optional iterable of labels to set on the flow run; if not provided, the default set of labels for the flow will be used - run_name: An optional name for the flow run; if not provided, the name will be generated as "{current_run_name}-{flow_name}" - run_config: An optional run config to use for the flow run; will override any existing run config settings - scheduled_start_time: An optional time in the future to schedule flow run execution for. If not provided, the flow run will be scheduled to start now - idempotency_key: a unique idempotency key for scheduling the flow run. Duplicate flow runs with the same idempotency key will only create a single flow run. This is useful for ensuring that only one run is created if this task is retried. If not provided, defaults to the active `task_run_id`. Returns: str: The UUID of the created flow run """ if flow_id and flow_name: raise ValueError( "Received both `flow_id` and `flow_name`. Only one flow identifier " "can be passed.") if not flow_id and not flow_name: raise ValueError( "Both `flow_id` and `flow_name` are null. You must pass a flow " "identifier") logger = prefect.context.logger logger.debug("Looking up flow metadata...") if flow_id: flow = FlowView.from_id(flow_id) if flow_name: flow = FlowView.from_flow_name(flow_name, project_name=project_name) # Generate a 'sub-flow' run name if not run_name: current_run = prefect.context.get("flow_run_name") if current_run: run_name = f"{current_run}-{flow.name}" # A run name for logging display; robust to 'run_name' being empty run_name_dsp = run_name or "<generated-name>" logger.info( f"Creating flow run {run_name_dsp!r} for flow {flow.name!r}...") if idempotency_key is None: idempotency_key = prefect.context.get("task_run_id", None) if isinstance(scheduled_start_time, (pendulum.Duration, datetime.timedelta)): scheduled_start_time = pendulum.now("utc") + scheduled_start_time client = Client() flow_run_id = client.create_flow_run( flow_id=flow.flow_id, parameters=parameters, context=context, labels=labels, run_name=run_name, run_config=run_config, scheduled_start_time=scheduled_start_time, idempotency_key=idempotency_key, ) run_url = client.get_cloud_url("flow-run", flow_run_id, as_user=False) logger.info(f"Created flow run {run_name_dsp!r}: {run_url}") return flow_run_id
def run( self, flow_name: str = None, project_name: str = None, parameters: dict = None, run_config: RunConfig = None, new_flow_context: dict = None, run_name: str = None, idempotency_key: str = None, scheduled_start_time: datetime.datetime = None, ) -> str: """ Run method for the task; responsible for scheduling the specified flow run. Args: - flow_name (str, optional): the name of the flow to schedule; if not provided, this method will use the flow name provided at initialization - project_name (str, optional): the Cloud project in which the flow is located; if not provided, this method will use the project provided at initialization. If running with Prefect Core's server as the backend, this should not be provided. - parameters (dict, optional): the parameters to pass to the flow run being scheduled; if not provided, this method will use the parameters provided at initialization - run_config (RunConfig, optional): a run-config to use for this flow run, overriding any existing flow settings. - new_flow_context (dict, optional): the optional run context for the new flow run - run_name (str, optional): name to be set for the flow run - idempotency_key (str, optional): a unique idempotency key for scheduling the flow run. Duplicate flow runs with the same idempotency key will only create a single flow run. This is useful for ensuring that only one run is created if this task is retried. If not provided, defaults to the active `task_run_id`. - scheduled_start_time (datetime, optional): the time to schedule the execution for; if not provided, defaults to now Returns: - str: the ID of the newly-scheduled flow run Raises: - ValueError: if flow was not provided, cannot be found, or if a project name was not provided while using Cloud as a backend Example: ```python from prefect.tasks.prefect.flow_run import StartFlowRun kickoff_task = StartFlowRun(project_name="Hello, World!", flow_name="My Cloud Flow") ``` """ # verify that flow and project names were passed where necessary if flow_name is None: raise ValueError("Must provide a flow name.") if project_name is None: raise ValueError("Must provide a project name.") where_clause = { "name": { "_eq": flow_name }, "archived": { "_eq": False }, "project": { "name": { "_eq": project_name } }, } # find the flow ID to schedule query = { "query": { with_args( "flow", { "where": where_clause, "order_by": { "version": EnumValue("desc") }, "limit": 1, }, ): {"id"} } } client = Client() flow = client.graphql(query).data.flow # verify that a flow has been returned if not flow: raise ValueError("Flow '{}' not found.".format(flow_name)) # grab the ID for the most recent version flow_id = flow[0].id if idempotency_key is None: idempotency_key = prefect.context.get("task_run_id", None) # providing an idempotency key ensures that retries for this task # will not create additional flow runs flow_run_id = client.create_flow_run( flow_id=flow_id, parameters=parameters, run_config=run_config, idempotency_key=idempotency_key, context=new_flow_context, run_name=run_name, scheduled_start_time=scheduled_start_time, ) self.logger.debug(f"Flow Run {flow_run_id} created.") self.logger.debug( f"Creating link artifact for Flow Run {flow_run_id}.") run_link = client.get_cloud_url("flow-run", flow_run_id, as_user=False) create_link_artifact(urlparse(run_link).path) self.logger.info(f"Flow Run: {run_link}") if not self.wait: return flow_run_id while True: time.sleep(self.poll_interval.total_seconds()) flow_run_state = client.get_flow_run_info(flow_run_id).state if flow_run_state.is_finished(): exc = signal_from_state(flow_run_state)( f"{flow_run_id} finished in state {flow_run_state}") raise exc
def register_workflow(prefect_register_token_secret_name: str): """ Registers the workflow to Prefect Cloud Parameters: prefect_register_token_secret_name [str] -- name of aws secrets manager secret where prefect register token is stored """ flow_module = __import__("flow") flow_name = f"{env}_{flow_module.flow.name}" flow_module.flow.name = flow_name flow_module.flow.environment = FargateTaskEnvironment( requiresCompatibilities=["FARGATE"], region=aws_region, labels=[f"{env}_dataflow_automation"], taskDefinition=flow_name, family=flow_name, cpu="512", memory="3072", networkMode="awsvpc", networkConfiguration={ "awsvpcConfiguration": { "assignPublicIp": "ENABLED", "subnets": subnets, "securityGroups": [], } }, containerDefinitions=[{ "logConfiguration": { "logDriver": "awslogs", "options": { "awslogs-region": aws_region, "awslogs-group": f"{env}_dataflow_automation_workflows", "awslogs-stream-prefix": flow_name, }, } }], executionRoleArn=execution_role_arn, taskRoleArn=task_role_arn, cluster=f"{env}_dataflow_automation_workflows", ) # Set the flow storage. Where to get the code from flow_module.flow.storage = Docker( registry_url=f"{account_id}.dkr.ecr.{aws_region}.amazonaws.com", image_name=flow_name, image_tag="latest", python_dependencies=["boto3"], env_vars={"PYTHONPATH": "/opt/prefect/flows"}, ) # Authenticate to ECR as the registration process pushes the image to AWS ecr_authenticate() # Instantiate the prefect client prefect_client = Client(api_token=get_prefect_token( secret_name=prefect_register_token_secret_name)) # Create ECR repository create_ecr_repository(flow_name=flow_name) # Register the Workflow prefect_client.register(flow=flow_module.flow, project_name=f"{env}_dataflow_automation")
class ResourceManager: """ The resource manager is responsible for cleaning up old completed/failed k8s jobs and pods from the cluster. This is optional and does not need to me used for the agent to work. DEPRECATED: The resource manager is deprecated and it's main functionality is now present in the Kubernetes agent. """ def __init__(self) -> None: self.loop_interval = prefect_config.cloud.agent.resource_manager.get( "loop_interval" ) self.client = Client(api_token=prefect_config.cloud.agent.get("auth_token")) self.namespace = os.getenv("NAMESPACE", "default") logger = logging.getLogger("resource-manager") logger.setLevel(logging.DEBUG) ch = logging.StreamHandler(sys.stdout) ch.setLevel(logging.DEBUG) formatter = logging.Formatter(context.config.logging.format) formatter.converter = time.gmtime # type: ignore ch.setFormatter(formatter) logger.addHandler(ch) self.logger = logger from kubernetes import client, config try: config.load_incluster_config() except config.config_exception.ConfigException as exc: self.logger.warning( "{} Using out of cluster configuration option.".format(exc) ) config.load_kube_config() self.k8s_client = client def start(self) -> None: """ Main loop which waits on a `LOOP_INTERVAL` and looks for finished jobs to clean """ self.logger.warning("DEPRECATED: The resource manager is deprecated") self.logger.info("Starting {}".format(type(self).__name__)) while True: try: self.clean_resources() except Exception as exc: self.logger.exception(exc) time.sleep(self.loop_interval) def clean_resources(self) -> None: """ Find jobs that are either completed or failed to delete from the cluster """ batch_client = self.k8s_client.BatchV1Api() more = True _continue = "" while more: try: jobs = batch_client.list_namespaced_job( namespace=self.namespace, label_selector="prefect.io/identifier", limit=20, _continue=_continue, ) _continue = jobs.metadata._continue more = bool(_continue) except self.k8s_client.rest.ApiException as exc: if exc.status == 410: self.logger.debug("List jobs continue token expired, relisting") _continue = "" continue else: self.logger.exception( "Error attempting to list jobs in namespace {}".format( self.namespace ) ) return for job in jobs.items: if job.status.succeeded or job.status.failed: identifier = job.metadata.labels.get("prefect.io/identifier") name = job.metadata.name if job.status.failed: self.logger.info( "Found failed job {} in namespace {}".format( name, self.namespace ) ) self.report_failed_job(identifier=identifier) self.delete_job(name=name) def delete_job(self, name: str) -> None: """ Delete a job based on the name """ batch_client = self.k8s_client.BatchV1Api() self.logger.info("Deleting job {} in namespace {}".format(name, self.namespace)) try: batch_client.delete_namespaced_job( name=name, namespace=self.namespace, body=self.k8s_client.V1DeleteOptions(propagation_policy="Foreground"), ) except self.k8s_client.rest.ApiException: self.logger.exception( "Error attempting to delete job {} in namespace {}".format( name, self.namespace ) ) def report_failed_job(self, identifier: str) -> None: """ Report jobs that failed for reasons outside of a flow run """ core_client = self.k8s_client.CoreV1Api() try: pods = core_client.list_namespaced_pod( namespace=self.namespace, label_selector="prefect.io/identifier={}".format(identifier), ) except self.k8s_client.rest.ApiException: self.logger.exception( "Error attempting to list pods in namespace {}".format(self.namespace) ) return for pod in pods.items: phase = pod.status.phase if phase == "Failed": self.report_failed_pod(pod) def report_failed_pod(self, pod: "kubernetes.client.V1Pod") -> None: """ Report pods that failed for reasons outside of a flow run. Write cloud log """ # deferred import to reduce import time for prefect from requests.exceptions import HTTPError core_client = self.k8s_client.CoreV1Api() name = pod.metadata.name if pod.status.reason == "Evicted": logs = "Pod was evicted due to cluster resource constraints / auto scaling." else: try: logs = core_client.read_namespaced_pod_log( namespace=self.namespace, name=name ) except self.k8s_client.rest.ApiException: self.logger.exception( "Error attempting to read pod logs for {} in namespace {}".format( name, self.namespace ) ) return self.logger.info( "Reporting failed pod {} in namespace {}".format(name, self.namespace) ) try: self.client.write_run_logs( [ dict( flow_run_id=pod.metadata.labels.get("prefect.io/flow_run_id"), timestamp=pendulum.now("UTC").isoformat(), name="resource-manager", message=logs, level="ERROR", info={}, ) ] ) except HTTPError as exc: self.logger.exception(exc)
df = ph.read_clickhouse(query, connection=connection) return df @prefect.task(max_retries=5, retry_delay=timedelta(seconds=2)) def agregate(df): agg = df.groupby(['SalesDate', 'UserID'], as_index=False).sum() return agg # schedule to run every 12 hours schedule = IntervalSchedule( start_date=datetime.utcnow() + timedelta(seconds=1), # interval=timedelta(hours=12)) interval=timedelta(minutes=5), end_date=datetime.utcnow() + timedelta(minutes=10)) with prefect.Flow( name="SQL", schedule=schedule, # state_handlers=[handler], ) as flow: dataframes = download(connection) fin = agregate(dataframes) # client = Client() client.create_project(project_name='SQL') flow.register(project_name='SQL') # flow.run()