def load_flows_from_json(path: str) -> "List[dict]": """Given a path to a JSON file containing flows, load all flows. Note that since `FlowSchema` doesn't roundtrip without mutation, we keep the flow objects as dicts. """ try: contents = read_bytes_from_path(path) except FileNotFoundError: raise TerminalError(f"Path {path!r} doesn't exist") except Exception as exc: click.secho(f"Error loading {path!r}:", fg="red") log_exception(exc, indent=2) raise TerminalError from exc try: flows_json = FlowsJSONSchema().load(json.loads(contents)) except Exception: raise TerminalError( f"{path!r} is not a valid Prefect flows `json` file.") if flows_json["version"] != 1: raise TerminalError( f"{path!r} is version {flows_json['version']}, only version 1 is supported" ) return flows_json["flows"]
def test_read_local_file(self, tmpdir, scheme): path = str(tmpdir.join("test.yaml")) with open(path, "wb") as f: f.write(b"hello") path_arg = (path if scheme is None else "agent://" + os.path.splitdrive(path)[1].replace("\\", "/")) res = read_bytes_from_path(path_arg) assert res == b"hello"
def test_read_s3(self, monkeypatch): pytest.importorskip("prefect.utilities.aws") client = MagicMock() monkeypatch.setattr("prefect.utilities.aws.get_boto_client", MagicMock(return_value=client)) res = read_bytes_from_path("s3://mybucket/path/to/thing.yaml") assert client.download_fileobj.call_args[1]["Bucket"] == "mybucket" assert client.download_fileobj.call_args[1][ "Key"] == "path/to/thing.yaml" assert isinstance(res, bytes)
def test_read_http_file(self, monkeypatch, scheme): pytest.importorskip("requests") url = f"{scheme}://some/file.json" requests_get = MagicMock(return_value=MagicMock(content=b"testing")) monkeypatch.setattr("requests.get", requests_get) res = read_bytes_from_path(url) assert requests_get.call_args[0] == (url, ) assert res == b"testing"
def test_read_gcs(self, monkeypatch, scheme): pytest.importorskip("prefect.utilities.gcp") client = MagicMock() monkeypatch.setattr("prefect.utilities.gcp.get_storage_client", MagicMock(return_value=client)) res = read_bytes_from_path(f"{scheme}://mybucket/path/to/thing.yaml") assert client.bucket.call_args[0] == ("mybucket", ) bucket = client.bucket.return_value assert bucket.get_blob.call_args[0] == ("path/to/thing.yaml", ) blob = bucket.get_blob.return_value assert blob.download_as_bytes.called assert blob.download_as_bytes.return_value is res
def test_read_local_file(self, tmpdir, scheme): if scheme and sys.platform == "win32": pytest.skip("Scheme not supported for Windows file paths") path = str(tmpdir.join("test.yaml")) with open(path, "wb") as f: f.write(b"hello") path_arg = (path if scheme is None else "agent://" + os.path.splitdrive(path)[1].replace("\\", "/")) res = read_bytes_from_path(path_arg) assert res == b"hello"
def generate_job_spec_from_run_config(self, flow_run: GraphQLResult, run_config: KubernetesRun) -> dict: """Generate a k8s job spec for a flow run. Args: - flow_run (GraphQLResult): A flow run object - run_config (KubernetesRun): The flow run's run_config Returns: - dict: a dictionary representation of a k8s job for flow execution """ if run_config.job_template: job = run_config.job_template else: job_template_path = run_config.job_template_path or self.job_template_path self.logger.debug("Loading job template from %r", job_template_path) template_bytes = read_bytes_from_path(job_template_path) job = yaml.safe_load(template_bytes) identifier = uuid.uuid4().hex[:8] job_name = f"prefect-job-{identifier}" # Populate job metadata for identification k8s_labels = { "prefect.io/identifier": identifier, "prefect.io/flow_run_id": flow_run.id, # type: ignore "prefect.io/flow_id": flow_run.flow.id, # type: ignore } _get_or_create(job, "metadata.labels") _get_or_create(job, "spec.template.metadata.labels") job["metadata"]["name"] = job_name job["metadata"]["labels"].update(**k8s_labels) job["spec"]["template"]["metadata"]["labels"].update(**k8s_labels) pod_spec = job["spec"]["template"]["spec"] # Configure `service_account_name` if specified if run_config.service_account_name is not None: # On run-config, always override service_account_name = (run_config.service_account_name ) # type: Optional[str] elif "serviceAccountName" in pod_spec and ( run_config.job_template or run_config.job_template_path): # On run-config job-template, no override service_account_name = None else: # Use agent value, if provided service_account_name = self.service_account_name if service_account_name is not None: pod_spec["serviceAccountName"] = service_account_name # Configure `image_pull_secrets` if specified if run_config.image_pull_secrets is not None: # On run-config, always override image_pull_secrets = (run_config.image_pull_secrets ) # type: Optional[Iterable[str]] elif "imagePullSecrets" in pod_spec and (run_config.job_template or run_config.job_template_path): # On run-config job template, no override image_pull_secrets = None else: # Use agent, if provided image_pull_secrets = self.image_pull_secrets if image_pull_secrets is not None: pod_spec["imagePullSecrets"] = [{ "name": s } for s in image_pull_secrets] # Default restartPolicy to Never _get_or_create(job, "spec.template.spec.restartPolicy", "Never") # Get the first container, which is used for the prefect job containers = _get_or_create(job, "spec.template.spec.containers", []) if not containers: containers.append({}) container = containers[0] # Set container image container["image"] = image = get_flow_image( flow_run, default=container.get("image")) # Set flow run command container["args"] = get_flow_run_command(flow_run).split() # Populate environment variables from the following sources, # with precedence: # - Values required for flow execution, hardcoded below # - Values set on the KubernetesRun object # - Values set using the `--env` CLI flag on the agent # - Values in the job template env = {"PREFECT__LOGGING__LEVEL": config.logging.level} env.update(self.env_vars) if run_config.env: env.update(run_config.env) env.update({ "PREFECT__BACKEND": config.backend, "PREFECT__CLOUD__AGENT__LABELS": str(self.labels), "PREFECT__CLOUD__API": config.cloud.api, "PREFECT__CLOUD__AUTH_TOKEN": config.cloud.agent.auth_token, "PREFECT__CLOUD__USE_LOCAL_SECRETS": "false", "PREFECT__CONTEXT__FLOW_RUN_ID": flow_run.id, "PREFECT__CONTEXT__FLOW_ID": flow_run.flow.id, "PREFECT__CONTEXT__IMAGE": image, "PREFECT__LOGGING__LOG_TO_CLOUD": str(self.log_to_cloud).lower(), "PREFECT__ENGINE__FLOW_RUNNER__DEFAULT_CLASS": "prefect.engine.cloud.CloudFlowRunner", "PREFECT__ENGINE__TASK_RUNNER__DEFAULT_CLASS": "prefect.engine.cloud.CloudTaskRunner", }) container_env = [{"name": k, "value": v} for k, v in env.items()] for entry in container.get("env", []): if entry["name"] not in env: container_env.append(entry) container["env"] = container_env # Set resource requirements if provided _get_or_create(container, "resources.requests") _get_or_create(container, "resources.limits") resources = container["resources"] if run_config.memory_request: resources["requests"]["memory"] = run_config.memory_request if run_config.memory_limit: resources["limits"]["memory"] = run_config.memory_limit if run_config.cpu_request: resources["requests"]["cpu"] = run_config.cpu_request if run_config.cpu_limit: resources["limits"]["cpu"] = run_config.cpu_limit return job
def mock(path): return data if path == s3_path else read_bytes_from_path(path)
def generate_task_definition(self, flow_run: GraphQLResult, run_config: ECSRun) -> Dict[str, Any]: """Generate an ECS task definition from a flow run Args: - flow_run (GraphQLResult): A flow run object - run_config (ECSRun): The flow's run config Returns: - dict: a dictionary representation of an ECS task definition """ if run_config.task_definition: taskdef = deepcopy(run_config.task_definition) elif run_config.task_definition_path: self.logger.debug( "Loading task definition template from %r", run_config.task_definition_path, ) template_bytes = read_bytes_from_path( run_config.task_definition_path) taskdef = yaml.safe_load(template_bytes) else: taskdef = deepcopy(self.task_definition) slug = slugify.slugify( flow_run.flow.name, max_length=255 - len("prefect-"), word_boundary=True, save_order=True, ) family = f"prefect-{slug}" tags = self.get_task_definition_tags(flow_run) taskdef["family"] = family taskdef_tags = [{"key": k, "value": v} for k, v in tags.items()] for entry in taskdef.get("tags", []): if entry["key"] not in tags: taskdef_tags.append(entry) taskdef["tags"] = taskdef_tags # Get the flow container (creating one if it doesn't already exist) containers = taskdef.setdefault("containerDefinitions", []) for container in containers: if container.get("name") == "flow": break else: container = {"name": "flow"} containers.append(container) # Set flow image container["image"] = image = get_flow_image(flow_run) # Set flow run command container["command"] = [ "/bin/sh", "-c", get_flow_run_command(flow_run) ] # Set taskRoleArn if configured if run_config.task_role_arn: taskdef["taskRoleArn"] = run_config.task_role_arn # Populate static environment variables from the following sources, # with precedence: # - Static environment variables, hardcoded below # - Values in the task definition template env = { "PREFECT__CLOUD__USE_LOCAL_SECRETS": "false", "PREFECT__CONTEXT__IMAGE": image, "PREFECT__ENGINE__FLOW_RUNNER__DEFAULT_CLASS": "prefect.engine.cloud.CloudFlowRunner", "PREFECT__ENGINE__TASK_RUNNER__DEFAULT_CLASS": "prefect.engine.cloud.CloudTaskRunner", } container_env = [{"name": k, "value": v} for k, v in env.items()] for entry in container.get("environment", []): if entry["name"] not in env: container_env.append(entry) container["environment"] = container_env # Set resource requirements, if provided # Also ensure that cpu/memory are strings not integers if run_config.cpu: taskdef["cpu"] = str(run_config.cpu) elif "cpu" in taskdef: taskdef["cpu"] = str(taskdef["cpu"]) if run_config.memory: taskdef["memory"] = str(run_config.memory) elif "memory" in taskdef: taskdef["memory"] = str(taskdef["memory"]) return taskdef
def __init__( # type: ignore self, agent_config_id: str = None, name: str = None, labels: Iterable[str] = None, env_vars: dict = None, max_polls: int = None, agent_address: str = None, no_cloud_logs: bool = False, task_definition_path: str = None, run_task_kwargs_path: str = None, aws_access_key_id: str = None, aws_secret_access_key: str = None, aws_session_token: str = None, region_name: str = None, cluster: str = None, launch_type: str = None, task_role_arn: str = None, botocore_config: dict = None, ) -> None: super().__init__( agent_config_id=agent_config_id, name=name, labels=labels, env_vars=env_vars, max_polls=max_polls, agent_address=agent_address, no_cloud_logs=no_cloud_logs, ) from botocore.config import Config from prefect.utilities.aws import get_boto_client self.cluster = cluster self.launch_type = launch_type.upper() if launch_type else "FARGATE" self.task_role_arn = task_role_arn # Load boto configuration. We want to use the standard retry mode by # default (which isn't boto's default due to backwards compatibility). # The logic below lets the user override our default retry mode either # in `botocore_config` or in their aws config file. # # See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/retries.html # for more info. boto_config = Config(**botocore_config or {}) if not boto_config.retries: boto_config.retries = {"mode": "standard"} self.boto_kwargs = dict( aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, aws_session_token=aws_session_token, region_name=region_name, config=boto_config, ) # type: Dict[str, Any] self.ecs_client = get_boto_client("ecs", **self.boto_kwargs) self.rgtag_client = get_boto_client("resourcegroupstaggingapi", **self.boto_kwargs) # Load default task definition if not task_definition_path: task_definition_path = DEFAULT_TASK_DEFINITION_PATH try: self.task_definition = yaml.safe_load( read_bytes_from_path(task_definition_path)) except Exception: self.logger.error( "Failed to load default task definition from %r", task_definition_path, exc_info=True, ) raise # Load default run_task kwargs if run_task_kwargs_path: try: self.run_task_kwargs = yaml.safe_load( read_bytes_from_path(run_task_kwargs_path)) except Exception: self.logger.error( "Failed to load default `run_task` kwargs from %r", run_task_kwargs_path, exc_info=True, ) raise else: self.run_task_kwargs = {} # If `task_role_arn` is configured on the agent, add it to the default # template. The agent default `task_role_arn` is only applied if using # the agent's default template. if self.task_role_arn: self.task_definition["taskRoleArn"] = self.task_role_arn # If running on fargate, auto-configure `networkConfiguration` for the # user if they didn't configure it themselves. if self.launch_type == "FARGATE" and not self.run_task_kwargs.get( "networkConfiguration"): self.run_task_kwargs[ "networkConfiguration"] = self.infer_network_configuration()
def generate_task_definition(self, flow_run: GraphQLResult, run_config: ECSRun) -> Dict[str, Any]: """Generate an ECS task definition from a flow run Args: - flow_run (GraphQLResult): A flow run object - run_config (ECSRun): The flow's run config Returns: - dict: a dictionary representation of an ECS task definition """ if run_config.task_definition: taskdef = deepcopy(run_config.task_definition) elif run_config.task_definition_path: self.logger.debug( "Loading task definition template from %r", run_config.task_definition_path, ) template_bytes = read_bytes_from_path( run_config.task_definition_path) taskdef = yaml.safe_load(template_bytes) else: taskdef = deepcopy(self.task_definition) slug = slugify.slugify( f"{flow_run.flow.name}-{flow_run.id}", max_length=255 - len("prefect-"), word_boundary=True, save_order=True, ) taskdef["family"] = f"prefect-{slug}" # Add some metadata tags for easier tracking by users taskdef.setdefault("tags", []).extend([ { "key": "prefect:flow-id", "value": flow_run.flow.id }, { "key": "prefect:flow-version", "value": str(flow_run.flow.version) }, ]) # Get the flow container (creating one if it doesn't already exist) containers = taskdef.setdefault("containerDefinitions", []) for container in containers: if container.get("name") == "flow": break else: container = {"name": "flow"} containers.append(container) # Set flow image container["image"] = image = get_flow_image( flow_run, default=container.get("image")) # Add `PREFECT__CONTEXT__IMAGE` environment variable env = {"PREFECT__CONTEXT__IMAGE": image} container_env = [{"name": k, "value": v} for k, v in env.items()] for entry in container.get("environment", []): if entry["name"] not in env: container_env.append(entry) container["environment"] = container_env # Ensure that cpu/memory are strings not integers if "cpu" in taskdef: taskdef["cpu"] = str(taskdef["cpu"]) if "memory" in taskdef: taskdef["memory"] = str(taskdef["memory"]) # If we're using Fargate, we need to explicitly set an executionRoleArn on the # task definition. If one isn't present, then try to load it from the run_config # and then the agent's default. if "executionRoleArn" not in taskdef: if run_config.execution_role_arn: taskdef["executionRoleArn"] = run_config.execution_role_arn elif self.execution_role_arn: taskdef["executionRoleArn"] = self.execution_role_arn # Set requiresCompatibilities if not already set if self.launch_type is set if "requiresCompatibilities" not in taskdef and self.launch_type: taskdef["requiresCompatibilities"] = [self.launch_type] return taskdef
def generate_job_spec_from_run_config(self, flow_run: GraphQLResult) -> dict: """Generate a k8s job spec for a flow run. Args: - flow_run (GraphQLResult): A flow run object Returns: - dict: a dictionary representation of a k8s job for flow execution """ run_config = RunConfigSchema().load(flow_run.flow.run_config) if run_config.job_template: job = run_config.job_template else: job_template_path = run_config.job_template_path or self.job_template_path self.logger.debug("Loading job template from %r", job_template_path) template_bytes = read_bytes_from_path(job_template_path) job = yaml.safe_load(template_bytes) identifier = uuid.uuid4().hex[:8] job_name = f"prefect-job-{identifier}" # Populate job metadata for identification k8s_labels = { "prefect.io/identifier": identifier, "prefect.io/flow_run_id": flow_run.id, # type: ignore "prefect.io/flow_id": flow_run.flow.id, # type: ignore } _get_or_create(job, "metadata.labels") _get_or_create(job, "spec.template.metadata.labels") job["metadata"]["name"] = job_name job["metadata"]["labels"].update(**k8s_labels) job["spec"]["template"]["metadata"]["labels"].update(**k8s_labels) # Get the first container, which is used for the prefect job containers = _get_or_create(job, "spec.template.spec.containers", []) if not containers: containers.append({}) container = containers[0] # Set container image container["image"] = image = get_flow_image(flow_run) # Set flow run command container["args"] = [get_flow_run_command(flow_run)] # Populate environment variables from the following sources, # with precedence: # - Values required for flow execution, hardcoded below # - Values set on the KubernetesRun object # - Values set using the `--env` CLI flag on the agent # - Values in the job template env = self.env_vars.copy() if run_config.env: env.update(run_config.env) env.update({ "PREFECT__CLOUD__API": config.cloud.api, "PREFECT__CLOUD__AUTH_TOKEN": config.cloud.agent.auth_token, "PREFECT__CLOUD__USE_LOCAL_SECRETS": "false", "PREFECT__CONTEXT__FLOW_RUN_ID": flow_run.id, "PREFECT__CONTEXT__FLOW_ID": flow_run.flow.id, "PREFECT__CONTEXT__IMAGE": image, "PREFECT__LOGGING__LOG_TO_CLOUD": str(self.log_to_cloud).lower(), "PREFECT__ENGINE__FLOW_RUNNER__DEFAULT_CLASS": "prefect.engine.cloud.CloudFlowRunner", "PREFECT__ENGINE__TASK_RUNNER__DEFAULT_CLASS": "prefect.engine.cloud.CloudTaskRunner", }) container_env = [{"name": k, "value": v} for k, v in env.items()] for entry in container.get("env", []): if entry["name"] not in env: container_env.append(entry) container["env"] = container_env # Set resource requirements if provided _get_or_create(container, "resources.requests") _get_or_create(container, "resources.limits") resources = container["resources"] if run_config.memory_request: resources["requests"]["memory"] = run_config.memory_request if run_config.memory_limit: resources["limits"]["memory"] = run_config.memory_limit if run_config.cpu_request: resources["requests"]["cpu"] = run_config.cpu_request if run_config.cpu_limit: resources["limits"]["cpu"] = run_config.cpu_limit return job
aws_session_token=aws_session_token, region_name=region_name, config=boto_config, ) # type: Dict[str, Any] self.ecs_client = get_boto_client("ecs", **self.boto_kwargs) self.rgtag_client = get_boto_client( "resourcegroupstaggingapi", **self.boto_kwargs ) # Load default task definition if not task_definition_path: task_definition_path = DEFAULT_TASK_DEFINITION_PATH try: self.task_definition = yaml.safe_load( read_bytes_from_path(task_definition_path) ) except Exception: self.logger.error( "Failed to load default task definition from %r", task_definition_path, exc_info=True, ) raise # Load default run_task kwargs if run_task_kwargs_path: try: self.run_task_kwargs = yaml.safe_load( read_bytes_from_path(run_task_kwargs_path) )