Exemplo n.º 1
0
def load_flows_from_json(path: str) -> "List[dict]":
    """Given a path to a JSON file containing flows, load all flows.

    Note that since `FlowSchema` doesn't roundtrip without mutation, we keep
    the flow objects as dicts.
    """
    try:
        contents = read_bytes_from_path(path)
    except FileNotFoundError:
        raise TerminalError(f"Path {path!r} doesn't exist")
    except Exception as exc:
        click.secho(f"Error loading {path!r}:", fg="red")
        log_exception(exc, indent=2)
        raise TerminalError from exc
    try:
        flows_json = FlowsJSONSchema().load(json.loads(contents))
    except Exception:
        raise TerminalError(
            f"{path!r} is not a valid Prefect flows `json` file.")

    if flows_json["version"] != 1:
        raise TerminalError(
            f"{path!r} is version {flows_json['version']}, only version 1 is supported"
        )

    return flows_json["flows"]
Exemplo n.º 2
0
    def test_read_local_file(self, tmpdir, scheme):
        path = str(tmpdir.join("test.yaml"))
        with open(path, "wb") as f:
            f.write(b"hello")

        path_arg = (path if scheme is None else "agent://" +
                    os.path.splitdrive(path)[1].replace("\\", "/"))
        res = read_bytes_from_path(path_arg)
        assert res == b"hello"
Exemplo n.º 3
0
 def test_read_s3(self, monkeypatch):
     pytest.importorskip("prefect.utilities.aws")
     client = MagicMock()
     monkeypatch.setattr("prefect.utilities.aws.get_boto_client",
                         MagicMock(return_value=client))
     res = read_bytes_from_path("s3://mybucket/path/to/thing.yaml")
     assert client.download_fileobj.call_args[1]["Bucket"] == "mybucket"
     assert client.download_fileobj.call_args[1][
         "Key"] == "path/to/thing.yaml"
     assert isinstance(res, bytes)
Exemplo n.º 4
0
    def test_read_http_file(self, monkeypatch, scheme):
        pytest.importorskip("requests")

        url = f"{scheme}://some/file.json"

        requests_get = MagicMock(return_value=MagicMock(content=b"testing"))
        monkeypatch.setattr("requests.get", requests_get)

        res = read_bytes_from_path(url)
        assert requests_get.call_args[0] == (url, )
        assert res == b"testing"
Exemplo n.º 5
0
 def test_read_gcs(self, monkeypatch, scheme):
     pytest.importorskip("prefect.utilities.gcp")
     client = MagicMock()
     monkeypatch.setattr("prefect.utilities.gcp.get_storage_client",
                         MagicMock(return_value=client))
     res = read_bytes_from_path(f"{scheme}://mybucket/path/to/thing.yaml")
     assert client.bucket.call_args[0] == ("mybucket", )
     bucket = client.bucket.return_value
     assert bucket.get_blob.call_args[0] == ("path/to/thing.yaml", )
     blob = bucket.get_blob.return_value
     assert blob.download_as_bytes.called
     assert blob.download_as_bytes.return_value is res
Exemplo n.º 6
0
    def test_read_local_file(self, tmpdir, scheme):
        if scheme and sys.platform == "win32":
            pytest.skip("Scheme not supported for Windows file paths")

        path = str(tmpdir.join("test.yaml"))
        with open(path, "wb") as f:
            f.write(b"hello")

        path_arg = (path if scheme is None else "agent://" +
                    os.path.splitdrive(path)[1].replace("\\", "/"))
        res = read_bytes_from_path(path_arg)
        assert res == b"hello"
Exemplo n.º 7
0
    def generate_job_spec_from_run_config(self, flow_run: GraphQLResult,
                                          run_config: KubernetesRun) -> dict:
        """Generate a k8s job spec for a flow run.

        Args:
            - flow_run (GraphQLResult): A flow run object
            - run_config (KubernetesRun): The flow run's run_config

        Returns:
            - dict: a dictionary representation of a k8s job for flow execution
        """
        if run_config.job_template:
            job = run_config.job_template
        else:
            job_template_path = run_config.job_template_path or self.job_template_path
            self.logger.debug("Loading job template from %r",
                              job_template_path)
            template_bytes = read_bytes_from_path(job_template_path)
            job = yaml.safe_load(template_bytes)

        identifier = uuid.uuid4().hex[:8]

        job_name = f"prefect-job-{identifier}"

        # Populate job metadata for identification
        k8s_labels = {
            "prefect.io/identifier": identifier,
            "prefect.io/flow_run_id": flow_run.id,  # type: ignore
            "prefect.io/flow_id": flow_run.flow.id,  # type: ignore
        }
        _get_or_create(job, "metadata.labels")
        _get_or_create(job, "spec.template.metadata.labels")
        job["metadata"]["name"] = job_name
        job["metadata"]["labels"].update(**k8s_labels)
        job["spec"]["template"]["metadata"]["labels"].update(**k8s_labels)
        pod_spec = job["spec"]["template"]["spec"]

        # Configure `service_account_name` if specified
        if run_config.service_account_name is not None:
            # On run-config, always override
            service_account_name = (run_config.service_account_name
                                    )  # type: Optional[str]
        elif "serviceAccountName" in pod_spec and (
                run_config.job_template or run_config.job_template_path):
            # On run-config job-template, no override
            service_account_name = None
        else:
            # Use agent value, if provided
            service_account_name = self.service_account_name
        if service_account_name is not None:
            pod_spec["serviceAccountName"] = service_account_name

        # Configure `image_pull_secrets` if specified
        if run_config.image_pull_secrets is not None:
            # On run-config, always override
            image_pull_secrets = (run_config.image_pull_secrets
                                  )  # type: Optional[Iterable[str]]
        elif "imagePullSecrets" in pod_spec and (run_config.job_template or
                                                 run_config.job_template_path):
            # On run-config job template, no override
            image_pull_secrets = None
        else:
            # Use agent, if provided
            image_pull_secrets = self.image_pull_secrets
        if image_pull_secrets is not None:
            pod_spec["imagePullSecrets"] = [{
                "name": s
            } for s in image_pull_secrets]

        # Default restartPolicy to Never
        _get_or_create(job, "spec.template.spec.restartPolicy", "Never")

        # Get the first container, which is used for the prefect job
        containers = _get_or_create(job, "spec.template.spec.containers", [])
        if not containers:
            containers.append({})
        container = containers[0]

        # Set container image
        container["image"] = image = get_flow_image(
            flow_run, default=container.get("image"))

        # Set flow run command
        container["args"] = get_flow_run_command(flow_run).split()

        # Populate environment variables from the following sources,
        # with precedence:
        # - Values required for flow execution, hardcoded below
        # - Values set on the KubernetesRun object
        # - Values set using the `--env` CLI flag on the agent
        # - Values in the job template
        env = {"PREFECT__LOGGING__LEVEL": config.logging.level}
        env.update(self.env_vars)
        if run_config.env:
            env.update(run_config.env)
        env.update({
            "PREFECT__BACKEND":
            config.backend,
            "PREFECT__CLOUD__AGENT__LABELS":
            str(self.labels),
            "PREFECT__CLOUD__API":
            config.cloud.api,
            "PREFECT__CLOUD__AUTH_TOKEN":
            config.cloud.agent.auth_token,
            "PREFECT__CLOUD__USE_LOCAL_SECRETS":
            "false",
            "PREFECT__CONTEXT__FLOW_RUN_ID":
            flow_run.id,
            "PREFECT__CONTEXT__FLOW_ID":
            flow_run.flow.id,
            "PREFECT__CONTEXT__IMAGE":
            image,
            "PREFECT__LOGGING__LOG_TO_CLOUD":
            str(self.log_to_cloud).lower(),
            "PREFECT__ENGINE__FLOW_RUNNER__DEFAULT_CLASS":
            "prefect.engine.cloud.CloudFlowRunner",
            "PREFECT__ENGINE__TASK_RUNNER__DEFAULT_CLASS":
            "prefect.engine.cloud.CloudTaskRunner",
        })
        container_env = [{"name": k, "value": v} for k, v in env.items()]
        for entry in container.get("env", []):
            if entry["name"] not in env:
                container_env.append(entry)
        container["env"] = container_env

        # Set resource requirements if provided
        _get_or_create(container, "resources.requests")
        _get_or_create(container, "resources.limits")
        resources = container["resources"]
        if run_config.memory_request:
            resources["requests"]["memory"] = run_config.memory_request
        if run_config.memory_limit:
            resources["limits"]["memory"] = run_config.memory_limit
        if run_config.cpu_request:
            resources["requests"]["cpu"] = run_config.cpu_request
        if run_config.cpu_limit:
            resources["limits"]["cpu"] = run_config.cpu_limit

        return job
Exemplo n.º 8
0
 def mock(path):
     return data if path == s3_path else read_bytes_from_path(path)
Exemplo n.º 9
0
    def generate_task_definition(self, flow_run: GraphQLResult,
                                 run_config: ECSRun) -> Dict[str, Any]:
        """Generate an ECS task definition from a flow run

        Args:
            - flow_run (GraphQLResult): A flow run object
            - run_config (ECSRun): The flow's run config

        Returns:
            - dict: a dictionary representation of an ECS task definition
        """
        if run_config.task_definition:
            taskdef = deepcopy(run_config.task_definition)
        elif run_config.task_definition_path:
            self.logger.debug(
                "Loading task definition template from %r",
                run_config.task_definition_path,
            )
            template_bytes = read_bytes_from_path(
                run_config.task_definition_path)
            taskdef = yaml.safe_load(template_bytes)
        else:
            taskdef = deepcopy(self.task_definition)

        slug = slugify.slugify(
            flow_run.flow.name,
            max_length=255 - len("prefect-"),
            word_boundary=True,
            save_order=True,
        )
        family = f"prefect-{slug}"

        tags = self.get_task_definition_tags(flow_run)

        taskdef["family"] = family

        taskdef_tags = [{"key": k, "value": v} for k, v in tags.items()]
        for entry in taskdef.get("tags", []):
            if entry["key"] not in tags:
                taskdef_tags.append(entry)
        taskdef["tags"] = taskdef_tags

        # Get the flow container (creating one if it doesn't already exist)
        containers = taskdef.setdefault("containerDefinitions", [])
        for container in containers:
            if container.get("name") == "flow":
                break
        else:
            container = {"name": "flow"}
            containers.append(container)

        # Set flow image
        container["image"] = image = get_flow_image(flow_run)

        # Set flow run command
        container["command"] = [
            "/bin/sh", "-c", get_flow_run_command(flow_run)
        ]

        # Set taskRoleArn if configured
        if run_config.task_role_arn:
            taskdef["taskRoleArn"] = run_config.task_role_arn

        # Populate static environment variables from the following sources,
        # with precedence:
        # - Static environment variables, hardcoded below
        # - Values in the task definition template
        env = {
            "PREFECT__CLOUD__USE_LOCAL_SECRETS":
            "false",
            "PREFECT__CONTEXT__IMAGE":
            image,
            "PREFECT__ENGINE__FLOW_RUNNER__DEFAULT_CLASS":
            "prefect.engine.cloud.CloudFlowRunner",
            "PREFECT__ENGINE__TASK_RUNNER__DEFAULT_CLASS":
            "prefect.engine.cloud.CloudTaskRunner",
        }
        container_env = [{"name": k, "value": v} for k, v in env.items()]
        for entry in container.get("environment", []):
            if entry["name"] not in env:
                container_env.append(entry)
        container["environment"] = container_env

        # Set resource requirements, if provided
        # Also ensure that cpu/memory are strings not integers
        if run_config.cpu:
            taskdef["cpu"] = str(run_config.cpu)
        elif "cpu" in taskdef:
            taskdef["cpu"] = str(taskdef["cpu"])
        if run_config.memory:
            taskdef["memory"] = str(run_config.memory)
        elif "memory" in taskdef:
            taskdef["memory"] = str(taskdef["memory"])

        return taskdef
Exemplo n.º 10
0
    def __init__(  # type: ignore
        self,
        agent_config_id: str = None,
        name: str = None,
        labels: Iterable[str] = None,
        env_vars: dict = None,
        max_polls: int = None,
        agent_address: str = None,
        no_cloud_logs: bool = False,
        task_definition_path: str = None,
        run_task_kwargs_path: str = None,
        aws_access_key_id: str = None,
        aws_secret_access_key: str = None,
        aws_session_token: str = None,
        region_name: str = None,
        cluster: str = None,
        launch_type: str = None,
        task_role_arn: str = None,
        botocore_config: dict = None,
    ) -> None:
        super().__init__(
            agent_config_id=agent_config_id,
            name=name,
            labels=labels,
            env_vars=env_vars,
            max_polls=max_polls,
            agent_address=agent_address,
            no_cloud_logs=no_cloud_logs,
        )

        from botocore.config import Config
        from prefect.utilities.aws import get_boto_client

        self.cluster = cluster
        self.launch_type = launch_type.upper() if launch_type else "FARGATE"
        self.task_role_arn = task_role_arn

        # Load boto configuration. We want to use the standard retry mode by
        # default (which isn't boto's default due to backwards compatibility).
        # The logic below lets the user override our default retry mode either
        # in `botocore_config` or in their aws config file.
        #
        # See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/retries.html
        # for more info.
        boto_config = Config(**botocore_config or {})
        if not boto_config.retries:
            boto_config.retries = {"mode": "standard"}

        self.boto_kwargs = dict(
            aws_access_key_id=aws_access_key_id,
            aws_secret_access_key=aws_secret_access_key,
            aws_session_token=aws_session_token,
            region_name=region_name,
            config=boto_config,
        )  # type: Dict[str, Any]

        self.ecs_client = get_boto_client("ecs", **self.boto_kwargs)
        self.rgtag_client = get_boto_client("resourcegroupstaggingapi",
                                            **self.boto_kwargs)

        # Load default task definition
        if not task_definition_path:
            task_definition_path = DEFAULT_TASK_DEFINITION_PATH
        try:
            self.task_definition = yaml.safe_load(
                read_bytes_from_path(task_definition_path))
        except Exception:
            self.logger.error(
                "Failed to load default task definition from %r",
                task_definition_path,
                exc_info=True,
            )
            raise

        # Load default run_task kwargs
        if run_task_kwargs_path:
            try:
                self.run_task_kwargs = yaml.safe_load(
                    read_bytes_from_path(run_task_kwargs_path))
            except Exception:
                self.logger.error(
                    "Failed to load default `run_task` kwargs from %r",
                    run_task_kwargs_path,
                    exc_info=True,
                )
                raise
        else:
            self.run_task_kwargs = {}

        # If `task_role_arn` is configured on the agent, add it to the default
        # template. The agent default `task_role_arn` is only applied if using
        # the agent's default template.
        if self.task_role_arn:
            self.task_definition["taskRoleArn"] = self.task_role_arn

        # If running on fargate, auto-configure `networkConfiguration` for the
        # user if they didn't configure it themselves.
        if self.launch_type == "FARGATE" and not self.run_task_kwargs.get(
                "networkConfiguration"):
            self.run_task_kwargs[
                "networkConfiguration"] = self.infer_network_configuration()
Exemplo n.º 11
0
    def generate_task_definition(self, flow_run: GraphQLResult,
                                 run_config: ECSRun) -> Dict[str, Any]:
        """Generate an ECS task definition from a flow run

        Args:
            - flow_run (GraphQLResult): A flow run object
            - run_config (ECSRun): The flow's run config

        Returns:
            - dict: a dictionary representation of an ECS task definition
        """
        if run_config.task_definition:
            taskdef = deepcopy(run_config.task_definition)
        elif run_config.task_definition_path:
            self.logger.debug(
                "Loading task definition template from %r",
                run_config.task_definition_path,
            )
            template_bytes = read_bytes_from_path(
                run_config.task_definition_path)
            taskdef = yaml.safe_load(template_bytes)
        else:
            taskdef = deepcopy(self.task_definition)
        slug = slugify.slugify(
            f"{flow_run.flow.name}-{flow_run.id}",
            max_length=255 - len("prefect-"),
            word_boundary=True,
            save_order=True,
        )
        taskdef["family"] = f"prefect-{slug}"

        # Add some metadata tags for easier tracking by users
        taskdef.setdefault("tags", []).extend([
            {
                "key": "prefect:flow-id",
                "value": flow_run.flow.id
            },
            {
                "key": "prefect:flow-version",
                "value": str(flow_run.flow.version)
            },
        ])

        # Get the flow container (creating one if it doesn't already exist)
        containers = taskdef.setdefault("containerDefinitions", [])
        for container in containers:
            if container.get("name") == "flow":
                break
        else:
            container = {"name": "flow"}
            containers.append(container)

        # Set flow image
        container["image"] = image = get_flow_image(
            flow_run, default=container.get("image"))

        # Add `PREFECT__CONTEXT__IMAGE` environment variable
        env = {"PREFECT__CONTEXT__IMAGE": image}
        container_env = [{"name": k, "value": v} for k, v in env.items()]
        for entry in container.get("environment", []):
            if entry["name"] not in env:
                container_env.append(entry)
        container["environment"] = container_env

        # Ensure that cpu/memory are strings not integers
        if "cpu" in taskdef:
            taskdef["cpu"] = str(taskdef["cpu"])
        if "memory" in taskdef:
            taskdef["memory"] = str(taskdef["memory"])

        # If we're using Fargate, we need to explicitly set an executionRoleArn on the
        # task definition. If one isn't present, then try to load it from the run_config
        # and then the agent's default.
        if "executionRoleArn" not in taskdef:
            if run_config.execution_role_arn:
                taskdef["executionRoleArn"] = run_config.execution_role_arn
            elif self.execution_role_arn:
                taskdef["executionRoleArn"] = self.execution_role_arn

        # Set requiresCompatibilities if not already set if self.launch_type is set
        if "requiresCompatibilities" not in taskdef and self.launch_type:
            taskdef["requiresCompatibilities"] = [self.launch_type]

        return taskdef
Exemplo n.º 12
0
    def generate_job_spec_from_run_config(self,
                                          flow_run: GraphQLResult) -> dict:
        """Generate a k8s job spec for a flow run.

        Args:
            - flow_run (GraphQLResult): A flow run object

        Returns:
            - dict: a dictionary representation of a k8s job for flow execution
        """
        run_config = RunConfigSchema().load(flow_run.flow.run_config)

        if run_config.job_template:
            job = run_config.job_template
        else:
            job_template_path = run_config.job_template_path or self.job_template_path
            self.logger.debug("Loading job template from %r",
                              job_template_path)
            template_bytes = read_bytes_from_path(job_template_path)
            job = yaml.safe_load(template_bytes)

        identifier = uuid.uuid4().hex[:8]

        job_name = f"prefect-job-{identifier}"

        # Populate job metadata for identification
        k8s_labels = {
            "prefect.io/identifier": identifier,
            "prefect.io/flow_run_id": flow_run.id,  # type: ignore
            "prefect.io/flow_id": flow_run.flow.id,  # type: ignore
        }
        _get_or_create(job, "metadata.labels")
        _get_or_create(job, "spec.template.metadata.labels")
        job["metadata"]["name"] = job_name
        job["metadata"]["labels"].update(**k8s_labels)
        job["spec"]["template"]["metadata"]["labels"].update(**k8s_labels)

        # Get the first container, which is used for the prefect job
        containers = _get_or_create(job, "spec.template.spec.containers", [])
        if not containers:
            containers.append({})
        container = containers[0]

        # Set container image
        container["image"] = image = get_flow_image(flow_run)

        # Set flow run command
        container["args"] = [get_flow_run_command(flow_run)]

        # Populate environment variables from the following sources,
        # with precedence:
        # - Values required for flow execution, hardcoded below
        # - Values set on the KubernetesRun object
        # - Values set using the `--env` CLI flag on the agent
        # - Values in the job template
        env = self.env_vars.copy()
        if run_config.env:
            env.update(run_config.env)
        env.update({
            "PREFECT__CLOUD__API":
            config.cloud.api,
            "PREFECT__CLOUD__AUTH_TOKEN":
            config.cloud.agent.auth_token,
            "PREFECT__CLOUD__USE_LOCAL_SECRETS":
            "false",
            "PREFECT__CONTEXT__FLOW_RUN_ID":
            flow_run.id,
            "PREFECT__CONTEXT__FLOW_ID":
            flow_run.flow.id,
            "PREFECT__CONTEXT__IMAGE":
            image,
            "PREFECT__LOGGING__LOG_TO_CLOUD":
            str(self.log_to_cloud).lower(),
            "PREFECT__ENGINE__FLOW_RUNNER__DEFAULT_CLASS":
            "prefect.engine.cloud.CloudFlowRunner",
            "PREFECT__ENGINE__TASK_RUNNER__DEFAULT_CLASS":
            "prefect.engine.cloud.CloudTaskRunner",
        })
        container_env = [{"name": k, "value": v} for k, v in env.items()]
        for entry in container.get("env", []):
            if entry["name"] not in env:
                container_env.append(entry)
        container["env"] = container_env

        # Set resource requirements if provided
        _get_or_create(container, "resources.requests")
        _get_or_create(container, "resources.limits")
        resources = container["resources"]
        if run_config.memory_request:
            resources["requests"]["memory"] = run_config.memory_request
        if run_config.memory_limit:
            resources["limits"]["memory"] = run_config.memory_limit
        if run_config.cpu_request:
            resources["requests"]["cpu"] = run_config.cpu_request
        if run_config.cpu_limit:
            resources["limits"]["cpu"] = run_config.cpu_limit

        return job
Exemplo n.º 13
0
            aws_session_token=aws_session_token,
            region_name=region_name,
            config=boto_config,
        )  # type: Dict[str, Any]

        self.ecs_client = get_boto_client("ecs", **self.boto_kwargs)
        self.rgtag_client = get_boto_client(
            "resourcegroupstaggingapi", **self.boto_kwargs
        )

        # Load default task definition
        if not task_definition_path:
            task_definition_path = DEFAULT_TASK_DEFINITION_PATH
        try:
            self.task_definition = yaml.safe_load(
                read_bytes_from_path(task_definition_path)
            )
        except Exception:
            self.logger.error(
                "Failed to load default task definition from %r",
                task_definition_path,
                exc_info=True,
            )
            raise

        # Load default run_task kwargs
        if run_task_kwargs_path:
            try:
                self.run_task_kwargs = yaml.safe_load(
                    read_bytes_from_path(run_task_kwargs_path)
                )