示例#1
0
def test_load_and_run_flow(monkeypatch, tmpdir):
    myflow = Flow("test-flow")

    # This is gross. Since the flow is pickled/unpickled, there's no easy way
    # to access the same object to set a flag. Resort to setting an environment
    # variable as a global flag that won't get copied eagerly through
    # cloudpickle.
    monkeypatch.setenv("TEST_RUN_CALLED", "FALSE")

    class MyEnvironment(Environment):
        def run(self, flow):
            assert flow is myflow
            os.environ["TEST_RUN_CALLED"] = "TRUE"

    myflow.environment = MyEnvironment()

    storage = Local(str(tmpdir))
    myflow.storage = storage
    storage.add_flow(myflow)

    gql_return = MagicMock(
        return_value=MagicMock(
            data=MagicMock(
                flow_run=[
                    GraphQLResult(
                        {
                            "flow": GraphQLResult(
                                {"name": myflow.name, "storage": storage.serialize()}
                            )
                        }
                    )
                ],
            )
        )
    )
    client = MagicMock()
    client.return_value.graphql = gql_return
    monkeypatch.setattr("prefect.environments.execution.base.Client", client)

    with set_temporary_config({"cloud.auth_token": "test"}), prefect.context(
        {"flow_run_id": "id"}
    ):
        load_and_run_flow()
    assert os.environ["TEST_RUN_CALLED"] == "TRUE"
示例#2
0
#!/usr/bin/env python

import prefect
from prefect import task, Flow
from prefect.environments.storage import Docker


@task
def hello_task():
    logger = prefect.context.get("logger")
    logger.info("test message")


flow = Flow("prefect-base-logs", tasks=[hello_task])
base_image = "prefect-base-logs:latest"

flow.storage = Docker(base_image=base_image, local_image=True)
flow.register(project_name="test")
示例#3
0
        name="SQL-stuff"
        # commit: bool = False,
)
#--------------------------------------------------------------
# Flow context
#--------------------------------------------------------------
# storage = GitHub(
#     repo="peyton-trp/prefect-test",
#     path="simple_flow.py",
#     secrets=["GITHUB_ACCESS_TOKEN"]
# )

flow = Flow("imp_flow")
flow.storage = GitHub(
    repo="peyton-trp/prefect-test",
    path="imperative_flow.py",
    secrets=["GITHUB_ACCESS_TOKEN"]
)
# password = EnvVarSecret(prefect.config.sql_server.password_var)
# thing = Parameter("thing", default=["Thing 1"])

# flow.add_task()

#--------------------------------------------------------------
# Closing Details
#--------------------------------------------------------------
# f.run_config = LocalRun(
#     env={
#         "PREFECT__USER_CONFIG_PATH": '/Users/peytonrunyan/TRP/prefect/config.toml'})

示例#4
0
from prefect import task, Flow
from prefect.environments import DaskKubernetesEnvironment
from prefect.environments.storage import S3


@task
def get_value():
    return "Example!"


@task
def output_value(value):
    print(value)


flow = Flow("dk8s-debug", )

# set task dependencies using imperative API
output_value.set_upstream(get_value, flow=flow)
output_value.bind(value=get_value, flow=flow)

flow.storage = S3(bucket="my-prefect-flows", secrets=["AWS_CREDENTIALS"])
flow.environment = DaskKubernetesEnvironment(
    metadata={"image": "joshmeek18/flows:all_extras9"})
flow.register(project_name="Demo")
示例#5
0
import prefect
from prefect import task, Flow
from prefect.environments.storage import Docker


@task
def hello_task():
    logger = prefect.context.get("logger")
    logger.info("Hello, Cloud!")


flow = Flow("hello-flow", tasks=[hello_task])

flow.storage = Docker(registry_url="docker.io/joshmeek18")
flow.register(project_name="Hello, World!")
示例#6
0
from prefect import task, Flow
from prefect.environments.storage import GitHub

@task
def t():
    raise Exception("NONONONO")

f = Flow("filetest", tasks=[t])

# You would still configure storage object on flow
# Maybe we should update storage.add_flow to take a filepath as well as flow object
f.storage = GitHub(repo="joshmeek/flow_storage_test")

# f.serialize(build=True)

# we might want a way to register a file from the command line
# prefect register -f file.py
#   Load flow
#   Register it
# would avoid having to run script directly

# idea: secondary storage where you say "flow is here"
# no need in this case to do the file magic

# possible idea for extra commands, check a flag in env
# which would be set during run and they wouldn't run again

# things that could be paramaterized:
# name of file, where it's currently stored,
# where we want to move it to
示例#7
0
    def register_flow_with_saturn(
        self,
        flow: Flow,
        dask_cluster_kwargs: Optional[Dict[str, Any]] = None,
        dask_adapt_kwargs: Optional[Dict[str, Any]] = None,
        instance_size: Optional[str] = None,
    ) -> Flow:
        """
        Given a flow, set up all the details needed to run it on
        a Saturn Dask cluster.

        :param flow: A Prefect ``Flow`` object
        :param dask_cluster_kwargs: Dictionary of keyword arguments
            to the ``dask_saturn.SaturnCluster`` constructor. If ``None``
            (the default), the cluster will be created with
            one worker (``{"n_workers": 1}``).
        :param dask_adapt_kwargs: Dictionary of keyword arguments
            to pass to ``dask_saturn.SaturnCluster.adapt()``. If
            ``None`` (the default), adaptive scaling will not be used.
        :param instance_size: Instance size for the flow run. Does not affect
            the size of dask workers. If ``None``, the smallest available size
            will be used.

        Prefect components
        ------------------

        This method modifies the following components of ``Flow`` objects
        passed to it.

        * ``.storage``: a ``Webhook`` storage instance is added

        If using ``prefect<0.14.0``

        * ``.environment``: a ``KubernetesJobEnvironment`` with a ``DaskExecutor``
            is added. This environment will use the same image as the notebook
            from which this code is run.

        If using ``prefect>=0.14.0``

        * ``run_config``: a ``KubernetesRun`` is added, which by default will use
            the same image, start script, and environment variables as the notebook
            from which this code is run.
        * ``executor``: a ``DaskExecutor``, which uses the same image as the notebook
            from which this code is run.

        Adaptive scaling is off by default
        --------------------------------------

        Dasks's `adaptive scaling <https://docs.dask.org/en/latest/setup/adaptive.html>`_
        can improve resource utilization by allowing Dask to spin things up
        and down based on your workload.

        This is off by default in the ``DaskExecutor`` created by ``prefect-saturn``
        because in some cases, the interaction between Dask and Prefect can lead
        adaptive scaling to make choices that interfere with the way Prefect executes
        flows.

        Dask cluster is not closed at the end of each flow run
        ------------------------------------------------------

        The first time a flow runs in Saturn, it will look for a specific Dask cluster. If
        that cluster isn't found, it will start one. By default, the Dask cluster will not
        be shut down when the flow is done running. All runs of one flow are executed on the
        same Saturn Dask cluster. Autoclosing is off by default to avoid the situation
        where you have two runs of the same flow happening at the same time, and one flow
        kills the Dask cluster the other flow is still running on.

        If you are not worried about concurrent flow runs and want to know that the Dask
        cluster will be shut down at the end of each flow run, you can override this default
        behavior with the parameter ``autoclose``. Setting this to ``True`` will tell Saturn
        to close down the Dask cluster at the end of a flow run.

        .. code-block:: python

            flow = integration.register_flow_with_saturn(
                flow=flow,
                dask_cluster_kwargs={
                    "n_workers": 4,
                    "autoclose": True
                }
            )

        Instance size
        -------------

        Use ``prefect_saturn.describe_sizes()`` to get the available instance_size options.
        The returned dict maps instance_size to a short description of the resources available on
        that size (e.g. {"medium": "Medium - 2 cores - 4 GB RAM", ...})
        """
        default_cluster_kwargs = {"n_workers": 1, "autoclose": False}

        if dask_cluster_kwargs is None:
            dask_cluster_kwargs = default_cluster_kwargs
        elif dask_cluster_kwargs != {}:
            default_cluster_kwargs.update(dask_cluster_kwargs)
            dask_cluster_kwargs = default_cluster_kwargs

        if dask_adapt_kwargs is None:
            dask_adapt_kwargs = {}

        self._set_flow_metadata(flow, instance_size=instance_size)

        storage = self._get_storage()
        flow.storage = storage

        if RUN_CONFIG_AVAILABLE:
            flow.executor = DaskExecutor(
                cluster_class="dask_saturn.SaturnCluster",
                cluster_kwargs=dask_cluster_kwargs,
                adapt_kwargs=dask_adapt_kwargs,
            )
            flow.run_config = KubernetesRun(
                job_template=self._flow_run_job_spec,
                labels=self._saturn_flow_labels,
                image=self._saturn_image,
            )
        else:
            flow.environment = self._get_environment(
                cluster_kwargs=dask_cluster_kwargs, adapt_kwargs=dask_adapt_kwargs
            )

        return flow
示例#8
0
from prefect import Flow, task
from prefect import storage


@task
def a():
    print("A")


f = Flow("0.14.0-test", tasks=[a])

f.storage = storage.Local(
    stored_as_script=True,
    path="/Users/josh/Desktop/code/Dummy-Flows/fttest.py")

# f.register("Demo")