def test_load_and_run_flow(monkeypatch, tmpdir): myflow = Flow("test-flow") # This is gross. Since the flow is pickled/unpickled, there's no easy way # to access the same object to set a flag. Resort to setting an environment # variable as a global flag that won't get copied eagerly through # cloudpickle. monkeypatch.setenv("TEST_RUN_CALLED", "FALSE") class MyEnvironment(Environment): def run(self, flow): assert flow is myflow os.environ["TEST_RUN_CALLED"] = "TRUE" myflow.environment = MyEnvironment() storage = Local(str(tmpdir)) myflow.storage = storage storage.add_flow(myflow) gql_return = MagicMock( return_value=MagicMock( data=MagicMock( flow_run=[ GraphQLResult( { "flow": GraphQLResult( {"name": myflow.name, "storage": storage.serialize()} ) } ) ], ) ) ) client = MagicMock() client.return_value.graphql = gql_return monkeypatch.setattr("prefect.environments.execution.base.Client", client) with set_temporary_config({"cloud.auth_token": "test"}), prefect.context( {"flow_run_id": "id"} ): load_and_run_flow() assert os.environ["TEST_RUN_CALLED"] == "TRUE"
def register_flow_with_saturn( self, flow: Flow, dask_cluster_kwargs: Optional[Dict[str, Any]] = None, dask_adapt_kwargs: Optional[Dict[str, Any]] = None, instance_size: Optional[str] = None, ) -> Flow: """ Given a flow, set up all the details needed to run it on a Saturn Dask cluster. :param flow: A Prefect ``Flow`` object :param dask_cluster_kwargs: Dictionary of keyword arguments to the ``dask_saturn.SaturnCluster`` constructor. If ``None`` (the default), the cluster will be created with one worker (``{"n_workers": 1}``). :param dask_adapt_kwargs: Dictionary of keyword arguments to pass to ``dask_saturn.SaturnCluster.adapt()``. If ``None`` (the default), adaptive scaling will not be used. :param instance_size: Instance size for the flow run. Does not affect the size of dask workers. If ``None``, the smallest available size will be used. Prefect components ------------------ This method modifies the following components of ``Flow`` objects passed to it. * ``.storage``: a ``Webhook`` storage instance is added If using ``prefect<0.14.0`` * ``.environment``: a ``KubernetesJobEnvironment`` with a ``DaskExecutor`` is added. This environment will use the same image as the notebook from which this code is run. If using ``prefect>=0.14.0`` * ``run_config``: a ``KubernetesRun`` is added, which by default will use the same image, start script, and environment variables as the notebook from which this code is run. * ``executor``: a ``DaskExecutor``, which uses the same image as the notebook from which this code is run. Adaptive scaling is off by default -------------------------------------- Dasks's `adaptive scaling <https://docs.dask.org/en/latest/setup/adaptive.html>`_ can improve resource utilization by allowing Dask to spin things up and down based on your workload. This is off by default in the ``DaskExecutor`` created by ``prefect-saturn`` because in some cases, the interaction between Dask and Prefect can lead adaptive scaling to make choices that interfere with the way Prefect executes flows. Dask cluster is not closed at the end of each flow run ------------------------------------------------------ The first time a flow runs in Saturn, it will look for a specific Dask cluster. If that cluster isn't found, it will start one. By default, the Dask cluster will not be shut down when the flow is done running. All runs of one flow are executed on the same Saturn Dask cluster. Autoclosing is off by default to avoid the situation where you have two runs of the same flow happening at the same time, and one flow kills the Dask cluster the other flow is still running on. If you are not worried about concurrent flow runs and want to know that the Dask cluster will be shut down at the end of each flow run, you can override this default behavior with the parameter ``autoclose``. Setting this to ``True`` will tell Saturn to close down the Dask cluster at the end of a flow run. .. code-block:: python flow = integration.register_flow_with_saturn( flow=flow, dask_cluster_kwargs={ "n_workers": 4, "autoclose": True } ) Instance size ------------- Use ``prefect_saturn.describe_sizes()`` to get the available instance_size options. The returned dict maps instance_size to a short description of the resources available on that size (e.g. {"medium": "Medium - 2 cores - 4 GB RAM", ...}) """ default_cluster_kwargs = {"n_workers": 1, "autoclose": False} if dask_cluster_kwargs is None: dask_cluster_kwargs = default_cluster_kwargs elif dask_cluster_kwargs != {}: default_cluster_kwargs.update(dask_cluster_kwargs) dask_cluster_kwargs = default_cluster_kwargs if dask_adapt_kwargs is None: dask_adapt_kwargs = {} self._set_flow_metadata(flow, instance_size=instance_size) storage = self._get_storage() flow.storage = storage if RUN_CONFIG_AVAILABLE: flow.executor = DaskExecutor( cluster_class="dask_saturn.SaturnCluster", cluster_kwargs=dask_cluster_kwargs, adapt_kwargs=dask_adapt_kwargs, ) flow.run_config = KubernetesRun( job_template=self._flow_run_job_spec, labels=self._saturn_flow_labels, image=self._saturn_image, ) else: flow.environment = self._get_environment( cluster_kwargs=dask_cluster_kwargs, adapt_kwargs=dask_adapt_kwargs ) return flow
from prefect import task, Flow from prefect.environments import DaskKubernetesEnvironment from prefect.environments.storage import S3 @task def get_value(): return "Example!" @task def output_value(value): print(value) flow = Flow("dk8s-debug", ) # set task dependencies using imperative API output_value.set_upstream(get_value, flow=flow) output_value.bind(value=get_value, flow=flow) flow.storage = S3(bucket="my-prefect-flows", secrets=["AWS_CREDENTIALS"]) flow.environment = DaskKubernetesEnvironment( metadata={"image": "joshmeek18/flows:all_extras9"}) flow.register(project_name="Demo")
def setup(self, worker=None): pass def teardown(self, worker=None): from prefect import Client msg = """ Lost communication with Dask worker: {} """.format(worker) Client().write_run_logs([ dict( flow_run_id=self.flow_run_id, name="DaskWorkerPlugin", message=msg, level="ERROR", ) ]) @task def sleep_me(): time.sleep(60) flow = Flow("plugin-test", tasks=[sleep_me]) flow.environment = LocalEnvironment( executor=DaskExecutor(address="localhost:8786", plugin=DaskReport)) flow.register(project_name="Demo")