def make_plot(df): """Make a plot of the star counts and post it as an artifact""" ax = df.plot.line(title="GitHub Stars") ax.set(xlabel="Date", ylabel="Stars") fil = BytesIO() plt.savefig(fil, format="svg") fig_body = fil.getvalue().decode("utf-8") create_markdown(fig_body)
def test_create_markdown(client, running_with_backend): with context(task_run_id="trid"): artifact_id = artifacts.create_markdown(markdown="markdown_here") assert artifact_id == "id" assert client.create_task_run_artifact.called assert client.create_task_run_artifact.call_args[1] == { "data": {"markdown": "markdown_here"}, "kind": "markdown", "task_run_id": "trid", }
def make_markdown_artifact_dedent(): mkdown = textwrap.dedent(""" # This is markdown [apparently](https://prefect.io) Am I markdown? ----- #### We'll find out """) artifact_id = artifacts.create_markdown(mkdown) print(artifact_id)
def test_old_create_markdown(client, running_with_backend): with context(task_run_id="trid"): with pytest.warns( UserWarning, match="has been moved to `prefect.backend.create_markdown_artifact`", ): artifact_id = artifacts.create_markdown(markdown="markdown_here") assert artifact_id == "id" assert client.create_task_run_artifact.called assert client.create_task_run_artifact.call_args[1] == { "data": {"markdown": "markdown_here"}, "kind": "markdown", "task_run_id": "trid", }
def create_df_artifact(df): json_df = df.to_json() create_markdown(f"Transformed Dataframe:\n{json_df}")
def test_create_markdown_not_using_backend(client): with context(task_run_id="trid"): artifact_id = artifacts.create_markdown(markdown="markdown_here") assert artifact_id == None assert not client.create_task_run_artifact.called
def run( self, checkpoint_name: str = None, context: "ge.DataContext" = None, assets_to_validate: list = None, batch_kwargs: dict = None, expectation_suite_name: str = None, context_root_dir: str = None, runtime_environment: Optional[dict] = None, run_name: str = None, run_info_at_end: bool = True, disable_markdown_artifact: bool = False, validation_operator: str = "action_list_operator", evaluation_parameters: Optional[dict] = None, ): """ Task run method. Args: - checkpoint_name (str, optional): the name of the checkpoint; should match the filename of the checkpoint without .py - context (DataContext, optional): an in-memory GE DataContext object. e.g. `ge.data_context.DataContext()` If not provided then `context_root_dir` will be used to look for one. - assets_to_validate (list, optional): A list of assets to validate when running the validation operator. - batch_kwargs (dict, optional): a dictionary of batch kwargs to be used when validating assets. - expectation_suite_name (str, optional): the name of an expectation suite to be used when validating assets. - context_root_dir (str, optional): the absolute or relative path to the directory holding your `great_expectations.yml` - runtime_environment (dict, optional): a dictionary of great expectation config key-value pairs to overwrite your config in `great_expectations.yml` - run_name (str, optional): the name of this Great Expectation validation run; defaults to the task slug - run_info_at_end (bool, optional): add run info to the end of the artifact generated by this task. Defaults to `True`. - disable_markdown_artifact (bool, optional): toggle the posting of a markdown artifact from this tasks. Defaults to `False`. - evaluation_parameters (Optional[dict], optional): the evaluation parameters to use when running validation. For more information, see [example](https://docs.prefect.io/api/latest/tasks/great_expectations.html#rungreatexpectationsvalidation) and [docs](https://docs.greatexpectations.io/en/latest/reference/core_concepts/evaluation_parameters.html). - validation_operator (str, optional): configure the actions to be executed after running validation. Defaults to `action_list_operator`. Raises: - 'signals.FAIL' if the validation was not a success Returns: - result ('great_expectations.validation_operators.types.validation_operator_result.ValidationOperatorResult'): The Great Expectations metadata returned from the validation """ runtime_environment = runtime_environment or dict() # Load context if not provided directly if not context: context = ge.DataContext( context_root_dir=context_root_dir, runtime_environment=runtime_environment, ) # Check that the parameters are mutually exclusive if (sum( bool(x) for x in [ (expectation_suite_name and batch_kwargs), assets_to_validate, checkpoint_name, ]) != 1): raise ValueError( "Exactly one of expectation_suite_name + batch_kwargs, assets_to_validate, or " "checkpoint_name is required to run validation.") # If assets are not provided directly through `assets_to_validate` then they need be loaded # if a checkpoint_name is supplied, then load suite and batch_kwargs from there # otherwise get batch from `batch_kwargs` and `expectation_suite_name` if not assets_to_validate: assets_to_validate = [] if checkpoint_name: ge_checkpoint = context.get_checkpoint(checkpoint_name) for batch in ge_checkpoint["batches"]: batch_kwargs = batch["batch_kwargs"] for suite_name in batch["expectation_suite_names"]: suite = context.get_expectation_suite(suite_name) batch = context.get_batch(batch_kwargs, suite) assets_to_validate.append(batch) validation_operator = ge_checkpoint["validation_operator_name"] else: assets_to_validate.append( context.get_batch(batch_kwargs, expectation_suite_name)) # Run validation operator results = context.run_validation_operator( validation_operator, assets_to_validate=assets_to_validate, run_id={"run_name": run_name or prefect.context.get("task_slug")}, evaluation_parameters=evaluation_parameters, ) # Generate artifact markdown if not disable_markdown_artifact: run_info_at_end = True validation_results_page_renderer = ( ge.render.renderer.ValidationResultsPageRenderer( run_info_at_end=run_info_at_end)) rendered_document_content_list = ( validation_results_page_renderer. render_validation_operator_result( validation_operator_result=results)) markdown_artifact = " ".join( ge.render.view.DefaultMarkdownPageView().render( rendered_document_content_list)) create_markdown(markdown_artifact) if results.success is False: raise signals.FAIL(result=results) return results
def run(self, readme, ref): artifact_id = artifacts.create_markdown(readme) return artifact_id
def run( self, checkpoint_name: str = None, context: "ge.DataContext" = None, assets_to_validate: list = None, batch_kwargs: dict = None, expectation_suite_name: str = None, get_checkpoint_from_context: bool = False, context_root_dir: str = None, runtime_environment: Optional[dict] = None, run_name: str = None, run_info_at_end: bool = True, disable_markdown_artifact: bool = False, ): """ Task run method. Args: - checkpoint_name (str, optional): the name of the checkpoint; should match the filename of the checkpoint without .py - context (DataContext, optional): an in-memory GE DataContext object. e.g. `ge.data_context.DataContext()` If not provided then `context_root_dir` will be used to look for one. - assets_to_validate (list, optional): A list of assets to validate when running the validation operator. If not provided then `batch_kwargs` and `expectation_suite_name` will be used if context is provided. Also, if not provided and `get_checkpoint_from_context` is True then the assets will be loaded from that context. - batch_kwargs (dict, optional): a dictionary of batch kwargs to be used when validating assets. - expectation_suite_name (str, optional): the name of an expectation suite to be used when validating assets. - get_checkpoint_from_context (bool, optional): get the checkpoint from context. Defaults to `False` - context_root_dir (str, optional): the absolute or relative path to the directory holding your `great_expectations.yml` - runtime_environment (dict, optional): a dictionary of great expectation config key-value pairs to overwrite your config in `great_expectations.yml` - run_name (str, optional): the name of this Great Expectation validation run; defaults to the task slug - run_info_at_end (bool, optional): add run info to the end of the artifact generated by this task. Defaults to `True`. - disable_markdown_artifact (bool, optional): toggle the posting of a markdown artifact from this tasks. Defaults to `False`. Raises: - 'signals.VALIDATIONFAIL' if the validation was not a success Returns: - result ('great_expectations.validation_operators.types.validation_operator_result.ValidationOperatorResult'): The Great Expectations metadata returned from the validation """ if checkpoint_name is None: raise ValueError("You must provide the checkpoint name.") runtime_environment = runtime_environment or dict() # Load context if not provided directly if not context: context = ge.DataContext( context_root_dir=context_root_dir, runtime_environment=runtime_environment, ) # if assets are not provided directly through `assets_to_validate` then they need be loaded # if the checkpoint is being loaded from the context then load suite and batch from there # otherwise get batch from `batch_kwargs` and `expectation_suite_name` if not assets_to_validate: assets_to_validate = [] if get_checkpoint_from_context: ge_checkpoint = context.get_checkpoint(checkpoint_name) for batch in ge_checkpoint["batches"]: batch_kwargs = batch["batch_kwargs"] for suite_name in batch["expectation_suite_names"]: suite = context.get_expectation_suite(suite_name) batch = context.get_batch(batch_kwargs, suite) assets_to_validate.append(batch) else: assets_to_validate.append( context.get_batch(batch_kwargs, expectation_suite_name) ) # Run validation operator results = context.run_validation_operator( checkpoint_name or ge_checkpoint["validation_operator_name"], assets_to_validate=assets_to_validate, run_id={"run_name": run_name or prefect.context.get("task_slug")}, ) if results.success is False: raise signals.FAIL(result=results) # Generate artifact markdown if not disable_markdown_artifact: run_info_at_end = True validation_results_page_renderer = ( ge.render.renderer.ValidationResultsPageRenderer( run_info_at_end=run_info_at_end ) ) rendered_document_content_list = ( validation_results_page_renderer.render_validation_operator_result( validation_operator_result=results ) ) markdown_artifact = " ".join( ge.render.view.DefaultMarkdownPageView().render( rendered_document_content_list ) ) create_markdown(markdown_artifact) return results