def run(
        self,
        template_name: Optional[str] = None,
        run_name_template: Optional[str] = None,
        expectation_suite_name: Optional[str] = None,
        batch_request: Optional[Union[BatchRequest, dict]] = None,
        action_list: Optional[List[dict]] = None,
        evaluation_parameters: Optional[dict] = None,
        runtime_configuration: Optional[dict] = None,
        validations: Optional[List[dict]] = None,
        profilers: Optional[List[dict]] = None,
        run_id=None,
        run_name=None,
        run_time=None,
        result_format=None,
        **kwargs,
    ) -> CheckpointResult:
        assert not (run_id and run_name) and not (
            run_id and run_time
        ), "Please provide either a run_id or run_name and/or run_time."

        run_time = run_time or datetime.now()
        runtime_configuration: dict = runtime_configuration or {}
        result_format: Optional[
            dict] = result_format or runtime_configuration.get("result_format")
        if result_format is None:
            result_format = {"result_format": "SUMMARY"}

        runtime_kwargs = {
            "template_name": template_name,
            "run_name_template": run_name_template,
            "expectation_suite_name": expectation_suite_name,
            "batch_request": batch_request,
            "action_list": action_list,
            "evaluation_parameters": evaluation_parameters,
            "runtime_configuration": runtime_configuration,
            "validations": validations,
            "profilers": profilers,
        }
        substituted_runtime_config: CheckpointConfig = self.get_substituted_config(
            runtime_kwargs=runtime_kwargs)
        run_name_template: Optional[
            str] = substituted_runtime_config.run_name_template
        validations: list = substituted_runtime_config.validations
        run_results = {}

        if run_name is None and run_name_template is not None:
            run_name: str = get_datetime_string_from_strftime_format(
                format_str=run_name_template, datetime_obj=run_time)

        run_id = run_id or RunIdentifier(run_name=run_name, run_time=run_time)

        for idx, validation_dict in enumerate(validations):
            try:
                substituted_validation_dict: dict = get_substituted_validation_dict(
                    substituted_runtime_config=substituted_runtime_config,
                    validation_dict=validation_dict,
                )
                batch_request: BatchRequest = substituted_validation_dict.get(
                    "batch_request")
                expectation_suite_name: str = substituted_validation_dict.get(
                    "expectation_suite_name")
                action_list: list = substituted_validation_dict.get(
                    "action_list")

                validator: Validator = self.data_context.get_validator(
                    batch_request=batch_request,
                    expectation_suite_name=expectation_suite_name,
                )
                action_list_validation_operator: ActionListValidationOperator = (
                    ActionListValidationOperator(
                        data_context=self.data_context,
                        action_list=action_list,
                        result_format=result_format,
                        name=f"{self.name}-checkpoint-validation[{idx}]",
                    ))
                val_op_run_result: ValidationOperatorResult = (
                    action_list_validation_operator.run(
                        assets_to_validate=[validator],
                        run_id=run_id,
                        evaluation_parameters=substituted_validation_dict.get(
                            "evaluation_parameters"),
                        result_format=result_format,
                    ))
                run_results.update(val_op_run_result.run_results)
            except CheckpointError as e:
                raise CheckpointError(
                    f"Exception occurred while running validation[{idx}] of checkpoint '{self.name}': {e.message}"
                )
        return CheckpointResult(run_id=run_id,
                                run_results=run_results,
                                checkpoint_config=self.config)
示例#2
0
    def run(
        self,
        template_name: Optional[str] = None,
        run_name_template: Optional[str] = None,
        expectation_suite_name: Optional[str] = None,
        batch_request: Optional[Union[BatchRequestBase, dict]] = None,
        action_list: Optional[List[dict]] = None,
        evaluation_parameters: Optional[dict] = None,
        runtime_configuration: Optional[dict] = None,
        validations: Optional[List[dict]] = None,
        profilers: Optional[List[dict]] = None,
        run_id: Optional[Union[str, RunIdentifier]] = None,
        run_name: Optional[str] = None,
        run_time: Optional[Union[str, datetime.datetime]] = None,
        result_format: Optional[Union[str, dict]] = None,
        expectation_suite_ge_cloud_id: Optional[str] = None,
    ) -> CheckpointResult:
        assert not (run_id and run_name) and not (
            run_id and run_time
        ), "Please provide either a run_id or run_name and/or run_time."

        run_time = run_time or datetime.datetime.now()
        runtime_configuration = runtime_configuration or {}
        result_format = result_format or runtime_configuration.get(
            "result_format")

        batch_request = get_batch_request_as_dict(batch_request=batch_request)
        validations = get_validations_with_batch_request_as_dict(
            validations=validations)

        runtime_kwargs: dict = {
            "template_name": template_name,
            "run_name_template": run_name_template,
            "expectation_suite_name": expectation_suite_name,
            "batch_request": batch_request or {},
            "action_list": action_list or [],
            "evaluation_parameters": evaluation_parameters or {},
            "runtime_configuration": runtime_configuration or {},
            "validations": validations or [],
            "profilers": profilers or [],
            "expectation_suite_ge_cloud_id": expectation_suite_ge_cloud_id,
        }

        substituted_runtime_config: dict = self.get_substituted_config(
            runtime_kwargs=runtime_kwargs)

        run_name_template = substituted_runtime_config.get("run_name_template")

        batch_request = substituted_runtime_config.get("batch_request")
        validations = substituted_runtime_config.get("validations") or []

        if len(validations) == 0 and not batch_request:
            raise ge_exceptions.CheckpointError(
                f'Checkpoint "{self.name}" must contain either a batch_request or validations.'
            )

        if run_name is None and run_name_template is not None:
            run_name = get_datetime_string_from_strftime_format(
                format_str=run_name_template, datetime_obj=run_time)

        run_id = run_id or RunIdentifier(run_name=run_name, run_time=run_time)

        # Use AsyncExecutor to speed up I/O bound validations by running them in parallel with multithreading (if
        # concurrency is enabled in the data context configuration) -- please see the below arguments used to initialize
        # AsyncExecutor and the corresponding AsyncExecutor docstring for more details on when multiple threads are
        # used.
        with AsyncExecutor(self.data_context.concurrency,
                           max_workers=len(validations)) as async_executor:
            # noinspection PyUnresolvedReferences
            async_validation_operator_results: List[
                AsyncResult[ValidationOperatorResult]] = []
            if len(validations) > 0:
                for idx, validation_dict in enumerate(validations):
                    self._run_validation(
                        substituted_runtime_config=substituted_runtime_config,
                        async_validation_operator_results=
                        async_validation_operator_results,
                        async_executor=async_executor,
                        result_format=result_format,
                        run_id=run_id,
                        idx=idx,
                        validation_dict=validation_dict,
                    )
            else:
                self._run_validation(
                    substituted_runtime_config=substituted_runtime_config,
                    async_validation_operator_results=
                    async_validation_operator_results,
                    async_executor=async_executor,
                    result_format=result_format,
                    run_id=run_id,
                )

            run_results: dict = {}
            for async_validation_operator_result in async_validation_operator_results:
                run_results.update(
                    async_validation_operator_result.result().run_results)

        return CheckpointResult(
            run_id=run_id,
            run_results=run_results,
            checkpoint_config=self.config,
        )