def _split_multi_column_metric_domain_kwargs(
        domain_kwargs: Dict,
        domain_type: MetricDomainTypes,
    ) -> SplitDomainKwargs:
        """Split domain_kwargs for multicolumn domain types into compute and accessor domain kwargs.

        Args:
            domain_kwargs: A dictionary consisting of the domain kwargs specifying which data to obtain
            domain_type: an Enum value indicating which metric domain the user would
            like to be using.

        Returns:
            compute_domain_kwargs, accessor_domain_kwargs from domain_kwargs
            The union of compute_domain_kwargs, accessor_domain_kwargs is the input domain_kwargs
        """
        assert (domain_type == MetricDomainTypes.MULTICOLUMN
                ), "This method only supports MetricDomainTypes.MULTICOLUMN"

        compute_domain_kwargs: Dict = copy.deepcopy(domain_kwargs)
        accessor_domain_kwargs: Dict = {}

        if "column_list" not in domain_kwargs:
            raise ge_exceptions.GreatExpectationsError(
                "column_list not found within domain_kwargs")

        column_list = compute_domain_kwargs.pop("column_list")

        if len(column_list) < 2:
            raise ge_exceptions.GreatExpectationsError(
                "column_list must contain at least 2 columns")

        accessor_domain_kwargs["column_list"] = column_list

        return SplitDomainKwargs(compute_domain_kwargs, accessor_domain_kwargs)
    def _split_column_pair_metric_domain_kwargs(
        domain_kwargs: Dict,
        domain_type: MetricDomainTypes,
    ) -> SplitDomainKwargs:
        """Split domain_kwargs for column pair domain types into compute and accessor domain kwargs.

        Args:
            domain_kwargs: A dictionary consisting of the domain kwargs specifying which data to obtain
            domain_type: an Enum value indicating which metric domain the user would
            like to be using.

        Returns:
            compute_domain_kwargs, accessor_domain_kwargs from domain_kwargs
            The union of compute_domain_kwargs, accessor_domain_kwargs is the input domain_kwargs
        """
        assert (domain_type == MetricDomainTypes.COLUMN_PAIR
                ), "This method only supports MetricDomainTypes.COLUMN_PAIR"

        compute_domain_kwargs: Dict = copy.deepcopy(domain_kwargs)
        accessor_domain_kwargs: Dict = {}

        if not ("column_A" in domain_kwargs and "column_B" in domain_kwargs):
            raise ge_exceptions.GreatExpectationsError(
                "column_A or column_B not found within domain_kwargs")

        accessor_domain_kwargs["column_A"] = compute_domain_kwargs.pop(
            "column_A")
        accessor_domain_kwargs["column_B"] = compute_domain_kwargs.pop(
            "column_B")

        return SplitDomainKwargs(compute_domain_kwargs, accessor_domain_kwargs)
Exemplo n.º 3
0
def _add_spark_datasource(datasource_name: str, dataset: AbstractDataSet,
                          ge_context: DataContext) -> str:
    from great_expectations.datasource import SparkDFDatasource

    path = str(dataset._filepath.parent)

    if path.startswith("./"):
        path = path[2:]

    configuration = SparkDFDatasource.build_configuration(
        batch_kwargs_generators={
            "subdir_reader": {
                "class_name": "SubdirReaderBatchKwargsGenerator",
                "base_directory": os.path.join("..", path),
            }
        })

    configuration["class_name"] = "SparkDFDatasource"
    errors = DatasourceConfigSchema().validate(configuration)
    if len(errors) != 0:
        raise ge_exceptions.GreatExpectationsError(
            "Invalid Datasource configuration: {0:s}".format(errors))

    ge_context.add_datasource(name=datasource_name, **configuration)
    return datasource_name
 def load_batch_data(self, batch_id: str, batch_data: Any) -> None:
     if isinstance(batch_data, pd.DataFrame):
         batch_data = PandasBatchData(self, batch_data)
     elif isinstance(batch_data, PandasBatchData):
         pass
     else:
         raise ge_exceptions.GreatExpectationsError(
             "PandasExecutionEngine requires batch data that is either a DataFrame or a PandasBatchData object"
         )
     super().load_batch_data(batch_id=batch_id, batch_data=batch_data)
    def add_column_row_condition(self,
                                 domain_kwargs,
                                 column_name=None,
                                 filter_null=True,
                                 filter_nan=False):
        """EXPERIMENTAL

        Add a row condition for handling null filter.

        Args:
            domain_kwargs: the domain kwargs to use as the base and to which to add the condition
            column_name: if provided, use this name to add the condition; otherwise, will use "column" key from table_domain_kwargs
            filter_null: if true, add a filter for null values
            filter_nan: if true, add a filter for nan values
        """
        if filter_null is False and filter_nan is False:
            logger.warning(
                "add_column_row_condition called with no filter condition requested"
            )
            return domain_kwargs

        if filter_nan:
            raise ge_exceptions.GreatExpectationsError(
                "Base ExecutionEngine does not support adding nan condition filters"
            )

        if "row_condition" in domain_kwargs and domain_kwargs["row_condition"]:
            raise ge_exceptions.GreatExpectationsError(
                "ExecutionEngine does not support updating existing row_conditions."
            )

        new_domain_kwargs = copy.deepcopy(domain_kwargs)
        assert "column" in domain_kwargs or column_name is not None
        if column_name is not None:
            column = column_name
        else:
            column = domain_kwargs["column"]
        new_domain_kwargs[
            "condition_parser"] = "great_expectations__experimental__"
        new_domain_kwargs["row_condition"] = f'col("{column}").notnull()'
        return new_domain_kwargs
    def add_column_row_condition(self,
                                 domain_kwargs,
                                 column_name=None,
                                 filter_null=True,
                                 filter_nan=False):
        """EXPERIMENTAL

        Add a row condition for handling null filter.

        Args:
            domain_kwargs: the domain kwargs to use as the base and to which to add the condition
            column_name: if provided, use this name to add the condition; otherwise, will use "column" key from table_domain_kwargs
            filter_null: if true, add a filter for null values
            filter_nan: if true, add a filter for nan values
        """
        if filter_null is False and filter_nan is False:
            logger.warning(
                "add_column_row_condition called with no filter condition requested"
            )
            return domain_kwargs

        if filter_nan:
            raise ge_exceptions.GreatExpectationsError(
                "Base ExecutionEngine does not support adding nan condition filters"
            )

        new_domain_kwargs = copy.deepcopy(domain_kwargs)
        assert (
            "column" in domain_kwargs or column_name is not None
        ), "No column provided: A column must be provided in domain_kwargs or in the column_name parameter"
        if column_name is not None:
            column = column_name
        else:
            column = domain_kwargs["column"]
        row_condition: RowCondition = RowCondition(
            condition=f'col("{column}").notnull()',
            condition_type=RowConditionParserType.GE,
        )
        new_domain_kwargs.setdefault("filter_conditions",
                                     []).append(row_condition)
        return new_domain_kwargs
    def get_compute_domain(
        self,
        domain_kwargs: dict,
        domain_type: Union[str, "MetricDomainTypes"],
        accessor_keys: Optional[Iterable[str]] = [],
    ) -> Tuple[pd.DataFrame, dict, dict]:
        """Uses a given batch dictionary and domain kwargs (which include a row condition and a condition parser)
        to obtain and/or query a batch. Returns in the format of a Pandas DataFrame. If the domain is a single column,
        this is added to 'accessor domain kwargs' and used for later access

        Args:
            domain_kwargs (dict) - A dictionary consisting of the domain kwargs specifying which data to obtain
            domain_type (str or "MetricDomainTypes") - an Enum value indicating which metric domain the user would
            like to be using, or a corresponding string value representing it. String types include "identity", "column",
            "column_pair", "table" and "other". Enum types include capitalized versions of these from the class
            MetricDomainTypes.
            accessor_keys (str iterable) - keys that are part of the compute domain but should be ignored when describing
             the domain and simply transferred with their associated values into accessor_domain_kwargs.

        Returns:
            A tuple including:
              - a DataFrame (the data on which to compute)
              - a dictionary of compute_domain_kwargs, describing the DataFrame
              - a dictionary of accessor_domain_kwargs, describing any accessors needed to
                identify the domain within the compute domain
        """
        # Extracting value from enum if it is given for future computation
        domain_type = MetricDomainTypes(domain_type)

        batch_id = domain_kwargs.get("batch_id")
        if batch_id is None:
            # We allow no batch id specified if there is only one batch
            if self.active_batch_data_id is not None:
                data = self.active_batch_data.dataframe
            else:
                raise ge_exceptions.ValidationError(
                    "No batch is specified, but could not identify a loaded batch."
                )
        else:
            if batch_id in self.loaded_batch_data_dict:
                data = self.loaded_batch_data_dict[batch_id].dataframe
            else:
                raise ge_exceptions.ValidationError(
                    f"Unable to find batch with batch_id {batch_id}"
                )

        compute_domain_kwargs = copy.deepcopy(domain_kwargs)
        accessor_domain_kwargs = dict()
        table = domain_kwargs.get("table", None)
        if table:
            raise ValueError(
                "PandasExecutionEngine does not currently support multiple named tables."
            )

        # Filtering by row condition
        row_condition = domain_kwargs.get("row_condition", None)
        if row_condition:
            condition_parser = domain_kwargs.get("condition_parser", None)

            # Ensuring proper condition parser has been provided
            if condition_parser not in ["python", "pandas"]:
                raise ValueError(
                    "condition_parser is required when setting a row_condition,"
                    " and must be 'python' or 'pandas'"
                )
            else:
                # Querying row condition
                data = data.query(row_condition, parser=condition_parser).reset_index(
                    drop=True
                )

        # Warning user if accessor keys are in any domain that is not of type table, will be ignored
        if (
            domain_type != MetricDomainTypes.TABLE
            and accessor_keys is not None
            and len(accessor_keys) > 0
        ):
            logger.warning(
                "Accessor keys ignored since Metric Domain Type is not 'table"
            )

        # If given table (this is default), get all unexpected accessor_keys (an optional parameters allowing us to
        # modify domain access)
        if domain_type == MetricDomainTypes.TABLE:
            if accessor_keys is not None and len(accessor_keys) > 0:
                for key in accessor_keys:
                    accessor_domain_kwargs[key] = compute_domain_kwargs.pop(key)
            if len(compute_domain_kwargs.keys()) > 0:
                for key in compute_domain_kwargs.keys():
                    # Warning user if kwarg not "normal"
                    if key not in [
                        "batch_id",
                        "table",
                        "row_condition",
                        "condition_parser",
                    ]:
                        logger.warning(
                            f"Unexpected key {key} found in domain_kwargs for domain type {domain_type.value}"
                        )
            return data, compute_domain_kwargs, accessor_domain_kwargs

        # If user has stated they want a column, checking if one is provided, and
        elif domain_type == MetricDomainTypes.COLUMN:
            if "column" in compute_domain_kwargs:
                accessor_domain_kwargs["column"] = compute_domain_kwargs.pop("column")
            else:
                # If column not given
                raise ge_exceptions.GreatExpectationsError(
                    "Column not provided in compute_domain_kwargs"
                )

        # Else, if column pair values requested
        elif domain_type == MetricDomainTypes.COLUMN_PAIR:
            # Ensuring column_A and column_B parameters provided
            if (
                "column_A" in compute_domain_kwargs
                and "column_B" in compute_domain_kwargs
            ):
                accessor_domain_kwargs["column_A"] = compute_domain_kwargs.pop(
                    "column_A"
                )
                accessor_domain_kwargs["column_B"] = compute_domain_kwargs.pop(
                    "column_B"
                )
            else:
                raise ge_exceptions.GreatExpectationsError(
                    "column_A or column_B not found within compute_domain_kwargs"
                )

        # Checking if table or identity or other provided, column is not specified. If it is, warning the user
        elif domain_type == MetricDomainTypes.MULTICOLUMN:
            if "columns" in compute_domain_kwargs:
                accessor_domain_kwargs["columns"] = compute_domain_kwargs.pop("columns")

        # Filtering if identity
        elif domain_type == MetricDomainTypes.IDENTITY:

            # If we would like our data to become a single column
            if "column" in compute_domain_kwargs:
                data = pd.DataFrame(data[compute_domain_kwargs["column"]])

            # If we would like our data to now become a column pair
            elif ("column_A" in compute_domain_kwargs) and (
                "column_B" in compute_domain_kwargs
            ):

                # Dropping all not needed columns
                column_a, column_b = (
                    compute_domain_kwargs["column_A"],
                    compute_domain_kwargs["column_B"],
                )
                data = pd.DataFrame(
                    {column_a: data[column_a], column_b: data[column_b]}
                )

            else:
                # If we would like our data to become a multicolumn
                if "columns" in compute_domain_kwargs:
                    data = data[compute_domain_kwargs["columns"]]

        return data, compute_domain_kwargs, accessor_domain_kwargs
Exemplo n.º 8
0
def _add_spark_datasource(
    context, passthrough_generator_only=True, prompt_for_datasource_name=True
):
    toolkit.send_usage_message(
        data_context=context,
        event="cli.new_ds_choice",
        event_payload={"type": "spark"},
        success=True,
    )

    if not _verify_pyspark_dependent_modules():
        return None

    if passthrough_generator_only:
        datasource_name = "files_spark_datasource"

        # configuration = SparkDFDatasource.build_configuration(batch_kwargs_generators={
        #     "default": {
        #         "class_name": "PassthroughGenerator",
        #     }
        # }
        # )
        configuration = SparkDFDatasource.build_configuration()

    else:
        path = click.prompt(
            msg_prompt_filesys_enter_base_path,
            type=click.Path(exists=True, file_okay=False),
        ).strip()
        if path.startswith("./"):
            path = path[2:]

        if path.endswith("/"):
            basenamepath = path[:-1]
        else:
            basenamepath = path

        datasource_name = os.path.basename(basenamepath) + "__dir"
        if prompt_for_datasource_name:
            datasource_name = click.prompt(
                msg_prompt_datasource_name, default=datasource_name
            )

        configuration = SparkDFDatasource.build_configuration(
            batch_kwargs_generators={
                "subdir_reader": {
                    "class_name": "SubdirReaderBatchKwargsGenerator",
                    "base_directory": os.path.join("..", path),
                }
            }
        )
        configuration["class_name"] = "SparkDFDatasource"
        configuration["module_name"] = "great_expectations.datasource"
        errors = DatasourceConfigSchema().validate(configuration)
        if len(errors) != 0:
            raise ge_exceptions.GreatExpectationsError(
                "Invalid Datasource configuration: {:s}".format(errors)
            )

    cli_message(
        """
Great Expectations will now add a new Datasource '{:s}' to your deployment, by adding this entry to your great_expectations.yml:

{:s}
""".format(
            datasource_name,
            textwrap.indent(toolkit.yaml.dump({datasource_name: configuration}), "  "),
        )
    )
    toolkit.confirm_proceed_or_exit()

    context.add_datasource(name=datasource_name, **configuration)
    return datasource_name
Exemplo n.º 9
0
def _add_sqlalchemy_datasource(context, prompt_for_datasource_name=True):

    msg_success_database = (
        "\n<green>Great Expectations connected to your database!</green>"
    )

    if not _verify_sqlalchemy_dependent_modules():
        return None

    db_choices = [str(x) for x in list(range(1, 1 + len(SupportedDatabases)))]
    selected_database = (
        int(
            click.prompt(
                msg_prompt_choose_database,
                type=click.Choice(db_choices),
                show_choices=False,
            )
        )
        - 1
    )  # don't show user a zero index list :)

    selected_database = list(SupportedDatabases)[selected_database]

    toolkit.send_usage_message(
        data_context=context,
        event="cli.new_ds_choice",
        event_payload={"type": "sqlalchemy", "db": selected_database.name},
        success=True,
    )

    datasource_name = "my_{}_db".format(selected_database.value.lower())
    if selected_database == SupportedDatabases.OTHER:
        datasource_name = "my_database"
    if prompt_for_datasource_name:
        datasource_name = click.prompt(
            msg_prompt_datasource_name, default=datasource_name
        )

    credentials = {}
    # Since we don't want to save the database credentials in the config file that will be
    # committed in the repo, we will use our Variable Substitution feature to store the credentials
    # in the credentials file (that will not be committed, since it is in the uncommitted directory)
    # with the datasource's name as the variable name.
    # The value of the datasource's "credentials" key in the config file (great_expectations.yml) will
    # be ${datasource name}.
    # Great Expectations will replace the ${datasource name} with the value from the credentials file in runtime.

    while True:
        cli_message(msg_db_config.format(datasource_name))

        if selected_database == SupportedDatabases.MYSQL:
            if not _verify_mysql_dependent_modules():
                return None

            credentials = _collect_mysql_credentials(default_credentials=credentials)
        elif selected_database == SupportedDatabases.POSTGRES:
            if not _verify_postgresql_dependent_modules():
                return None

            credentials = _collect_postgres_credentials(default_credentials=credentials)
        elif selected_database == SupportedDatabases.REDSHIFT:
            if not _verify_redshift_dependent_modules():
                return None

            credentials = _collect_redshift_credentials(default_credentials=credentials)
        elif selected_database == SupportedDatabases.SNOWFLAKE:
            if not _verify_snowflake_dependent_modules():
                return None

            credentials = _collect_snowflake_credentials(
                default_credentials=credentials
            )
        elif selected_database == SupportedDatabases.BIGQUERY:
            if not _verify_bigquery_dependent_modules():
                return None

            credentials = _collect_bigquery_credentials(default_credentials=credentials)
        elif selected_database == SupportedDatabases.OTHER:
            sqlalchemy_url = click.prompt(
                """What is the url/connection string for the sqlalchemy connection?
(reference: https://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls)
""",
                show_default=False,
            ).strip()
            credentials = {"url": sqlalchemy_url}

        context.save_config_variable(datasource_name, credentials)

        message = """
<red>Cannot connect to the database.</red>
  - Please check your environment and the configuration you provided.
  - Database Error: {0:s}"""
        try:
            cli_message(
                "<cyan>Attempting to connect to your database. This may take a moment...</cyan>"
            )

            configuration = SqlAlchemyDatasource.build_configuration(
                credentials="${" + datasource_name + "}"
            )

            configuration["class_name"] = "SqlAlchemyDatasource"
            configuration["module_name"] = "great_expectations.datasource"
            errors = DatasourceConfigSchema().validate(configuration)
            if len(errors) != 0:
                raise ge_exceptions.GreatExpectationsError(
                    "Invalid Datasource configuration: {:s}".format(errors)
                )

            cli_message(
                """
Great Expectations will now add a new Datasource '{0:s}' to your deployment, by adding this entry to your great_expectations.yml:

{1:s}
The credentials will be saved in uncommitted/config_variables.yml under the key '{0:s}'
""".format(
                    datasource_name,
                    textwrap.indent(
                        toolkit.yaml.dump({datasource_name: configuration}), "  "
                    ),
                )
            )

            toolkit.confirm_proceed_or_exit()
            context.add_datasource(name=datasource_name, **configuration)
            cli_message(msg_success_database)
            break
        except ModuleNotFoundError as de:
            cli_message(message.format(str(de)))
            return None

        except DatasourceInitializationError as de:
            cli_message(message.format(str(de)))
            if not click.confirm("Enter the credentials again?", default=True):
                context.add_datasource(
                    datasource_name,
                    initialize=False,
                    module_name="great_expectations.datasource",
                    class_name="SqlAlchemyDatasource",
                    data_asset_type={"class_name": "SqlAlchemyDataset"},
                    credentials="${" + datasource_name + "}",
                )
                # TODO this message about continuing may not be accurate
                cli_message(
                    """
We saved datasource {:s} in {:s} and the credentials you entered in {:s}.
Since we could not connect to the database, you can complete troubleshooting in the configuration files documented in the how-to guides here:
<blue>https://docs.greatexpectations.io/en/latest/guides/how_to_guides/configuring_datasources.html?utm_source=cli&utm_medium=init&utm_campaign={:s}#{:s}</blue> .

After you connect to the datasource, run great_expectations init to continue.

""".format(
                        datasource_name,
                        DataContext.GE_YML,
                        context.get_config()["config_variables_file_path"],
                        rtd_url_ge_version,
                        selected_database.value.lower(),
                    )
                )
                return None

    return datasource_name
Exemplo n.º 10
0
def _add_pandas_datasource(
    context, passthrough_generator_only=True, prompt_for_datasource_name=True
):
    toolkit.send_usage_message(
        data_context=context,
        event="cli.new_ds_choice",
        event_payload={"type": "pandas"},
        success=True,
    )

    if passthrough_generator_only:
        datasource_name = "files_datasource"
        configuration = PandasDatasource.build_configuration()

    else:
        path = click.prompt(
            msg_prompt_filesys_enter_base_path,
            type=click.Path(exists=True, file_okay=False),
        )

        if path.startswith("./"):
            path = path[2:]

        if path.endswith("/"):
            basenamepath = path[:-1]
        else:
            basenamepath = path

        datasource_name = os.path.basename(basenamepath) + "__dir"
        if prompt_for_datasource_name:
            datasource_name = click.prompt(
                msg_prompt_datasource_name, default=datasource_name
            )

        configuration = PandasDatasource.build_configuration(
            batch_kwargs_generators={
                "subdir_reader": {
                    "class_name": "SubdirReaderBatchKwargsGenerator",
                    "base_directory": os.path.join("..", path),
                }
            }
        )

        configuration["class_name"] = "PandasDatasource"
        configuration["module_name"] = "great_expectations.datasource"
        errors = DatasourceConfigSchema().validate(configuration)
        if len(errors) != 0:
            raise ge_exceptions.GreatExpectationsError(
                "Invalid Datasource configuration: {:s}".format(errors)
            )

    cli_message(
        """
Great Expectations will now add a new Datasource '{:s}' to your deployment, by adding this entry to your great_expectations.yml:

{:s}
""".format(
            datasource_name,
            textwrap.indent(toolkit.yaml.dump({datasource_name: configuration}), "  "),
        )
    )

    toolkit.confirm_proceed_or_exit(
        continuation_message="Okay, exiting now. To learn more about adding datasources, run great_expectations "
        "datasource --help or visit https://docs.greatexpectations.io/"
    )

    context.add_datasource(name=datasource_name, **configuration)
    return datasource_name
    def get_compute_domain(
        self,
        domain_kwargs: dict,
        domain_type: Union[str, MetricDomainTypes],
        accessor_keys: Optional[Iterable[str]] = None,
    ) -> Tuple[pd.DataFrame, dict, dict]:
        """
        Uses the given domain kwargs (which include row_condition, condition_parser, and ignore_row_if directives) to
        obtain and/or query a batch.  Returns in the format of a Pandas DataFrame. If the domain is a single column,
        this is added to 'accessor domain kwargs' and used for later access

        Args:
            domain_kwargs (dict) - A dictionary consisting of the domain kwargs specifying which data to obtain
            domain_type (str or MetricDomainTypes) - an Enum value indicating which metric domain the user would
            like to be using, or a corresponding string value representing it. String types include "column",
            "column_pair", "table", and "other".  Enum types include capitalized versions of these from the
            class MetricDomainTypes.
            accessor_keys (str iterable) - keys that are part of the compute domain but should be ignored when
            describing the domain and simply transferred with their associated values into accessor_domain_kwargs.

        Returns:
            A tuple including:
              - a DataFrame (the data on which to compute)
              - a dictionary of compute_domain_kwargs, describing the DataFrame
              - a dictionary of accessor_domain_kwargs, describing any accessors needed to
                identify the domain within the compute domain
        """
        data = self.get_domain_records(domain_kwargs=domain_kwargs, )
        # Extracting value from enum if it is given for future computation
        domain_type = MetricDomainTypes(domain_type)

        compute_domain_kwargs = copy.deepcopy(domain_kwargs)
        accessor_domain_kwargs = {}
        table = domain_kwargs.get("table", None)
        if table:
            raise ValueError(
                "PandasExecutionEngine does not currently support multiple named tables."
            )

        # Warning user if accessor keys are in any domain that is not of type table, will be ignored
        if (domain_type != MetricDomainTypes.TABLE
                and accessor_keys is not None
                and len(list(accessor_keys)) > 0):
            logger.warning(
                'Accessor keys ignored since Metric Domain Type is not "table"'
            )

        # If given table (this is default), get all unexpected accessor_keys (an optional parameters allowing us to
        # modify domain access)
        if domain_type == MetricDomainTypes.TABLE:
            if accessor_keys is not None and len(list(accessor_keys)) > 0:
                for key in accessor_keys:
                    accessor_domain_kwargs[key] = compute_domain_kwargs.pop(
                        key)
            if len(compute_domain_kwargs.keys()) > 0:
                # Warn user if kwarg not "normal".
                unexpected_keys: set = set(
                    compute_domain_kwargs.keys()).difference({
                        "batch_id",
                        "table",
                        "row_condition",
                        "condition_parser",
                    })
                if len(unexpected_keys) > 0:
                    unexpected_keys_str: str = ", ".join(
                        map(lambda element: f'"{element}"', unexpected_keys))
                    logger.warning(
                        f'Unexpected key(s) {unexpected_keys_str} found in domain_kwargs for domain type "{domain_type.value}".'
                    )
            return data, compute_domain_kwargs, accessor_domain_kwargs

        elif domain_type == MetricDomainTypes.COLUMN:
            if "column" not in compute_domain_kwargs:
                raise ge_exceptions.GreatExpectationsError(
                    "Column not provided in compute_domain_kwargs")

            accessor_domain_kwargs["column"] = compute_domain_kwargs.pop(
                "column")

        elif domain_type == MetricDomainTypes.COLUMN_PAIR:
            if not ("column_A" in domain_kwargs
                    and "column_B" in domain_kwargs):
                raise ge_exceptions.GreatExpectationsError(
                    "column_A or column_B not found within domain_kwargs")

            accessor_domain_kwargs["column_A"] = compute_domain_kwargs.pop(
                "column_A")
            accessor_domain_kwargs["column_B"] = compute_domain_kwargs.pop(
                "column_B")

        elif domain_type == MetricDomainTypes.MULTICOLUMN:
            if "column_list" not in domain_kwargs:
                raise ge_exceptions.GreatExpectationsError(
                    "column_list not found within domain_kwargs")

            column_list = compute_domain_kwargs.pop("column_list")

            if len(column_list) < 2:
                raise ge_exceptions.GreatExpectationsError(
                    "column_list must contain at least 2 columns")

            accessor_domain_kwargs["column_list"] = column_list

        return data, compute_domain_kwargs, accessor_domain_kwargs