Пример #1
0
def test_anonymize_simple_sqlalchemy_datasource():
    name = "test_simple_sqlalchemy_datasource"
    yaml_config = f"""
class_name: SimpleSqlalchemyDatasource
connection_string: sqlite:///some_db.db

introspection:
    whole_table_with_limits:
        sampling_method: _sample_using_limit
        sampling_kwargs:
            n: 10
"""
    config: CommentedMap = yaml.load(yaml_config)
    datasource_anonymizer = DatasourceAnonymizer(salt=CONSISTENT_SALT)
    anonymized_datasource = (
        datasource_anonymizer.anonymize_simple_sqlalchemy_datasource(
            name=name, config=config))
    assert anonymized_datasource == {
        "anonymized_name":
        "3be0aacd79b32e22a41949bf607b3e80",
        "parent_class":
        "SimpleSqlalchemyDatasource",
        "anonymized_execution_engine": {
            "parent_class": "SqlAlchemyExecutionEngine"
        },
        "anonymized_data_connectors": [{
            "anonymized_name":
            "d6b508db454c47ea40131b0a11415dd4",
            "parent_class":
            "InferredAssetSqlDataConnector",
        }],
    }
Пример #2
0
def test_anonymize_custom_simple_sqlalchemy_datasource():
    name = "test_custom_simple_sqlalchemy_datasource"
    yaml_config = """
module_name: tests.data_context.fixtures.plugins.my_custom_simple_sqlalchemy_datasource_class
class_name: MyCustomSimpleSqlalchemyDatasource
connection_string: sqlite:///some_db.db
name: some_name
introspection:
    my_custom_datasource_name:
        data_asset_name_suffix: some_suffix
"""
    config: CommentedMap = yaml.load(yaml_config)
    datasource_anonymizer = DatasourceAnonymizer(salt=CONSISTENT_SALT)
    anonymized_datasource = (
        datasource_anonymizer.anonymize_simple_sqlalchemy_datasource(
            name=name, config=config))
    assert anonymized_datasource == {
        "anonymized_name":
        "d9e0c5f761c6ea5e54000f8c10a1049b",
        "parent_class":
        "SimpleSqlalchemyDatasource",
        "anonymized_class":
        "aab66054e62007a9ac5afbcacedaf0d2",
        "anonymized_execution_engine": {
            "parent_class": "SqlAlchemyExecutionEngine"
        },
        "anonymized_data_connectors": [{
            "anonymized_name":
            "82b8b59e076789ac1476b2b745ebc268",
            "parent_class":
            "InferredAssetSqlDataConnector",
        }],
    }
    def __init__(self, data_context, data_context_id, usage_statistics_url):
        self._url = usage_statistics_url

        self._data_context_id = data_context_id
        self._data_context_instance_id = data_context.instance_id
        self._data_context = data_context
        self._ge_version = ge_version

        self._message_queue = Queue()
        self._worker = threading.Thread(target=self._requests_worker,
                                        daemon=True)
        self._worker.start()
        self._datasource_anonymizer = DatasourceAnonymizer(data_context_id)
        self._store_anonymizer = StoreAnonymizer(data_context_id)
        self._validation_operator_anonymizer = ValidationOperatorAnonymizer(
            data_context_id)
        self._data_docs_sites_anonymizer = DataDocsSiteAnonymizer(
            data_context_id)
        self._batch_anonymizer = BatchAnonymizer(data_context_id)
        self._expectation_suite_anonymizer = ExpectationSuiteAnonymizer(
            data_context_id)
        try:
            self._sigterm_handler = signal.signal(signal.SIGTERM,
                                                  self._teardown)
        except ValueError:
            # if we are not the main thread, we don't get to ask for signal handling.
            self._sigterm_handler = None
        try:
            self._sigint_handler = signal.signal(signal.SIGINT, self._teardown)
        except ValueError:
            # if we are not the main thread, we don't get to ask for signal handling.
            self._sigint_handler = None

        atexit.register(self._close_worker)
Пример #4
0
def test_anonymize_datasource_info_v2_api_core_ge_class():

    name = "test_pandas_datasource"
    config = {
        "name": name,
        "class_name": "PandasDatasource",
        "module_name": "great_expectations.datasource",
        "data_asset_type": {
            "module_name": "custom_pandas_dataset",
            "class_name": "CustomPandasDataset",
        },
        "batch_kwargs_generators": {
            "subdir_reader": {
                "class_name": "SubdirReaderBatchKwargsGenerator",
                "base_directory": "some_path",
            }
        },
    }

    datasource_anonymizer = DatasourceAnonymizer(salt=CONSISTENT_SALT)
    anonymized_datasource = datasource_anonymizer.anonymize_datasource_info(
        name=name, config=config)
    assert anonymized_datasource == {
        "anonymized_name": "2642802d79d90ce6d147b0f9f61c3569",
        "parent_class": "PandasDatasource",
    }
Пример #5
0
def test_anonymize_datasource_info_v3_api_core_ge_class():
    name = "test_pandas_datasource"
    yaml_config = f"""
class_name: Datasource
module_name: great_expectations.datasource

execution_engine:
    class_name: PandasExecutionEngine
    module_name: great_expectations.execution_engine

data_connectors:
    my_filesystem_data_connector:
        class_name: InferredAssetFilesystemDataConnector
        module_name: great_expectations.datasource.data_connector
"""
    config: CommentedMap = yaml.load(yaml_config)
    datasource_anonymizer = DatasourceAnonymizer(salt=CONSISTENT_SALT)
    anonymized_datasource = datasource_anonymizer.anonymize_datasource_info(
        name=name, config=config)
    assert anonymized_datasource == {
        "anonymized_data_connectors": [{
            "anonymized_name":
            "42af601aeb8a03d76bf468a462cb62f6",
            "parent_class":
            "InferredAssetFilesystemDataConnector",
        }],
        "anonymized_execution_engine": {
            "anonymized_name": "6b8f8c12352592a69083f958369c7151",
            "parent_class": "PandasExecutionEngine",
        },
        "anonymized_name":
        "2642802d79d90ce6d147b0f9f61c3569",
        "parent_class":
        "Datasource",
    }
def add_datasource_usage_statistics(
        data_context: "DataContext",
        name: str,
        **kwargs  # noqa: F821
) -> dict:
    if not data_context._usage_statistics_handler:
        return {}
    try:
        data_context_id = data_context.data_context_id
    except AttributeError:
        data_context_id = None

    from great_expectations.core.usage_statistics.anonymizers.datasource_anonymizer import (
        DatasourceAnonymizer, )

    aggregate_anonymizer = Anonymizer(salt=data_context_id)
    datasource_anonymizer = DatasourceAnonymizer(
        salt=data_context_id, aggregate_anonymizer=aggregate_anonymizer)

    payload = {}
    # noinspection PyBroadException
    try:
        payload = datasource_anonymizer._anonymize_datasource_info(
            name, kwargs)
    except Exception as e:
        logger.debug(
            f"{UsageStatsExceptionPrefix.EMIT_EXCEPTION.value}: {e} type: {type(e)}, add_datasource_usage_statistics: Unable to create add_datasource_usage_statistics payload field"
        )

    return payload
Пример #7
0
def test_is_custom_parent_class_recognized_v3_api_yes():
    config = {
        "module_name":
        "tests.data_context.fixtures.plugins.my_custom_v3_api_datasource",
        "class_name": "MyCustomV3ApiDatasource",
    }
    datasource_anonymizer = DatasourceAnonymizer(salt=CONSISTENT_SALT)
    parent_class = datasource_anonymizer.is_parent_class_recognized_v3_api(
        config=config)
    assert parent_class == "Datasource"
Пример #8
0
def test_is_parent_class_recognized_no():
    parent_classes = [
        "MyCustomNonDatasourceClass", "MyOtherCustomNonDatasourceClass"
    ]
    configs = [{
        "name": "test_datasource",
        "class_name": parent_class,
        "module_name": "great_expectations.datasource",
    } for parent_class in parent_classes]
    datasource_anonymizer = DatasourceAnonymizer(salt=CONSISTENT_SALT)
    for idx in range(len(configs)):
        parent_class = datasource_anonymizer.is_parent_class_recognized(
            config=configs[idx])
        assert parent_class != parent_classes[idx]
        assert parent_class is None
def test_is_custom_parent_class_recognized_yes():
    config = {
        "module_name":
        "tests.data_context.fixtures.plugins.my_custom_v3_api_datasource",
        "class_name": "MyCustomV3ApiDatasource",
    }
    parent_class = DatasourceAnonymizer.get_parent_class(config=config)
    assert parent_class == "Datasource"

    config = {
        "module_name":
        "tests.data_context.fixtures.plugins.my_custom_v2_api_datasource",
        "class_name": "MyCustomV2ApiDatasource",
    }
    parent_class = DatasourceAnonymizer.get_parent_class(config=config)
    assert parent_class == "PandasDatasource"
def datasource_anonymizer() -> DatasourceAnonymizer:
    # Standardize the salt so our tests are deterimistic
    salt: str = "00000000-0000-0000-0000-00000000a004"
    aggregate_anonymizer: Anonymizer = Anonymizer(salt=salt)
    anonymizer: DatasourceAnonymizer = DatasourceAnonymizer(
        salt=salt, aggregate_anonymizer=aggregate_anonymizer)
    return anonymizer
Пример #11
0
def test_is_parent_class_recognized_v3_api_yes():
    v3_batch_request_api_datasources = [
        "SimpleSqlalchemyDatasource",
        "Datasource",
        "BaseDatasource",
    ]
    parent_classes = v3_batch_request_api_datasources
    configs = [{
        "name": "test_datasource",
        "class_name": parent_class,
        "module_name": "great_expectations.datasource",
    } for parent_class in parent_classes]
    datasource_anonymizer = DatasourceAnonymizer(salt=CONSISTENT_SALT)
    for idx in range(len(configs)):
        parent_class = datasource_anonymizer.is_parent_class_recognized_v3_api(
            config=configs[idx])
        assert parent_class == parent_classes[idx]
Пример #12
0
def test_is_parent_class_recognized_v2_api_yes():
    v2_batch_kwargs_api_datasources = [
        "PandasDatasource",
        "SqlAlchemyDatasource",
        "SparkDFDatasource",
        "LegacyDatasource",
    ]
    parent_classes = v2_batch_kwargs_api_datasources
    configs = [{
        "name": "test_datasource",
        "class_name": parent_class,
        "module_name": "great_expectations.datasource",
    } for parent_class in parent_classes]
    datasource_anonymizer = DatasourceAnonymizer(salt=CONSISTENT_SALT)
    for idx in range(len(configs)):
        parent_class = datasource_anonymizer.is_parent_class_recognized_v2_api(
            config=configs[idx])
        assert parent_class == parent_classes[idx]
Пример #13
0
def add_datasource_usage_statistics(data_context, name, **kwargs):
    try:
        data_context_id = data_context.data_context_id
    except AttributeError:
        data_context_id = None

    try:
        datasource_anonymizer = (
            data_context._usage_statistics_handler._datasource_anonymizer)
    except Exception:
        datasource_anonymizer = DatasourceAnonymizer(data_context_id)

    payload = {}
    try:
        payload = datasource_anonymizer.anonymize_datasource_info(name, kwargs)
    except Exception:
        logger.debug(
            "add_datasource_usage_statistics: Unable to create add_datasource_usage_statistics payload field"
        )
    return payload
Пример #14
0
def test_anonymize_datasource_info_v2_api_custom_subclass():
    """
    What does this test and why?
    We should be able to discern the GE parent class for a custom type and construct
    a useful usage stats event message.
    Custom v2 API Datasources should continue to be supported.
    """
    name = "test_pandas_datasource"
    yaml_config = f"""
module_name: tests.data_context.fixtures.plugins.my_custom_v2_api_datasource
class_name: MyCustomV2ApiDatasource
"""
    config: CommentedMap = yaml.load(yaml_config)
    datasource_anonymizer = DatasourceAnonymizer(salt=CONSISTENT_SALT)
    anonymized_datasource = datasource_anonymizer.anonymize_datasource_info(
        name=name, config=config)
    assert anonymized_datasource == {
        "anonymized_class": "c454ace824bf401ea42815c84d0f5717",
        "anonymized_name": "2642802d79d90ce6d147b0f9f61c3569",
        "parent_class": "PandasDatasource",
    }
def test_get_parent_class_no():
    parent_classes = [
        "MyCustomNonDatasourceClass", "MyOtherCustomNonDatasourceClass"
    ]
    configs = [{
        "name": "test_datasource",
        "class_name": parent_class,
        "module_name": "great_expectations.datasource",
    } for parent_class in parent_classes]
    for idx in range(len(configs)):
        parent_class = DatasourceAnonymizer.get_parent_class(
            config=configs[idx])
        assert parent_class != parent_classes[idx]
        assert parent_class is None
Пример #16
0
def test_datasource_anonymizer():
    datasource_anonymizer = DatasourceAnonymizer(salt=CONSISTENT_SALT)
    n1 = datasource_anonymizer.anonymize_datasource_info(
        name="test_datasource",
        config={
            "name": "test_datasource",
            "class_name": "PandasDatasource",
            "module_name": "great_expectations.datasource",
        },
    )
    assert n1 == {
        "anonymized_name": "04bf89e1fb7495b0904bbd5ae478fbe0",
        "parent_class": "PandasDatasource",
    }
    n2 = datasource_anonymizer.anonymize_datasource_info(
        name="test_datasource",
        config={
            "name": "test_datasource",
            "class_name": "CustomDatasource",
            "module_name": "tests.datasource.test_datasource_anonymizer",
        },
    )
    datasource_anonymizer_2 = DatasourceAnonymizer()
    n3 = datasource_anonymizer_2.anonymize_datasource_info(
        name="test_datasource",
        config={
            "name": "test_datasource",
            "class_name": "CustomDatasource",
            "module_name": "tests.datasource.test_datasource_anonymizer",
        },
    )
    assert n2["parent_class"] == "PandasDatasource"
    assert n3["parent_class"] == "PandasDatasource"
    print(n3)
    assert len(n3["anonymized_class"]) == 32
    assert n2["anonymized_class"] != n3["anonymized_class"]

    # Same anonymizer *does* produce the same result
    n4 = datasource_anonymizer.anonymize_datasource_info(
        name="test_datasource",
        config={
            "name": "test_datasource",
            "class_name": "CustomDatasource",
            "module_name": "tests.datasource.test_datasource_anonymizer",
        },
    )
    assert n4["anonymized_class"] == n2["anonymized_class"]
def test_get_parent_class_v3_api_yes():
    v3_batch_request_api_datasources = [
        "SimpleSqlalchemyDatasource",
        "Datasource",
        "BaseDatasource",
    ]
    parent_classes = v3_batch_request_api_datasources
    configs = [{
        "name": "test_datasource",
        "class_name": parent_class,
        "module_name": "great_expectations.datasource",
    } for parent_class in parent_classes]
    for idx in range(len(configs)):
        parent_class = DatasourceAnonymizer.get_parent_class_v3_api(
            config=configs[idx])
        assert parent_class == parent_classes[idx]
def test_get_parent_class_v2_api_yes():
    v2_batch_kwargs_api_datasources = [
        "PandasDatasource",
        "SqlAlchemyDatasource",
        "SparkDFDatasource",
        "LegacyDatasource",
    ]
    parent_classes = v2_batch_kwargs_api_datasources
    configs = [{
        "name": "test_datasource",
        "class_name": parent_class,
        "module_name": "great_expectations.datasource",
    } for parent_class in parent_classes]
    for idx in range(len(configs)):
        parent_class = DatasourceAnonymizer.get_parent_class_v2_api(
            config=configs[idx])
        assert parent_class == parent_classes[idx]
def test_get_parent_class_v2_api_no():
    v3_batch_request_api_datasources = [
        "SimpleSqlalchemyDatasource",
        "Datasource",
        "BaseDatasource",
    ]
    custom_non_datsource_classes = [
        "MyCustomNonDatasourceClass",
        "MyOtherCustomNonDatasourceClass",
    ]
    parent_classes = v3_batch_request_api_datasources + custom_non_datsource_classes
    configs = [{
        "name": "test_datasource",
        "class_name": parent_class,
        "module_name": "great_expectations.datasource",
    } for parent_class in parent_classes]
    for idx in range(len(configs)):
        parent_class = DatasourceAnonymizer.get_parent_class_v2_api(
            config=configs[idx])
        assert parent_class != parent_classes[idx]
        assert parent_class is None
def test_get_parent_class_v3_api_no():
    v2_batch_kwargs_api_datasources = [
        "PandasDatasource",
        "SqlAlchemyDatasource",
        "SparkDFDatasource",
        "LegacyDatasource",
    ]
    custom_non_datsource_classes = [
        "MyCustomNonDatasourceClass",
        "MyOtherCustomNonDatasourceClass",
    ]
    parent_classes = v2_batch_kwargs_api_datasources + custom_non_datsource_classes
    configs = [{
        "name": "test_datasource",
        "class_name": parent_class,
        "module_name": "great_expectations.datasource",
    } for parent_class in parent_classes]
    for idx in range(len(configs)):
        parent_class = DatasourceAnonymizer.get_parent_class_v3_api(
            config=configs[idx])
        assert parent_class != parent_classes[idx]
        assert parent_class is None
def test_anonymize_datasource_info_v3_api_custom_subclass(
    datasource_anonymizer: DatasourceAnonymizer, ):
    name = "test_pandas_datasource"
    yaml_config = """
module_name: tests.data_context.fixtures.plugins.my_custom_v3_api_datasource
class_name: MyCustomV3ApiDatasource

execution_engine:
    class_name: PandasExecutionEngine
    module_name: great_expectations.execution_engine

data_connectors:
    my_filesystem_data_connector:
        class_name: InferredAssetFilesystemDataConnector
        module_name: great_expectations.datasource.data_connector
"""
    config: CommentedMap = yaml.load(yaml_config)
    anonymized_datasource = datasource_anonymizer._anonymize_datasource_info(
        name=name, config=config)
    assert anonymized_datasource == {
        "anonymized_name":
        "2642802d79d90ce6d147b0f9f61c3569",
        "anonymized_class":
        "ae74d1b58a67f5a944bb9cda16a62472",
        "parent_class":
        "Datasource",
        "anonymized_execution_engine": {
            "anonymized_name": "6b8f8c12352592a69083f958369c7151",
            "parent_class": "PandasExecutionEngine",
        },
        "anonymized_data_connectors": [{
            "anonymized_name":
            "42af601aeb8a03d76bf468a462cb62f6",
            "parent_class":
            "InferredAssetFilesystemDataConnector",
        }],
    }
Пример #22
0
def test_datasource_anonymizer():
    datasource_anonymizer = DatasourceAnonymizer()
class UsageStatisticsHandler(object):
    def __init__(self, data_context, data_context_id, usage_statistics_url):
        self._url = usage_statistics_url

        self._data_context_id = data_context_id
        self._data_context_instance_id = data_context.instance_id
        self._data_context = data_context
        self._ge_version = ge_version

        self._message_queue = Queue()
        self._worker = threading.Thread(target=self._requests_worker,
                                        daemon=True)
        self._worker.start()
        self._datasource_anonymizer = DatasourceAnonymizer(data_context_id)
        self._store_anonymizer = StoreAnonymizer(data_context_id)
        self._validation_operator_anonymizer = ValidationOperatorAnonymizer(
            data_context_id)
        self._data_docs_sites_anonymizer = DataDocsSiteAnonymizer(
            data_context_id)
        self._batch_anonymizer = BatchAnonymizer(data_context_id)
        self._expectation_suite_anonymizer = ExpectationSuiteAnonymizer(
            data_context_id)
        self._sigterm_handler = signal.signal(signal.SIGTERM, self._teardown)
        self._sigint_handler = signal.signal(signal.SIGINT, self._teardown)
        atexit.register(self._close_worker)

    def _teardown(self, signum: int, frame):
        self._close_worker()
        if signum == signal.SIGTERM:
            self._sigterm_handler(signum, frame)
        if signum == signal.SIGINT:
            self._sigint_handler(signum, frame)

    def _close_worker(self):
        self._message_queue.put(STOP_SIGNAL)
        self._worker.join()

    def _requests_worker(self):
        session = requests.Session()
        while True:
            message = self._message_queue.get()
            if message == STOP_SIGNAL:
                self._message_queue.task_done()
                return
            try:
                res = session.post(self._url, json=message, timeout=2)
                logger.debug("Posted usage stats: message status " +
                             str(res.status_code))
                if res.status_code != 201:
                    logger.debug("Server rejected message: ",
                                 json.dumps(message, indent=2))
            except requests.exceptions.Timeout:
                logger.debug("Timeout while sending usage stats message.")
            except Exception as e:
                logger.debug("Unexpected error posting message: " + str(e))
            finally:
                self._message_queue.task_done()

    def send_usage_message(self, event, event_payload=None, success=None):
        """send a usage statistics message."""
        try:
            message = {
                "event": event,
                "event_payload": event_payload or {},
                "success": success,
            }

            self.emit(message)
        except Exception:
            pass

    def build_init_payload(self):
        """Adds information that may be available only after full data context construction, but is useful to
        calculate only one time (for example, anonymization)."""
        expectation_suites = [
            self._data_context.get_expectation_suite(expectation_suite_name)
            for expectation_suite_name in
            self._data_context.list_expectation_suite_names()
        ]
        return {
            "platform.system":
            platform.system(),
            "platform.release":
            platform.release(),
            "version_info":
            str(sys.version_info),
            "anonymized_datasources": [
                self._datasource_anonymizer.anonymize_datasource_info(
                    datasource_name, datasource_config)
                for datasource_name, datasource_config in self._data_context.
                _project_config_with_variables_substituted.datasources.items()
            ],
            "anonymized_stores": [
                self._store_anonymizer.anonymize_store_info(
                    store_name, store_obj)
                for store_name, store_obj in self._data_context.stores.items()
            ],
            "anonymized_validation_operators": [
                self._validation_operator_anonymizer.
                anonymize_validation_operator_info(
                    validation_operator_name=validation_operator_name,
                    validation_operator_obj=validation_operator_obj)
                for validation_operator_name, validation_operator_obj in
                self._data_context.validation_operators.items()
            ],
            "anonymized_data_docs_sites": [
                self._data_docs_sites_anonymizer.anonymize_data_docs_site_info(
                    site_name=site_name, site_config=site_config)
                for site_name, site_config in
                self._data_context._project_config_with_variables_substituted.
                data_docs_sites.items()
            ],
            "anonymized_expectation_suites": [
                self._expectation_suite_anonymizer.
                anonymize_expectation_suite_info(expectation_suite)
                for expectation_suite in expectation_suites
            ]
        }

    def build_envelope(self, message):
        message["version"] = "1.0.0"
        message["event_time"] = datetime.datetime.utcnow().strftime(
            "%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"
        message["data_context_id"] = self._data_context_id
        message["data_context_instance_id"] = self._data_context_instance_id
        message["ge_version"] = self._ge_version
        return message

    def validate_message(self, message, schema):
        try:
            jsonschema.validate(message, schema=schema)
            return True
        except jsonschema.ValidationError as e:
            logger.debug("invalid message: " + str(e))
            return False

    def emit(self, message):
        """
        Emit a message.
        """
        try:
            if message["event"] == "data_context.__init__":
                message["event_payload"] = self.build_init_payload()
            message = self.build_envelope(message)
            if not self.validate_message(
                    message, schema=usage_statistics_record_schema):
                return

            self._message_queue.put(message)
        # noinspection PyBroadException
        except Exception as e:
            # We *always* tolerate *any* error in usage statistics
            logger.debug(e)
Пример #24
0
class UsageStatisticsHandler:
    def __init__(
        self,
        data_context: "DataContext",  # noqa: F821
        data_context_id: str,
        usage_statistics_url: str,
    ):
        self._url = usage_statistics_url

        self._data_context_id = data_context_id
        self._data_context_instance_id = data_context.instance_id
        self._data_context = data_context
        self._ge_version = ge_version

        self._message_queue = Queue()
        self._worker = threading.Thread(target=self._requests_worker,
                                        daemon=True)
        self._worker.start()
        self._datasource_anonymizer = DatasourceAnonymizer(data_context_id)
        self._execution_engine_anonymizer = ExecutionEngineAnonymizer(
            data_context_id)
        self._store_anonymizer = StoreAnonymizer(data_context_id)
        self._validation_operator_anonymizer = ValidationOperatorAnonymizer(
            data_context_id)
        self._data_docs_sites_anonymizer = DataDocsSiteAnonymizer(
            data_context_id)
        self._batch_request_anonymizer = BatchRequestAnonymizer(
            data_context_id)
        self._batch_anonymizer = BatchAnonymizer(data_context_id)
        self._expectation_suite_anonymizer = ExpectationSuiteAnonymizer(
            data_context_id)
        self._checkpoint_run_anonymizer = CheckpointRunAnonymizer(
            data_context_id)
        try:
            self._sigterm_handler = signal.signal(signal.SIGTERM,
                                                  self._teardown)
        except ValueError:
            # if we are not the main thread, we don't get to ask for signal handling.
            self._sigterm_handler = None
        try:
            self._sigint_handler = signal.signal(signal.SIGINT, self._teardown)
        except ValueError:
            # if we are not the main thread, we don't get to ask for signal handling.
            self._sigint_handler = None

        atexit.register(self._close_worker)

    def _teardown(self, signum: int, frame: Optional[FrameType]) -> None:
        self._close_worker()
        if signum == signal.SIGTERM and self._sigterm_handler:
            self._sigterm_handler(signum, frame)
        if signum == signal.SIGINT and self._sigint_handler:
            self._sigint_handler(signum, frame)

    def _close_worker(self) -> None:
        self._message_queue.put(STOP_SIGNAL)
        self._worker.join()

    def _requests_worker(self) -> None:
        session = requests.Session()
        while True:
            message = self._message_queue.get()
            if message == STOP_SIGNAL:
                self._message_queue.task_done()
                return
            try:
                res = session.post(self._url, json=message, timeout=2)
                logger.debug("Posted usage stats: message status " +
                             str(res.status_code))
                if res.status_code != 201:
                    logger.debug("Server rejected message: ",
                                 json.dumps(message, indent=2))
            except requests.exceptions.Timeout:
                logger.debug("Timeout while sending usage stats message.")
            except Exception as e:
                logger.debug("Unexpected error posting message: " + str(e))
            finally:
                self._message_queue.task_done()

    def build_init_payload(self) -> dict:
        """Adds information that may be available only after full data context construction, but is useful to
        calculate only one time (for example, anonymization)."""
        expectation_suites = [
            self._data_context.get_expectation_suite(expectation_suite_name)
            for expectation_suite_name in
            self._data_context.list_expectation_suite_names()
        ]
        return {
            "platform.system":
            platform.system(),
            "platform.release":
            platform.release(),
            "version_info":
            str(sys.version_info),
            "anonymized_datasources": [
                self._datasource_anonymizer.anonymize_datasource_info(
                    datasource_name, datasource_config)
                for datasource_name, datasource_config in self._data_context.
                project_config_with_variables_substituted.datasources.items()
            ],
            "anonymized_stores": [
                self._store_anonymizer.anonymize_store_info(
                    store_name, store_obj)
                for store_name, store_obj in self._data_context.stores.items()
            ],
            "anonymized_validation_operators": [
                self._validation_operator_anonymizer.
                anonymize_validation_operator_info(
                    validation_operator_name=validation_operator_name,
                    validation_operator_obj=validation_operator_obj,
                ) for validation_operator_name, validation_operator_obj in
                self._data_context.validation_operators.items()
            ],
            "anonymized_data_docs_sites": [
                self._data_docs_sites_anonymizer.anonymize_data_docs_site_info(
                    site_name=site_name, site_config=site_config)
                for site_name, site_config in
                self._data_context.project_config_with_variables_substituted.
                data_docs_sites.items()
            ],
            "anonymized_expectation_suites": [
                self._expectation_suite_anonymizer.
                anonymize_expectation_suite_info(expectation_suite)
                for expectation_suite in expectation_suites
            ],
        }

    def build_envelope(self, message: dict) -> dict:
        message["version"] = "1.0.0"
        message["ge_version"] = self._ge_version

        message["data_context_id"] = self._data_context_id
        message["data_context_instance_id"] = self._data_context_instance_id

        message["event_time"] = (datetime.datetime.now(
            datetime.timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z")

        event_duration_property_name: str = f'{message["event"]}.duration'.replace(
            ".", "_")
        if hasattr(self, event_duration_property_name):
            delta_t: int = getattr(self, event_duration_property_name)
            message["event_duration"] = delta_t

        return message

    @staticmethod
    def validate_message(message: dict, schema: dict) -> bool:
        try:
            jsonschema.validate(message, schema=schema)
            return True
        except jsonschema.ValidationError as e:
            logger.debug(
                f"{UsageStatsExceptionPrefix.INVALID_MESSAGE.value} invalid message: "
                + str(e))
            return False

    def send_usage_message(
        self,
        event: str,
        event_payload: Optional[dict] = None,
        success: Optional[bool] = None,
    ) -> None:
        """send a usage statistics message."""
        # noinspection PyBroadException
        try:
            message: dict = {
                "event": event,
                "event_payload": event_payload or {},
                "success": success,
            }
            self.emit(message)
        except Exception:
            pass

    def emit(self, message: dict) -> None:
        """
        Emit a message.
        """
        try:
            if message["event"] == "data_context.__init__":
                message["event_payload"] = self.build_init_payload()
            message = self.build_envelope(message=message)
            if not self.validate_message(
                    message, schema=anonymized_usage_statistics_record_schema):
                return
            self._message_queue.put(message)
        # noinspection PyBroadException
        except Exception as e:
            # We *always* tolerate *any* error in usage statistics
            log_message: str = (
                f"{UsageStatsExceptionPrefix.EMIT_EXCEPTION.value}: {e} type: {type(e)}"
            )
            logger.debug(log_message)