def test_anonymize_simple_sqlalchemy_datasource(): name = "test_simple_sqlalchemy_datasource" yaml_config = f""" class_name: SimpleSqlalchemyDatasource connection_string: sqlite:///some_db.db introspection: whole_table_with_limits: sampling_method: _sample_using_limit sampling_kwargs: n: 10 """ config: CommentedMap = yaml.load(yaml_config) datasource_anonymizer = DatasourceAnonymizer(salt=CONSISTENT_SALT) anonymized_datasource = ( datasource_anonymizer.anonymize_simple_sqlalchemy_datasource( name=name, config=config)) assert anonymized_datasource == { "anonymized_name": "3be0aacd79b32e22a41949bf607b3e80", "parent_class": "SimpleSqlalchemyDatasource", "anonymized_execution_engine": { "parent_class": "SqlAlchemyExecutionEngine" }, "anonymized_data_connectors": [{ "anonymized_name": "d6b508db454c47ea40131b0a11415dd4", "parent_class": "InferredAssetSqlDataConnector", }], }
def test_anonymize_custom_simple_sqlalchemy_datasource(): name = "test_custom_simple_sqlalchemy_datasource" yaml_config = """ module_name: tests.data_context.fixtures.plugins.my_custom_simple_sqlalchemy_datasource_class class_name: MyCustomSimpleSqlalchemyDatasource connection_string: sqlite:///some_db.db name: some_name introspection: my_custom_datasource_name: data_asset_name_suffix: some_suffix """ config: CommentedMap = yaml.load(yaml_config) datasource_anonymizer = DatasourceAnonymizer(salt=CONSISTENT_SALT) anonymized_datasource = ( datasource_anonymizer.anonymize_simple_sqlalchemy_datasource( name=name, config=config)) assert anonymized_datasource == { "anonymized_name": "d9e0c5f761c6ea5e54000f8c10a1049b", "parent_class": "SimpleSqlalchemyDatasource", "anonymized_class": "aab66054e62007a9ac5afbcacedaf0d2", "anonymized_execution_engine": { "parent_class": "SqlAlchemyExecutionEngine" }, "anonymized_data_connectors": [{ "anonymized_name": "82b8b59e076789ac1476b2b745ebc268", "parent_class": "InferredAssetSqlDataConnector", }], }
def __init__(self, data_context, data_context_id, usage_statistics_url): self._url = usage_statistics_url self._data_context_id = data_context_id self._data_context_instance_id = data_context.instance_id self._data_context = data_context self._ge_version = ge_version self._message_queue = Queue() self._worker = threading.Thread(target=self._requests_worker, daemon=True) self._worker.start() self._datasource_anonymizer = DatasourceAnonymizer(data_context_id) self._store_anonymizer = StoreAnonymizer(data_context_id) self._validation_operator_anonymizer = ValidationOperatorAnonymizer( data_context_id) self._data_docs_sites_anonymizer = DataDocsSiteAnonymizer( data_context_id) self._batch_anonymizer = BatchAnonymizer(data_context_id) self._expectation_suite_anonymizer = ExpectationSuiteAnonymizer( data_context_id) try: self._sigterm_handler = signal.signal(signal.SIGTERM, self._teardown) except ValueError: # if we are not the main thread, we don't get to ask for signal handling. self._sigterm_handler = None try: self._sigint_handler = signal.signal(signal.SIGINT, self._teardown) except ValueError: # if we are not the main thread, we don't get to ask for signal handling. self._sigint_handler = None atexit.register(self._close_worker)
def test_anonymize_datasource_info_v2_api_core_ge_class(): name = "test_pandas_datasource" config = { "name": name, "class_name": "PandasDatasource", "module_name": "great_expectations.datasource", "data_asset_type": { "module_name": "custom_pandas_dataset", "class_name": "CustomPandasDataset", }, "batch_kwargs_generators": { "subdir_reader": { "class_name": "SubdirReaderBatchKwargsGenerator", "base_directory": "some_path", } }, } datasource_anonymizer = DatasourceAnonymizer(salt=CONSISTENT_SALT) anonymized_datasource = datasource_anonymizer.anonymize_datasource_info( name=name, config=config) assert anonymized_datasource == { "anonymized_name": "2642802d79d90ce6d147b0f9f61c3569", "parent_class": "PandasDatasource", }
def test_anonymize_datasource_info_v3_api_core_ge_class(): name = "test_pandas_datasource" yaml_config = f""" class_name: Datasource module_name: great_expectations.datasource execution_engine: class_name: PandasExecutionEngine module_name: great_expectations.execution_engine data_connectors: my_filesystem_data_connector: class_name: InferredAssetFilesystemDataConnector module_name: great_expectations.datasource.data_connector """ config: CommentedMap = yaml.load(yaml_config) datasource_anonymizer = DatasourceAnonymizer(salt=CONSISTENT_SALT) anonymized_datasource = datasource_anonymizer.anonymize_datasource_info( name=name, config=config) assert anonymized_datasource == { "anonymized_data_connectors": [{ "anonymized_name": "42af601aeb8a03d76bf468a462cb62f6", "parent_class": "InferredAssetFilesystemDataConnector", }], "anonymized_execution_engine": { "anonymized_name": "6b8f8c12352592a69083f958369c7151", "parent_class": "PandasExecutionEngine", }, "anonymized_name": "2642802d79d90ce6d147b0f9f61c3569", "parent_class": "Datasource", }
def add_datasource_usage_statistics( data_context: "DataContext", name: str, **kwargs # noqa: F821 ) -> dict: if not data_context._usage_statistics_handler: return {} try: data_context_id = data_context.data_context_id except AttributeError: data_context_id = None from great_expectations.core.usage_statistics.anonymizers.datasource_anonymizer import ( DatasourceAnonymizer, ) aggregate_anonymizer = Anonymizer(salt=data_context_id) datasource_anonymizer = DatasourceAnonymizer( salt=data_context_id, aggregate_anonymizer=aggregate_anonymizer) payload = {} # noinspection PyBroadException try: payload = datasource_anonymizer._anonymize_datasource_info( name, kwargs) except Exception as e: logger.debug( f"{UsageStatsExceptionPrefix.EMIT_EXCEPTION.value}: {e} type: {type(e)}, add_datasource_usage_statistics: Unable to create add_datasource_usage_statistics payload field" ) return payload
def test_is_custom_parent_class_recognized_v3_api_yes(): config = { "module_name": "tests.data_context.fixtures.plugins.my_custom_v3_api_datasource", "class_name": "MyCustomV3ApiDatasource", } datasource_anonymizer = DatasourceAnonymizer(salt=CONSISTENT_SALT) parent_class = datasource_anonymizer.is_parent_class_recognized_v3_api( config=config) assert parent_class == "Datasource"
def test_is_parent_class_recognized_no(): parent_classes = [ "MyCustomNonDatasourceClass", "MyOtherCustomNonDatasourceClass" ] configs = [{ "name": "test_datasource", "class_name": parent_class, "module_name": "great_expectations.datasource", } for parent_class in parent_classes] datasource_anonymizer = DatasourceAnonymizer(salt=CONSISTENT_SALT) for idx in range(len(configs)): parent_class = datasource_anonymizer.is_parent_class_recognized( config=configs[idx]) assert parent_class != parent_classes[idx] assert parent_class is None
def test_is_custom_parent_class_recognized_yes(): config = { "module_name": "tests.data_context.fixtures.plugins.my_custom_v3_api_datasource", "class_name": "MyCustomV3ApiDatasource", } parent_class = DatasourceAnonymizer.get_parent_class(config=config) assert parent_class == "Datasource" config = { "module_name": "tests.data_context.fixtures.plugins.my_custom_v2_api_datasource", "class_name": "MyCustomV2ApiDatasource", } parent_class = DatasourceAnonymizer.get_parent_class(config=config) assert parent_class == "PandasDatasource"
def datasource_anonymizer() -> DatasourceAnonymizer: # Standardize the salt so our tests are deterimistic salt: str = "00000000-0000-0000-0000-00000000a004" aggregate_anonymizer: Anonymizer = Anonymizer(salt=salt) anonymizer: DatasourceAnonymizer = DatasourceAnonymizer( salt=salt, aggregate_anonymizer=aggregate_anonymizer) return anonymizer
def test_is_parent_class_recognized_v3_api_yes(): v3_batch_request_api_datasources = [ "SimpleSqlalchemyDatasource", "Datasource", "BaseDatasource", ] parent_classes = v3_batch_request_api_datasources configs = [{ "name": "test_datasource", "class_name": parent_class, "module_name": "great_expectations.datasource", } for parent_class in parent_classes] datasource_anonymizer = DatasourceAnonymizer(salt=CONSISTENT_SALT) for idx in range(len(configs)): parent_class = datasource_anonymizer.is_parent_class_recognized_v3_api( config=configs[idx]) assert parent_class == parent_classes[idx]
def test_is_parent_class_recognized_v2_api_yes(): v2_batch_kwargs_api_datasources = [ "PandasDatasource", "SqlAlchemyDatasource", "SparkDFDatasource", "LegacyDatasource", ] parent_classes = v2_batch_kwargs_api_datasources configs = [{ "name": "test_datasource", "class_name": parent_class, "module_name": "great_expectations.datasource", } for parent_class in parent_classes] datasource_anonymizer = DatasourceAnonymizer(salt=CONSISTENT_SALT) for idx in range(len(configs)): parent_class = datasource_anonymizer.is_parent_class_recognized_v2_api( config=configs[idx]) assert parent_class == parent_classes[idx]
def add_datasource_usage_statistics(data_context, name, **kwargs): try: data_context_id = data_context.data_context_id except AttributeError: data_context_id = None try: datasource_anonymizer = ( data_context._usage_statistics_handler._datasource_anonymizer) except Exception: datasource_anonymizer = DatasourceAnonymizer(data_context_id) payload = {} try: payload = datasource_anonymizer.anonymize_datasource_info(name, kwargs) except Exception: logger.debug( "add_datasource_usage_statistics: Unable to create add_datasource_usage_statistics payload field" ) return payload
def test_anonymize_datasource_info_v2_api_custom_subclass(): """ What does this test and why? We should be able to discern the GE parent class for a custom type and construct a useful usage stats event message. Custom v2 API Datasources should continue to be supported. """ name = "test_pandas_datasource" yaml_config = f""" module_name: tests.data_context.fixtures.plugins.my_custom_v2_api_datasource class_name: MyCustomV2ApiDatasource """ config: CommentedMap = yaml.load(yaml_config) datasource_anonymizer = DatasourceAnonymizer(salt=CONSISTENT_SALT) anonymized_datasource = datasource_anonymizer.anonymize_datasource_info( name=name, config=config) assert anonymized_datasource == { "anonymized_class": "c454ace824bf401ea42815c84d0f5717", "anonymized_name": "2642802d79d90ce6d147b0f9f61c3569", "parent_class": "PandasDatasource", }
def test_get_parent_class_no(): parent_classes = [ "MyCustomNonDatasourceClass", "MyOtherCustomNonDatasourceClass" ] configs = [{ "name": "test_datasource", "class_name": parent_class, "module_name": "great_expectations.datasource", } for parent_class in parent_classes] for idx in range(len(configs)): parent_class = DatasourceAnonymizer.get_parent_class( config=configs[idx]) assert parent_class != parent_classes[idx] assert parent_class is None
def test_datasource_anonymizer(): datasource_anonymizer = DatasourceAnonymizer(salt=CONSISTENT_SALT) n1 = datasource_anonymizer.anonymize_datasource_info( name="test_datasource", config={ "name": "test_datasource", "class_name": "PandasDatasource", "module_name": "great_expectations.datasource", }, ) assert n1 == { "anonymized_name": "04bf89e1fb7495b0904bbd5ae478fbe0", "parent_class": "PandasDatasource", } n2 = datasource_anonymizer.anonymize_datasource_info( name="test_datasource", config={ "name": "test_datasource", "class_name": "CustomDatasource", "module_name": "tests.datasource.test_datasource_anonymizer", }, ) datasource_anonymizer_2 = DatasourceAnonymizer() n3 = datasource_anonymizer_2.anonymize_datasource_info( name="test_datasource", config={ "name": "test_datasource", "class_name": "CustomDatasource", "module_name": "tests.datasource.test_datasource_anonymizer", }, ) assert n2["parent_class"] == "PandasDatasource" assert n3["parent_class"] == "PandasDatasource" print(n3) assert len(n3["anonymized_class"]) == 32 assert n2["anonymized_class"] != n3["anonymized_class"] # Same anonymizer *does* produce the same result n4 = datasource_anonymizer.anonymize_datasource_info( name="test_datasource", config={ "name": "test_datasource", "class_name": "CustomDatasource", "module_name": "tests.datasource.test_datasource_anonymizer", }, ) assert n4["anonymized_class"] == n2["anonymized_class"]
def test_get_parent_class_v3_api_yes(): v3_batch_request_api_datasources = [ "SimpleSqlalchemyDatasource", "Datasource", "BaseDatasource", ] parent_classes = v3_batch_request_api_datasources configs = [{ "name": "test_datasource", "class_name": parent_class, "module_name": "great_expectations.datasource", } for parent_class in parent_classes] for idx in range(len(configs)): parent_class = DatasourceAnonymizer.get_parent_class_v3_api( config=configs[idx]) assert parent_class == parent_classes[idx]
def test_get_parent_class_v2_api_yes(): v2_batch_kwargs_api_datasources = [ "PandasDatasource", "SqlAlchemyDatasource", "SparkDFDatasource", "LegacyDatasource", ] parent_classes = v2_batch_kwargs_api_datasources configs = [{ "name": "test_datasource", "class_name": parent_class, "module_name": "great_expectations.datasource", } for parent_class in parent_classes] for idx in range(len(configs)): parent_class = DatasourceAnonymizer.get_parent_class_v2_api( config=configs[idx]) assert parent_class == parent_classes[idx]
def test_get_parent_class_v2_api_no(): v3_batch_request_api_datasources = [ "SimpleSqlalchemyDatasource", "Datasource", "BaseDatasource", ] custom_non_datsource_classes = [ "MyCustomNonDatasourceClass", "MyOtherCustomNonDatasourceClass", ] parent_classes = v3_batch_request_api_datasources + custom_non_datsource_classes configs = [{ "name": "test_datasource", "class_name": parent_class, "module_name": "great_expectations.datasource", } for parent_class in parent_classes] for idx in range(len(configs)): parent_class = DatasourceAnonymizer.get_parent_class_v2_api( config=configs[idx]) assert parent_class != parent_classes[idx] assert parent_class is None
def test_get_parent_class_v3_api_no(): v2_batch_kwargs_api_datasources = [ "PandasDatasource", "SqlAlchemyDatasource", "SparkDFDatasource", "LegacyDatasource", ] custom_non_datsource_classes = [ "MyCustomNonDatasourceClass", "MyOtherCustomNonDatasourceClass", ] parent_classes = v2_batch_kwargs_api_datasources + custom_non_datsource_classes configs = [{ "name": "test_datasource", "class_name": parent_class, "module_name": "great_expectations.datasource", } for parent_class in parent_classes] for idx in range(len(configs)): parent_class = DatasourceAnonymizer.get_parent_class_v3_api( config=configs[idx]) assert parent_class != parent_classes[idx] assert parent_class is None
def test_anonymize_datasource_info_v3_api_custom_subclass( datasource_anonymizer: DatasourceAnonymizer, ): name = "test_pandas_datasource" yaml_config = """ module_name: tests.data_context.fixtures.plugins.my_custom_v3_api_datasource class_name: MyCustomV3ApiDatasource execution_engine: class_name: PandasExecutionEngine module_name: great_expectations.execution_engine data_connectors: my_filesystem_data_connector: class_name: InferredAssetFilesystemDataConnector module_name: great_expectations.datasource.data_connector """ config: CommentedMap = yaml.load(yaml_config) anonymized_datasource = datasource_anonymizer._anonymize_datasource_info( name=name, config=config) assert anonymized_datasource == { "anonymized_name": "2642802d79d90ce6d147b0f9f61c3569", "anonymized_class": "ae74d1b58a67f5a944bb9cda16a62472", "parent_class": "Datasource", "anonymized_execution_engine": { "anonymized_name": "6b8f8c12352592a69083f958369c7151", "parent_class": "PandasExecutionEngine", }, "anonymized_data_connectors": [{ "anonymized_name": "42af601aeb8a03d76bf468a462cb62f6", "parent_class": "InferredAssetFilesystemDataConnector", }], }
def test_datasource_anonymizer(): datasource_anonymizer = DatasourceAnonymizer()
class UsageStatisticsHandler(object): def __init__(self, data_context, data_context_id, usage_statistics_url): self._url = usage_statistics_url self._data_context_id = data_context_id self._data_context_instance_id = data_context.instance_id self._data_context = data_context self._ge_version = ge_version self._message_queue = Queue() self._worker = threading.Thread(target=self._requests_worker, daemon=True) self._worker.start() self._datasource_anonymizer = DatasourceAnonymizer(data_context_id) self._store_anonymizer = StoreAnonymizer(data_context_id) self._validation_operator_anonymizer = ValidationOperatorAnonymizer( data_context_id) self._data_docs_sites_anonymizer = DataDocsSiteAnonymizer( data_context_id) self._batch_anonymizer = BatchAnonymizer(data_context_id) self._expectation_suite_anonymizer = ExpectationSuiteAnonymizer( data_context_id) self._sigterm_handler = signal.signal(signal.SIGTERM, self._teardown) self._sigint_handler = signal.signal(signal.SIGINT, self._teardown) atexit.register(self._close_worker) def _teardown(self, signum: int, frame): self._close_worker() if signum == signal.SIGTERM: self._sigterm_handler(signum, frame) if signum == signal.SIGINT: self._sigint_handler(signum, frame) def _close_worker(self): self._message_queue.put(STOP_SIGNAL) self._worker.join() def _requests_worker(self): session = requests.Session() while True: message = self._message_queue.get() if message == STOP_SIGNAL: self._message_queue.task_done() return try: res = session.post(self._url, json=message, timeout=2) logger.debug("Posted usage stats: message status " + str(res.status_code)) if res.status_code != 201: logger.debug("Server rejected message: ", json.dumps(message, indent=2)) except requests.exceptions.Timeout: logger.debug("Timeout while sending usage stats message.") except Exception as e: logger.debug("Unexpected error posting message: " + str(e)) finally: self._message_queue.task_done() def send_usage_message(self, event, event_payload=None, success=None): """send a usage statistics message.""" try: message = { "event": event, "event_payload": event_payload or {}, "success": success, } self.emit(message) except Exception: pass def build_init_payload(self): """Adds information that may be available only after full data context construction, but is useful to calculate only one time (for example, anonymization).""" expectation_suites = [ self._data_context.get_expectation_suite(expectation_suite_name) for expectation_suite_name in self._data_context.list_expectation_suite_names() ] return { "platform.system": platform.system(), "platform.release": platform.release(), "version_info": str(sys.version_info), "anonymized_datasources": [ self._datasource_anonymizer.anonymize_datasource_info( datasource_name, datasource_config) for datasource_name, datasource_config in self._data_context. _project_config_with_variables_substituted.datasources.items() ], "anonymized_stores": [ self._store_anonymizer.anonymize_store_info( store_name, store_obj) for store_name, store_obj in self._data_context.stores.items() ], "anonymized_validation_operators": [ self._validation_operator_anonymizer. anonymize_validation_operator_info( validation_operator_name=validation_operator_name, validation_operator_obj=validation_operator_obj) for validation_operator_name, validation_operator_obj in self._data_context.validation_operators.items() ], "anonymized_data_docs_sites": [ self._data_docs_sites_anonymizer.anonymize_data_docs_site_info( site_name=site_name, site_config=site_config) for site_name, site_config in self._data_context._project_config_with_variables_substituted. data_docs_sites.items() ], "anonymized_expectation_suites": [ self._expectation_suite_anonymizer. anonymize_expectation_suite_info(expectation_suite) for expectation_suite in expectation_suites ] } def build_envelope(self, message): message["version"] = "1.0.0" message["event_time"] = datetime.datetime.utcnow().strftime( "%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z" message["data_context_id"] = self._data_context_id message["data_context_instance_id"] = self._data_context_instance_id message["ge_version"] = self._ge_version return message def validate_message(self, message, schema): try: jsonschema.validate(message, schema=schema) return True except jsonschema.ValidationError as e: logger.debug("invalid message: " + str(e)) return False def emit(self, message): """ Emit a message. """ try: if message["event"] == "data_context.__init__": message["event_payload"] = self.build_init_payload() message = self.build_envelope(message) if not self.validate_message( message, schema=usage_statistics_record_schema): return self._message_queue.put(message) # noinspection PyBroadException except Exception as e: # We *always* tolerate *any* error in usage statistics logger.debug(e)
class UsageStatisticsHandler: def __init__( self, data_context: "DataContext", # noqa: F821 data_context_id: str, usage_statistics_url: str, ): self._url = usage_statistics_url self._data_context_id = data_context_id self._data_context_instance_id = data_context.instance_id self._data_context = data_context self._ge_version = ge_version self._message_queue = Queue() self._worker = threading.Thread(target=self._requests_worker, daemon=True) self._worker.start() self._datasource_anonymizer = DatasourceAnonymizer(data_context_id) self._execution_engine_anonymizer = ExecutionEngineAnonymizer( data_context_id) self._store_anonymizer = StoreAnonymizer(data_context_id) self._validation_operator_anonymizer = ValidationOperatorAnonymizer( data_context_id) self._data_docs_sites_anonymizer = DataDocsSiteAnonymizer( data_context_id) self._batch_request_anonymizer = BatchRequestAnonymizer( data_context_id) self._batch_anonymizer = BatchAnonymizer(data_context_id) self._expectation_suite_anonymizer = ExpectationSuiteAnonymizer( data_context_id) self._checkpoint_run_anonymizer = CheckpointRunAnonymizer( data_context_id) try: self._sigterm_handler = signal.signal(signal.SIGTERM, self._teardown) except ValueError: # if we are not the main thread, we don't get to ask for signal handling. self._sigterm_handler = None try: self._sigint_handler = signal.signal(signal.SIGINT, self._teardown) except ValueError: # if we are not the main thread, we don't get to ask for signal handling. self._sigint_handler = None atexit.register(self._close_worker) def _teardown(self, signum: int, frame: Optional[FrameType]) -> None: self._close_worker() if signum == signal.SIGTERM and self._sigterm_handler: self._sigterm_handler(signum, frame) if signum == signal.SIGINT and self._sigint_handler: self._sigint_handler(signum, frame) def _close_worker(self) -> None: self._message_queue.put(STOP_SIGNAL) self._worker.join() def _requests_worker(self) -> None: session = requests.Session() while True: message = self._message_queue.get() if message == STOP_SIGNAL: self._message_queue.task_done() return try: res = session.post(self._url, json=message, timeout=2) logger.debug("Posted usage stats: message status " + str(res.status_code)) if res.status_code != 201: logger.debug("Server rejected message: ", json.dumps(message, indent=2)) except requests.exceptions.Timeout: logger.debug("Timeout while sending usage stats message.") except Exception as e: logger.debug("Unexpected error posting message: " + str(e)) finally: self._message_queue.task_done() def build_init_payload(self) -> dict: """Adds information that may be available only after full data context construction, but is useful to calculate only one time (for example, anonymization).""" expectation_suites = [ self._data_context.get_expectation_suite(expectation_suite_name) for expectation_suite_name in self._data_context.list_expectation_suite_names() ] return { "platform.system": platform.system(), "platform.release": platform.release(), "version_info": str(sys.version_info), "anonymized_datasources": [ self._datasource_anonymizer.anonymize_datasource_info( datasource_name, datasource_config) for datasource_name, datasource_config in self._data_context. project_config_with_variables_substituted.datasources.items() ], "anonymized_stores": [ self._store_anonymizer.anonymize_store_info( store_name, store_obj) for store_name, store_obj in self._data_context.stores.items() ], "anonymized_validation_operators": [ self._validation_operator_anonymizer. anonymize_validation_operator_info( validation_operator_name=validation_operator_name, validation_operator_obj=validation_operator_obj, ) for validation_operator_name, validation_operator_obj in self._data_context.validation_operators.items() ], "anonymized_data_docs_sites": [ self._data_docs_sites_anonymizer.anonymize_data_docs_site_info( site_name=site_name, site_config=site_config) for site_name, site_config in self._data_context.project_config_with_variables_substituted. data_docs_sites.items() ], "anonymized_expectation_suites": [ self._expectation_suite_anonymizer. anonymize_expectation_suite_info(expectation_suite) for expectation_suite in expectation_suites ], } def build_envelope(self, message: dict) -> dict: message["version"] = "1.0.0" message["ge_version"] = self._ge_version message["data_context_id"] = self._data_context_id message["data_context_instance_id"] = self._data_context_instance_id message["event_time"] = (datetime.datetime.now( datetime.timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z") event_duration_property_name: str = f'{message["event"]}.duration'.replace( ".", "_") if hasattr(self, event_duration_property_name): delta_t: int = getattr(self, event_duration_property_name) message["event_duration"] = delta_t return message @staticmethod def validate_message(message: dict, schema: dict) -> bool: try: jsonschema.validate(message, schema=schema) return True except jsonschema.ValidationError as e: logger.debug( f"{UsageStatsExceptionPrefix.INVALID_MESSAGE.value} invalid message: " + str(e)) return False def send_usage_message( self, event: str, event_payload: Optional[dict] = None, success: Optional[bool] = None, ) -> None: """send a usage statistics message.""" # noinspection PyBroadException try: message: dict = { "event": event, "event_payload": event_payload or {}, "success": success, } self.emit(message) except Exception: pass def emit(self, message: dict) -> None: """ Emit a message. """ try: if message["event"] == "data_context.__init__": message["event_payload"] = self.build_init_payload() message = self.build_envelope(message=message) if not self.validate_message( message, schema=anonymized_usage_statistics_record_schema): return self._message_queue.put(message) # noinspection PyBroadException except Exception as e: # We *always* tolerate *any* error in usage statistics log_message: str = ( f"{UsageStatsExceptionPrefix.EMIT_EXCEPTION.value}: {e} type: {type(e)}" ) logger.debug(log_message)