def __init__(self, serialization_type=None, root_directory=None): self.serialization_type = serialization_type self.root_directory = root_directory self.store_backend = instantiate_class_from_config( config={ "module_name": "great_expectations.data_context.store", "class_name": "InMemoryStoreBackend", "separator": ".", }, runtime_config={ "root_directory": root_directory, }, config_defaults={}, )
def __init__(self, column_section_renderer=None): if column_section_renderer is None: column_section_renderer = { "class_name": "ValidationResultsColumnSectionRenderer" } self._column_section_renderer = instantiate_class_from_config( config=column_section_renderer, runtime_config={}, config_defaults={ "module_name": column_section_renderer.get( "module_name", "great_expectations.render.renderer.column_section_renderer" ) })
def _build_generator_from_config(self, **kwargs): if "type" in kwargs: warnings.warn( "Using type to configure generators is now deprecated. Please use module_name and class_name" "instead.") type_ = kwargs.pop("type") generator_class = self._get_generator_class_from_type(type_) kwargs.update({"class_name": generator_class.__name__}) generator = instantiate_class_from_config( config=kwargs, runtime_config={"datasource": self}, config_defaults={ "module_name": "great_expectations.datasource.generator" }) return generator
def test_site_builder_with_custom_site_section_builders_config( tmp_path_factory): """Test that site builder can handle partially specified custom site_section_builders config""" base_dir = str(tmp_path_factory.mktemp("project_dir")) project_dir = os.path.join(base_dir, "project_path") os.mkdir(project_dir) # fixture config swaps site section builder source stores and specifies custom run_name_filters shutil.copy( file_relative_path( __file__, "../test_fixtures/great_expectations_custom_local_site_config.yml" ), str(os.path.join(project_dir, "great_expectations.yml")), ) context = DataContext(context_root_dir=project_dir) local_site_config = context._project_config.data_docs_sites.get( "local_site") module_name = "great_expectations.render.renderer.site_builder" site_builder = instantiate_class_from_config( config=local_site_config, runtime_environment={ "data_context": context, "root_directory": context.root_directory, "site_name": "local_site", }, config_defaults={"module_name": module_name}, ) site_section_builders = site_builder.site_section_builders expectations_site_section_builder = site_section_builders["expectations"] assert isinstance(expectations_site_section_builder.source_store, ValidationsStore) validations_site_section_builder = site_section_builders["validations"] assert isinstance(validations_site_section_builder.source_store, ExpectationsStore) assert validations_site_section_builder.run_name_filter == { "ne": "custom_validations_filter" } profiling_site_section_builder = site_section_builders["profiling"] assert isinstance(validations_site_section_builder.source_store, ExpectationsStore) assert profiling_site_section_builder.run_name_filter == { "eq": "custom_profiling_filter" }
def test_site_builder_usage_statistics_disabled( site_builder_data_context_with_html_store_titanic_random, ): context = site_builder_data_context_with_html_store_titanic_random context._project_config.anonymous_usage_statistics = { "enabled": False, "data_context_id": "f43d4897-385f-4366-82b0-1a8eda2bf79c", } data_context_id = context.anonymous_usage_statistics["data_context_id"] sites = ( site_builder_data_context_with_html_store_titanic_random.project_config_with_variables_substituted.data_docs_sites ) local_site_config = sites["local_site"] site_builder = instantiate_class_from_config( config=local_site_config, runtime_environment={ "data_context": context, "root_directory": context.root_directory, "site_name": "local_site", }, config_defaults={ "module_name": "great_expectations.render.renderer.site_builder" }, ) site_builder_return_obj = site_builder.build() index_page_path = site_builder_return_obj[0] links_dict = site_builder_return_obj[1] expectation_suite_pages = [ file_relative_path(index_page_path, expectation_suite_link_dict["filepath"]) for expectation_suite_link_dict in links_dict["expectations_links"] ] profiling_results_pages = [ file_relative_path(index_page_path, profiling_link_dict["filepath"]) for profiling_link_dict in links_dict["profiling_links"] ] page_paths_to_check = ( [index_page_path] + expectation_suite_pages + profiling_results_pages ) expected_logo_url = "https://great-expectations-web-assets.s3.us-east-2.amazonaws.com/logo-long.png?d=20190924T231836.000000Z" for page_path in page_paths_to_check: with open(page_path[7:]) as f: page_contents = f.read() assert expected_logo_url in page_contents assert data_context_id not in page_contents
def in_memory_param_store(request, test_backends): # If we have a backend configuration but we do not have postgres configured, skip backend_config = request.param.get("store_backend", None) if backend_config: if (backend_config.get("credentials", {}).get("drivername", None) == "postgresql"): if "postgresql" not in test_backends: pytest.skip("skipping fixture because postgresql not selected") return instantiate_class_from_config( config=request.param, config_defaults={ "module_name": "great_expectations.data_context.store", }, runtime_environment={}, )
def _build_asset_from_config(self, config: dict): runtime_environment: dict = {"data_connector": self} config = assetConfigSchema.load(config) config = assetConfigSchema.dump(config) asset: Asset = instantiate_class_from_config( config=config, runtime_environment=runtime_environment, config_defaults={}, ) if not asset: raise ge_exceptions.ClassInstantiationError( module_name="great_expectations.datasource.data_connector.asset", package_name=None, class_name=config["class_name"], ) return asset
def from_data_context(data_context): suite_edit_notebook_config: Optional[NotebookConfig] = None if data_context.notebooks and data_context.notebooks.get("suite_edit"): suite_edit_notebook_config = notebookConfigSchema.load( data_context.notebooks.get("suite_edit")) return instantiate_class_from_config( config=suite_edit_notebook_config.__dict__ if suite_edit_notebook_config else { "module_name": "great_expectations.render.renderer.suite_edit_notebook_renderer", "class_name": "SuiteEditNotebookRenderer", }, runtime_environment={}, config_defaults={}, )
def test_get_definition_list_from_batch_request_with_empty_args_raises_error( mock_gcs_conn, mock_list_keys, mock_emit, empty_data_context_stats_enabled): my_data_connector_yaml = yaml.load( f""" class_name: ConfiguredAssetGCSDataConnector datasource_name: test_environment bucket_or_name: my_bucket prefix: "" assets: TestFiles: default_regex: pattern: (.+)_(.+)_(.+)\\.csv group_names: - name - timestamp - price """, ) mock_list_keys.return_value = ([ "alex_20200809_1000.csv", "eugene_20200809_1500.csv", "james_20200811_1009.csv", "abe_20200809_1040.csv", "will_20200809_1002.csv", "james_20200713_1567.csv", "eugene_20201129_1900.csv", "will_20200810_1001.csv", "james_20200810_1003.csv", "alex_20200819_1300.csv", ], ) my_data_connector: ConfiguredAssetGCSDataConnector = instantiate_class_from_config( config=my_data_connector_yaml, runtime_environment={ "name": "general_gcs_data_connector", "execution_engine": PandasExecutionEngine(), }, config_defaults={ "module_name": "great_expectations.datasource.data_connector" }, ) # Raises error in `FilePathDataConnector.get_batch_definition_list_from_batch_request()` due to missing a `batch_request` arg with pytest.raises(TypeError): # noinspection PyArgumentList my_data_connector.get_batch_definition_list_from_batch_request()
def __init__( self, data_context, renderer, slack_webhook=None, slack_token=None, slack_channel=None, notify_on="all", notify_with=None, ): """Construct a SlackNotificationAction Args: data_context: renderer: dictionary specifying the renderer used to generate a query consumable by Slack API, for example: { "module_name": "great_expectations.render.renderer.slack_renderer", "class_name": "SlackRenderer", } slack_webhook: incoming Slack webhook to which to send notification notify_on: "all", "failure", "success" - specifies validation status that will trigger notification payload: *Optional* payload from other ValidationActions """ super().__init__(data_context) self.renderer = instantiate_class_from_config( config=renderer, runtime_environment={}, config_defaults={}, ) module_name = renderer["module_name"] if not self.renderer: raise ClassInstantiationError( module_name=module_name, package_name=None, class_name=renderer["class_name"], ) if not slack_token and slack_channel: assert slack_webhook if not slack_webhook: assert slack_token and slack_channel assert not (slack_webhook and slack_channel and slack_token) self.slack_webhook = slack_webhook self.slack_token = slack_token self.slack_channel = slack_channel self.notify_on = notify_on self.notify_with = notify_with
def clean_up_tables_with_prefix(connection_string: str, table_prefix: str) -> List[str]: """Drop all tables starting with the provided table_prefix. Note: Uses private method InferredAssetSqlDataConnector._introspect_db() to get the table names to not duplicate code, but should be refactored in the future to not use a private method. Args: connection_string: To connect to the database. table_prefix: First characters of the tables you want to remove. Returns: List of deleted tables. """ execution_engine: SqlAlchemyExecutionEngine = SqlAlchemyExecutionEngine( connection_string=connection_string ) data_connector = instantiate_class_from_config( config={ "class_name": "InferredAssetSqlDataConnector", "name": "temp_data_connector", }, runtime_environment={ "execution_engine": execution_engine, "datasource_name": "temp_datasource", }, config_defaults={"module_name": "great_expectations.datasource.data_connector"}, ) introspection_output = data_connector._introspect_db() tables_to_drop: List[str] = [] tables_dropped: List[str] = [] for table in introspection_output: if table["table_name"].startswith(table_prefix): tables_to_drop.append(table["table_name"]) connection = execution_engine.engine.connect() for table_name in tables_to_drop: print(f"Dropping table {table_name}") connection.execute(f"DROP TABLE IF EXISTS {table_name}") tables_dropped.append(table_name) tables_skipped: List[str] = list(set(tables_to_drop) - set(tables_dropped)) if len(tables_skipped) > 0: warnings.warn(f"Warning: Tables skipped: {tables_skipped}") return tables_dropped
def __init__(self, name="default", datasource=None, query_store_backend=None, queries=None): super().__init__(name=name, datasource=datasource) if (datasource and datasource.data_context and datasource.data_context.root_directory): root_directory = datasource.data_context.root_directory else: root_directory = None if query_store_backend is None: # We will choose a Tuple store if there is a configured DataContext with a root_directory, # and an InMemoryStore otherwise if root_directory: query_store_backend = { "class_name": "TupleFilesystemStoreBackend", "base_directory": os.path.join( datasource.data_context.root_directory, "datasources", datasource.name, "generators", name, ), "filepath_suffix": ".sql", } else: query_store_backend = {"class_name": "InMemoryStoreBackend"} module_name = "great_expectations.data_context.store" self._store_backend = instantiate_class_from_config( config=query_store_backend, runtime_environment={"root_directory": root_directory}, config_defaults={"module_name": module_name}, ) if not self._store_backend: raise ClassInstantiationError( module_name=module_name, package_name=None, class_name=query_store_backend["class_name"], ) if queries is not None: for query_name, query in queries.items(): self.add_query(data_asset_name=query_name, query=query)
def __init__( self, data_context, action_list, name, result_format={"result_format": "SUMMARY"}, ): super().__init__() self.data_context = data_context self.name = name result_format = parse_result_format(result_format) assert result_format["result_format"] in [ "BOOLEAN_ONLY", "BASIC", "SUMMARY", "COMPLETE", ] self.result_format = result_format self.action_list = action_list self.actions = OrderedDict() for action_config in action_list: assert isinstance(action_config, dict) # NOTE: Eugene: 2019-09-23: need a better way to validate an action config: if not set(action_config.keys()) == {"name", "action"}: raise KeyError( 'Action config keys must be ("name", "action"). Instead got {}'.format( action_config.keys() ) ) config = action_config["action"] module_name = "great_expectations.validation_operators" new_action = instantiate_class_from_config( config=config, runtime_environment={"data_context": self.data_context}, config_defaults={"module_name": module_name}, ) if not new_action: raise ClassInstantiationError( module_name=module_name, package_name=None, class_name=config["class_name"], ) self.actions[action_config["name"]] = new_action
def _build_batch_kwargs_generator(self, **kwargs): """Build a BatchKwargGenerator using the provided configuration and return the newly-built generator.""" generator = instantiate_class_from_config( config=kwargs, runtime_environment={"datasource": self}, config_defaults={ "module_name": "great_expectations.datasource.batch_kwargs_generator" }, ) if not generator: raise ClassInstantiationError( module_name="great_expectations.datasource.batch_kwargs_generator", package_name=None, class_name=kwargs["class_name"], ) return generator
def basic_pandas_datasource_v013(tmp_path_factory): base_directory: str = str( tmp_path_factory.mktemp( "basic_pandas_datasource_v013_filesystem_data_connector" ) ) basic_datasource: Datasource = instantiate_class_from_config( yaml.load( f""" class_name: Datasource execution_engine: class_name: PandasExecutionEngine data_connectors: test_runtime_data_connector: module_name: great_expectations.datasource.data_connector class_name: RuntimeDataConnector runtime_keys: - pipeline_stage_name - airflow_run_id my_filesystem_data_connector: class_name: ConfiguredAssetFilesystemDataConnector base_directory: {base_directory} # TODO: <Alex>Investigate: this potentially breaks the data_reference centric design.</Alex> glob_directive: "*.csv" # glob_directive: "*" assets: Titanic: {{}} default_regex: # TODO: <Alex>Investigate: this potentially breaks the data_reference centric design.</Alex> pattern: (.+)_(\\d+)\\.csv # pattern: (.+)_(\\d+)\\.[a-z][a-z][a-z] group_names: - letter - number """, ), runtime_environment={"name": "my_datasource"}, config_defaults={"module_name": "great_expectations.datasource"}, ) return basic_datasource
def init_parameter_builder( parameter_builder_config: Union["ParameterBuilderConfig", dict], # noqa: F821 data_context: Optional["BaseDataContext"] = None, # noqa: F821 ) -> "ParameterBuilder": # noqa: F821 if not isinstance(parameter_builder_config, dict): parameter_builder_config = parameter_builder_config.to_dict() parameter_builder: "ParameterBuilder" = instantiate_class_from_config( # noqa: F821 config=parameter_builder_config, runtime_environment={"data_context": data_context}, config_defaults={ "module_name": "great_expectations.rule_based_profiler.parameter_builder" }, ) return parameter_builder
def __init__(self, column_section_renderer=None): if column_section_renderer is None: column_section_renderer = { "class_name": "ExpectationSuiteColumnSectionRenderer" } module_name = 'great_expectations.render.renderer.column_section_renderer' self._column_section_renderer = instantiate_class_from_config( config=column_section_renderer, runtime_environment={}, config_defaults={ "module_name": column_section_renderer.get("module_name", module_name) }) if not self._column_section_renderer: raise ClassInstantiationError( module_name=column_section_renderer, package_name=None, class_name=column_section_renderer['class_name'])
def _build_asset_from_config(runtime_environment: "DataConnector", config: dict) -> Asset: """Build Asset from configuration and return asset. Used by both ConfiguredAssetDataConnector and RuntimeDataConnector""" runtime_environment: dict = {"data_connector": runtime_environment} config = assetConfigSchema.load(config) config = assetConfigSchema.dump(config) asset: Asset = instantiate_class_from_config( config=config, runtime_environment=runtime_environment, config_defaults={}, ) if not asset: raise ge_exceptions.ClassInstantiationError( module_name="great_expectations.datasource.data_connector.asset", package_name=None, class_name=config["class_name"], ) return asset
def _build_asset_from_config(self, name: str, config: dict): """Build an Asset using the provided configuration and return the newly-built Asset.""" runtime_environment: dict = {"name": name, "data_connector": self} asset: Asset = instantiate_class_from_config( config=config, runtime_environment=runtime_environment, config_defaults={ "module_name": "great_expectations.datasource.data_connector.asset", "class_name": "Asset", }, ) if not asset: raise ge_exceptions.ClassInstantiationError( module_name="great_expectations.datasource.data_connector.asset", package_name=None, class_name=config["class_name"], ) return asset
def get_profiler( data_context: "DataContext", # noqa: F821 profiler_store: ProfilerStore, name: Optional[str] = None, ge_cloud_id: Optional[str] = None, ) -> "RuleBasedProfiler": assert bool(name) ^ bool( ge_cloud_id ), "Must provide either name or ge_cloud_id (but not both)" key: Union[GeCloudIdentifier, ConfigurationIdentifier] if ge_cloud_id: key = GeCloudIdentifier(resource_type="contract", ge_cloud_id=ge_cloud_id) else: key = ConfigurationIdentifier(configuration_key=name, ) try: profiler_config: RuleBasedProfilerConfig = profiler_store.get( key=key) except ge_exceptions.InvalidKeyError as exc_ik: id_ = (key.configuration_key if isinstance( key, ConfigurationIdentifier) else key) raise ge_exceptions.ProfilerNotFoundError( message= f'Non-existent Profiler configuration named "{id_}".\n\nDetails: {exc_ik}' ) config = profiler_config.to_json_dict() if name: config.update({"name": name}) config = filter_properties_dict(properties=config, clean_falsy=True) profiler = instantiate_class_from_config( config=config, runtime_environment={ "data_context": data_context, }, config_defaults={ "module_name": "great_expectations.rule_based_profiler", "class_name": "RuleBasedProfiler", }, ) return profiler
def __init__(self, store_backend=None, runtime_environment=None): """Runtime environment may be necessary to instantiate store backend elements.""" if store_backend is None: store_backend = {"class_name": "InMemoryStoreBackend"} logger.debug("Building store_backend.") module_name = 'great_expectations.data_context.store' self._store_backend = instantiate_class_from_config( config=store_backend, runtime_environment=runtime_environment or {}, config_defaults={"module_name": module_name}) if not self._store_backend: raise ClassInstantiationError(module_name=module_name, package_name=None, class_name=store_backend) if not isinstance(self._store_backend, StoreBackend): raise DataContextError( "Invalid StoreBackend configuration: expected a StoreBackend instance." ) self._use_fixed_length_key = self._store_backend.fixed_length_key
def test_basic_instantiation_with_bigquery_creds_failure_pkey( sa, empty_data_context): context = empty_data_context try: my_data_source = instantiate_class_from_config( # private key is valid but useless config={ "connection_string": "bigquery://project-1353/dataset", "credentials_info": { "type": "service_account", "project_id": "project-1353", "private_key_id": "df87033061fd7c27dcc953e235fe099a7017f9c4", "private_key": "bad_pkey", "client_email": "*****@*****.**", "client_id": "100945395817716260007", "auth_uri": "https://accounts.google.com/o/oauth2/auth", "token_uri": "https://oauth2.googleapis.com/token", "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/testme%40project-1353.iam.gserviceaccount.com", }, }, config_defaults={ "module_name": "great_expectations.datasource", "class_name": "SimpleSqlalchemyDatasource", }, runtime_environment={"name": "my_sql_datasource"}, ) except: return raise Exception("BigQuery incorrectly passed with invalid private key") print(my_data_source)
def __init__( self, foo, fake_configurable, x, y=None, z=None, ): assert isinstance(foo, int) self.foo = foo self.x = x self.y = y self.z = z print(fake_configurable) # This code allows us to specify defaults for the child class self.fake_configurable_object = instantiate_class_from_config( config=fake_configurable, runtime_environment={"x": self.x, "y": self.y, "z": self.z,}, config_defaults={"a": "default_value_for_a"}, )
def _init_store_backend(self, store_backend_config, runtime_config): self.key_class = ExpectationSuiteIdentifier if store_backend_config[ "class_name"] == "FixedLengthTupleFilesystemStoreBackend": config_defaults = { "key_length": 4, "module_name": "great_expectations.data_context.store", "filepath_template": "{0}/{1}/{2}/{3}.json", } else: config_defaults = { "module_name": "great_expectations.data_context.store", } return instantiate_class_from_config( config=store_backend_config, runtime_config=runtime_config, config_defaults=config_defaults, )
def init_expectation_configuration_builder( expectation_configuration_builder_config: Union[ "ExpectationConfigurationBuilder", dict # noqa: F821 ], data_context: Optional["BaseDataContext"] = None, # noqa: F821 ) -> "ExpectationConfigurationBuilder": # noqa: F821 if not isinstance(expectation_configuration_builder_config, dict): expectation_configuration_builder_config = ( expectation_configuration_builder_config.to_dict() ) expectation_configuration_builder: "ExpectationConfigurationBuilder" = instantiate_class_from_config( # noqa: F821 config=expectation_configuration_builder_config, runtime_environment={"data_context": data_context}, config_defaults={ "class_name": "DefaultExpectationConfigurationBuilder", "module_name": "great_expectations.rule_based_profiler.expectation_configuration_builder", }, ) return expectation_configuration_builder
def _build_data_connector_from_config( self, name: str, config: Dict[str, Any], ) -> DataConnector: """Build a DataConnector using the provided configuration and return the newly-built DataConnector.""" new_data_connector: DataConnector = instantiate_class_from_config( config=config, runtime_environment={ "name": name, "datasource_name": self.name, "execution_engine": self.execution_engine, }, config_defaults={ "module_name": "great_expectations.datasource.data_connector" }, ) new_data_connector.data_context_root_directory = ( self._data_context_root_directory ) self.data_connectors[name] = new_data_connector return new_data_connector
def __init__(self, data_context, action_list): self.data_context = data_context self.action_list = action_list self.actions = {} for action_config in action_list: assert isinstance(action_config, dict) #NOTE: Eugene: 2019-09-23: need a better way to validate an action config: if not set(action_config.keys()) == {"name", "action"}: raise KeyError('Action config keys must be ("name", "action"). Instead got {}'.format(action_config.keys())) new_action = instantiate_class_from_config( config=action_config["action"], runtime_environment={ "data_context": self.data_context, }, config_defaults={ "module_name": "great_expectations.validation_operators" } ) self.actions[action_config["name"]] = new_action
def __init__(self, column_section_renderer=None): super().__init__() if column_section_renderer is None: column_section_renderer = { "class_name": "ValidationResultsColumnSectionRenderer" } module_name = "great_expectations.render.renderer.column_section_renderer" self._column_section_renderer = instantiate_class_from_config( config=column_section_renderer, runtime_environment={}, config_defaults={ "module_name": column_section_renderer.get("module_name", module_name) }, ) if not self._column_section_renderer: raise ClassInstantiationError( module_name=module_name, package_name=None, class_name=column_section_renderer["class_name"], )
def basic_spark_datasource(tmp_path_factory, spark_session): base_directory: str = str( tmp_path_factory.mktemp("basic_spark_datasource_v013_filesystem_data_connector") ) basic_datasource: Datasource = instantiate_class_from_config( yaml.load( f""" class_name: Datasource execution_engine: class_name: SparkDFExecutionEngine spark_config: spark.master: local[*] spark.executor.memory: 6g spark.driver.memory: 6g spark.ui.showConsoleProgress: false spark.sql.shuffle.partitions: 2 spark.default.parallelism: 4 data_connectors: test_runtime_data_connector: module_name: great_expectations.datasource.data_connector class_name: RuntimeDataConnector runtime_keys: - pipeline_stage_name - airflow_run_id simple_filesystem_data_connector: class_name: InferredAssetFilesystemDataConnector base_directory: {base_directory} glob_directive: '*' default_regex: pattern: (.+)\\.csv group_names: - data_asset_name """, ), runtime_environment={"name": "my_datasource"}, config_defaults={"module_name": "great_expectations.datasource"}, ) return basic_datasource
def basic_datasource_with_runtime_data_connector(): basic_datasource: Datasource = instantiate_class_from_config( yaml.load( f""" class_name: Datasource execution_engine: class_name: PandasExecutionEngine data_connectors: test_runtime_data_connector: module_name: great_expectations.datasource.data_connector class_name: RuntimeDataConnector batch_identifiers: - pipeline_stage_name - airflow_run_id - custom_key_0 """, ), runtime_environment={"name": "my_datasource"}, config_defaults={"module_name": "great_expectations.datasource"}, ) return basic_datasource