Python DataContext.add_datasource示例

def sanitize_yaml_and_save_datasource(
        context: DataContext,
        datasource_yaml: str,
        overwrite_existing: bool = False) -> None:
    """A convenience function used in notebooks to help users save secrets."""
    if not datasource_yaml:
        raise ValueError("Please verify the yaml and try again.")
    if not isinstance(datasource_yaml, str):
        raise TypeError("Please pass in a valid yaml string.")
    config = yaml.load(datasource_yaml)
    try:
        datasource_name = config.pop("name")
    except KeyError:
        raise ValueError("The datasource yaml is missing a `name` attribute.")
    if not overwrite_existing and check_if_datasource_name_exists(
            context=context, datasource_name=datasource_name):
        print(
            f'**WARNING** A Datasource named "{datasource_name}" already exists in this Data Context. The Datasource has *not* been saved. Please use a different name or set overwrite_existing=True if you want to overwrite!'
        )
        return
    if "credentials" in config.keys():
        credentials = config["credentials"]
        config["credentials"] = "${" + datasource_name + "}"
        context.save_config_variable(datasource_name, credentials)
    context.add_datasource(name=datasource_name, **config)

示例#2

显示文件

文件： test_suite.py 项目： trucklos/great_expectations

def test_suite_edit_with_batch_kwargs_unable_to_load_a_batch_raises_helpful_error(
    mock_webbrowser, mock_subprocess, caplog, empty_data_context
):
    """
    The command should:
    - exit with a clear error message
    - NOT open Data Docs
    - NOT open jupyter
    """
    project_dir = empty_data_context.root_directory

    context = DataContext(project_dir)
    context.create_expectation_suite("foo")
    context.add_datasource("source", class_name="PandasDatasource")

    runner = CliRunner(mix_stderr=False)
    batch_kwargs = '{"table": "fake", "datasource": "source"}'
    result = runner.invoke(
        cli,
        ["suite", "edit", "foo", "-d", project_dir, "--batch-kwargs", batch_kwargs],
        catch_exceptions=False,
    )
    stdout = result.output
    assert result.exit_code == 1
    assert "To continue editing this suite" not in stdout
    assert "Please check that your batch_kwargs are able to load a batch." in stdout

    assert mock_webbrowser.call_count == 0
    assert mock_subprocess.call_count == 0

    assert_no_logging_messages_or_tracebacks(caplog, result)

示例#3

显示文件

def test_init_on_existing_project_with_multiple_datasources_exist_do_nothing(
    mock_webbrowser, caplog, initialized_project, filesystem_csv_2
):
    project_dir = initialized_project
    ge_dir = os.path.join(project_dir, DataContext.GE_DIR)

    context = DataContext(ge_dir)
    context.add_datasource(
        "another_datasource",
        module_name="great_expectations.datasource",
        class_name="PandasDatasource",
    )

    assert len(context.list_datasources()) == 2

    runner = CliRunner(mix_stderr=False)
    with pytest.warns(
        UserWarning, match="Warning. An existing `great_expectations.yml` was found"
    ):
        result = runner.invoke(
            cli, ["init", "-d", project_dir], input="n\n", catch_exceptions=False,
        )
    stdout = result.stdout

    assert result.exit_code == 0
    assert mock_webbrowser.call_count == 0
    assert "Error: invalid input" not in stdout

    assert "Always know what to expect from your data" in stdout
    assert "This looks like an existing project that" in stdout
    assert "appears complete" in stdout
    assert "Would you like to build & view this project's Data Docs" in stdout

    assert_no_logging_messages_or_tracebacks(caplog, result)

示例#4

显示文件

文件： test_datasource.py 项目： yjlee215/great_expectations

def test_get_batch_kwargs_for_specific_dataasset(empty_data_context, filesystem_csv):
    project_root_dir = empty_data_context.root_directory
    context = DataContext(project_root_dir)
    base_directory = str(filesystem_csv)

    context.add_datasource(
        "wow_a_datasource",
        module_name="great_expectations.datasource",
        class_name="PandasDatasource",
        batch_kwargs_generators={
            "subdir_reader": {
                "class_name": "SubdirReaderBatchKwargsGenerator",
                "base_directory": base_directory,
            }
        },
    )

    batch = get_batch_kwargs(
        context,
        datasource_name=None,
        batch_kwargs_generator_name=None,
        data_asset_name="f1",
        additional_batch_kwargs={},
    )

    expected_batch = {
        "data_asset_name": "f1",
        "datasource": "wow_a_datasource",
        "path": os.path.join(filesystem_csv, "f1.csv"),
    }
    assert batch == expected_batch

示例#5

显示文件

文件： datasource.py 项目： anishshah97/kedro-great

def _add_spark_datasource(datasource_name: str, dataset: AbstractDataSet,
                          ge_context: DataContext) -> str:
    from great_expectations.datasource import SparkDFDatasource

    path = str(dataset._filepath.parent)

    if path.startswith("./"):
        path = path[2:]

    configuration = SparkDFDatasource.build_configuration(
        batch_kwargs_generators={
            "subdir_reader": {
                "class_name": "SubdirReaderBatchKwargsGenerator",
                "base_directory": os.path.join("..", path),
            }
        })

    configuration["class_name"] = "SparkDFDatasource"
    errors = DatasourceConfigSchema().validate(configuration)
    if len(errors) != 0:
        raise ge_exceptions.GreatExpectationsError(
            "Invalid Datasource configuration: {0:s}".format(errors))

    ge_context.add_datasource(name=datasource_name, **configuration)
    return datasource_name

示例#6

显示文件

文件： test_datasource_pandas.py 项目： rlshuhart/great_expectations

def test_cli_datasorce_list(caplog, empty_data_context, filesystem_csv_2):
    """Test an empty project and after adding a single datasource."""
    project_root_dir = empty_data_context.root_directory
    context = DataContext(project_root_dir)

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(cli, ["datasource", "list", "-d", project_root_dir],
                           catch_exceptions=False)

    stdout = result.output.strip()
    assert "[]" in stdout
    assert context.list_datasources() == []

    context.add_datasource(
        "wow_a_datasource",
        module_name="great_expectations.datasource",
        class_name="PandasDatasource",
        generators={
            "subdir_reader": {
                "class_name": "SubdirReaderBatchKwargsGenerator",
                "base_directory": str(filesystem_csv_2),
            }
        },
    )

    assert context.list_datasources() == [{
        "name": "wow_a_datasource",
        "class_name": "PandasDatasource"
    }]

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(cli, ["datasource", "list", "-d", project_root_dir],
                           catch_exceptions=False)

    stdout = result.output.strip()
    if PY2:
        # deal with legacy python dictionary sorting
        assert ("'name': 'wow_a_datasource'"
                and "'class_name': 'PandasDatasource'" in stdout)
        assert len(stdout) >= 60 and len(stdout) <= 70
    else:
        assert (
            "[{'name': 'wow_a_datasource', 'class_name': 'PandasDatasource'}]"
            in stdout)
    assert_no_logging_messages_or_tracebacks(caplog, result)

示例#7

显示文件

def test_cli_datasource_list(caplog, empty_data_context, filesystem_csv_2):
    """Test an empty project and after adding a single datasource."""
    project_root_dir = empty_data_context.root_directory
    context = DataContext(project_root_dir)

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli, ["datasource", "list", "-d", project_root_dir], catch_exceptions=False
    )

    stdout = result.output.strip()
    assert "No Datasources found" in stdout
    assert context.list_datasources() == []

    base_directory = str(filesystem_csv_2)

    context.add_datasource(
        "wow_a_datasource",
        module_name="great_expectations.datasource",
        class_name="PandasDatasource",
        batch_kwargs_generators={
            "subdir_reader": {
                "class_name": "SubdirReaderBatchKwargsGenerator",
                "base_directory": base_directory,
            }
        },
    )

    datasources = context.list_datasources()

    assert datasources == [
        {
            "name": "wow_a_datasource",
            "class_name": "PandasDatasource",
            "data_asset_type": {"class_name": "PandasDataset",
                                "module_name": "great_expectations.dataset"},
            "batch_kwargs_generators": {"subdir_reader": {
                "base_directory": base_directory,
                "class_name": "SubdirReaderBatchKwargsGenerator"}},
            "module_name": "great_expectations.datasource",
        }
    ]

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli, ["datasource", "list", "-d", project_root_dir], catch_exceptions=False
    )
    expected_output = """
1 Datasource found:[0m
[0m
 - [36mname:[0m wow_a_datasource[0m
   [36mmodule_name:[0m great_expectations.datasource[0m
   [36mclass_name:[0m PandasDatasource[0m
   [36mbatch_kwargs_generators:[0m[0m
     [36msubdir_reader:[0m[0m
       [36mclass_name:[0m SubdirReaderBatchKwargsGenerator[0m
       [36mbase_directory:[0m {}[0m
   [36mdata_asset_type:[0m[0m
     [36mmodule_name:[0m great_expectations.dataset[0m
     [36mclass_name:[0m PandasDataset[0m""".format(base_directory).strip()
    stdout = result.output.strip()
    assert stdout == expected_output
    assert_no_logging_messages_or_tracebacks(caplog, result)