Пример #1
0
    def test_http_filesystem_no_versioning(self):
        pattern = r"HTTP\(s\) DataSet doesn't support versioning\."

        with pytest.raises(DataSetError, match=pattern):
            TextDataSet(
                filepath="https://example.com/file.txt", version=Version(None, None)
            )
Пример #2
0
    def test_protocol_usage(self, filepath, instance_type):
        data_set = TextDataSet(filepath=filepath)
        assert isinstance(data_set._fs, instance_type)

        path = filepath.split(PROTOCOL_DELIMITER, 1)[-1]

        assert str(data_set._filepath) == path
        assert isinstance(data_set._filepath, PurePosixPath)
Пример #3
0
    def test_version_str_repr(self, load_version, save_version):
        """Test that version is in string representation of the class instance
        when applicable."""
        filepath = "test.txt"
        ds = TextDataSet(filepath=filepath)
        ds_versioned = TextDataSet(filepath=filepath,
                                   version=Version(load_version, save_version))
        assert filepath in str(ds)
        assert "version" not in str(ds)

        assert filepath in str(ds_versioned)
        ver_str = f"version=Version(load={load_version}, save='{save_version}')"
        assert ver_str in str(ds_versioned)
        assert "TextDataSet" in str(ds_versioned)
        assert "TextDataSet" in str(ds)
        assert "protocol" in str(ds_versioned)
        assert "protocol" in str(ds)
Пример #4
0
    def test_protocol_usage(self, filepath, instance_type):
        data_set = TextDataSet(filepath=filepath)
        assert isinstance(data_set._fs, instance_type)

        # _strip_protocol() doesn't strip http(s) protocol
        if data_set._protocol == "https":
            path = filepath.split("://")[-1]
        else:
            path = data_set._fs._strip_protocol(filepath)

        assert str(data_set._filepath) == path
        assert isinstance(data_set._filepath, PurePosixPath)
Пример #5
0
    def test_force_checkpoint_checkpoint_file_exists(self, forced_checkpoint,
                                                     expected_partitions,
                                                     mocked_csvs_in_s3):
        """Test how forcing checkpoint value affects the available partitions
        in S3 if the checkpoint file exists"""
        # create checkpoint and assert that it exists
        IncrementalDataSet(mocked_csvs_in_s3, DATASET).confirm()
        checkpoint_path = "{}/{}".format(
            mocked_csvs_in_s3, IncrementalDataSet.DEFAULT_CHECKPOINT_FILENAME)
        checkpoint_value = TextDataSet(checkpoint_path).load()
        assert checkpoint_value == "p04/data.csv"

        pds = IncrementalDataSet(mocked_csvs_in_s3,
                                 DATASET,
                                 checkpoint=forced_checkpoint)
        assert pds._checkpoint.exists()
        loaded = pds.load()
        assert loaded.keys() == expected_partitions
Пример #6
0
    def before_pipeline_run(self, run_params: Dict[str, Any], pipeline,
                            catalog):
        """A hook implementation to add a catalog entry
        based on the filename passed to the command line, e.g.:
            kedro run --params=input:iris_1.csv
            kedro run --params=input:iris_2.csv
            kedro run --params=input:iris_3.csv
        """
        filename = run_params["extra_params"]["input"]

        # add input dataset
        input_dataset_name = "example_iris_data"
        input_dataset = CSVDataSet(filepath=f"data/01_raw/{filename}")
        catalog.add(input_dataset_name, input_dataset)

        # add output dataset
        output_dataset_name = "example_reporting_data"
        output_dataset = TextDataSet(filepath=f"data/08_reporting/{filename}")
        catalog.add(output_dataset_name, output_dataset)
Пример #7
0
def versioned_txt_data_set(filepath_txt, load_version, save_version):
    return TextDataSet(filepath=filepath_txt,
                       version=Version(load_version, save_version))
Пример #8
0
def txt_data_set(filepath_txt, fs_args):
    return TextDataSet(filepath=filepath_txt, fs_args=fs_args)
Пример #9
0
 def test_catalog_release(self, mocker):
     fs_mock = mocker.patch("fsspec.filesystem").return_value
     filepath = "test.txt"
     data_set = TextDataSet(filepath=filepath)
     data_set.release()
     fs_mock.invalidate_cache.assert_called_once_with(filepath)
Пример #10
0
from kedro.extras.datasets.text import TextDataSet

catalog_dict = {
    "my_output_dataset": TextDataSet(filepath="data/load/my_output.txt")
}