示例#1
0
    def test_exists(self, file_format, tmp_path, sample_spark_df):
        filepath = str(tmp_path / "test_data")
        spark_data_set = SparkDataSet(filepath=filepath, file_format=file_format)

        assert not spark_data_set.exists()

        spark_data_set.save(sample_spark_df)
        assert spark_data_set.exists()
示例#2
0
def test_exists(file_format):
    with tempfile.TemporaryDirectory() as temp_dir:
        temp_path = join(temp_dir, "test_data")
        spark_data_set = SparkDataSet(filepath=temp_path,
                                      file_format=file_format)
        spark_df = _get_sample_spark_data_frame().coalesce(1)

        assert not spark_data_set.exists()

        spark_data_set.save(spark_df)
        assert spark_data_set.exists()
示例#3
0
    def test_exists_raises_error(self, mocker):
        # exists should raise all errors except for
        # AnalysisExceptions clearly indicating a missing file
        spark_data_set = SparkDataSet(filepath="")
        mocker.patch.object(
            spark_data_set,
            "_get_spark",
            side_effect=AnalysisException("Other Exception", []),
        )

        with pytest.raises(DataSetError, match="Other Exception"):
            spark_data_set.exists()
示例#4
0
def test_exists_raises_error(monkeypatch):
    # exists should raise all errors except for
    # AnalysisExceptions clearly indicating a missing file
    def faulty_get_spark():
        raise AnalysisException("Other Exception", [])

    spark_data_set = SparkDataSet(filepath="")
    monkeypatch.setattr(spark_data_set, "_get_spark", faulty_get_spark)

    with pytest.raises(DataSetError) as error:
        spark_data_set.exists()
    assert "Other Exception" in str(error.value)