示例#1
0
def test_data_set_describe():
    """Test `describe` method invocation"""
    def _dummy_load():
        pass  # pragma: no cover

    def _dummy_save():
        pass  # pragma: no cover

    def _dummy_exists():
        return False  # pragma: no cover

    def _dummy_release():
        pass  # pragma: no cover

    assert "LambdaDataSet(load=<tests.io.test_lambda_data_set._dummy_load>)" in str(
        LambdaDataSet(_dummy_load, None))
    assert "LambdaDataSet(save=<tests.io.test_lambda_data_set._dummy_save>)" in str(
        LambdaDataSet(None, _dummy_save))
    assert "LambdaDataSet(exists=<tests.io.test_lambda_data_set._dummy_exists>)" in str(
        LambdaDataSet(None, None, _dummy_exists))
    assert (
        "LambdaDataSet(release=<tests.io.test_lambda_data_set._dummy_release>)"
        in str(LambdaDataSet(None, None, None, _dummy_release)))

    # __init__ keys alphabetically sorted, None values not shown
    expected = (
        "LambdaDataSet(exists=<tests.io.test_lambda_data_set._dummy_exists>, "
        "load=<tests.io.test_lambda_data_set._dummy_load>, "
        "save=<tests.io.test_lambda_data_set._dummy_save>)")
    actual = str(LambdaDataSet(_dummy_load, _dummy_save, _dummy_exists, None))
    assert actual == expected
示例#2
0
 def test_exists_invocation(self, mocker):
     """Test the basic `exists` method invocation"""
     mocked_exists = mocker.Mock(return_value=True)
     data_set = LambdaDataSet(None, None, mocked_exists)
     result = data_set.exists()
     mocked_exists.assert_called_once_with()
     assert result is True
示例#3
0
    def test_load_invocation(self, mocker):
        """Test the basic `load` method invocation"""
        mocked_load = mocker.Mock(return_value=42)
        data_set = LambdaDataSet(mocked_load, None)
        result = data_set.load()

        mocked_load.assert_called_once_with()
        assert result == 42
示例#4
0
    def test_release_raises_error(self, mocker):
        """Check the error when `release` raises an exception"""
        mocked_release = mocker.Mock()
        error_message = "File not found"
        mocked_release.side_effect = FileNotFoundError(error_message)
        data_set = LambdaDataSet(None, None, None, mocked_release)

        with pytest.raises(DataSetError, match=error_message):
            data_set.release()
        mocked_release.assert_called_once_with()
示例#5
0
    def test_load_raises_error(self):
        """Check the error if loading the LambdaDataSet raises an exception"""
        error_message = "Internal load exception message"

        def internal_load():
            raise FileNotFoundError(error_message)

        data_set = LambdaDataSet(internal_load, None)
        with pytest.raises(DataSetError, match=error_message):
            data_set.load()
示例#6
0
    def test_result_saved_not_returned(self, saving_result_pipeline):
        """The pipeline runs ds->dsX but save does not save the output."""
        def _load():
            return 0

        def _save(arg):
            assert arg == 0

        catalog = DataCatalog({
            "ds": LambdaDataSet(load=_load, save=_save),
            "dsX": LambdaDataSet(load=_load, save=_save),
        })
        output = SequentialRunner().run(saving_result_pipeline, catalog)
        assert output == {}
示例#7
0
def csv_dataset_same_file(context):
    context.read_csv_path = create_sample_csv()
    context.write_csv_path = context.read_csv_path
    context.csv_data_set = LambdaDataSet(
        load=lambda: pd.read_csv(context.read_csv_path),
        save=lambda df: df.to_csv(context.write_csv_path, index=False),
    )
示例#8
0
def define_dataset_with_load_save(context):
    context.read_csv_path = create_sample_csv()
    context.write_csv_path = create_temp_csv()
    context.csv_data_set = LambdaDataSet(
        load=lambda: pd.read_csv(context.read_csv_path),
        save=lambda df: df.to_csv(context.write_csv_path),
    )
示例#9
0
 def test_release_not_callable(self):
     pattern = (
         r"`release` function for LambdaDataSet must be a Callable\. "
         r"Object of type `str` provided instead\."
     )
     with pytest.raises(DataSetError, match=pattern):
         LambdaDataSet(None, None, None, "release")
示例#10
0
 def test_load_not_callable(self):
     pattern = (
         r"`load` function for LambdaDataSet must be a Callable\. "
         r"Object of type `str` provided instead\."
     )
     with pytest.raises(DataSetError, match=pattern):
         LambdaDataSet("load", None)
示例#11
0
def test_data_set_describe():
    """Test `describe` method invocation"""
    def _dummy_load():
        pass  # pragma: no cover

    def _dummy_save():
        pass  # pragma: no cover

    def _dummy_exists():
        return False  # pragma: no cover

    assert "LambdaDataSet(load=<tests.io.test_lambda_data_set._dummy_load>)" in str(
        LambdaDataSet(_dummy_load, None))
    assert "LambdaDataSet(save=<tests.io.test_lambda_data_set._dummy_save>)" in str(
        LambdaDataSet(None, _dummy_save))
    assert "LambdaDataSet(exists=<tests.io.test_lambda_data_set._dummy_exists>)" in str(
        LambdaDataSet(None, None, _dummy_exists))
def _make_catalog(
    existent=None, non_existent=None, no_exists_method=None, feed_dict=None
):
    """Creates a catalog of existent and non-existent DataSets."""
    existent = [] if existent is None else existent
    non_existent = [] if non_existent is None else non_existent
    no_exists_method = [] if no_exists_method is None else no_exists_method

    catalog = DataCatalog(feed_dict=feed_dict)
    for source in existent:
        catalog.add(source, LambdaDataSet(None, None, lambda: True))
    for source in non_existent:
        catalog.add(source, LambdaDataSet(None, None, lambda: False))
    # Some LambdaDataSet do not have exists() method
    for source in no_exists_method:
        catalog.add(source, LambdaDataSet(None, None))
    return catalog
示例#13
0
    def test_exists_not_implemented(self, caplog):
        """Test calling `exists` on the data set, which didn't implement it"""
        catalog = DataCatalog(data_sets={"test": LambdaDataSet(None, None)})
        result = catalog.exists("test")

        log_record = caplog.records[0]
        assert log_record.levelname == "WARNING"
        assert ("`exists()` not implemented for `LambdaDataSet`. "
                "Assuming output does not exist." in log_record.message)
        assert result is False
示例#14
0
    def test_data_set_not_serializable(self, is_async, fan_out_fan_in):
        """Data set A cannot be serializable because _load and _save are not
        defined in global scope.
        """
        def _load():
            return 0  # pragma: no cover

        def _save(arg):
            assert arg == 0  # pragma: no cover

        # Data set A cannot be serialized
        catalog = DataCatalog({"A": LambdaDataSet(load=_load, save=_save)})

        pipeline = Pipeline([fan_out_fan_in])
        with pytest.raises(AttributeError, match="['A']"):
            ParallelRunner(is_async=is_async).run(pipeline, catalog)
示例#15
0
 def test_save_undefined(self):
     """Check the error if `LambdaDataSet.__save` is None"""
     with pytest.raises(DataSetError, match="Cannot save to data set"):
         LambdaDataSet(None, None).save(42)
示例#16
0
 def test_load_undefined(self):
     """Check the error if `LambdaDataSet.__load` is None"""
     with pytest.raises(DataSetError, match="Cannot load data set"):
         LambdaDataSet(None, None).load()
示例#17
0
def test_run(mocker):  # pylint: disable=too-many-locals
    # The Nodes
    first_node = Node(lambda: None, [], "a")
    middle_node = Node(lambda a: None, ["a"], "b")
    last_node = Node(lambda b: None, ["b"], [])

    # get turned into tasks by create_task
    first_task = Mock()
    middle_task = Mock()
    last_task = Mock()
    create_task = mocker.patch(
        "kedro_airflow.runner.AirflowRunner.create_task")
    create_task.side_effect = lambda node, catalog: {
        first_node: first_task,
        middle_node: middle_task,
        last_node: last_task,
    }[node]

    # and tasks get turned into operators by the runner
    first_op = Mock()
    middle_op = Mock()
    last_op = Mock()
    operator = mocker.patch("kedro_airflow.runner.PythonOperator")
    operator.side_effect = lambda python_callable, **kwargs: {
        first_task: first_op,
        middle_task: middle_op,
        last_task: last_op,
    }[python_callable]

    def operator_arguments(task_id):
        args = {
            "lambda-none-a": {
                "retries": 1
            },
            "lambda-b-none": {
                "retries": 2
            }
        }
        return args.get(task_id, {})

    # actually call the runner to do the conversion
    dag = Mock()
    pipeline = Pipeline([first_node, last_node, middle_node])
    catalog = DataCatalog({
        "a": LambdaDataSet(load=None, save=None),
        "b": LambdaDataSet(load=None, save=None),
    })
    AirflowRunner(dag, None, operator_arguments).run(pipeline, catalog)

    # check the create task calls
    create_task.assert_has_calls(
        [
            call(first_node, catalog),
            call(middle_node, catalog),
            call(last_node, catalog),
        ],
        any_order=True,
    )

    # check the operator constructor calls
    operator.assert_has_calls(
        [
            call(
                dag=dag,
                provide_context=True,
                python_callable=first_task,
                task_id="lambda-none-a",
                retries=1,
            ),
            call(
                dag=dag,
                provide_context=True,
                python_callable=middle_task,
                task_id="lambda-a-b",
            ),
            call(
                dag=dag,
                provide_context=True,
                python_callable=last_task,
                task_id="lambda-b-none",
                retries=2,
            ),
        ],
        any_order=True,
    )

    # check the dependcy hookup
    first_op.set_upstream.assert_not_called()
    middle_op.set_upstream.assert_called_once_with(first_op)
    last_op.set_upstream.assert_called_once_with(middle_op)
示例#18
0
def data_set_with_no_save(context):
    context.csv_data_set = LambdaDataSet(load=None, save=None)
示例#19
0
 def test_exists_not_implemented(self):
     """Check that `exists` method is not implemented by default"""
     data_set = LambdaDataSet(None, None)
     assert not hasattr(data_set, "exists")
示例#20
0
def mocked_data_set(mocked_save):
    return LambdaDataSet(None, mocked_save)
示例#21
0
def prepare_missing_csv(context):
    sample_csv = "/var/missing_csv_file.csv"
    context.csv_data_set = LambdaDataSet(
        load=lambda: pd.read_csv(sample_csv), save=None
    )
示例#22
0
 def one_in_two_out(arg):
     load = mocker.Mock(return_value=42)
     save = mocker.Mock()
     return [LambdaDataSet(load, save), LambdaDataSet(load, save)]
示例#23
0
 def test_release_not_implemented(self):
     """Check that `release` does nothing by default"""
     data_set = LambdaDataSet(None, None)
     data_set.release()
示例#24
0
 def test_release_invocation(self, mocker):
     """Test the basic `release` method invocation"""
     mocked_release = mocker.Mock()
     data_set = LambdaDataSet(None, None, None, mocked_release)
     data_set.release()
     mocked_release.assert_called_once_with()
示例#25
0
 def test_exists_not_implemented(self):
     """Check that `exists` method returns False by default"""
     data_set = LambdaDataSet(None, None)
     assert not data_set.exists()
示例#26
0
def mocked_dataset(mocker):
    load = mocker.Mock(return_value=42)
    save = mocker.Mock()
    return LambdaDataSet(load, save)