示例#1
0
def test_create_task():
    def func(a, b):
        return a + b

    orig_catalog = Mock()
    catalog = orig_catalog.shallow_copy()
    catalog.load.side_effect = [1, 2]
    process_context = Mock(return_value=catalog)
    node = Node(func, ["ds_a", "ds_b"], "ds_c")

    task = AirflowRunner(None, process_context,
                         None).create_task(node, orig_catalog)
    task(param=123)
    process_context.assert_called_once_with(catalog, param=123)
    catalog.save.assert_called_once_with("ds_c", 3)
示例#2
0
def test_operator_arguments(mocker):
    # The Nodes
    first_node = Node(lambda: None, [], "a")
    last_node = Node(lambda: None, [], "b")

    # get turned into tasks and then into operators by the runner
    operator = mocker.patch("kedro_airflow.runner.PythonOperator")

    def operator_arguments(task_id):
        args = {"lambda-none-a": {"retries": 1}}
        return args.get(task_id, {})

    # actually call the runner to do the conversion
    dag = Mock()
    pipeline = Pipeline([first_node, last_node])
    catalog = DataCatalog({"a": None, "b": None})
    AirflowRunner(dag, None, operator_arguments).run(pipeline, catalog)

    # check the operator constructor calls
    operator.assert_has_calls(
        [
            call(
                dag=dag,
                provide_context=True,
                python_callable=ANY,
                task_id="lambda-none-a",
                retries=1,
            ),
            call(
                dag=dag,
                provide_context=True,
                python_callable=ANY,
                task_id="lambda-none-b",
            ),
        ],
        any_order=True,
    )
示例#3
0
    for key in ["dag", "conf", "macros", "task", "task_instance", "ti", "var"]:
        del airflow_context[key]  # drop unpicklable things
    data_catalog.add_feed_dict({"airflow_context": airflow_context},
                               replace=True)

    # or add just the ones you need into Kedro parameters
    parameters = data_catalog.load("parameters")
    parameters["airflow_ds"] = airflow_context["ds"]
    data_catalog.save("parameters", parameters)

    return data_catalog


# Construct a DAG and then call into Kedro to have the operators constructed
dag = DAG(slugify("kedro-airflow-mushrooms"),
          default_args=default_args,
          schedule_interval=timedelta(days=1),
          catchup=False)

_context = load_context(project_path)
data_catalog = _context.catalog
pipeline = _context.pipeline

runner = AirflowRunner(
    dag=dag,
    process_context=process_context,
    operator_arguments=operator_specific_arguments,
)

runner.run(pipeline, data_catalog)
示例#4
0
    data_catalog.add_feed_dict({"airflow_context": kwargs}, replace=True)

    return data_catalog


# Construct a DAG and then call into Kedro to have the operators constructed
dag = DAG(
    slugify("{{ project_name }}"),
    default_args=default_args,
    schedule_interval=timedelta(days=1),
)

{% if context_compatibility_mode %}
config = get_project_context('get_config')(project_path)
data_catalog = get_project_context('create_catalog')(config)
pipeline = get_project_context('create_pipeline')()
{%- else %}
_context = load_context(project_path)
data_catalog = _context.catalog
pipeline = _context.pipeline
{%- endif %}

runner = AirflowRunner(
    dag=dag,
    process_context=process_context,
    operator_arguments=operator_specific_arguments,
)

runner.run(pipeline, data_catalog)
示例#5
0
def test_run(mocker):  # pylint: disable=too-many-locals
    # The Nodes
    first_node = Node(lambda: None, [], "a")
    middle_node = Node(lambda a: None, ["a"], "b")
    last_node = Node(lambda b: None, ["b"], [])

    # get turned into tasks by create_task
    first_task = Mock()
    middle_task = Mock()
    last_task = Mock()
    create_task = mocker.patch(
        "kedro_airflow.runner.AirflowRunner.create_task")
    create_task.side_effect = lambda node, catalog: {
        first_node: first_task,
        middle_node: middle_task,
        last_node: last_task,
    }[node]

    # and tasks get turned into operators by the runner
    first_op = Mock()
    middle_op = Mock()
    last_op = Mock()
    operator = mocker.patch("kedro_airflow.runner.PythonOperator")
    operator.side_effect = lambda python_callable, **kwargs: {
        first_task: first_op,
        middle_task: middle_op,
        last_task: last_op,
    }[python_callable]

    def operator_arguments(task_id):
        args = {
            "lambda-none-a": {
                "retries": 1
            },
            "lambda-b-none": {
                "retries": 2
            }
        }
        return args.get(task_id, {})

    # actually call the runner to do the conversion
    dag = Mock()
    pipeline = Pipeline([first_node, last_node, middle_node])
    catalog = DataCatalog({
        "a": LambdaDataSet(load=None, save=None),
        "b": LambdaDataSet(load=None, save=None),
    })
    AirflowRunner(dag, None, operator_arguments).run(pipeline, catalog)

    # check the create task calls
    create_task.assert_has_calls(
        [
            call(first_node, catalog),
            call(middle_node, catalog),
            call(last_node, catalog),
        ],
        any_order=True,
    )

    # check the operator constructor calls
    operator.assert_has_calls(
        [
            call(
                dag=dag,
                provide_context=True,
                python_callable=first_task,
                task_id="lambda-none-a",
                retries=1,
            ),
            call(
                dag=dag,
                provide_context=True,
                python_callable=middle_task,
                task_id="lambda-a-b",
            ),
            call(
                dag=dag,
                provide_context=True,
                python_callable=last_task,
                task_id="lambda-b-none",
                retries=2,
            ),
        ],
        any_order=True,
    )

    # check the dependcy hookup
    first_op.set_upstream.assert_not_called()
    middle_op.set_upstream.assert_called_once_with(first_op)
    last_op.set_upstream.assert_called_once_with(middle_op)
示例#6
0
def test_create_default_data_set():
    with pytest.raises(ValueError, match="testing"):
        AirflowRunner(None, None, None).create_default_data_set("testing", 0)
示例#7
0
def test_no_memory_datasets():
    pipeline = Pipeline([Node(lambda: None, [], "fred")])
    catalog = DataCatalog({"fred": MemoryDataSet()})
    with pytest.raises(ValueError, match="memory data sets: 'fred'"):
        AirflowRunner(None, None, {}).run(pipeline, catalog)
示例#8
0
def test_no_default_datasets():
    pipeline = Pipeline([Node(lambda: None, [], "fred")])
    catalog = DataCatalog()
    with pytest.raises(ValueError, match="'fred' is not registered"):
        AirflowRunner(None, None, {}).run(pipeline, catalog)