def test_run_batch_verify_in_spark_failed(dag, mocker):
    op = LivyBatchOperator(
        verify_in="spark",
        spill_logs=False,
        task_id="test_run_batch_verify_in_spark_failed",
        dag=dag,
    )
    spill_logs_spy = mocker.spy(op, "spill_batch_logs")
    mock_livy_batch_responses(
        mocker,
        mock_spark=[
            MockedResponse(
                200,
                json_body=[
                    {
                        "jobId": 1,
                        "status": "SUCCEEDED"
                    },
                    {
                        "jobId": 2,
                        "status": "FAILED"
                    },
                ],
            )
        ],
    )
    with raises(AirflowException) as ae:
        op.execute({})
    print(
        f"\n\nImitated failed Spark job, then checked status via Spark REST API, "
        f"got the expected exception:\n<{ae.value}>")
    # spill_logs=True, and Operator had the batch_id by the time error occured.
    spill_logs_spy.assert_called_once()
示例#2
0
def test_run_batch_no_appid(dag, mocker):
    op = LivyBatchOperator(
        verify_in="spark",
        spill_logs=False,
        task_id="test_run_batch_no_appid",
        dag=dag,
    )
    spill_logs_spy = mocker.spy(op, "spill_batch_logs")
    mock_livy_batch_responses(
        mocker,
        mock_get=[MockedResponse(200, json_body={"state": "success", "appId": None})],
    )
    with raises(AirflowException) as ae:
        op.execute({})
    print(
        f"\n\nImitated null Spark appId, then checked status via Spark REST API, "
        f"got the expected exception:\n<{ae.value}>"
    )
    # spill_logs=True, and Operator had the batch_id by the time error occured.
    spill_logs_spy.assert_called_once()

    mock_livy_batch_responses(
        mocker,
        mock_get=[
            MockedResponse(200, json_body={"state": "success", "noAppId": "here"})
        ],
    )
    with raises(AirflowException) as ae:
        op.execute({})
    print(
        f"\n\nImitated no key for Spark appId, then checked status via Spark REST API, "
        f"got the expected exception:\n<{ae.value}>"
    )
    assert spill_logs_spy.call_count == 2
def test_run_batch_logs_greater_than_page_size(dag, mocker):
    op = LivyBatchOperator(
        spill_logs=True,
        task_id="test_run_batch_logs_greater_than_page_size",
        dag=dag,
    )
    fetch_log_page_spy = mocker.spy(op, "fetch_log_page")
    mock_livy_batch_responses(mocker, log_lines=321)
    op.execute({})
    assert fetch_log_page_spy.call_count == 4
def test_run_batch_logs_one_page_size(dag, mocker):
    op = LivyBatchOperator(
        spill_logs=True,
        task_id="test_run_batch_logs_one_page_size",
        dag=dag,
    )
    fetch_log_page_spy = mocker.spy(op, "fetch_log_page")
    mock_livy_batch_responses(mocker, log_lines=100)
    op.execute({})
    fetch_log_page_spy.assert_called_once()
def test_run_batch_verify_in_spark(dag, mocker):
    op = LivyBatchOperator(
        verify_in="spark",
        spill_logs=False,
        task_id="test_run_batch_verify_in_spark",
        dag=dag,
    )
    spark_checker_spy = mocker.spy(op, "check_spark_app_status")
    mock_livy_batch_responses(mocker)
    op.execute({})
    spark_checker_spy.assert_called_once()
def test_run_batch_logs_missing_attrs_in_json(dag, mocker):
    op = LivyBatchOperator(
        spill_logs=True,
        task_id="test_run_batch_logs_missing_attrs_in_json",
        dag=dag,
    )
    mock_livy_batch_responses(mocker,
                              log_override_response='{"id": 1, "from": 2}')
    with raises(AirflowException) as ae:
        op.execute({})
    print(f"\n\nImitated missing attributes when calling /logs , "
          f"got the expected exception:\n<{ae.value}>")
def test_run_batch_logs_malformed_json(dag, mocker):
    op = LivyBatchOperator(
        spill_logs=True,
        task_id="test_run_batch_logs_greater_than_page_size",
        dag=dag,
    )
    mock_livy_batch_responses(mocker,
                              log_override_response='{"invalid":json]}')
    with raises(AirflowException) as ae:
        op.execute({})
    print(f"\n\nImitated malformed response when calling /logs , "
          f"got the expected exception:\n<{ae.value}>")
def test_run_batch_verify_in_spark_garbled(dag, mocker):
    op = LivyBatchOperator(
        verify_in="spark",
        spill_logs=False,
        task_id="test_run_batch_verify_in_spark_garbled",
        dag=dag,
    )
    spill_logs_spy = mocker.spy(op, "spill_batch_logs")
    mock_livy_batch_responses(
        mocker,
        mock_spark=[MockedResponse(200, json_body={"unparseable": "obj"})],
    )
    with raises(AirflowException) as ae:
        op.execute({})
    print(f"\n\nImitated garbled output from Spark REST API, "
          f"got the expected exception:\n<{ae.value}>")
    spill_logs_spy.assert_called_once()
def test_run_batch_successfully(dag, mocker):
    op = LivyBatchOperator(spill_logs=False,
                           task_id="test_run_batch_successfully",
                           dag=dag)
    spill_logs_spy = mocker.spy(op, "spill_batch_logs")
    submit_batch_spy = mocker.spy(op, "submit_batch")
    mock_livy_batch_responses(mocker)
    op.execute({})

    submit_batch_spy.assert_called_once()
    # spill_logs is False and batch completed successfully, so we don't expect logs.
    spill_logs_spy.assert_not_called()
    op.spill_logs = True
    op.execute({})

    # We set spill_logs to True this time, therefore expecting logs.
    spill_logs_spy.assert_called_once()
def test_run_batch_error_before_batch_created(dag, mocker):
    op = LivyBatchOperator(
        spill_logs=True,
        task_id="test_run_batch_error_before_batch_created",
        dag=dag,
    )
    spill_logs_spy = mocker.spy(op, "spill_batch_logs")
    mocker.patch.object(
        BaseHook,
        "_get_connections_from_db",
        return_value=[Connection(host="HOST", port=123)],
    )
    with raises(requests.exceptions.ConnectionError) as ae:
        op.execute({})
    print(f"\n\nNo response from server was mocked, "
          f"got the expected exception:\n<{ae.value}>")
    # Even though we set spill_logs to True, Operator doesn't have a batch_id yet.
    spill_logs_spy.assert_not_called()
def test_run_batch_verify_in_yarn_garbled_response(dag, mocker):
    op = LivyBatchOperator(
        verify_in="yarn",
        spill_logs=False,
        task_id="test_run_batch_verify_in_spark",
        dag=dag,
    )
    spill_logs_spy = mocker.spy(op, "spill_batch_logs")
    mock_livy_batch_responses(
        mocker,
        mock_yarn=[
            MockedResponse(200, body="<!DOCTYPE html><html>notjson</html>")
        ],
    )
    with raises(AirflowException) as ae:
        op.execute({})
    print(f"\n\nImitated garbled output from YARN REST API, "
          f"got the expected exception:\n<{ae.value}>")
    spill_logs_spy.assert_called_once()
示例#12
0
def test_run_batch_verify_in_yarn_failed(dag, mocker):
    op = LivyBatchOperator(
        verify_in="yarn",
        spill_logs=False,
        task_id="test_run_batch_verify_in_yarn_failed",
        dag=dag,
    )
    spill_logs_spy = mocker.spy(op, "spill_batch_logs")
    mock_livy_batch_responses(
        mocker,
        mock_yarn=[MockedResponse(200, json_body={"app": {"finalStatus": "NOTGOOD"}})],
    )
    with raises(AirflowException) as ae:
        op.execute({})
    print(
        f"\n\nImitated failed status from YARN REST API, "
        f"got the expected exception:\n<{ae.value}>"
    )
    spill_logs_spy.assert_called_once()
def test_run_batch_error_during_status_probing(dag, mocker, code):
    op = LivyBatchOperator(
        spill_logs=True,
        task_id="test_run_batch_error_during_status_probing",
        dag=dag,
    )
    spill_logs_spy = mocker.spy(op, "spill_batch_logs")
    mock_livy_batch_responses(
        mocker,
        mock_get=[MockedResponse(code, body=f"Response from server:{code}")])
    with raises(AirflowException) as ae:
        op.execute({})
    print(
        f"\n\nImitated {code} response from server during batch status probing , "
        f"got the expected exception:\n<{ae.value}>")
    # spill_logs=True, and Operator had the batch_id by the time error occured.
    spill_logs_spy.assert_called_once()
    op.spill_logs = False
    with raises(AirflowException):
        op.execute({})
    # spill_logs=False, but error occured and Operator had the batch_id.
    assert spill_logs_spy.call_count == 2