def test_run_batch_verify_in_spark_failed(dag, mocker): op = LivyBatchOperator( verify_in="spark", spill_logs=False, task_id="test_run_batch_verify_in_spark_failed", dag=dag, ) spill_logs_spy = mocker.spy(op, "spill_batch_logs") mock_livy_batch_responses( mocker, mock_spark=[ MockedResponse( 200, json_body=[ { "jobId": 1, "status": "SUCCEEDED" }, { "jobId": 2, "status": "FAILED" }, ], ) ], ) with raises(AirflowException) as ae: op.execute({}) print( f"\n\nImitated failed Spark job, then checked status via Spark REST API, " f"got the expected exception:\n<{ae.value}>") # spill_logs=True, and Operator had the batch_id by the time error occured. spill_logs_spy.assert_called_once()
def test_submit_batch_malformed_json(dag, mocker): op = LivyBatchOperator(task_id="test_submit_batch_malformed_json", dag=dag) http_response = mock_http_calls(201, content=b'{"id":{}') mocker.patch.object(HttpHook, "get_conn", return_value=http_response) with raises(AirflowBadRequest) as bre: op.submit_batch() print(f"\n\nImitated malformed JSON response when submitting a batch, " f"got the expected exception:\n<{bre.value}>")
def test_submit_batch_string_id(dag, mocker): op = LivyBatchOperator(task_id="test_submit_batch_string_id", dag=dag) http_response = mock_http_calls( 201, content=b'{"id":"unexpectedly, a string!"}') mocker.patch.object(HttpHook, "get_conn", return_value=http_response) with raises(AirflowException) as ae: op.submit_batch() print(f"\n\nImitated server returning a string for a batch ID, " f"got the expected exception:\n<{ae.value}>")
def test_run_batch_logs_greater_than_page_size(dag, mocker): op = LivyBatchOperator( spill_logs=True, task_id="test_run_batch_logs_greater_than_page_size", dag=dag, ) fetch_log_page_spy = mocker.spy(op, "fetch_log_page") mock_livy_batch_responses(mocker, log_lines=321) op.execute({}) assert fetch_log_page_spy.call_count == 4
def test_run_batch_logs_one_page_size(dag, mocker): op = LivyBatchOperator( spill_logs=True, task_id="test_run_batch_logs_one_page_size", dag=dag, ) fetch_log_page_spy = mocker.spy(op, "fetch_log_page") mock_livy_batch_responses(mocker, log_lines=100) op.execute({}) fetch_log_page_spy.assert_called_once()
def test_submit_batch_bad_response_codes(dag, mocker, code): op = LivyBatchOperator( task_id=f"test_submit_batch_bad_response_codes_{code}", dag=dag) http_response = mock_http_calls(code, content=b"Error content", reason="Good reason") mocker.patch.object(HttpHook, "get_conn", return_value=http_response) with raises(AirflowException) as ae: op.submit_batch() print(f"\n\nImitated the {code} error response when submitting a batch, " f"got the expected exception:\n<{ae.value}>")
def test_run_batch_verify_in_spark(dag, mocker): op = LivyBatchOperator( verify_in="spark", spill_logs=False, task_id="test_run_batch_verify_in_spark", dag=dag, ) spark_checker_spy = mocker.spy(op, "check_spark_app_status") mock_livy_batch_responses(mocker) op.execute({}) spark_checker_spy.assert_called_once()
def test_run_batch_logs_malformed_json(dag, mocker): op = LivyBatchOperator( spill_logs=True, task_id="test_run_batch_logs_greater_than_page_size", dag=dag, ) mock_livy_batch_responses(mocker, log_override_response='{"invalid":json]}') with raises(AirflowException) as ae: op.execute({}) print(f"\n\nImitated malformed response when calling /logs , " f"got the expected exception:\n<{ae.value}>")
def test_run_batch_logs_missing_attrs_in_json(dag, mocker): op = LivyBatchOperator( spill_logs=True, task_id="test_run_batch_logs_missing_attrs_in_json", dag=dag, ) mock_livy_batch_responses(mocker, log_override_response='{"id": 1, "from": 2}') with raises(AirflowException) as ae: op.execute({}) print(f"\n\nImitated missing attributes when calling /logs , " f"got the expected exception:\n<{ae.value}>")
def test_jinja(dag): op = LivyBatchOperator( name="test_jinja_{{ run_id }}", arguments=[ "{{ run_id|replace(':', '-') }}", "prefix {{ custom_param }} postfix", ], task_id="test_jinja", dag=dag, ) op.render_template_fields({"run_id": "hello:world", "custom_param": "custom value"}) assert op.name == "test_jinja_hello:world" assert op.arguments[0] == "hello-world" assert op.arguments[1] == "prefix custom value postfix"
def test_run_batch_verify_in_spark_garbled(dag, mocker): op = LivyBatchOperator( verify_in="spark", spill_logs=False, task_id="test_run_batch_verify_in_spark_garbled", dag=dag, ) spill_logs_spy = mocker.spy(op, "spill_batch_logs") mock_livy_batch_responses( mocker, mock_spark=[MockedResponse(200, json_body={"unparseable": "obj"})], ) with raises(AirflowException) as ae: op.execute({}) print(f"\n\nImitated garbled output from Spark REST API, " f"got the expected exception:\n<{ae.value}>") spill_logs_spy.assert_called_once()
def test_run_batch_error_before_batch_created(dag, mocker): op = LivyBatchOperator( spill_logs=True, task_id="test_run_batch_error_before_batch_created", dag=dag, ) spill_logs_spy = mocker.spy(op, "spill_batch_logs") mocker.patch.object( BaseHook, "_get_connections_from_db", return_value=[Connection(host="HOST", port=123)], ) with raises(requests.exceptions.ConnectionError) as ae: op.execute({}) print(f"\n\nNo response from server was mocked, " f"got the expected exception:\n<{ae.value}>") # Even though we set spill_logs to True, Operator doesn't have a batch_id yet. spill_logs_spy.assert_not_called()
def test_invalid_verification(dag): with raises(AirflowException) as ae: LivyBatchOperator( task_id="test_invalid_verification", verify_in="invalid", dag=dag, ) print( f"\n\nCreated a batch operator with invalid veification method, " f"got the expected exception:\n<{ae.value}>" )
def test_run_batch_verify_in_yarn_failed(dag, mocker): op = LivyBatchOperator( verify_in="yarn", spill_logs=False, task_id="test_run_batch_verify_in_yarn_failed", dag=dag, ) spill_logs_spy = mocker.spy(op, "spill_batch_logs") mock_livy_batch_responses( mocker, mock_yarn=[MockedResponse(200, json_body={"app": {"finalStatus": "NOTGOOD"}})], ) with raises(AirflowException) as ae: op.execute({}) print( f"\n\nImitated failed status from YARN REST API, " f"got the expected exception:\n<{ae.value}>" ) spill_logs_spy.assert_called_once()
def test_run_batch_verify_in_yarn_garbled_response(dag, mocker): op = LivyBatchOperator( verify_in="yarn", spill_logs=False, task_id="test_run_batch_verify_in_spark", dag=dag, ) spill_logs_spy = mocker.spy(op, "spill_batch_logs") mock_livy_batch_responses( mocker, mock_yarn=[ MockedResponse(200, body="<!DOCTYPE html><html>notjson</html>") ], ) with raises(AirflowException) as ae: op.execute({}) print(f"\n\nImitated garbled output from YARN REST API, " f"got the expected exception:\n<{ae.value}>") spill_logs_spy.assert_called_once()
def test_run_batch_no_appid(dag, mocker): op = LivyBatchOperator( verify_in="spark", spill_logs=False, task_id="test_run_batch_no_appid", dag=dag, ) spill_logs_spy = mocker.spy(op, "spill_batch_logs") mock_livy_batch_responses( mocker, mock_get=[MockedResponse(200, json_body={"state": "success", "appId": None})], ) with raises(AirflowException) as ae: op.execute({}) print( f"\n\nImitated null Spark appId, then checked status via Spark REST API, " f"got the expected exception:\n<{ae.value}>" ) # spill_logs=True, and Operator had the batch_id by the time error occured. spill_logs_spy.assert_called_once() mock_livy_batch_responses( mocker, mock_get=[ MockedResponse(200, json_body={"state": "success", "noAppId": "here"}) ], ) with raises(AirflowException) as ae: op.execute({}) print( f"\n\nImitated no key for Spark appId, then checked status via Spark REST API, " f"got the expected exception:\n<{ae.value}>" ) assert spill_logs_spy.call_count == 2
def test_run_batch_successfully(dag, mocker): op = LivyBatchOperator(spill_logs=False, task_id="test_run_batch_successfully", dag=dag) spill_logs_spy = mocker.spy(op, "spill_batch_logs") submit_batch_spy = mocker.spy(op, "submit_batch") mock_livy_batch_responses(mocker) op.execute({}) submit_batch_spy.assert_called_once() # spill_logs is False and batch completed successfully, so we don't expect logs. spill_logs_spy.assert_not_called() op.spill_logs = True op.execute({}) # We set spill_logs to True this time, therefore expecting logs. spill_logs_spy.assert_called_once()
def test_run_batch_error_during_status_probing(dag, mocker, code): op = LivyBatchOperator( spill_logs=True, task_id="test_run_batch_error_during_status_probing", dag=dag, ) spill_logs_spy = mocker.spy(op, "spill_batch_logs") mock_livy_batch_responses( mocker, mock_get=[MockedResponse(code, body=f"Response from server:{code}")]) with raises(AirflowException) as ae: op.execute({}) print( f"\n\nImitated {code} response from server during batch status probing , " f"got the expected exception:\n<{ae.value}>") # spill_logs=True, and Operator had the batch_id by the time error occured. spill_logs_spy.assert_called_once() op.spill_logs = False with raises(AirflowException): op.execute({}) # spill_logs=False, but error occured and Operator had the batch_id. assert spill_logs_spy.call_count == 2
def test_submit_batch_params(dag, mocker): http_conn_id_yarn = "http_conn_id_yarn" http_conn_id_spark = "http_conn_id_spark" http_conn_id_livy = "http_conn_id_livy" timeout_minutes = 4 poll_period_sec = 5 verify_in = "yarn" op = LivyBatchOperator( file="file", proxy_user="******", class_name="class_name", arguments=["arg1", "arg2"], jars=["jar1", "jar2"], py_files=["py_file1", "py_file2"], files=["file1", "file2"], driver_memory="driver_memory", driver_cores=1, executor_memory="executor_memory", executor_cores=2, num_executors=3, archives=["archive1", "archive2"], queue="queue", name="name", conf={"key1": "val1", "key2": 2}, timeout_minutes=timeout_minutes, poll_period_sec=poll_period_sec, verify_in=verify_in, http_conn_id_livy=http_conn_id_livy, http_conn_id_spark=http_conn_id_spark, http_conn_id_yarn=http_conn_id_yarn, task_id="test_submit_batch_params", dag=dag, ) mock_response = Response() mock_response._content = b'{"id": 1}' patched_hook = mocker.patch.object(HttpHook, "run", return_value=mock_response) op.submit_batch() assert op.timeout_minutes == timeout_minutes assert op.poll_period_sec == poll_period_sec assert op.verify_in == verify_in assert op.http_conn_id_livy == http_conn_id_livy assert op.http_conn_id_spark == http_conn_id_spark assert op.http_conn_id_yarn == http_conn_id_yarn expected_json = json.loads( """{ "proxyUser": "******", "file": "file", "className": "class_name", "args": [ "arg2", "arg1" ], "pyFiles": [ "py_file1", "py_file2" ], "jars": [ "jar1", "jar2" ], "files": [ "file1", "file2" ], "driverMemory": "driver_memory", "driverCores": 1, "executorMemory": "executor_memory", "executorCores": 2, "numExecutors": 3, "archives": [ "archive1", "archive2" ], "name": "name", "queue": "queue", "conf": { "key1": "val1", "key2": 2 } }""" ) actual_args, actual_kwargs = patched_hook._call_matcher(patched_hook.call_args) actual_json = find_json_in_args(actual_args, actual_kwargs) if actual_json is None: raise AssertionError( f"Can not find JSON in HttpHook args.\n" f"Args:\n{actual_args}\n" f"KWArgs (JSON should be under 'data' key):\n{actual_kwargs}" ) else: diff = DeepDiff(actual_json, expected_json, ignore_order=True) if diff: print(f"\nDifference:\n{json.dumps(diff, indent=2)}") assert not diff
def test_submit_batch_get_id(dag, mocker): op = LivyBatchOperator(task_id="test_submit_batch_get_id", dag=dag) http_response = mock_http_calls(201, content=b'{"id": 123}') mocker.patch.object(HttpHook, "get_conn", return_value=http_response) op.submit_batch() assert op.batch_id == 123