def test_it_handles_old_version_delete_failures( mock_handle, mock_delete, mock_s3, mock_delete_versions, mock_save, message_stub, ): mock_s3.S3FileSystem.return_value = mock_s3 mock_s3.open.return_value = mock_s3 mock_s3.__enter__.return_value = MagicMock(version_id="abc123") mock_save.return_value = "new_version123" mock_delete.return_value = pa.BufferOutputStream(), {"DeletedRows": 1} mock_delete_versions.side_effect = DeleteOldVersionsError( errors=["access denied"]) execute( "https://queue/url", message_stub( RoleArn="arn:aws:iam:account_id:role/rolename", DeleteOldVersions=True, Object="s3://bucket/path/basic.parquet", ), "receipt_handle", ) mock_handle.assert_called_with( ANY, ANY, "Unable to delete previous versions: access denied")
def test_happy_path_when_queue_not_empty_for_compressed_json( mock_save, mock_emit, mock_delete, mock_s3, mock_session, mock_verify_integrity, message_stub, ): mock_s3.S3FileSystem.return_value = mock_s3 column = {"Column": "customer_id", "MatchIds": ["12345", "23456"]} mock_file = MagicMock(version_id="abc123") mock_save.return_value = "new_version123" mock_s3.open.return_value = mock_s3 mock_s3.__enter__.return_value = mock_file mock_delete.return_value = pa.BufferOutputStream(), {"DeletedRows": 1} execute( "https://queue/url", message_stub(Object="s3://bucket/path/basic.json.gz", Format="json"), "receipt_handle", ) mock_s3.open.assert_called_with("s3://bucket/path/basic.json.gz", "rb") mock_delete.assert_called_with(mock_file, [column], "json", True) mock_save.assert_called_with(ANY, ANY, ANY, "bucket", "path/basic.json.gz", "abc123") mock_emit.assert_called() mock_session.assert_called_with(None) mock_verify_integrity.assert_called_with(ANY, "bucket", "path/basic.json.gz", "abc123", "new_version123") buf = mock_save.call_args[0][2] assert buf.read assert isinstance(buf, pa.BufferReader) # must be BufferReader for zero-copy
def test_it_provides_logs_for_failed_version_integrity_check_and_performs_rollback( mock_error_handler, mock_delete, mock_load, mock_verify_integrity, rollback_mock, message_stub, ): parquet_file = MagicMock() parquet_file.num_row_groups = 1 mock_load.return_value = parquet_file mock_verify_integrity.side_effect = IntegrityCheckFailedError( "Some error", MagicMock(), "bucket", "path/basic.parquet", "new_version") mock_delete.return_value = pa.BufferOutputStream(), {"DeletedRows": 1} execute("https://queue/url", message_stub(), "receipt_handle") mock_verify_integrity.assert_called() mock_error_handler.assert_called_with( ANY, ANY, "Object version integrity check failed: Some error") rollback_mock.assert_called_with(ANY, "bucket", "path/basic.parquet", "new_version", on_error=ANY)
def test_happy_path_when_queue_not_empty(mock_save, mock_emit, mock_delete, mock_s3, mock_load, mock_session, mock_verify_integrity, message_stub): mock_s3.S3FileSystem.return_value = mock_s3 column = {"Column": "customer_id", "MatchIds": ["12345", "23456"]} parquet_file = MagicMock() parquet_file.num_row_groups = 1 mock_save.return_value = "new_version123" mock_s3.open.return_value = mock_s3 mock_s3.__enter__.return_value = MagicMock(version_id="abc123") mock_load.return_value = parquet_file mock_delete.return_value = pa.BufferOutputStream(), {"DeletedRows": 1} execute("https://queue/url", message_stub(Object="s3://bucket/path/basic.parquet"), "receipt_handle") mock_s3.open.assert_called_with("s3://bucket/path/basic.parquet", "rb") mock_delete.assert_called_with(parquet_file, [column]) mock_save.assert_called_with(ANY, ANY, ANY, "bucket", "path/basic.parquet", "abc123") mock_emit.assert_called() mock_session.assert_called_with(None) mock_verify_integrity.assert_called_with(ANY, 'bucket', 'path/basic.parquet', 'abc123', 'new_version123') buf = mock_save.call_args[0][2] assert buf.read assert isinstance(buf, pa.BufferReader) # must be BufferReader for zero-copy
def test_it_handles_s3_permission_issues(mock_error_handler, mock_s3, message_stub): mock_s3.S3FileSystem.return_value = mock_s3 mock_s3.open.side_effect = ClientError({}, "GetObject") # Act execute("https://queue/url", message_stub(), "receipt_handle") # Assert msg = mock_error_handler.call_args[0][2] assert msg.startswith("ClientError:")
def test_it_handles_io_errors(mock_error_handler, mock_s3, message_stub): # Arrange mock_s3.S3FileSystem.return_value = mock_s3 mock_s3.open.side_effect = IOError("an error") # Act execute("https://queue/url", message_stub(), "receipt_handle") # Assert mock_error_handler.assert_called_with( ANY, ANY, "Unable to retrieve object: an error")
def test_it_handles_arrow_exceptions(mock_error_handler, mock_delete, message_stub): # Arrange mock_delete.side_effect = ArrowException("FAIL") # Act execute("https://queue/url", message_stub(), "receipt_handle") # Assert mock_error_handler.assert_called_with( ANY, ANY, "Apache Arrow processing error: FAIL")
def test_it_handles_file_too_big(mock_error_handler, mock_s3, message_stub): # Arrange mock_s3.S3FileSystem.return_value = mock_s3 mock_s3.open.side_effect = MemoryError("Too big") # Act execute("https://queue/url", message_stub(), "receipt_handle") # Assert mock_error_handler.assert_called_with( ANY, ANY, "Insufficient memory to work on object: Too big")
def test_it_handles_generic_error(mock_error_handler, mock_s3, message_stub): # Arrange mock_s3.S3FileSystem.return_value = mock_s3 mock_s3.open.side_effect = RuntimeError("Some Error") # Act execute("https://queue/url", message_stub(), "receipt_handle") # Assert mock_error_handler.assert_called_with( ANY, ANY, "Unknown error during message processing: Some Error")
def test_it_handles_unversioned_buckets(mock_error_handler, mock_s3, mock_versioning, message_stub): # Arrange mock_s3.S3FileSystem.return_value = mock_s3 mock_versioning.side_effect = ValueError("Versioning validation Error") # Act execute("https://queue/url", message_stub(), "receipt_handle") # Assert mock_error_handler.assert_called_with( ANY, ANY, "Unprocessable message: Versioning validation Error") mock_versioning.assert_called_with(ANY, 'bucket')
def test_it_handles_missing_col_exceptions( mock_build_matches, mock_error_handler, mock_delete, message_stub ): # Arrange mock_delete.side_effect = KeyError("FAIL") # Act execute("https://queue/url", message_stub(), "receipt_handle") # Assert mock_error_handler.assert_called_with( ANY, ANY, "Apache Arrow processing error: 'FAIL'" )
def test_it_handles_arrow_exceptions(mock_error_handler, mock_delete, mock_load, message_stub): # Arrange parquet_file = MagicMock() parquet_file.num_row_groups = 1 mock_load.return_value = parquet_file mock_delete.side_effect = ArrowException("FAIL") # Act execute("https://queue/url", message_stub(), "receipt_handle") # Assert mock_error_handler.assert_called_with(ANY, ANY, "Parquet processing error: FAIL")
def test_it_provides_logs_for_acl_fail(mock_save, mock_error_handler, mock_delete, message_stub): mock_save.side_effect = ClientError({}, "PutObjectAcl") mock_delete.return_value = pa.BufferOutputStream(), {"DeletedRows": 1} execute("https://queue/url", message_stub(), "receipt_handle") mock_save.assert_called() mock_error_handler.assert_called_with( ANY, ANY, "ClientError: An error occurred (Unknown) when calling the PutObjectAcl " "operation: Unknown. Redacted object uploaded successfully but unable to " "restore WRITE ACL", )
def test_it_assumes_role(mock_delete, mock_s3, mock_session, message_stub): mock_s3.S3FileSystem.return_value = mock_s3 mock_s3.open.return_value = mock_s3 mock_s3.__enter__.return_value = MagicMock(version_id="abc123") mock_delete.return_value = pa.BufferOutputStream(), {"DeletedRows": 1} execute( "https://queue/url", message_stub( RoleArn="arn:aws:iam:account_id:role/rolename", Object="s3://bucket/path/basic.parquet", ), "receipt_handle", ) mock_session.assert_called_with("arn:aws:iam:account_id:role/rolename")
def test_it_provides_logs_for_get_latest_version_fail( mock_error_handler, mock_delete, mock_verify_integrity, message_stub ): mock_verify_integrity.side_effect = get_list_object_versions_error() mock_delete.return_value = pa.BufferOutputStream(), {"DeletedRows": 1} execute("https://queue/url", message_stub(), "receipt_handle") mock_verify_integrity.assert_called() mock_error_handler.assert_called_with( ANY, ANY, "ClientError: An error occurred (InvalidArgument) when calling the " "ListObjectVersions operation: Invalid version id specified. Could " "not verify redacted object version integrity", )
def test_it_removes_old_versions(mock_delete, mock_s3, mock_delete_versions, mock_save, message_stub): mock_s3.S3FileSystem.return_value = mock_s3 mock_s3.open.return_value = mock_s3 mock_s3.__enter__.return_value = MagicMock(version_id="abc123") mock_save.return_value = "new_version123" mock_delete.return_value = pa.BufferOutputStream(), {"DeletedRows": 1} execute( "https://queue/url", message_stub( RoleArn="arn:aws:iam:account_id:role/rolename", DeleteOldVersions=True, Object="s3://bucket/path/basic.parquet", ), "receipt_handle", ) mock_delete_versions.assert_called_with(ANY, ANY, ANY, "new_version123")
def test_it_handles_no_deletions(mock_handle, mock_save, mock_emit, mock_delete, mock_s3, message_stub): mock_s3.S3FileSystem.return_value = mock_s3 column = {"Column": "customer_id", "MatchIds": ["12345", "23456"]} mock_delete.return_value = pa.BufferOutputStream(), {"DeletedRows": 0} execute( "https://queue/url", message_stub(Object="s3://bucket/path/basic.parquet"), "receipt_handle", ) mock_s3.open.assert_called_with("s3://bucket/path/basic.parquet", "rb") mock_save.assert_not_called() mock_emit.assert_not_called() mock_handle.assert_called_with( ANY, ANY, "Unprocessable message: The object s3://bucket/path/basic.parquet " "was processed successfully but no rows required deletion", )
def test_it_provides_logs_for_failed_rollback_generic_error( mock_error_handler, mock_delete, mock_verify_integrity, message_stub): mock_s3 = MagicMock() mock_s3.delete_object.side_effect = Exception("error!!") mock_verify_integrity.side_effect = IntegrityCheckFailedError( "Some error", mock_s3, "bucket", "test/basic.parquet", "new_version") mock_delete.return_value = pa.BufferOutputStream(), {"DeletedRows": 1} execute("https://queue/url", message_stub(), "receipt_handle") mock_verify_integrity.assert_called() assert mock_error_handler.call_args_list == [ call(ANY, ANY, "Object version integrity check failed: Some error"), call( ANY, ANY, "Unknown error: error!!. Version rollback caused by version integrity conflict failed", "ObjectRollbackFailed", False, ), ]
def test_it_provides_logs_for_failed_rollback_client_error( mock_error_handler, mock_delete, mock_load, mock_verify_integrity, message_stub): parquet_file = MagicMock() parquet_file.num_row_groups = 1 mock_load.return_value = parquet_file mock_s3 = MagicMock() mock_s3.delete_object.side_effect = ClientError({}, "DeleteObject") mock_verify_integrity.side_effect = IntegrityCheckFailedError( "Some error", mock_s3, 'bucket', 'test/basic.parquet', 'new_version') mock_delete.return_value = pa.BufferOutputStream(), {"DeletedRows": 1} execute("https://queue/url", message_stub(), "receipt_handle") mock_verify_integrity.assert_called() assert mock_error_handler.call_args_list == [ call(ANY, ANY, "Object version integrity check failed: Some error"), call( ANY, ANY, "ClientError: An error occurred (Unknown) when calling the DeleteObject operation: Unknown. " "Version rollback caused by version integrity conflict failed", "ObjectRollbackFailed", False) ]
def test_it_validates_messages_with_invalid_body(mock_error_handler): # Act execute("https://queue/url", "NOT JSON", "receipt_handle") mock_error_handler.assert_called()
def test_it_validates_messages_with_missing_keys(mock_error_handler): # Act execute("https://queue/url", "{}", "receipt_handle") # Assert mock_error_handler.assert_called()