def test_pop_unbalanced_queues(self): client = QueuedIngestClient("some-cluster") fake_receive = fake_receive_factory( lambda queue_name, messages_per_page=1: [mock_message(success=False) for _ in range(0, messages_per_page)] if "1" in queue_name else []) with mock.patch.object( client._resource_manager, "get_successful_ingestions_queues"), mock.patch.object( client._resource_manager, "get_failed_ingestions_queues" ) as mocked_get_failed_qs, mock.patch.object( QueueClient, "receive_messages", autospec=True, side_effect=fake_receive, ) as q_receive_mock, mock.patch.object(QueueClient, "delete_message", return_value=None): fake_failed_queue1 = _ResourceUri( "mocked_storage_account_f1", OBJECT_TYPE, "queue", "mocked_qf_1_name", ENDPOINT_SUFFIX, ) fake_failed_queue2 = _ResourceUri( "mocked_storage_account_f2", OBJECT_TYPE, "queue", "mocked_qf_2_name", ENDPOINT_SUFFIX, ) mocked_get_failed_qs.return_value = [ fake_failed_queue1, fake_failed_queue2 ] qs = KustoIngestStatusQueues(client) get_failure_actual = qs.failure.pop(6) assert len(get_failure_actual) == 6 for m in get_failure_actual: assert isinstance(m, FailureMessage) assert q_receive_mock.call_count == 3 actual = {} for call_args in q_receive_mock.call_args_list: actual[call_args[0][0].queue_name] = actual.get( call_args[0][0].queue_name, 0) + call_args[1]["messages_per_page"] assert actual[fake_failed_queue2.object_name] + actual[ fake_failed_queue1.object_name] == (4 + 4 + 6)
def getStatusQueue(client): statusQueue = None try: statusQueue = KustoIngestStatusQueues(client) logging.info("Initialized status queue successfully.") except Exception as e: logging.error("Error initializing status queue:%s"%e) return statusQueue
def test_isempty(self): client = QueuedIngestClient("some-cluster") fake_peek = fake_peek_factory(lambda queue_name, num_messages=1: [ mock_message(success=True) for _ in range(0, num_messages) ] if "qs" in queue_name else []) with mock.patch.object(client._resource_manager, "get_successful_ingestions_queues" ) as mocked_get_success_qs, mock.patch.object( client._resource_manager, "get_failed_ingestions_queues" ) as mocked_get_failed_qs, mock.patch.object( QueueClient, "peek_messages", autospec=True, side_effect=fake_peek) as q_mock: fake_failed_queue = _ResourceUri( "mocked_storage_account1", OBJECT_TYPE, "queue", "mocked_qf_name", ENDPOINT_SUFFIX, ) fake_success_queue = _ResourceUri( "mocked_storage_account2", OBJECT_TYPE, "queue", "mocked_qs_name", ENDPOINT_SUFFIX, ) mocked_get_success_qs.return_value = [fake_success_queue] mocked_get_failed_qs.return_value = [fake_failed_queue] qs = KustoIngestStatusQueues(client) assert qs.success.is_empty() is False assert qs.failure.is_empty() is True assert q_mock.call_count == 2 assert q_mock.call_args_list[0][1]["max_messages"] == 2 assert q_mock.call_args_list[1][1]["max_messages"] == 2
def test_isempty(self, mocked_q_peek_messages): client = KustoIngestClient("some-cluster") with mock.patch.object( client._resource_manager, "get_successful_ingestions_queues" ) as mocked_get_success_qs, mock.patch.object( client._resource_manager, "get_failed_ingestions_queues") as mocked_get_failed_qs: fake_failed_queue = _ResourceUri("mocked_storage_account1", "queue", "mocked_qf_name", "mocked_sas") fake_success_queue = _ResourceUri("mocked_storage_account2", "queue", "mocked_qs_name", "mocked_sas") mocked_get_success_qs.return_value = [fake_success_queue] mocked_get_failed_qs.return_value = [fake_failed_queue] mocked_q_peek_messages.side_effect = ( lambda queue_name, num_messages=1: [] if queue_name == fake_failed_queue.object_name else [QueueMessage() for _ in range(0, num_messages)]) qs = KustoIngestStatusQueues(client) assert qs.success.is_empty() == False assert qs.failure.is_empty() == True assert mocked_q_peek_messages.call_count == 2 assert mocked_q_peek_messages.call_args_list[0][0][ 0] == fake_success_queue.object_name assert mocked_q_peek_messages.call_args_list[0][1][ "num_messages"] == 2 assert mocked_q_peek_messages.call_args_list[1][0][ 0] == fake_failed_queue.object_name assert mocked_q_peek_messages.call_args_list[1][1][ "num_messages"] == 2
jsonPath="$.xtime", cslDataType="timespan")) mappings.append( JsonColumnMapping(columnName="xtextWithNulls", jsonPath="$.xtextWithNulls", cslDataType="string")) mappings.append( JsonColumnMapping(columnName="xdynamicWithNulls", jsonPath="$.xdynamicWithNulls", cslDataType="dynamic")) return mappings client = KustoClient("https://toshetah.kusto.windows.net") ingest_client = KustoIngestClient("https://ingest-toshetah.kusto.windows.net") ingest_status_q = KustoIngestStatusQueues(ingest_client) client.execute("PythonTest", ".drop table Deft ifexists") @pytest.mark.run(order=1) def test_csv_ingest_non_existing_table(): csv_ingest_props = IngestionProperties( "PythonTest", "Deft", dataFormat=DataFormat.csv, mapping=Helpers.create_deft_table_csv_mappings(), reportLevel=ReportLevel.FailuresAndSuccesses, ) csv_file_path = os.path.join(os.getcwd(), "azure-kusto-ingest", "tests", "input", "dataset.csv") zipped_csv_file_path = os.path.join(os.getcwd(), "azure-kusto-ingest",
# ingest a whole folder. import os path = "folder/path" [client.ingest_from_file(f, ingestion_properties=ingestion_props) for f in os.listdir(path)] ################################################################## ## INGESTION STATUS ## ################################################################## # if status updates are required, something like this can be done import pprint import time from azure.kusto.ingest.status import KustoIngestStatusQueues qs = KustoIngestStatusQueues(client) MAX_BACKOFF = 180 backoff = 1 while True: ################### NOTICE #################### # in order to get success status updates, # make sure ingestion properties set the # reportLevel=ReportLevel.FailuresAndSuccesses. if qs.success.is_empty() and qs.failure.is_empty(): time.sleep(backoff) backoff = min(backoff * 2, MAX_BACKOFF) print("No new messages. backing off for {} seconds".format(backoff)) continue
def update_ADX_ingest_status(tc): KCSB_INGEST = KustoConnectionStringBuilder.with_aad_device_authentication( DATA_INGESTION_URI) KCSB_INGEST.authority_id = APP_AAD_TENANT_ID INGESTION_CLIENT = KustoIngestClient(KCSB_INGEST) qs = KustoIngestStatusQueues(INGESTION_CLIENT) run_id = (str(uuid.uuid4()))[31:].upper() MAX_BACKOFF = 8 backoff = 1 total_queue_success_messages = 0 while True: ################### NOTICE #################### # in order to get success status updates, # make sure ingestion properties set the # reportLevel=ReportLevel.FailuresAndSuccesses. if qs.success.is_empty() and qs.failure.is_empty(): time.sleep(backoff) if backoff == 1 and total_queue_success_messages != 0: print( "{} RUN_ID:{} Processed {} message in this batch ".format( LOG_MESSAGE_HEADER, run_id, total_queue_success_messages)) backoff = min(backoff * 2, MAX_BACKOFF) if (backoff < MAX_BACKOFF): #print("{} No new messages. backing off for {} seconds".format(LOG_MESSAGE_HEADER,backoff)) continue if (backoff == MAX_BACKOFF): #print("{} Reach max waiting time {}, exit.".format(LOG_MESSAGE_HEADER,backoff)) break backoff = 1 success_messages = qs.success.pop(15) failure_messages = qs.failure.pop(15) total_success = 0 total_failure = 0 if success_messages is not None: if (len(success_messages) > 0): tc.track_trace("{} Get {} success ingest messages ".format( LOG_MESSAGE_HEADER, str(len(success_messages)))) total_success = len(success_messages) if failure_messages is not None: if (len(failure_messages) > 0): tc.track_trace("{} Get {} failure ingest messages ".format( LOG_MESSAGE_HEADER, str(len(failure_messages)))) total_failure = len(failure_messages) tc.flush() total_queue_success_messages += len(success_messages) count_success = 0 count_faulure = 0 for smsg in success_messages: file_path = get_file_path(smsg.IngestionSourcePath) container_name = get_container_name(smsg.IngestionSourcePath) count_success += 1 log_msg = "{} SUCCESS TO INGEST TO ADX <{}> -[{}/{}/{}] , Time: {}, vm_uuid: {}, source_id:{}, file path: {}".format( LOG_MESSAGE_HEADER, run_id, str(count_success), str(total_success), str(total_queue_success_messages), smsg.SucceededOn, get_vm_uuid_from_filename(file_path), smsg.IngestionSourceId, file_path) tc.track_trace(log_msg) tc.track_event( APP_INSIGHT_INGEST_SUCCESS_EVENT_NAME, { 'MESSAGE': 'SUCCESS TO Ingest ADX', 'file_path': file_path, 'source_id': smsg.IngestionSourceId }, {}) tc.flush() update_COSMOS_status(COSMOS_CLIENT, file_path, smsg.SucceededOn, SUCCESS_STATUS, str(smsg), get_vm_uuid_from_filename(file_path), smsg.IngestionSourceId, container_name, tc, count_success, run_id) telemetry_block_blob_service = BlockBlobService( account_name=SOURCE_TELEMETRY_BLOB_ACCOUNT, account_key=SOURCE_TELEMETRY_FILE_BLOB_KEY) target_file_path = '' if (PROCESSED_TELEMETRY_FOLDER.endswith('/')): target_file_path = PROCESSED_TELEMETRY_FOLDER + file_path else: target_file_path = PROCESSED_TELEMETRY_FOLDER + '/' + file_path move_processed_file(telemetry_block_blob_service, container_name, file_path, container_name, target_file_path, tc) tc.track_trace( '{} DONE ADX INGESTION PROCESS <{}> -[{}/{}/{}], File Moved to processed folder {} , vm_uuid: {}, file path: {}' .format(LOG_MESSAGE_HEADER, run_id, str(count_success), str(total_success), str(total_queue_success_messages), target_file_path, get_vm_uuid_from_filename(file_path), file_path)) tc.track_event( APP_INSIGHT_INGEST_SUCCESS_EVENT_NAME, { 'MESSAGE': 'DONE ADX INGESTION PROCESS', 'moved_file_path': target_file_path, 'source_file_path': file_path }, {}) tc.flush() #smsgjson=json.loads(smsg) #print (smsgjson['IngestionSourcePath']) #print (smsgjson['SucceededOn']) print("{} IngestionSourcePath: {}".format( LOG_MESSAGE_HEADER, smsg.IngestionSourcePath)) print(smsg.SucceededOn) for fmsg in failure_messages: container_name = get_container_name(fmsg.IngestionSourcePath) file_path = get_file_path(fmsg.IngestionSourcePath) count_faulure += 1 log_msg = "{} FAILED TO INGEST TO ADX <{}> -[{}/{}] , Time: {}, vm_uuid: {}, source_id:{}, container:{}, file path: {}, message: {}".format( LOG_MESSAGE_HEADER, run_id, str(count_faulure), str(total_failure), fmsg.FailedOn, get_vm_uuid_from_filename(file_path), fmsg.IngestionSourceId, container_name, file_path, str(fmsg)) tc.track_trace(log_msg) tc.track_event( APP_INSIGHT_INGEST_FAILURE_EVENT_NAME, { 'MESSAGE': 'FAILED TO Ingest ADX', 'file_path': file_path, 'source_id': fmsg.IngestionSourceId }, {}) tc.flush() update_COSMOS_status(COSMOS_CLIENT, file_path, fmsg.FailedOn, FAILURE_STATUS, str(fmsg), get_vm_uuid_from_filename(file_path), fmsg.IngestionSourceId, container_name, tc, count_faulure, run_id)
"""Queued blob '{FILE_NAME}' ({FILE_SIZE} bytes) for ingestion into ADX table '{DESTINATION_TABLE}'""" .format(FILE_NAME=file_name, FILE_SIZE=file_size, DESTINATION_TABLE=destination_table)) # query = """{} | count""".format(destination_table) # response = kusto_client.execute_query(kusto_database, query) # count_query_df = dataframe_from_result_table(response.primary_results[0]) # print(count_query_df) #break #NOTE: uncomment this to check the status message logs qs = KustoIngestStatusQueues(ingestion_client) MAX_BACKOFF = 180 backoff = 1 while True: ################### NOTICE #################### # in order to get success status updates, # make sure ingestion properties set the # reportLevel=ReportLevel.FailuresAndSuccesses. if qs.success.is_empty() and qs.failure.is_empty(): time.sleep(backoff) backoff = min(backoff * 2, MAX_BACKOFF) print("No new messages. Backing off for {} seconds".format(backoff)) continue
def test_init(self): client = QueuedIngestClient("some-cluster") qs = KustoIngestStatusQueues(client) assert qs.success.message_cls == SuccessMessage assert qs.failure.message_cls == FailureMessage
def test_pop(self): client = QueuedIngestClient("some-cluster") fake_receive = fake_receive_factory( lambda queue_name, num_messages=1: [ mock_message(success=True) if "qs" in queue_name else mock_message(success=False) for _ in range(0, num_messages) ]) with mock.patch.object(client._resource_manager, "get_successful_ingestions_queues" ) as mocked_get_success_qs, mock.patch.object( client._resource_manager, "get_failed_ingestions_queues" ) as mocked_get_failed_qs, mock.patch.object( QueueClient, "receive_messages", autospec=True, side_effect=fake_receive, ) as q_receive_mock, mock.patch.object( QueueClient, "delete_message", return_value=None) as q_del_mock: fake_failed_queue1 = _ResourceUri( "mocked_storage_account_f1", OBJECT_TYPE, "queue", "mocked_qf_1_name", ENDPOINT_SUFFIX, ) fake_failed_queue2 = _ResourceUri( "mocked_storage_account_f2", OBJECT_TYPE, "queue", "mocked_qf_2_name", ENDPOINT_SUFFIX, ) fake_success_queue = _ResourceUri( "mocked_storage_account2", OBJECT_TYPE, "queue", "mocked_qs_name", ENDPOINT_SUFFIX, ) mocked_get_success_qs.return_value = [fake_success_queue] mocked_get_failed_qs.return_value = [ fake_failed_queue1, fake_failed_queue2 ] qs = KustoIngestStatusQueues(client) get_success_actual = qs.success.pop() get_failure_actual = qs.failure.pop(6) assert len(get_success_actual) == 1 assert len(get_failure_actual) == 6 for m in get_failure_actual: assert isinstance(m, FailureMessage) for m in get_success_actual: assert isinstance(m, SuccessMessage) assert q_receive_mock.call_count == 3 assert q_del_mock.call_count == len(get_success_actual) + len( get_failure_actual) assert q_receive_mock.call_args_list[0][1][ "messages_per_page"] == 2 actual = { q_receive_mock.call_args_list[1][0][0].queue_name: q_receive_mock.call_args_list[1][1]["messages_per_page"], q_receive_mock.call_args_list[2][0][0].queue_name: q_receive_mock.call_args_list[2][1]["messages_per_page"], } assert actual[fake_failed_queue2.object_name] == 4 assert actual[fake_failed_queue1.object_name] == 4
def test_peek(self): client = QueuedIngestClient("some-cluster") fake_peek = fake_peek_factory(lambda queue_name, num_messages=1: [ mock_message(success=True) if "qs" in queue_name else mock_message( success=False) for _ in range(0, num_messages) ]) with mock.patch.object(client._resource_manager, "get_successful_ingestions_queues" ) as mocked_get_success_qs, mock.patch.object( client._resource_manager, "get_failed_ingestions_queues" ) as mocked_get_failed_qs, mock.patch.object( QueueClient, "peek_messages", autospec=True, side_effect=fake_peek) as q_mock: fake_failed_queue1 = _ResourceUri( "mocked_storage_account_f1", OBJECT_TYPE, "queue", "mocked_qf_1_name", ENDPOINT_SUFFIX, ) fake_failed_queue2 = _ResourceUri( "mocked_storage_account_f2", OBJECT_TYPE, "queue", "mocked_qf_2_name", ENDPOINT_SUFFIX, ) fake_success_queue = _ResourceUri( "mocked_storage_account2", OBJECT_TYPE, "queue", "mocked_qs_name", ENDPOINT_SUFFIX, ) mocked_get_success_qs.return_value = [fake_success_queue] mocked_get_failed_qs.return_value = [ fake_failed_queue1, fake_failed_queue2 ] qs = KustoIngestStatusQueues(client) peek_success_actual = qs.success.peek() peek_failure_actual = qs.failure.peek(6) assert len(peek_success_actual) == 1 for m in peek_failure_actual: assert isinstance(m, FailureMessage) is True for m in peek_success_actual: assert isinstance(m, SuccessMessage) is True assert len(peek_failure_actual) == 6 actual = {} assert len(QueueClient.peek_messages.call_args_list) == 3 for call_args in q_mock.call_args_list: actual[call_args[0][0].queue_name] = actual.get( call_args[0][0].queue_name, 0) + call_args[1]["max_messages"] assert actual[fake_failed_queue2.object_name] == 4 assert actual[fake_failed_queue1.object_name] == 4 assert actual[fake_success_queue.object_name] == 2
def test_peek(self, mocked_q_peek_messages): client = KustoIngestClient("some-cluster") with mock.patch.object( client._resource_manager, "get_successful_ingestions_queues" ) as mocked_get_success_qs, mock.patch.object( client._resource_manager, "get_failed_ingestions_queues") as mocked_get_failed_qs: fake_failed_queue1 = _ResourceUri("mocked_storage_account_f1", "queue", "mocked_qf_1_name", "mocked_sas") fake_failed_queue2 = _ResourceUri("mocked_storage_account_f2", "queue", "mocked_qf_2_name", "mocked_sas") fake_success_queue = _ResourceUri("mocked_storage_account2", "queue", "mocked_qs_name", "mocked_sas") mocked_get_success_qs.return_value = [fake_success_queue] mocked_get_failed_qs.return_value = [ fake_failed_queue1, fake_failed_queue2 ] def mock_message(success): m = QueueMessage() m.id = uuid4() m.insertion_time = time.time() m.expiration_time = None m.dequeue_count = None if success: content = { "OperationId": str(m.id), "Database": "db1", "Table": "table1", "IngestionSourceId": str(m.id), "IngestionSourcePath": "blob/path", "RootActivityId": "1", "SucceededOn": time.time(), } else: content = { "OperationId": str(m.id), "Database": "db1", "Table": "table1", "IngestionSourceId": str(m.id), "IngestionSourcePath": "blob/path", "RootActivityId": "1", "FailedOn": time.time(), "Details": "", "ErrorCode": "1", "FailureStatus": "", "OriginatesFromUpdatePolicy": "", "ShouldRetry": False, } m.content = str( base64.b64encode( json.dumps(content).encode("utf-8")).decode("utf-8")) m.pop_receipt = None m.time_next_visible = None return m mocked_q_peek_messages.side_effect = lambda queue_name, num_messages=1: [ mock_message(success=True) if queue_name in [fake_success_queue.object_name] else mock_message(success=False) for i in range(0, num_messages) ] qs = KustoIngestStatusQueues(client) peek_success_actual = qs.success.peek() peek_failure_actual = qs.failure.peek(6) assert len(peek_success_actual) == 1 for m in peek_failure_actual: assert isinstance(m, FailureMessage) == True for m in peek_success_actual: assert isinstance(m, SuccessMessage) == True assert len(peek_failure_actual) == 6 actual = {} assert len(mocked_q_peek_messages.call_args_list) == 3 for call_args in mocked_q_peek_messages.call_args_list: actual[call_args[0][0]] = actual.get( call_args[0][0], 0) + call_args[1]["num_messages"] assert actual[fake_failed_queue2.object_name] == 4 assert actual[fake_failed_queue1.object_name] == 4 assert actual[fake_success_queue.object_name] == 2