示例#1
0
def ingest_to_ADX(filepath, filesize):
    KCSB_INGEST = KustoConnectionStringBuilder.with_aad_device_authentication(
        DATA_INGESTION_URI)
    KCSB_INGEST.authority_id = AAD_TENANT_ID

    KCSB_ENGINE = KustoConnectionStringBuilder.with_aad_device_authentication(
        URI)
    KCSB_ENGINE.authority_id = AAD_TENANT_ID

    INGESTION_CLIENT = KustoIngestClient(KCSB_INGEST)
    INGESTION_PROPERTIES = IngestionProperties(
        database=DATABASE,
        table=DESTINATION_TABLE,
        dataFormat=DataFormat.CSV,
        mappingReference=DESTINATION_TABLE_COLUMN_MAPPING,
        additionalProperties={'ignoreFirstRecord': 'true'},
        reportLevel=ReportLevel.FailuresAndSuccesses)
    BLOB_PATH = "https://" + SOURCE_CSV_BLOB_ACCOUNT + ".blob.core.windows.net/" + SOURCE_CSV_CONTAINER + "/" + filepath + SOURCE_CSV_BLOB_TOKEN

    BLOB_DESCRIPTOR = BlobDescriptor(
        BLOB_PATH, filesize)  # 10 is the raw size of the data in bytes
    INGESTION_CLIENT.ingest_from_blob(
        BLOB_DESCRIPTOR, ingestion_properties=INGESTION_PROPERTIES)

    print('Done queuing up ingestion with Azure Data Explorer ' + filepath)
示例#2
0
def ingest_to_ADX(filepath, telemetry_block_blob_service, container_name, blob_account, file_size, tc,vm_uuid,deploy_uuid,config_uuid):
    ingest_source_id=str(uuid.uuid4())
    KCSB_INGEST = KustoConnectionStringBuilder.with_aad_device_authentication(DATA_INGESTION_URI)
    KCSB_INGEST.authority_id = APP_AAD_TENANT_ID
    INGESTION_CLIENT = KustoIngestClient(KCSB_INGEST)
    ing_map=[JsonColumnMapping("vm_uuid", "$.vm_uuid", "string"),
             JsonColumnMapping("deploy_uuid", "$.deployment_description[0].deploy_uuid", "string"),
             JsonColumnMapping("config_uuid", "$.vm_configuration[0].config_uuid", "string"),
             JsonColumnMapping("rawdata", "$", "dynamic")]
        
    INGESTION_PROPERTIES  = IngestionProperties(database=DATABASE, table=DESTINATION_TABLE, dataFormat=DataFormat.JSON, ingestionMapping=ing_map, reportLevel=ReportLevel.FailuresAndSuccesses,flushImmediately=IS_FLUSH_IMMEDIATELY)                                                                                                                                                          

    print("Database {} Tabele {}".format(DATABASE,DESTINATION_TABLE))
    
    BLOB_PATH = "https://" + blob_account + ".blob.core.windows.net/" + container_name + "/" + filepath + CLEAN_FILE_TOKEN

    print (BLOB_PATH,' ',str(file_size), ingest_source_id)
    BLOB_DESCRIPTOR = BlobDescriptor(BLOB_PATH, file_size, ingest_source_id) # 10 is the raw size of the data in bytes
    INGESTION_CLIENT.ingest_from_blob(BLOB_DESCRIPTOR,ingestion_properties=INGESTION_PROPERTIES)
    tc.context.properties["ingest_source_id"]=ingest_source_id

    min_datatime=0
    max_datatime=0
    total_records=1

    doc_id=save_COSMOS_log(vm_uuid,deploy_uuid,config_uuid,filepath,min_datatime,max_datatime, total_records,ingest_source_id,blob_account,container_name, tc)

    tc.track_event(APP_INSIGHT_INGEST_EVENT_NAME, { 'FILE_PATH': filepath,'DOC_ID':doc_id,"SOURCE_ID":ingest_source_id }, { 'TOTOAL_RECORDS': total_records, 'FILE_SIZE':file_size,'MIN_DATETIME':min_datatime,'MAX_DATETIME': max_datatime })
    log_msg="{} Done queuing up ingestion with Azure Data Explorer {}, Ingest SourceID {}".format(LOG_MESSAGE_HEADER,filepath,ingest_source_id)
    print(log_msg)
    tc.track_trace(log_msg)
    tc.flush()
示例#3
0
    def test_simple_ingest_from_dataframe(self, mock_pid, mock_time, mock_uuid,
                                          mock_put_message_in_queue,
                                          mock_create_blob_from_path):
        responses.add_callback(
            responses.POST,
            "https://ingest-somecluster.kusto.windows.net/v1/rest/mgmt",
            callback=request_callback,
            content_type="application/json",
        )

        ingest_client = KustoIngestClient(
            "https://ingest-somecluster.kusto.windows.net")
        ingestion_properties = IngestionProperties(database="database",
                                                   table="table",
                                                   dataFormat=DataFormat.csv)

        from pandas import DataFrame

        fields = ["id", "name", "value"]
        rows = [[1, "abc", 15.3], [2, "cde", 99.9]]
        df = DataFrame(data=rows, columns=fields)

        ingest_client.ingest_from_dataframe(
            df, ingestion_properties=ingestion_properties)

        # mock_put_message_in_queue
        assert mock_put_message_in_queue.call_count == 1

        put_message_in_queue_mock_kwargs = mock_put_message_in_queue.call_args_list[
            0][1]

        assert put_message_in_queue_mock_kwargs[
            "queue_name"] == "readyforaggregation-secured"
        queued_message = base64.b64decode(
            put_message_in_queue_mock_kwargs["content"].encode(
                "utf-8")).decode("utf-8")
        queued_message_json = json.loads(queued_message)
        # mock_create_blob_from_stream
        assert (
            queued_message_json["BlobPath"] ==
            "https://storageaccount.blob.core.windows.net/tempstorage/database__table__1111-111111-111111-1111__df_100_64.csv.gz?sas"
        )
        assert queued_message_json["DatabaseName"] == "database"
        assert queued_message_json["IgnoreSizeLimit"] == False
        assert queued_message_json["AdditionalProperties"]["format"] == "csv"
        assert queued_message_json["FlushImmediately"] == False
        assert queued_message_json["TableName"] == "table"
        assert queued_message_json["RawDataSize"] > 0
        assert queued_message_json["RetainBlobOnSuccess"] == True

        create_blob_from_path_mock_kwargs = mock_create_blob_from_path.call_args_list[
            0][1]
        import tempfile

        assert create_blob_from_path_mock_kwargs[
            "container_name"] == "tempstorage"
        assert create_blob_from_path_mock_kwargs["file_path"] == os.path.join(
            tempfile.gettempdir(), "df_100_64.csv.gz")
        assert (create_blob_from_path_mock_kwargs["blob_name"] ==
                "database__table__1111-111111-111111-1111__df_100_64.csv.gz")
    def test_sanity_ingest_from_file(self, mock_uuid,
                                     mock_put_message_in_queue,
                                     mock_create_blob_from_stream, mock_aad):
        responses.add_callback(
            responses.POST,
            "https://ingest-somecluster.kusto.windows.net/v1/rest/mgmt",
            callback=request_callback,
            content_type="application/json")

        ingest_client = KustoIngestClient(
            "https://ingest-somecluster.kusto.windows.net")
        ingestion_properties = IngestionProperties(database="database",
                                                   table="table",
                                                   dataFormat=DataFormat.CSV)

        # ensure test can work when executed from within directories
        current_dir = os.getcwd()
        path_parts = ["azure-kusto-ingest", "tests", "input", "dataset.csv"]
        missing_path_parts = []
        for path_part in path_parts:
            if path_part not in current_dir:
                missing_path_parts.append(path_part)

        file_path = os.path.join(current_dir, *missing_path_parts)

        ingest_client.ingest_from_file(
            file_path, ingestion_properties=ingestion_properties)

        # mock_put_message_in_queue
        assert mock_put_message_in_queue.call_count == 1

        put_message_in_queue_mock_kwargs = mock_put_message_in_queue.call_args_list[
            0][1]

        assert put_message_in_queue_mock_kwargs[
            "queue_name"] == "readyforaggregation-secured"
        queued_message = base64.b64decode(
            put_message_in_queue_mock_kwargs["content"].encode(
                "utf-8")).decode("utf-8")
        queued_message_json = json.loads(queued_message)
        expected_url = "https://storageaccount.blob.core.windows.net/tempstorage/" "database__table__1111-111111-111111-1111__dataset.csv.gz?sas"
        # mock_create_blob_from_stream
        assert queued_message_json["BlobPath"] == expected_url
        assert queued_message_json["DatabaseName"] == "database"
        assert queued_message_json["IgnoreSizeLimit"] == False
        assert queued_message_json["AdditionalProperties"]["format"] == "csv"
        assert queued_message_json["FlushImmediately"] == False
        assert queued_message_json["TableName"] == "table"
        assert queued_message_json["RawDataSize"] > 0
        assert queued_message_json["RetainBlobOnSuccess"] == True

        create_blob_from_stream_mock_kwargs = mock_create_blob_from_stream.call_args_list[
            0][1]

        assert create_blob_from_stream_mock_kwargs[
            "container_name"] == "tempstorage"
        assert type(
            create_blob_from_stream_mock_kwargs["stream"]) == io.BytesIO
        assert create_blob_from_stream_mock_kwargs[
            "blob_name"] == "database__table__1111-111111-111111-1111__dataset.csv.gz"
示例#5
0
def ingest_to_ADX(filepath, telemetry_block_blob_service, container_name,
                  blob_account, tc):
    ingest_source_id = str(uuid.uuid4())
    #file_size=BlockBlobService.get_blob_properties(telemetry_block_blob_service,container_name,filepath).properties.content_length
    #print (filepath+" File Size "+str(file_size))

    KCSB_INGEST = KustoConnectionStringBuilder.with_aad_device_authentication(
        DATA_INGESTION_URI)
    KCSB_INGEST.authority_id = APP_AAD_TENANT_ID

    vm_uuid, config_uuid, deploy_uuid, file_size, min_datatime, max_datatime, total_records = get_uuids_from_csv(
        telemetry_block_blob_service, container_name, filepath)
    dropByTag = vm_uuid + '_' + config_uuid + '_' + deploy_uuid

    INGESTION_CLIENT = KustoIngestClient(KCSB_INGEST)
    INGESTION_PROPERTIES = IngestionProperties(
        database=DATABASE,
        table=DESTINATION_TABLE,
        dataFormat=DataFormat.CSV,
        mappingReference=DESTINATION_TABLE_COLUMN_MAPPING,
        additionalProperties={
            'ignoreFirstRecord': 'true',
            'reportMethod': 'QueueAndTable'
        },
        reportLevel=ReportLevel.FailuresAndSuccesses,
        dropByTags=[dropByTag],
        flushImmediately=IS_FLUSH_IMMEDIATELY)

    BLOB_PATH = "https://" + SOURCE_OSMETRICS_BLOB_ACCOUNT + ".blob.core.windows.net/" + SOURCE_OSMETRICS_CONTAINER + "/" + filepath + SOURCE_OSMETRICS_FILE_TOKEN
    #print (BLOB_PATH,' ',str(file_size))
    BLOB_DESCRIPTOR = BlobDescriptor(
        BLOB_PATH, file_size,
        ingest_source_id)  # 10 is the raw size of the data in bytes

    INGESTION_CLIENT.ingest_from_blob(
        BLOB_DESCRIPTOR, ingestion_properties=INGESTION_PROPERTIES)

    tc.context.properties["ingest_source_id"] = str(ingest_source_id)

    doc_id = save_COSMOS_log(vm_uuid, deploy_uuid, config_uuid, filepath,
                             min_datatime, max_datatime, total_records,
                             ingest_source_id, blob_account, container_name,
                             tc)

    tc.track_event(APP_INSIGHT_INGEST_EVENT_NAME, {
        'FILE_PATH': filepath,
        'DOC_ID': doc_id,
        "SOURCE_ID": ingest_source_id
    }, {
        'TOTOAL_RECORDS': total_records,
        'FILE_SIZE': file_size,
        'MIN_DATETIME': min_datatime,
        'MAX_DATETIME': max_datatime
    })
    log_msg = "{} Done queuing up ingestion with Azure Data Explorer {}, Ingest SourceID {}".format(
        LOG_MESSAGE_HEADER, filepath, ingest_source_id)
    print(log_msg)
    tc.track_trace(log_msg)
    tc.flush()
示例#6
0
 def __init__(self):
     self.ingest_client = KustoIngestClient(
         credentials.kusto_ppe_ingest_connection,
         client_id=credentials.kusto_application_id,
         client_secret=credentials.kusto_application_key)
     self.properties = IngestionProperties(database="BingAdsUCM",
                                           table="PerfIcMAlertEvent",
                                           dataFormat=DataFormat.csv)
     self.log_buffer_file = "kusto_log_buffer.csv"
示例#7
0
def Ingest(Tag):
    # setting
    AUTHORITY_ID = "6babcaad-604b-40ac-a9d7-9fd97c0b779f"
    INGESTCLUSTER = "https://ingest-cgadataout.kusto.windows.net"
    KUSTOCLUSTER = "https://cgadataout.kusto.windows.net"
    DATABASE = "DevRelWorkArea"

    # Create table
    KCSB_DATA = KustoConnectionStringBuilder.with_aad_device_authentication(
        KUSTOCLUSTER)
    DESTINATION_TABLE = "RepoContributors"
    DESTINATION_TABLE_COLUMN_MAPPING = "RepoContributors_CSV_Mapping"

    KUSTO_CLIENT = KustoClient(KCSB_DATA)
    DROP_TABLE_IF_EXIST = ".drop table RepoContributors ifexists"
    RESPONSE = KUSTO_CLIENT.execute_mgmt(DATABASE, DROP_TABLE_IF_EXIST)

    CREATE_TABLE_COMMAND = ".create table RepoContributors (Article: string, Contributors: int64, Data: string)"
    RESPONSE = KUSTO_CLIENT.execute_mgmt(DATABASE, CREATE_TABLE_COMMAND)

    print("RepoContributors table is created")

    # Create mapping

    CREATE_MAPPING_COMMAND = """.create table RepoContributors ingestion csv mapping 'RepoContributors_CSV_Mapping' '[{"Name": "Article","datatype": "string","Ordinal": 0},{"Name": "Contributors","datatype": "int64","Ordinal": 1},{"Name": "Data","datatype": "string","Ordinal": 2}]'"""
    RESPONSE = KUSTO_CLIENT.execute_mgmt(DATABASE, CREATE_MAPPING_COMMAND)

    print("mapping is created")

    # Ingest

    # The authentication method will be taken from the chosen KustoConnectionStringBuilder.
    ingestion_props = IngestionProperties(
        database="DevRelWorkArea",
        table="RepoContributors",
        dataFormat=DataFormat.CSV,
        ingestByTags=[Tag],
        dropByTags=[Tag],
        mappingReference=DESTINATION_TABLE_COLUMN_MAPPING,
        reportLevel=ReportLevel.FailuresAndSuccesses,
        additionalProperties={'ignoreFirstRecord': 'true'})

    kcsb = KustoConnectionStringBuilder.with_aad_device_authentication(
        INGESTCLUSTER)
    client = KustoIngestClient(kcsb)

    # ingest from file
    file_descriptor = FileDescriptor(
        r"D:\test\Results\log_data_merge\merge_microsoftdocs_sql-docs-pr.txt",
        3333)  # 3333 is the raw size of the data in bytes.
    client.ingest_from_file(file_descriptor,
                            ingestion_properties=ingestion_props)
    # if status updates are required, something like this can be done

    return 1
    def test_sanity_ingest_from_file(self, mock_uuid,
                                     mock_put_message_in_queue,
                                     mock_upload_blob_from_stream, mock_aad):
        responses.add_callback(
            responses.POST,
            "https://ingest-somecluster.kusto.windows.net/v1/rest/mgmt",
            callback=request_callback,
            content_type="application/json")

        ingest_client = KustoIngestClient(
            "https://ingest-somecluster.kusto.windows.net")
        ingestion_properties = IngestionProperties(database="database",
                                                   table="table",
                                                   data_format=DataFormat.CSV)

        # ensure test can work when executed from within directories
        current_dir = os.getcwd()
        path_parts = ["azure-kusto-ingest", "tests", "input", "dataset.csv"]
        missing_path_parts = []
        for path_part in path_parts:
            if path_part not in current_dir:
                missing_path_parts.append(path_part)

        file_path = os.path.join(current_dir, *missing_path_parts)

        ingest_client.ingest_from_file(
            file_path, ingestion_properties=ingestion_properties)

        # mock_put_message_in_queue
        assert mock_put_message_in_queue.call_count == 1

        put_message_in_queue_mock_kwargs = mock_put_message_in_queue.call_args_list[
            0][1]

        queued_message_json = json.loads(
            put_message_in_queue_mock_kwargs["content"])
        expected_url = "https://storageaccount.blob.core.windows.net/tempstorage/database__table__1111-111111-111111-1111__dataset.csv.gz?"
        # mock_upload_blob_from_stream
        # not checking the query string because it can change order, just checking it's there
        assert queued_message_json["BlobPath"].startswith(expected_url) is True
        assert len(queued_message_json["BlobPath"]) > len(expected_url)
        assert queued_message_json["DatabaseName"] == "database"
        assert queued_message_json["IgnoreSizeLimit"] is False
        assert queued_message_json["AdditionalProperties"]["format"] == "csv"
        assert queued_message_json["FlushImmediately"] is False
        assert queued_message_json["TableName"] == "table"
        assert queued_message_json["RawDataSize"] > 0
        assert queued_message_json["RetainBlobOnSuccess"] is True

        upload_blob_kwargs = mock_upload_blob_from_stream.call_args_list[0][1]

        assert type(upload_blob_kwargs["data"]) == io.BytesIO
    def test_simple_ingest_from_dataframe(self, mock_pid, mock_time, mock_uuid,
                                          mock_put_message_in_queue,
                                          mock_upload_blob_from_stream):
        responses.add_callback(
            responses.POST,
            "https://ingest-somecluster.kusto.windows.net/v1/rest/mgmt",
            callback=request_callback,
            content_type="application/json")

        ingest_client = KustoIngestClient(
            "https://ingest-somecluster.kusto.windows.net")
        ingestion_properties = IngestionProperties(database="database",
                                                   table="table",
                                                   data_format=DataFormat.CSV)

        from pandas import DataFrame

        fields = ["id", "name", "value"]
        rows = [[1, "abc", 15.3], [2, "cde", 99.9]]
        df = DataFrame(data=rows, columns=fields)

        ingest_client.ingest_from_dataframe(
            df, ingestion_properties=ingestion_properties)

        # mock_put_message_in_queue
        assert mock_put_message_in_queue.call_count == 1

        put_message_in_queue_mock_kwargs = mock_put_message_in_queue.call_args_list[
            0][1]

        queued_message_json = json.loads(
            put_message_in_queue_mock_kwargs["content"])
        expected_url = "https://storageaccount.blob.core.windows.net/tempstorage/database__table__1111-111111-111111-1111__df_{0}_100_64.csv.gz?".format(
            id(df))
        # mock_upload_blob_from_stream
        # not checking the query string because it can change order, just checking it's there
        assert queued_message_json["BlobPath"].startswith(expected_url) is True
        assert len(queued_message_json["BlobPath"]) > len(expected_url)
        assert queued_message_json["DatabaseName"] == "database"
        assert queued_message_json["IgnoreSizeLimit"] is False
        assert queued_message_json["AdditionalProperties"]["format"] == "csv"
        assert queued_message_json["FlushImmediately"] is False
        assert queued_message_json["TableName"] == "table"
        assert queued_message_json["RawDataSize"] > 0
        assert queued_message_json["RetainBlobOnSuccess"] is True

        upload_blob_kwargs = mock_upload_blob_from_stream.call_args_list[0][1]

        assert type(upload_blob_kwargs["data"]) == io.BufferedReader
def authenticate_to_kusto_ingress(cluster):
    """Authenticate and return kusto connection client"""
    kcsb = KustoConnectionStringBuilder.with_aad_application_key_authentication(
        cluster, CLIENT_ID, CLIENT_SECRET, AUTHORITY_ID)
    # The authentication method will be taken from the chosen KustoConnectionStringBuilder.
    kusto_client = KustoIngestClient(kcsb)
    return kusto_client
示例#11
0
def getKustoClient(kcsb):
    client = None
    try:
        client = KustoIngestClient(kcsb)
    except Exception as e:
        logging.error("Could not initialize Kusto Client:%s"%e)
    return client
示例#12
0
    def test_pop_unbalanced_queues(self):
        client = KustoIngestClient("some-cluster")

        fake_receive = fake_receive_factory(
            lambda queue_name, messages_per_page=1:
            [mock_message(success=False) for _ in range(0, messages_per_page)]
            if "1" in queue_name else [])
        with mock.patch.object(
                client._resource_manager,
                "get_successful_ingestions_queues"), mock.patch.object(
                    client._resource_manager, "get_failed_ingestions_queues"
                ) as mocked_get_failed_qs, mock.patch.object(
                    QueueClient,
                    "receive_messages",
                    autospec=True,
                    side_effect=fake_receive,
                ) as q_receive_mock, mock.patch.object(QueueClient,
                                                       "delete_message",
                                                       return_value=None):

            fake_failed_queue1 = _ResourceUri(
                "mocked_storage_account_f1",
                OBJECT_TYPE,
                "queue",
                "mocked_qf_1_name",
                ENDPOINT_SUFFIX,
            )
            fake_failed_queue2 = _ResourceUri(
                "mocked_storage_account_f2",
                OBJECT_TYPE,
                "queue",
                "mocked_qf_2_name",
                ENDPOINT_SUFFIX,
            )

            mocked_get_failed_qs.return_value = [
                fake_failed_queue1, fake_failed_queue2
            ]

            qs = KustoIngestStatusQueues(client)

            get_failure_actual = qs.failure.pop(6)

            assert len(get_failure_actual) == 6

            for m in get_failure_actual:
                assert isinstance(m, FailureMessage)

            assert q_receive_mock.call_count == 3

            actual = {}

            for call_args in q_receive_mock.call_args_list:
                actual[call_args[0][0].queue_name] = actual.get(
                    call_args[0][0].queue_name,
                    0) + call_args[1]["messages_per_page"]

            assert actual[fake_failed_queue2.object_name] + actual[
                fake_failed_queue1.object_name] == (4 + 4 + 6)
示例#13
0
def get_kusto_client() -> KustoIngestClient:
    cluster = "https://ingest-" + os.environ['KUSTO_CLUSTER'] + ".kusto.windows.net"
    username = os.environ['KUSTO_USERNAME']
    password = os.environ['KUSTO_PASSWORD']
    authority_id = os.environ['KUSTO_TENANT_ID']

    kcsb = KustoConnectionStringBuilder.with_aad_user_password_authentication(cluster, username, password, authority_id)

    return KustoIngestClient(kcsb)
    def test_sanity_ingest(self, mock_post, mock_aad, mock_block_blob,
                           mock_queue):
        """Test simple ingest"""

        ingest_client = KustoIngestClient(
            "https://ingest-somecluster.kusto.windows.net")

        ingestion_properties = IngestionProperties(database="database",
                                                   table="table",
                                                   dataFormat=DataFormat.csv)

        file_path = os.path.join(os.getcwd(), "azure-kusto-ingest", "tests",
                                 "input", "dataset.csv")

        ingest_client.ingest_from_multiple_files(
            [file_path],
            delete_sources_on_success=False,
            ingestion_properties=ingestion_properties)
示例#15
0
def initialize_kusto_client():
    """initialize kusto client
    """
    global KUSTO_INGESTION_CLIENT
    if not KUSTO_INGESTION_CLIENT:
        kcsb_ingest = KustoConnectionStringBuilder.with_aad_application_key_authentication( \
            INGESTION_SERVER_URI, APP_CLIENT_ID, APP_CLIENT_SECRETS, APP_AAD_TENANT_ID)
        KUSTO_INGESTION_CLIENT = KustoIngestClient(kcsb_ingest)
        logging.info(f"{LOG_MESSAGE_HEADER} Build KUSTO_INGESTION_CLIENT")
    else:
        logging.info(f"{LOG_MESSAGE_HEADER} KUSTO_INGESTION_CLIENT exist")
示例#16
0
    def __init__(self, db_name: str):
        """Initialize a Kusto report DB connector.

        Args:
            db_name: The Kusto database to connect to.
        """
        self.db_name = db_name

        ingest_cluster = os.getenv("TEST_REPORT_INGEST_KUSTO_CLUSTER")
        tenant_id = os.getenv("TEST_REPORT_AAD_TENANT_ID")
        service_id = os.getenv("TEST_REPORT_AAD_CLIENT_ID")
        service_key = os.getenv("TEST_REPORT_AAD_CLIENT_KEY")

        if not ingest_cluster or not tenant_id or not service_id or not service_key:
            raise RuntimeError("Could not load Kusto Credentials from environment")

        kcsb = KustoConnectionStringBuilder.with_aad_application_key_authentication(ingest_cluster,
                                                                                    service_id,
                                                                                    service_key,
                                                                                    tenant_id)
        self._ingestion_client = KustoIngestClient(kcsb)

        """
            Kusto performance depends on the work load of cluster, to improve the high availability of test result data service 
            by hosting a backup cluster, which is optional. 
        """
        ingest_cluster = os.getenv("TEST_REPORT_INGEST_KUSTO_CLUSTER_BACKUP")
        tenant_id = os.getenv("TEST_REPORT_AAD_TENANT_ID_BACKUP")
        service_id = os.getenv("TEST_REPORT_AAD_CLIENT_ID_BACKUP")
        service_key = os.getenv("TEST_REPORT_AAD_CLIENT_KEY_BACKUP")

        if not ingest_cluster or not tenant_id or not service_id or not service_key:
            print("Could not load backup Kusto Credentials from environment")
            self._ingestion_client_backup = None
        else:
            kcsb = KustoConnectionStringBuilder.with_aad_application_key_authentication(ingest_cluster,
                                                                                        service_id,
                                                                                        service_key,
                                                                                        tenant_id)
            self._ingestion_client_backup = KustoIngestClient(kcsb)
    def test_ingest_from_file_wrong_endpoint(self):
        responses.add_callback(
            responses.POST, "https://somecluster.kusto.windows.net/v1/rest/mgmt", callback=request_error_callback, content_type="application/json"
        )

        ingest_client = KustoIngestClient("https://somecluster.kusto.windows.net")
        ingestion_properties = IngestionProperties(database="database", table="table", data_format=DataFormat.CSV)

        current_dir = os.getcwd()
        path_parts = ["azure-kusto-ingest", "tests", "input", "dataset.csv"]
        missing_path_parts = []
        for path_part in path_parts:
            if path_part not in current_dir:
                missing_path_parts.append(path_part)

        file_path = os.path.join(current_dir, *missing_path_parts)

        with self.assertRaises(KustoInvalidEndpointError) as ex:
            ingest_client.ingest_from_file(file_path, ingestion_properties=ingestion_properties)
        self.assertEqual(
            ex.exception.args[0],
            "You are using 'DataManagement' client type, but the provided endpoint is of ServiceType 'Engine'. Initialize the client with the appropriate endpoint URI: 'https://ingest-somecluster.kusto.windows.net'",
            "Expected exception was not raised",
        )
示例#18
0
    def setup_class(cls):
        # DM CS can be composed from engine CS
        cls.engine_cs = os.environ.get("ENGINE_CONNECTION_STRING")
        cls.dm_cs = os.environ.get(
            "DM_CONNECTION_STRING") or cls.engine_cs.replace(
                "//", "//ingest-")
        cls.app_id = os.environ.get("APP_ID")
        cls.app_key = os.environ.get("APP_KEY")
        cls.auth_id = os.environ.get("AUTH_ID")
        cls.test_db = os.environ.get("TEST_DATABASE")

        if not all([
                cls.engine_cs, cls.dm_cs, cls.app_id, cls.app_key, cls.auth_id,
                cls.test_db
        ]):
            raise unittest.SkipTest("E2E environment is missing")

        # Init clients
        python_version = "_".join([str(v) for v in sys.version_info[:3]])
        cls.test_table = "python_test_{0}_{1}_{2}".format(
            python_version, str(int(time.time())), random.randint(1, 100000))
        cls.client = KustoClient(cls.engine_kcsb_from_env())
        cls.ingest_client = KustoIngestClient(cls.dm_kcsb_from_env())
        cls.streaming_ingest_client = KustoStreamingIngestClient(
            cls.engine_kcsb_from_env())

        cls.input_folder_path = cls.get_file_path()

        cls.csv_file_path = os.path.join(cls.input_folder_path, "dataset.csv")
        cls.tsv_file_path = os.path.join(cls.input_folder_path, "dataset.tsv")
        cls.zipped_csv_file_path = os.path.join(cls.input_folder_path,
                                                "dataset.csv.gz")
        cls.json_file_path = os.path.join(cls.input_folder_path,
                                          "dataset.json")
        cls.zipped_json_file_path = os.path.join(cls.input_folder_path,
                                                 "dataset.jsonz.gz")

        cls.current_count = 0

        cls.client.execute(
            cls.test_db,
            ".create table {0} (rownumber: int, rowguid: string, xdouble: real, xfloat: real, xbool: bool, xint16: int, xint32: int, xint64: long, xuint8: long, xuint16: long, xuint32: long, xuint64: long, xdate: datetime, xsmalltext: string, xtext: string, xnumberAsText: string, xtime: timespan, xtextWithNulls: string, xdynamicWithNulls: dynamic)"
            .format(cls.test_table),
        )
        cls.client.execute(
            cls.test_db,
            ".create table {0} ingestion json mapping 'JsonMapping' {1}".
            format(cls.test_table, cls.test_table_json_mapping_reference()))
示例#19
0
    def test_isempty(self):
        client = KustoIngestClient("some-cluster")

        fake_peek = fake_peek_factory(lambda queue_name, num_messages=1: [
            mock_message(success=True) for _ in range(0, num_messages)
        ] if "qs" in queue_name else [])
        with mock.patch.object(client._resource_manager,
                               "get_successful_ingestions_queues"
                               ) as mocked_get_success_qs, mock.patch.object(
                                   client._resource_manager,
                                   "get_failed_ingestions_queues"
                               ) as mocked_get_failed_qs, mock.patch.object(
                                   QueueClient,
                                   "peek_messages",
                                   autospec=True,
                                   side_effect=fake_peek) as q_mock:
            fake_failed_queue = _ResourceUri(
                "mocked_storage_account1",
                OBJECT_TYPE,
                "queue",
                "mocked_qf_name",
                ENDPOINT_SUFFIX,
            )
            fake_success_queue = _ResourceUri(
                "mocked_storage_account2",
                OBJECT_TYPE,
                "queue",
                "mocked_qs_name",
                ENDPOINT_SUFFIX,
            )

            mocked_get_success_qs.return_value = [fake_success_queue]
            mocked_get_failed_qs.return_value = [fake_failed_queue]

            qs = KustoIngestStatusQueues(client)

            assert qs.success.is_empty() is False
            assert qs.failure.is_empty() is True

            assert q_mock.call_count == 2
            assert q_mock.call_args_list[0][1]["max_messages"] == 2
            assert q_mock.call_args_list[1][1]["max_messages"] == 2
示例#20
0
    def __init__(self, db_name: str):
        """Initialize a Kusto report DB connector.

        Args:
            db_name: The Kusto database to connect to.
        """
        self.db_name = db_name

        ingest_cluster = os.getenv("TEST_REPORT_INGEST_KUSTO_CLUSTER")
        tenant_id = os.getenv("TEST_REPORT_AAD_TENANT_ID")
        service_id = os.getenv("TEST_REPORT_AAD_CLIENT_ID")
        service_key = os.getenv("TEST_REPORT_AAD_CLIENT_KEY")

        if not ingest_cluster or not tenant_id or not service_id or not service_key:
            raise RuntimeError(
                "Could not load Kusto Credentials from environment")

        kcsb = KustoConnectionStringBuilder.with_aad_application_key_authentication(
            ingest_cluster, service_id, service_key, tenant_id)
        self._ingestion_client = KustoIngestClient(kcsb)
    def test_isempty(self, mocked_q_peek_messages):
        client = KustoIngestClient("some-cluster")
        with mock.patch.object(
                client._resource_manager, "get_successful_ingestions_queues"
        ) as mocked_get_success_qs, mock.patch.object(
                client._resource_manager,
                "get_failed_ingestions_queues") as mocked_get_failed_qs:

            fake_failed_queue = _ResourceUri("mocked_storage_account1",
                                             "queue", "mocked_qf_name",
                                             "mocked_sas")
            fake_success_queue = _ResourceUri("mocked_storage_account2",
                                              "queue", "mocked_qs_name",
                                              "mocked_sas")

            mocked_get_success_qs.return_value = [fake_success_queue]
            mocked_get_failed_qs.return_value = [fake_failed_queue]

            mocked_q_peek_messages.side_effect = (
                lambda queue_name, num_messages=1: []
                if queue_name == fake_failed_queue.object_name else
                [QueueMessage() for _ in range(0, num_messages)])

            qs = KustoIngestStatusQueues(client)

            assert qs.success.is_empty() == False
            assert qs.failure.is_empty() == True

            assert mocked_q_peek_messages.call_count == 2
            assert mocked_q_peek_messages.call_args_list[0][0][
                0] == fake_success_queue.object_name
            assert mocked_q_peek_messages.call_args_list[0][1][
                "num_messages"] == 2

            assert mocked_q_peek_messages.call_args_list[1][0][
                0] == fake_failed_queue.object_name
            assert mocked_q_peek_messages.call_args_list[1][1][
                "num_messages"] == 2
示例#22
0
kcsb = KustoConnectionStringBuilder.with_aad_application_certificate_authentication(
    cluster, client_id, PEM, thumbprint, authority_id
)

# In case you want to authenticate with AAD username and password
username = "******"
password = "******"
kcsb = KustoConnectionStringBuilder.with_aad_user_password_authentication(cluster, username, password, authority_id)

# In case you want to authenticate with AAD device code.
# Please note that if you choose this option, you'll need to autenticate for every new instance that is initialized.
# It is highly recommended to create one instance and use it for all of your queries.
kcsb = KustoConnectionStringBuilder.with_aad_device_authentication(cluster)

# The authentication method will be taken from the chosen KustoConnectionStringBuilder.
client = KustoIngestClient(kcsb)

# there are more options for authenticating - see azure-kusto-data samples

##################################################################
##                        INGESTION                             ##
##################################################################

# there are a lot of useful properties, make sure to go over docs and check them out
ingestion_props = IngestionProperties(
    database="{database_name}",
    table="{table_name}",
    dataFormat=DataFormat.CSV,
    # in case status update for success are also required
    # reportLevel=ReportLevel.FailuresAndSuccesses,
    # in case a mapping is required
示例#23
0
            JsonColumnMapping(columnName="xtextWithNulls",
                              jsonPath="$.xtextWithNulls",
                              cslDataType="string"))
        mappings.append(
            JsonColumnMapping(columnName="xdynamicWithNulls",
                              jsonPath="$.xdynamicWithNulls",
                              cslDataType="dynamic"))
        return mappings


engine_kcsb = KustoConnectionStringBuilder.with_aad_device_authentication(
    "https://toshetah.kusto.windows.net")
dm_kcsb = KustoConnectionStringBuilder.with_aad_device_authentication(
    "https://ingest-toshetah.kusto.windows.net")
client = KustoClient(engine_kcsb)
ingest_client = KustoIngestClient(dm_kcsb)
ingest_status_q = KustoIngestStatusQueues(ingest_client)
client.execute("PythonTest", ".drop table Deft ifexists")


@pytest.mark.run(order=1)
def test_csv_ingest_non_existing_table():
    csv_ingest_props = IngestionProperties(
        "PythonTest",
        "Deft",
        dataFormat=DataFormat.csv,
        mapping=Helpers.create_deft_table_csv_mappings(),
        reportLevel=ReportLevel.FailuresAndSuccesses,
    )
    csv_file_path = os.path.join(os.getcwd(), "azure-kusto-ingest", "tests",
                                 "input", "dataset.csv")
示例#24
0
               COLUMN_MAPPING_NAME=column_mapping_name,
               COLUMN_MAPPING=json.dumps(obj.get('COLUMN_MAPPING')))

    # Drop table if exists, then create.
    kusto_client.execute_mgmt(kusto_database, drop_table_if_exists_command)
    kusto_client.execute_mgmt(kusto_database, create_table_command)

    #NOTE: this may be backwards...
    try:
        response = kusto_client.execute_mgmt(kusto_database,
                                             check_for_mapping_command)
    except:  # Because check_for_mapping_command should throw error if mapping already exists
        response = kusto_client.execute_mgmt(kusto_database,
                                             create_mapping_command)

    ingestion_client = KustoIngestClient(kcsb_ingest)

    # All ingestion properties: https://docs.microsoft.com/en-us/azure/kusto/management/data-ingestion/#ingestion-properties
    ingestion_props = IngestionProperties(
        reportLevel=reportLevel,
        database=kusto_database,
        table=destination_table,
        dataFormat=DataFormat.csv,
        mappingReference=column_mapping_name,
        additionalProperties={'ignoreFirstRecord': 'true'})
    blobProps = BlockBlobService.get_blob_properties(blob_service, container,
                                                     file_name).properties
    file_size = blobProps.content_length
    blob_descriptor = BlobDescriptor(
        blob_path, file_size)  # Raw size of the data in bytes
示例#25
0
from branch.path_app_branch import *


def authenticate_kusto(kusto_cluster):
    tenant_id = '72f988bf-86f1-41af-91ab-2d7cd011db47'
    KCSB = KustoConnectionStringBuilder.with_aad_device_authentication(
        kusto_cluster)
    KCSB.authority_id = tenant_id
    return KustoClient(KCSB), KCSB


# Query Kusto
cga_cluster = 'https://cgadataout.kusto.windows.net'
ingest_cluster = "https://ingest-cgadataout.kusto.windows.net"
cga_client = authenticate_kusto(cga_cluster)[0]
ingest_client = KustoIngestClient(authenticate_kusto(ingest_cluster)[1])
ls = [cga_client, ingest_client]


def Ingest(Tag):
    ingestion_props = IngestionProperties(
        database="DevRelWorkArea",
        table="RepoContributors",
        dataFormat=DataFormat.CSV,
        ingestByTags=[Tag],
        dropByTags=[Tag],
        mappingReference="RepoContributors_CSV_Mapping",
        reportLevel=ReportLevel.FailuresAndSuccesses,
        additionalProperties={'ignoreFirstRecord': 'true'})

    # 修改这里的本地路径以加载数据源
示例#26
0
    FileDescriptor,
    BlobDescriptor,
    DataFormat,
    ReportLevel,
)

# there are a lot of useful properties, make sure to go over docs and check them out
ingestion_props = IngestionProperties(
    database="{database_name}",
    table="{table_name}",
    dataFormat=DataFormat.csv,
    # incase status update for success are also required
    # reportLevel=ReportLevel.FailuresAndSuccesses,
)
client = KustoIngestClient(
    KustoConnectionStringBuilder.with_aad_device_authentication(
        "https://ingest-{cluster_name}.kusto.windows.net"))

# there are more options for authenticating - see azure-kusto-data samples

##################################################################
##                        INGESTION                             ##
##################################################################

# ingest from file
file_descriptor = FileDescriptor(
    "{filename}.csv", 3333)  # 3333 is the raw size of the data in bytes.
client.ingest_from_file(file_descriptor, ingestion_properties=ingestion_props)
client.ingest_from_file("{filename}.csv", ingestion_properties=ingestion_props)

# ingest from blob
示例#27
0
def update_ADX_ingest_status(tc):

    KCSB_INGEST = KustoConnectionStringBuilder.with_aad_device_authentication(
        DATA_INGESTION_URI)
    KCSB_INGEST.authority_id = APP_AAD_TENANT_ID
    INGESTION_CLIENT = KustoIngestClient(KCSB_INGEST)
    qs = KustoIngestStatusQueues(INGESTION_CLIENT)

    run_id = (str(uuid.uuid4()))[31:].upper()
    MAX_BACKOFF = 8
    backoff = 1

    total_queue_success_messages = 0
    while True:
        ################### NOTICE ####################
        # in order to get success status updates,
        # make sure ingestion properties set the
        # reportLevel=ReportLevel.FailuresAndSuccesses.
        if qs.success.is_empty() and qs.failure.is_empty():
            time.sleep(backoff)

            if backoff == 1 and total_queue_success_messages != 0:
                print(
                    "{} RUN_ID:{}  Processed {} message in this batch ".format(
                        LOG_MESSAGE_HEADER, run_id,
                        total_queue_success_messages))

            backoff = min(backoff * 2, MAX_BACKOFF)
            if (backoff < MAX_BACKOFF):
                #print("{} No new messages. backing off for {} seconds".format(LOG_MESSAGE_HEADER,backoff))
                continue
            if (backoff == MAX_BACKOFF):
                #print("{} Reach max waiting time {}, exit.".format(LOG_MESSAGE_HEADER,backoff))
                break

        backoff = 1

        success_messages = qs.success.pop(15)
        failure_messages = qs.failure.pop(15)

        total_success = 0
        total_failure = 0
        if success_messages is not None:
            if (len(success_messages) > 0):
                tc.track_trace("{} Get {} success ingest messages ".format(
                    LOG_MESSAGE_HEADER, str(len(success_messages))))
                total_success = len(success_messages)
        if failure_messages is not None:
            if (len(failure_messages) > 0):
                tc.track_trace("{} Get {} failure  ingest messages ".format(
                    LOG_MESSAGE_HEADER, str(len(failure_messages))))
                total_failure = len(failure_messages)
        tc.flush()
        total_queue_success_messages += len(success_messages)
        count_success = 0
        count_faulure = 0
        for smsg in success_messages:
            file_path = get_file_path(smsg.IngestionSourcePath)
            container_name = get_container_name(smsg.IngestionSourcePath)
            count_success += 1
            log_msg = "{} SUCCESS TO INGEST TO ADX <{}> -[{}/{}/{}] , Time: {}, vm_uuid: {}, source_id:{},  file path: {}".format(
                LOG_MESSAGE_HEADER, run_id, str(count_success),
                str(total_success),
                str(total_queue_success_messages), smsg.SucceededOn,
                get_vm_uuid_from_filename(file_path), smsg.IngestionSourceId,
                file_path)
            tc.track_trace(log_msg)
            tc.track_event(
                APP_INSIGHT_INGEST_SUCCESS_EVENT_NAME, {
                    'MESSAGE': 'SUCCESS TO Ingest ADX',
                    'file_path': file_path,
                    'source_id': smsg.IngestionSourceId
                }, {})
            tc.flush()
            update_COSMOS_status(COSMOS_CLIENT, file_path, smsg.SucceededOn,
                                 SUCCESS_STATUS, str(smsg),
                                 get_vm_uuid_from_filename(file_path),
                                 smsg.IngestionSourceId, container_name, tc,
                                 count_success, run_id)

            telemetry_block_blob_service = BlockBlobService(
                account_name=SOURCE_TELEMETRY_BLOB_ACCOUNT,
                account_key=SOURCE_TELEMETRY_FILE_BLOB_KEY)

            target_file_path = ''
            if (PROCESSED_TELEMETRY_FOLDER.endswith('/')):
                target_file_path = PROCESSED_TELEMETRY_FOLDER + file_path
            else:
                target_file_path = PROCESSED_TELEMETRY_FOLDER + '/' + file_path

            move_processed_file(telemetry_block_blob_service, container_name,
                                file_path, container_name, target_file_path,
                                tc)
            tc.track_trace(
                '{} DONE ADX INGESTION PROCESS <{}> -[{}/{}/{}], File Moved to processed folder {} , vm_uuid: {}, file path: {}'
                .format(LOG_MESSAGE_HEADER, run_id, str(count_success),
                        str(total_success),
                        str(total_queue_success_messages), target_file_path,
                        get_vm_uuid_from_filename(file_path), file_path))
            tc.track_event(
                APP_INSIGHT_INGEST_SUCCESS_EVENT_NAME, {
                    'MESSAGE': 'DONE ADX INGESTION PROCESS',
                    'moved_file_path': target_file_path,
                    'source_file_path': file_path
                }, {})
            tc.flush()
            #smsgjson=json.loads(smsg)
            #print (smsgjson['IngestionSourcePath'])
            #print (smsgjson['SucceededOn'])
            print("{} IngestionSourcePath: {}".format(
                LOG_MESSAGE_HEADER, smsg.IngestionSourcePath))
            print(smsg.SucceededOn)
        for fmsg in failure_messages:
            container_name = get_container_name(fmsg.IngestionSourcePath)
            file_path = get_file_path(fmsg.IngestionSourcePath)
            count_faulure += 1
            log_msg = "{} FAILED TO INGEST TO ADX <{}> -[{}/{}] , Time: {}, vm_uuid: {}, source_id:{}, container:{},  file path: {}, message: {}".format(
                LOG_MESSAGE_HEADER, run_id, str(count_faulure),
                str(total_failure), fmsg.FailedOn,
                get_vm_uuid_from_filename(file_path), fmsg.IngestionSourceId,
                container_name, file_path, str(fmsg))
            tc.track_trace(log_msg)
            tc.track_event(
                APP_INSIGHT_INGEST_FAILURE_EVENT_NAME, {
                    'MESSAGE': 'FAILED TO Ingest ADX',
                    'file_path': file_path,
                    'source_id': fmsg.IngestionSourceId
                }, {})
            tc.flush()
            update_COSMOS_status(COSMOS_CLIENT, file_path, fmsg.FailedOn,
                                 FAILURE_STATUS, str(fmsg),
                                 get_vm_uuid_from_filename(file_path),
                                 fmsg.IngestionSourceId, container_name, tc,
                                 count_faulure, run_id)
示例#28
0
def main():
    
    # Kusto cluster inputs
    data = os.environ["INPUT_DATA"]
    tenantId = os.environ["INPUT_TENANTID"]
    databaseName = os.environ["INPUT_DATABASE"]
    clusterName = os.environ["INPUT_CLUSTERNAME"]
    region = os.environ["INPUT_CLUSTERREGION"]
    clientId = os.environ["INPUT_CLIENTID"]
    clientSecret = os.environ["INPUT_CLIENTSECRET"]
    destinationTable = os.environ["INPUT_TABLE"]
    mapping = os.environ['INPUT_MAPPING']

    try:
        print(data)
        # file creation 

        fileName = "sample.json"
        filePath = os.path.join(os.environ["GITHUB_WORKSPACE"], fileName)

        deploymentData = {}
        deploymentData["Timestamp"] = str(datetime.now())
        deploymentData["DeploymentDetails"] = data

        with open(filePath, "w") as targetFile:
            json.dump(deploymentData, targetFile)

        # cluster client connection and auth

        httpsPrefix = "https://"
        suffixKustoUri = "kusto.windows.net:443/"
        clusterIngestUri = "{0}ingest-{1}.{2}.{3}".format(httpsPrefix, clusterName, region, suffixKustoUri)

        kcsb_ingest = KustoConnectionStringBuilder.with_aad_application_key_authentication(
                       clusterIngestUri, clientId, clientSecret, tenantId)

        print(mapping)

        # Cluster ingestion parameters
        ingestionClient = KustoIngestClient(kcsb_ingest)
        ingestionProperties = IngestionProperties(database=databaseName, table=destinationTable, dataFormat=DataFormat.JSON, ingestion_mapping_reference=mapping, report_level=ReportLevel.FailuresAndSuccesses)
        fileDescriptor = FileDescriptor(filePath, 1000)

        print('Payload to dump')
        with open(filePath, "r") as targetFile:
            parsed = json.load(targetFile)
            print(json.dumps(parsed, indent=2, sort_keys=True))

        ingestionClient.ingest_from_file(fileDescriptor, ingestion_properties=ingestionProperties)

        print('Queued up ingestion with Azure Data Explorer')

        # Remove the temporary file
        os.remove(filePath)
        """
        # Repeated pinging to wait for success/failure message
        qs = KustoIngestStatusQueues(ingestionClient)

        # Interval to ping
        MAX_BACKOFF = 5
        backoff = 1
        while True:
            if qs.success.is_empty() and qs.failure.is_empty():
                time.sleep(backoff)
                backoff = min(backoff * 2, MAX_BACKOFF)
                print("No new messages. backing off for {} seconds".format(backoff))
                continue

            backoff = 1

            success_messages = qs.success.pop(10)
            failure_messages = qs.failure.pop(10)

            pprint.pprint("SUCCESS : {}".format(success_messages))
            pprint.pprint("FAILURE : {}".format(failure_messages))
            break
        """
    except Exception as e:
        raise Exception(e)
示例#29
0
            JsonColumnMapping(columnName="xtime",
                              jsonPath="$.xtime",
                              cslDataType="timespan"))
        mappings.append(
            JsonColumnMapping(columnName="xtextWithNulls",
                              jsonPath="$.xtextWithNulls",
                              cslDataType="string"))
        mappings.append(
            JsonColumnMapping(columnName="xdynamicWithNulls",
                              jsonPath="$.xdynamicWithNulls",
                              cslDataType="dynamic"))
        return mappings


client = KustoClient("https://toshetah.kusto.windows.net")
ingest_client = KustoIngestClient("https://ingest-toshetah.kusto.windows.net")
ingest_status_q = KustoIngestStatusQueues(ingest_client)
client.execute("PythonTest", ".drop table Deft ifexists")


@pytest.mark.run(order=1)
def test_csv_ingest_non_existing_table():
    csv_ingest_props = IngestionProperties(
        "PythonTest",
        "Deft",
        dataFormat=DataFormat.csv,
        mapping=Helpers.create_deft_table_csv_mappings(),
        reportLevel=ReportLevel.FailuresAndSuccesses,
    )
    csv_file_path = os.path.join(os.getcwd(), "azure-kusto-ingest", "tests",
                                 "input", "dataset.csv")
示例#30
0
    IngestionProperties,
    FileDescriptor,
    BlobDescriptor,
    DataFormat,
    ReportLevel,
)

# there are a lot of useful properties, make sure to go over docs and check them out
ingestion_props = IngestionProperties(
    database="{database_name}",
    table="{table_name}",
    dataFormat=DataFormat.csv,
    # incase status update for success are also required
    # reportLevel=ReportLevel.FailuresAndSuccesses,
)
client = KustoIngestClient("https://ingest-{cluster_name}.kusto.windows.net")

# there are more options for authenticating - see azure-kusto-data samples

##################################################################
##                        INGESTION                             ##
##################################################################


# ingest from file
file_descriptor = FileDescriptor("{filename}.csv", 3333)  # 3333 is the raw size of the data in bytes.
client.ingest_from_file(file_descriptor, ingestion_properties=ingestion_props)
client.ingest_from_file("{filename}.csv", ingestion_properties=ingestion_props)


# ingest from blob