Пример #1
0
    def test_simple_ingest_from_dataframe(self, mock_pid, mock_time, mock_uuid,
                                          mock_put_message_in_queue,
                                          mock_create_blob_from_path):
        responses.add_callback(
            responses.POST,
            "https://ingest-somecluster.kusto.windows.net/v1/rest/mgmt",
            callback=request_callback,
            content_type="application/json",
        )

        ingest_client = KustoIngestClient(
            "https://ingest-somecluster.kusto.windows.net")
        ingestion_properties = IngestionProperties(database="database",
                                                   table="table",
                                                   dataFormat=DataFormat.csv)

        from pandas import DataFrame

        fields = ["id", "name", "value"]
        rows = [[1, "abc", 15.3], [2, "cde", 99.9]]
        df = DataFrame(data=rows, columns=fields)

        ingest_client.ingest_from_dataframe(
            df, ingestion_properties=ingestion_properties)

        # mock_put_message_in_queue
        assert mock_put_message_in_queue.call_count == 1

        put_message_in_queue_mock_kwargs = mock_put_message_in_queue.call_args_list[
            0][1]

        assert put_message_in_queue_mock_kwargs[
            "queue_name"] == "readyforaggregation-secured"
        queued_message = base64.b64decode(
            put_message_in_queue_mock_kwargs["content"].encode(
                "utf-8")).decode("utf-8")
        queued_message_json = json.loads(queued_message)
        # mock_create_blob_from_stream
        assert (
            queued_message_json["BlobPath"] ==
            "https://storageaccount.blob.core.windows.net/tempstorage/database__table__1111-111111-111111-1111__df_100_64.csv.gz?sas"
        )
        assert queued_message_json["DatabaseName"] == "database"
        assert queued_message_json["IgnoreSizeLimit"] == False
        assert queued_message_json["AdditionalProperties"]["format"] == "csv"
        assert queued_message_json["FlushImmediately"] == False
        assert queued_message_json["TableName"] == "table"
        assert queued_message_json["RawDataSize"] > 0
        assert queued_message_json["RetainBlobOnSuccess"] == True

        create_blob_from_path_mock_kwargs = mock_create_blob_from_path.call_args_list[
            0][1]
        import tempfile

        assert create_blob_from_path_mock_kwargs[
            "container_name"] == "tempstorage"
        assert create_blob_from_path_mock_kwargs["file_path"] == os.path.join(
            tempfile.gettempdir(), "df_100_64.csv.gz")
        assert (create_blob_from_path_mock_kwargs["blob_name"] ==
                "database__table__1111-111111-111111-1111__df_100_64.csv.gz")
    def test_simple_ingest_from_dataframe(self, mock_pid, mock_time, mock_uuid,
                                          mock_put_message_in_queue,
                                          mock_upload_blob_from_stream):
        responses.add_callback(
            responses.POST,
            "https://ingest-somecluster.kusto.windows.net/v1/rest/mgmt",
            callback=request_callback,
            content_type="application/json")

        ingest_client = KustoIngestClient(
            "https://ingest-somecluster.kusto.windows.net")
        ingestion_properties = IngestionProperties(database="database",
                                                   table="table",
                                                   data_format=DataFormat.CSV)

        from pandas import DataFrame

        fields = ["id", "name", "value"]
        rows = [[1, "abc", 15.3], [2, "cde", 99.9]]
        df = DataFrame(data=rows, columns=fields)

        ingest_client.ingest_from_dataframe(
            df, ingestion_properties=ingestion_properties)

        # mock_put_message_in_queue
        assert mock_put_message_in_queue.call_count == 1

        put_message_in_queue_mock_kwargs = mock_put_message_in_queue.call_args_list[
            0][1]

        queued_message_json = json.loads(
            put_message_in_queue_mock_kwargs["content"])
        expected_url = "https://storageaccount.blob.core.windows.net/tempstorage/database__table__1111-111111-111111-1111__df_{0}_100_64.csv.gz?".format(
            id(df))
        # mock_upload_blob_from_stream
        # not checking the query string because it can change order, just checking it's there
        assert queued_message_json["BlobPath"].startswith(expected_url) is True
        assert len(queued_message_json["BlobPath"]) > len(expected_url)
        assert queued_message_json["DatabaseName"] == "database"
        assert queued_message_json["IgnoreSizeLimit"] is False
        assert queued_message_json["AdditionalProperties"]["format"] == "csv"
        assert queued_message_json["FlushImmediately"] is False
        assert queued_message_json["TableName"] == "table"
        assert queued_message_json["RawDataSize"] > 0
        assert queued_message_json["RetainBlobOnSuccess"] is True

        upload_blob_kwargs = mock_upload_blob_from_stream.call_args_list[0][1]

        assert type(upload_blob_kwargs["data"]) == io.BufferedReader
Пример #3
0
client.ingest_from_file("{filename}.csv", ingestion_properties=ingestion_props)


# ingest from blob
blob_descriptor = BlobDescriptor("https://{path_to_blob}.csv.gz?sas", 10)  # 10 is the raw size of the data in bytes.
client.ingest_from_blob(blob_descriptor, ingestion_properties=ingestion_props)

# ingest from dataframe
import pandas

fields = ["id", "name", "value"]
rows = [[1, "abc", 15.3], [2, "cde", 99.9]]

df = pandas.DataFrame(data=rows, columns=fields)

client.ingest_from_dataframe(df, ingestion_properties=ingestion_props)

# ingest a whole folder.
import os

path = "folder/path"
[client.ingest_from_file(f, ingestion_properties=ingestion_props) for f in os.listdir(path)]

##################################################################
##                        INGESTION STATUS                      ##
##################################################################

# if status updates are required, something like this can be done
import pprint
import time
from azure.kusto.ingest.status import KustoIngestStatusQueues