def test_quick_query_iter_records_with_json_serialization_setting(self, resource_group, location, storage_account,
                                                                      storage_account_key):
        # Arrange
        bsc = BlobServiceClient(
            self.account_url(storage_account, "blob"),
            credential=storage_account_key)
        self._setup(bsc)

        data1 = b'{\"name\": \"owner\", \"id\": 1}'
        data2 = b'{\"name2\": \"owner2\"}'
        data = data1 + b'\n' + data2 + b'\n' + data1

        # upload the json file
        blob_name = self._get_blob_reference()
        blob_client = bsc.get_blob_client(self.container_name, blob_name)
        blob_client.upload_blob(data, overwrite=True)

        errors = []
        def on_error(error):
            errors.append(error)

        input_format = DelimitedJsonDialect(delimiter='\n')
        output_format = DelimitedJsonDialect(delimiter=';')

        resp = blob_client.query_blob(
            "SELECT name from BlobStorage",
            on_error=on_error,
            blob_format=input_format,
            output_format=output_format)
        listdata = list(resp.records())

        self.assertEqual(len(errors), 0)
        self.assertEqual(resp._size, len(data))
        self.assertEqual(listdata, [b'{"name":"owner"}',b'{}',b'{"name":"owner"}', b''])
        self._teardown(bsc)
    def test_quick_query_readall_with_fatal_error_ignore(self, resource_group, location, storage_account,
                                                         storage_account_key):
        # Arrange
        bsc = BlobServiceClient(
            self.account_url(storage_account, "blob"),
            credential=storage_account_key)
        self._setup(bsc)

        data1 = b'{name: owner}'
        data2 = b'{name2: owner2}'
        data = data1 + b'\n' + data2 + b'\n' + data1

        # upload the json file
        blob_name = self._get_blob_reference()
        blob_client = bsc.get_blob_client(self.container_name, blob_name)
        blob_client.upload_blob(data, overwrite=True)

        input_format = DelimitedJsonDialect()
        output_format = DelimitedTextDialect(
            delimiter=';',
            quotechar="'",
            lineterminator='.',
            escapechar='\\'
        )
        resp = blob_client.query_blob(
            "SELECT * from BlobStorage",
            blob_format=input_format,
            output_format=output_format)
        query_result = resp.readall()
        self._teardown(bsc)
Exemplo n.º 3
0
    def test_quick_query_iter_records_with_fatal_error_ignore(
            self, resource_group, location, storage_account,
            storage_account_key):
        # Arrange
        bsc = BlobServiceClient(self.account_url(storage_account, "blob"),
                                credential=storage_account_key)
        self._setup(bsc)

        data1 = b'{name: owner}'
        data2 = b'{name2: owner2}'
        data3 = b'{version:0,begin:1601-01-01T00:00:00.000Z,intervalSecs:3600,status:Finalized,config:' \
                b'{version:0,configVersionEtag:0x8d75ef460eb1a12,numShards:1,recordsFormat:avro,formatSchemaVersion:3,' \
                b'shardDistFnVersion:1},chunkFilePaths:[$blobchangefeed/log/00/1601/01/01/0000/],storageDiagnostics:' \
                b'{version:0,lastModifiedTime:2019-11-01T17:53:18.861Z,' \
                b'data:{aid:d305317d-a006-0042-00dd-902bbb06fc56}}}'
        data = data1 + b'\n' + data2 + b'\n' + data1

        # upload the json file
        blob_name = self._get_blob_reference()
        blob_client = bsc.get_blob_client(self.container_name, blob_name)
        blob_client.upload_blob(data, overwrite=True)

        input_format = DelimitedJsonDialect()
        output_format = DelimitedTextDialect(delimiter=';',
                                             quotechar="'",
                                             lineterminator='.',
                                             escapechar='\\')
        resp = blob_client.query_blob("SELECT * from BlobStorage",
                                      blob_format=input_format,
                                      output_format=output_format)

        for record in resp.records():
            print(record)
        self._teardown(bsc)
Exemplo n.º 4
0
    def test_quick_query_with_only_input_json_serialization_setting(self, storage_account_name, storage_account_key):
        # Arrange
        bsc = BlobServiceClient(
            self.account_url(storage_account_name, "blob"),
            credential=storage_account_key)
        self._setup(bsc)

        data1 = b'{\"name\": \"owner\", \"id\": 1}'
        data2 = b'{\"name2\": \"owner2\"}'
        data = data1 + data2 + data1

        # upload the json file
        blob_name = self._get_blob_reference()
        blob_client = bsc.get_blob_client(self.container_name, blob_name)
        blob_client.upload_blob(data, overwrite=True)

        errors = []
        def on_error(error):
            errors.append(error)

        input_format = DelimitedJsonDialect(delimiter='\n')
        output_format = None

        resp = blob_client.query_blob(
            "SELECT name from BlobStorage",
            on_error=on_error,
            blob_format=input_format,
            output_format=output_format)
        query_result = resp.readall()

        self.assertEqual(len(errors), 0)
        self.assertEqual(resp._size, len(data))
        self.assertEqual(query_result, b'{"name":"owner"}\n{}\n{"name":"owner"}\n')
        self._teardown(bsc)
Exemplo n.º 5
0
def query(a_query=store_conn['query_sql'],
          a_blob_url=store_conn['file_csv'],
          a_sas_key=store_conn['access_key']):
    """ Helper to query json and/or csv files on a Blob/Datalake """
    result_set = []
    start = time.perf_counter()
    #Get the file extension/type
    a_file_name, a_file_type = get_ext(a_blob_url)

    blob_client = BlobClient.from_blob_url(blob_url=a_blob_url + a_sas_key)

    if a_file_type == '.csv':
        qa_reader = blob_client.query_blob(
            a_query,
            blob_format=DelimitedTextDialect(has_header=True),
            encoding='utf-8')
    elif a_file_type == '.json':
        qa_reader = blob_client.query_blob(
            a_query,
            blob_format=DelimitedJsonDialect(delimeter=' '),
            encoding='utf-8',
            output_format=DelimitedJsonDialect(delimiter='\n'))
    elif a_file_type == '.parquet':
        qa_reader = None
        print("We'll do something about this")
    else:
        qa_reader = None
        print(f"Sorry, can't query a {a_file_type} file type")

    end = time.perf_counter()
    #Show (sarcastic voice) *usefully accurate* elapsed seconds and return records
    print(f"Time taken to get results {end - start} seconds")

    if qa_reader is None:
        print("No result found. Sorry human, better luck nextime ¯\_(ツ)_/¯")
    else:
        for row in qa_reader.records():
            if row:
                result_set.append(row)

    return result_set
def main():
    try:
        CONNECTION_STRING = os.environ['AZURE_STORAGE_CONNECTION_STRING']

    except KeyError:
        print("AZURE_STORAGE_CONNECTION_STRING must be set.")
        sys.exit(1)

    blob_service_client = BlobServiceClient.from_connection_string(
        CONNECTION_STRING)
    container_name = "quickquerycontainer"
    container_client = blob_service_client.get_container_client(container_name)
    try:
        container_client.create_container()
    except:
        pass
    # [START query]
    errors = []

    def on_error(error):
        errors.append(error)

    # upload the csv file
    blob_client = blob_service_client.get_blob_client(container_name,
                                                      "csvfile")
    with open("./sample-blobs/quick_query.csv", "rb") as stream:
        blob_client.upload_blob(stream, overwrite=True)

    # select the second column of the csv file
    query_expression = "SELECT _2 from BlobStorage"
    input_format = DelimitedTextDialect(delimiter=',',
                                        quotechar='"',
                                        lineterminator='\n',
                                        escapechar="",
                                        has_header=False)
    output_format = DelimitedJsonDialect(delimiter='\n')
    reader = blob_client.query_blob(query_expression,
                                    on_error=on_error,
                                    blob_format=input_format,
                                    output_format=output_format)
    content = reader.readall()
    # [END query]
    print(content)

    container_client.delete_container()