def test_quick_query_iter_records_with_nonfatal_error_ignore(self, resource_group, location, storage_account,
                                                                 storage_account_key):
        # Arrange
        bsc = BlobServiceClient(
            self.account_url(storage_account, "blob"),
            credential=storage_account_key)
        self._setup(bsc)

        # upload the csv file
        blob_name = self._get_blob_reference()
        blob_client = bsc.get_blob_client(self.container_name, blob_name)
        blob_client.upload_blob(CSV_DATA, overwrite=True)

        input_format = DelimitedTextDialect(
            delimiter=',',
            quotechar='"',
            lineterminator='\n',
            escapechar='',
            has_header=True
        )
        output_format = DelimitedTextDialect(
            delimiter=';',
            quotechar="'",
            lineterminator='$',
            escapechar='\\',
        )
        resp = blob_client.query_blob(
            "SELECT RepoPath from BlobStorage",
            blob_format=input_format,
            output_format=output_format)
        data = list(resp.records())
        self.assertEqual(resp._size, len(CSV_DATA))
        self.assertEqual(len(data), 32)
        self._teardown(bsc)
Exemplo n.º 2
0
    def test_quick_query_iter_records_with_serialization_setting(
            self, resource_group, location, storage_account,
            storage_account_key):
        # Arrange
        bsc = BlobServiceClient(self.account_url(storage_account, "blob"),
                                credential=storage_account_key)
        self._setup(bsc)

        # upload the csv file
        blob_name = self._get_blob_reference()
        blob_client = bsc.get_blob_client(self.container_name, blob_name)
        blob_client.upload_blob(CSV_DATA, overwrite=True)

        input_format = DelimitedTextDialect(delimiter=',',
                                            quotechar='"',
                                            lineterminator='\n',
                                            escapechar='',
                                            has_header=False)
        output_format = DelimitedTextDialect(delimiter=';',
                                             quotechar="'",
                                             lineterminator='%',
                                             escapechar='\\')

        reader = blob_client.query_blob("SELECT * from BlobStorage",
                                        blob_format=input_format,
                                        output_format=output_format)
        data = []
        for record in reader.records():
            if record:
                data.append(record)

        self.assertEqual(len(reader), len(CSV_DATA))
        self.assertEqual(reader._size, reader._bytes_processed)
        self.assertEqual(len(data), 33)
        self._teardown(bsc)
Exemplo n.º 3
0
    def test_quick_query_iter_output_records_excluding_headers(
            self, resource_group, location, storage_account,
            storage_account_key):
        # Arrange
        bsc = BlobServiceClient(self.account_url(storage_account, "blob"),
                                credential=storage_account_key)
        self._setup(bsc)

        # upload the csv file
        blob_name = self._get_blob_reference()
        blob_client = bsc.get_blob_client(self.container_name, blob_name)
        blob_client.upload_blob(CSV_DATA, overwrite=True)

        input_format = DelimitedTextDialect(has_header=True)
        output_format = DelimitedTextDialect(has_header=False)
        reader = blob_client.query_blob("SELECT * from BlobStorage",
                                        blob_format=input_format,
                                        output_format=output_format)
        read_records = reader.records()

        # Assert first line does not include header
        data = next(read_records)
        self.assertEqual(
            data,
            b'App Configuration,azure-data-appconfiguration,1,appconfiguration,FALSE'
        )

        for record in read_records:
            data += record

        self.assertEqual(len(reader), len(CSV_DATA))
        self.assertEqual(reader._size, reader._bytes_processed)
        self.assertEqual(data, CSV_DATA.replace(b'\r\n', b'')[44:])
        self._teardown(bsc)
    def test_quick_query_readall_with_fatal_error_ignore(self, resource_group, location, storage_account,
                                                         storage_account_key):
        # Arrange
        bsc = BlobServiceClient(
            self.account_url(storage_account, "blob"),
            credential=storage_account_key)
        self._setup(bsc)

        data1 = b'{name: owner}'
        data2 = b'{name2: owner2}'
        data = data1 + b'\n' + data2 + b'\n' + data1

        # upload the json file
        blob_name = self._get_blob_reference()
        blob_client = bsc.get_blob_client(self.container_name, blob_name)
        blob_client.upload_blob(data, overwrite=True)

        input_format = DelimitedJsonDialect()
        output_format = DelimitedTextDialect(
            delimiter=';',
            quotechar="'",
            lineterminator='.',
            escapechar='\\'
        )
        resp = blob_client.query_blob(
            "SELECT * from BlobStorage",
            blob_format=input_format,
            output_format=output_format)
        query_result = resp.readall()
        self._teardown(bsc)
Exemplo n.º 5
0
    def test_quick_query_iter_records_with_fatal_error_ignore(
            self, resource_group, location, storage_account,
            storage_account_key):
        # Arrange
        bsc = BlobServiceClient(self.account_url(storage_account, "blob"),
                                credential=storage_account_key)
        self._setup(bsc)

        data1 = b'{name: owner}'
        data2 = b'{name2: owner2}'
        data3 = b'{version:0,begin:1601-01-01T00:00:00.000Z,intervalSecs:3600,status:Finalized,config:' \
                b'{version:0,configVersionEtag:0x8d75ef460eb1a12,numShards:1,recordsFormat:avro,formatSchemaVersion:3,' \
                b'shardDistFnVersion:1},chunkFilePaths:[$blobchangefeed/log/00/1601/01/01/0000/],storageDiagnostics:' \
                b'{version:0,lastModifiedTime:2019-11-01T17:53:18.861Z,' \
                b'data:{aid:d305317d-a006-0042-00dd-902bbb06fc56}}}'
        data = data1 + b'\n' + data2 + b'\n' + data1

        # upload the json file
        blob_name = self._get_blob_reference()
        blob_client = bsc.get_blob_client(self.container_name, blob_name)
        blob_client.upload_blob(data, overwrite=True)

        input_format = DelimitedJsonDialect()
        output_format = DelimitedTextDialect(delimiter=';',
                                             quotechar="'",
                                             lineterminator='.',
                                             escapechar='\\')
        resp = blob_client.query_blob("SELECT * from BlobStorage",
                                      blob_format=input_format,
                                      output_format=output_format)

        for record in resp.records():
            print(record)
        self._teardown(bsc)
Exemplo n.º 6
0
    def test_quick_query_iter_output_records_including_headers(self, storage_account_name, storage_account_key):
        # Arrange
        bsc = BlobServiceClient(
            self.account_url(storage_account_name, "blob"),
            credential=storage_account_key)
        self._setup(bsc)

        # upload the csv file
        blob_name = self._get_blob_reference()
        blob_client = bsc.get_blob_client(self.container_name, blob_name)
        blob_client.upload_blob(CSV_DATA, overwrite=True)

        input_format = DelimitedTextDialect(has_header=True)
        reader = blob_client.query_blob("SELECT * from BlobStorage", blob_format=input_format)
        read_records = reader.records()

        # Assert first line does not include header
        data = next(read_records)
        self.assertEqual(data, b'Service,Package,Version,RepoPath,MissingDocs')

        for record in read_records:
            data += record

        self.assertEqual(len(reader), len(CSV_DATA))
        self.assertEqual(reader._size, reader._bytes_processed)
        self.assertEqual(data, CSV_DATA.replace(b'\r\n', b''))
        self._teardown(bsc)
    def test_quick_query_readall_with_nonfatal_error_handler(self, resource_group, location, storage_account,
                                                                 storage_account_key):
        # Arrange
        bsc = BlobServiceClient(
            self.account_url(storage_account, "blob"),
            credential=storage_account_key)
        self._setup(bsc)

        # upload the csv file
        blob_name = self._get_blob_reference()
        blob_client = bsc.get_blob_client(self.container_name, blob_name)
        blob_client.upload_blob(CSV_DATA, overwrite=True)

        errors = []
        def on_error(error):
            errors.append(error)

        input_format = DelimitedTextDialect(
            delimiter=',',
            quotechar='"',
            lineterminator='\n',
            escapechar='',
            has_header=True
        )
        output_format = DelimitedTextDialect(
            delimiter=';',
            quotechar="'",
            lineterminator='.',
            escapechar='\\',
        )
        resp = blob_client.query_blob(
            "SELECT RepoPath from BlobStorage",
            blob_format=input_format,
            output_format=output_format,
            on_error=on_error)
        query_result = resp.readall()

        # the error is because that line only has one column
        self.assertEqual(len(errors), 1)
        self.assertEqual(resp._size, len(CSV_DATA))
        self.assertTrue(len(query_result) > 0)
        self._teardown(bsc)
    def test_quick_query_readall_with_serialization_setting(self, resource_group, location, storage_account,
                                                    storage_account_key):
        # Arrange
        bsc = BlobServiceClient(
            self.account_url(storage_account, "blob"),
            credential=storage_account_key)
        self._setup(bsc)

        # upload the csv file
        blob_name = self._get_blob_reference()
        blob_client = bsc.get_blob_client(self.container_name, blob_name)
        blob_client.upload_blob(CSV_DATA, overwrite=True)

        errors = []

        def on_error(error):
            errors.append(error)

        input_format = DelimitedTextDialect(
            delimiter=',',
            quotechar='"',
            lineterminator='\n',
            escapechar='',
            has_header=False
        )
        output_format = DelimitedTextDialect(
            delimiter=';',
            quotechar="'",
            lineterminator='.',
            escapechar='\\'
        )
        resp = blob_client.query_blob(
            "SELECT * from BlobStorage",
            on_error=on_error,
            blob_format=input_format,
            output_format=output_format)
        query_result = resp.readall()

        self.assertEqual(len(errors), 0)
        self.assertEqual(resp._size, len(CSV_DATA))
        self.assertEqual(query_result, CONVERTED_CSV_DATA)
        self._teardown(bsc)
def main():
    try:
        CONNECTION_STRING = os.environ['AZURE_STORAGE_CONNECTION_STRING']

    except KeyError:
        print("AZURE_STORAGE_CONNECTION_STRING must be set.")
        sys.exit(1)

    blob_service_client = BlobServiceClient.from_connection_string(
        CONNECTION_STRING)
    container_name = "quickquerycontainer"
    container_client = blob_service_client.get_container_client(container_name)
    try:
        container_client.create_container()
    except:
        pass
    # [START query]
    errors = []

    def on_error(error):
        errors.append(error)

    # upload the csv file
    blob_client = blob_service_client.get_blob_client(container_name,
                                                      "csvfile")
    with open("./sample-blobs/quick_query.csv", "rb") as stream:
        blob_client.upload_blob(stream, overwrite=True)

    # select the second column of the csv file
    query_expression = "SELECT _2 from BlobStorage"
    input_format = DelimitedTextDialect(delimiter=',',
                                        quotechar='"',
                                        lineterminator='\n',
                                        escapechar="",
                                        has_header=False)
    output_format = DelimitedJsonDialect(delimiter='\n')
    reader = blob_client.query_blob(query_expression,
                                    on_error=on_error,
                                    blob_format=input_format,
                                    output_format=output_format)
    content = reader.readall()
    # [END query]
    print(content)

    container_client.delete_container()
Exemplo n.º 10
0
def query_a_csv_blob(a_query, a_blob_url):

    blob_client = BlobClient.from_blob_url(blob_url=a_blob_url)
    qa_reader = blob_client.query_blob(
        a_query,
        blob_format=DelimitedTextDialect(has_header=True),
        encoding='utf-8')
    return csv.reader(qa_reader.records())


#In the future refactor to class
# class nodbdb_client(Resource):
#     def get(self):
#         return "'cause 'tis a storage"

#     def query_a_blob(self, a_query, a_blob_url):
#         return 0

#     def insert_a_blob(self, a_blob, a_path):
#         return 0
Exemplo n.º 11
0
def query(a_query=store_conn['query_sql'],
          a_blob_url=store_conn['file_csv'],
          a_sas_key=store_conn['access_key']):
    """ Helper to query json and/or csv files on a Blob/Datalake """
    result_set = []
    start = time.perf_counter()
    #Get the file extension/type
    a_file_name, a_file_type = get_ext(a_blob_url)

    blob_client = BlobClient.from_blob_url(blob_url=a_blob_url + a_sas_key)

    if a_file_type == '.csv':
        qa_reader = blob_client.query_blob(
            a_query,
            blob_format=DelimitedTextDialect(has_header=True),
            encoding='utf-8')
    elif a_file_type == '.json':
        qa_reader = blob_client.query_blob(
            a_query,
            blob_format=DelimitedJsonDialect(delimeter=' '),
            encoding='utf-8',
            output_format=DelimitedJsonDialect(delimiter='\n'))
    elif a_file_type == '.parquet':
        qa_reader = None
        print("We'll do something about this")
    else:
        qa_reader = None
        print(f"Sorry, can't query a {a_file_type} file type")

    end = time.perf_counter()
    #Show (sarcastic voice) *usefully accurate* elapsed seconds and return records
    print(f"Time taken to get results {end - start} seconds")

    if qa_reader is None:
        print("No result found. Sorry human, better luck nextime ¯\_(ツ)_/¯")
    else:
        for row in qa_reader.records():
            if row:
                result_set.append(row)

    return result_set