class StoragePageBlobTest(StorageTestCase): def setUp(self): super(StoragePageBlobTest, self).setUp() url = self._get_account_url() # test chunking functionality by reducing the size of each chunk, # otherwise the tests would take too long to execute credential = self._get_shared_key_credential() self.bs = BlobServiceClient( url, credential=credential, connection_data_block_size=4 * 1024, max_page_size=4 * 1024) self.config = self.bs._config self.container_name = self.get_resource_name('utcontainer') if not self.is_playback(): self.bs.create_container(self.container_name) def tearDown(self): if not self.is_playback(): try: self.bs.delete_container(self.container_name) except: pass if os.path.isfile(FILE_PATH): try: os.remove(FILE_PATH) except: pass return super(StoragePageBlobTest, self).tearDown() #--Helpers----------------------------------------------------------------- def _get_blob_reference(self): return self.bs.get_blob_client( self.container_name, self.get_resource_name(TEST_BLOB_PREFIX)) def _create_blob(self, length=512): blob = self._get_blob_reference() blob.create_page_blob(size=length) return blob def _wait_for_async_copy(self, blob): count = 0 props = blob.get_blob_properties() while props.copy.status == 'pending': count = count + 1 if count > 10: self.fail('Timed out waiting for async copy to complete.') self.sleep(6) props = blob.get_blob_properties() return props def assertBlobEqual(self, container_name, blob_name, expected_data): blob = self.bs.get_blob_client(container_name, blob_name) actual_data = blob.download_blob() self.assertEqual(b"".join(list(actual_data)), expected_data) def assertRangeEqual(self, container_name, blob_name, expected_data, start_range, end_range): blob = self.bs.get_blob_client(container_name, blob_name) actual_data = blob.download_blob(offset=start_range, length=end_range) self.assertEqual(b"".join(list(actual_data)), expected_data) class NonSeekableFile(object): def __init__(self, wrapped_file): self.wrapped_file = wrapped_file def write(self, data): self.wrapped_file.write(data) def read(self, count): return self.wrapped_file.read(count) #--Test cases for page blobs -------------------------------------------- @record def test_create_blob(self): # Arrange blob = self._get_blob_reference() # Act resp = blob.create_page_blob(1024) # Assert self.assertIsNotNone(resp.get('etag')) self.assertIsNotNone(resp.get('last_modified')) self.assertTrue(blob.get_blob_properties()) @record def test_create_blob_with_metadata(self): # Arrange blob = self._get_blob_reference() metadata = {'hello': 'world', 'number': '42'} # Act resp = blob.create_page_blob(512, metadata=metadata) # Assert md = blob.get_blob_properties() self.assertDictEqual(md.metadata, metadata) @record def test_put_page_with_lease_id(self): # Arrange blob = self._create_blob() lease = blob.acquire_lease() # Act data = self.get_random_bytes(512) blob.upload_page(data, 0, 511, lease=lease) # Assert content = blob.download_blob(lease=lease) self.assertEqual(b"".join(list(content)), data) @record def test_update_page(self): # Arrange blob = self._create_blob() # Act data = self.get_random_bytes(512) resp = blob.upload_page(data, 0, 511) # Assert self.assertIsNotNone(resp.get('etag')) self.assertIsNotNone(resp.get('last_modified')) self.assertIsNotNone(resp.get('blob_sequence_number')) self.assertBlobEqual(self.container_name, blob.blob_name, data) @record def test_create_8tb_blob(self): # Arrange blob = self._get_blob_reference() # Act resp = blob.create_page_blob(EIGHT_TB) props = blob.get_blob_properties() page_ranges, cleared = blob.get_page_ranges() # Assert self.assertIsNotNone(resp.get('etag')) self.assertIsNotNone(resp.get('last_modified')) self.assertIsInstance(props, BlobProperties) self.assertEqual(props.size, EIGHT_TB) self.assertEqual(0, len(page_ranges)) @record def test_create_larger_than_8tb_blob_fail(self): # Arrange blob = self._get_blob_reference() # Act with self.assertRaises(HttpResponseError): blob.create_page_blob(EIGHT_TB + 1) @record def test_update_8tb_blob_page(self): # Arrange blob = self._get_blob_reference() blob.create_page_blob(EIGHT_TB) # Act data = self.get_random_bytes(512) start_range = EIGHT_TB - 512 end_range = EIGHT_TB - 1 resp = blob.upload_page(data, start_range, end_range) props = blob.get_blob_properties() page_ranges, cleared = blob.get_page_ranges() # Assert self.assertIsNotNone(resp.get('etag')) self.assertIsNotNone(resp.get('last_modified')) self.assertIsNotNone(resp.get('blob_sequence_number')) self.assertRangeEqual(self.container_name, blob.blob_name, data, start_range, end_range) self.assertEqual(props.size, EIGHT_TB) self.assertEqual(1, len(page_ranges)) self.assertEqual(page_ranges[0]['start'], start_range) self.assertEqual(page_ranges[0]['end'], end_range) @record def test_update_page_with_md5(self): # Arrange blob = self._create_blob() # Act data = self.get_random_bytes(512) resp = blob.upload_page(data, 0, 511, validate_content=True) # Assert @record def test_clear_page(self): # Arrange blob = self._create_blob() # Act resp = blob.clear_page(0, 511) # Assert self.assertIsNotNone(resp.get('etag')) self.assertIsNotNone(resp.get('last_modified')) self.assertIsNotNone(resp.get('blob_sequence_number')) self.assertBlobEqual(self.container_name, blob.blob_name, b'\x00' * 512) @record def test_put_page_if_sequence_number_lt_success(self): # Arrange blob = self._get_blob_reference() data = self.get_random_bytes(512) start_sequence = 10 blob.create_page_blob(512, sequence_number=start_sequence) # Act blob.upload_page(data, 0, 511, if_sequence_number_lt=start_sequence + 1) # Assert self.assertBlobEqual(self.container_name, blob.blob_name, data) @record def test_update_page_if_sequence_number_lt_failure(self): # Arrange blob = self._get_blob_reference() data = self.get_random_bytes(512) start_sequence = 10 blob.create_page_blob(512, sequence_number=start_sequence) # Act with self.assertRaises(HttpResponseError): blob.upload_page(data, 0, 511, if_sequence_number_lt=start_sequence) # Assert @record def test_update_page_if_sequence_number_lte_success(self): # Arrange blob = self._get_blob_reference() data = self.get_random_bytes(512) start_sequence = 10 blob.create_page_blob(512, sequence_number=start_sequence) # Act blob.upload_page(data, 0, 511, if_sequence_number_lte=start_sequence) # Assert self.assertBlobEqual(self.container_name, blob.blob_name, data) @record def test_update_page_if_sequence_number_lte_failure(self): # Arrange blob = self._get_blob_reference() data = self.get_random_bytes(512) start_sequence = 10 blob.create_page_blob(512, sequence_number=start_sequence) # Act with self.assertRaises(HttpResponseError): blob.upload_page(data, 0, 511, if_sequence_number_lte=start_sequence - 1) # Assert @record def test_update_page_if_sequence_number_eq_success(self): # Arrange blob = self._get_blob_reference() data = self.get_random_bytes(512) start_sequence = 10 blob.create_page_blob(512, sequence_number=start_sequence) # Act blob.upload_page(data, 0, 511, if_sequence_number_eq=start_sequence) # Assert self.assertBlobEqual(self.container_name, blob.blob_name, data) @record def test_update_page_if_sequence_number_eq_failure(self): # Arrange blob = self._get_blob_reference() data = self.get_random_bytes(512) start_sequence = 10 blob.create_page_blob(512, sequence_number=start_sequence) # Act with self.assertRaises(HttpResponseError): blob.upload_page(data, 0, 511, if_sequence_number_eq=start_sequence - 1) # Assert @record def test_update_page_unicode(self): # Arrange blob = self._create_blob() # Act data = u'abcdefghijklmnop' * 32 resp = blob.upload_page(data, 0, 511) # Assert self.assertIsNotNone(resp.get('etag')) self.assertIsNotNone(resp.get('last_modified')) @record def test_get_page_ranges_no_pages(self): # Arrange blob = self._create_blob() # Act ranges, cleared = blob.get_page_ranges() # Assert self.assertIsNotNone(ranges) self.assertIsInstance(ranges, list) self.assertEqual(len(ranges), 0) @record def test_get_page_ranges_2_pages(self): # Arrange blob = self._create_blob(2048) data = self.get_random_bytes(512) resp1 = blob.upload_page(data, 0, 511) resp2 = blob.upload_page(data, 1024, 1535) # Act ranges, cleared = blob.get_page_ranges() # Assert self.assertIsNotNone(ranges) self.assertIsInstance(ranges, list) self.assertEqual(len(ranges), 2) self.assertEqual(ranges[0]['start'], 0) self.assertEqual(ranges[0]['end'], 511) self.assertEqual(ranges[1]['start'], 1024) self.assertEqual(ranges[1]['end'], 1535) @record def test_get_page_ranges_diff(self): # Arrange blob = self._create_blob(2048) data = self.get_random_bytes(1536) snapshot1 = blob.create_snapshot() blob.upload_page(data, 0, 1535) snapshot2 = blob.create_snapshot() blob.clear_page(512, 1023) # Act ranges1, cleared1 = blob.get_page_ranges(previous_snapshot_diff=snapshot1) ranges2, cleared2 = blob.get_page_ranges(previous_snapshot_diff=snapshot2['snapshot']) # Assert self.assertIsNotNone(ranges1) self.assertIsInstance(ranges1, list) self.assertEqual(len(ranges1), 2) self.assertIsInstance(cleared1, list) self.assertEqual(len(cleared1), 1) self.assertEqual(ranges1[0]['start'], 0) self.assertEqual(ranges1[0]['end'], 511) self.assertEqual(cleared1[0]['start'], 512) self.assertEqual(cleared1[0]['end'], 1023) self.assertEqual(ranges1[1]['start'], 1024) self.assertEqual(ranges1[1]['end'], 1535) self.assertIsNotNone(ranges2) self.assertIsInstance(ranges2, list) self.assertEqual(len(ranges2), 0) self.assertIsInstance(cleared2, list) self.assertEqual(len(cleared2), 1) self.assertEqual(cleared2[0]['start'], 512) self.assertEqual(cleared2[0]['end'], 1023) @record def test_update_page_fail(self): # Arrange blob = self._create_blob(2048) data = self.get_random_bytes(512) resp1 = blob.upload_page(data, 0, 511) # Act try: blob.upload_page(data, 1024, 1536) except ValueError as e: self.assertEqual(str(e), 'end_range must be an integer that aligns with 512 page size') return # Assert raise Exception('Page range validation failed to throw on failure case') @record def test_resize_blob(self): # Arrange blob = self._create_blob(1024) # Act resp = blob.resize_blob(512) # Assert self.assertIsNotNone(resp.get('etag')) self.assertIsNotNone(resp.get('last_modified')) self.assertIsNotNone(resp.get('blob_sequence_number')) props = blob.get_blob_properties() self.assertIsInstance(props, BlobProperties) self.assertEqual(props.size, 512) @record def test_set_sequence_number_blob(self): # Arrange blob = self._create_blob() # Act resp = blob.set_sequence_number(SequenceNumberAction.Update, 6) #Assert self.assertIsNotNone(resp.get('etag')) self.assertIsNotNone(resp.get('last_modified')) self.assertIsNotNone(resp.get('blob_sequence_number')) props = blob.get_blob_properties() self.assertIsInstance(props, BlobProperties) self.assertEqual(props.page_blob_sequence_number, 6) @record def test_create_page_blob_with_no_overwrite(self): # Arrange blob = self._get_blob_reference() data1 = self.get_random_bytes(LARGE_BLOB_SIZE) data2 = self.get_random_bytes(LARGE_BLOB_SIZE + 512) # Act create_resp = blob.upload_blob( data1, overwrite=True, blob_type=BlobType.PageBlob, metadata={'BlobData': 'Data1'}) with self.assertRaises(ResourceExistsError): blob.upload_blob( data2, overwrite=False, blob_type=BlobType.PageBlob, metadata={'BlobData': 'Data2'}) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob.blob_name, data1) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) self.assertEqual(props.metadata, {'BlobData': 'Data1'}) self.assertEqual(props.size, LARGE_BLOB_SIZE) self.assertEqual(props.blob_type, BlobType.PageBlob) @record def test_create_page_blob_with_overwrite(self): # Arrange blob = self._get_blob_reference() data1 = self.get_random_bytes(LARGE_BLOB_SIZE) data2 = self.get_random_bytes(LARGE_BLOB_SIZE + 512) # Act create_resp = blob.upload_blob( data1, overwrite=True, blob_type=BlobType.PageBlob, metadata={'BlobData': 'Data1'}) update_resp = blob.upload_blob( data2, overwrite=True, blob_type=BlobType.PageBlob, metadata={'BlobData': 'Data2'}) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob.blob_name, data2) self.assertEqual(props.etag, update_resp.get('etag')) self.assertEqual(props.last_modified, update_resp.get('last_modified')) self.assertEqual(props.metadata, {'BlobData': 'Data2'}) self.assertEqual(props.size, LARGE_BLOB_SIZE + 512) self.assertEqual(props.blob_type, BlobType.PageBlob) def test_create_blob_from_bytes(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self._get_blob_reference() data = self.get_random_bytes(LARGE_BLOB_SIZE) # Act create_resp = blob.upload_blob(data, blob_type=BlobType.PageBlob) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob.blob_name, data) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) def test_create_blob_from_0_bytes(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self._get_blob_reference() data = self.get_random_bytes(0) # Act create_resp = blob.upload_blob(data, blob_type=BlobType.PageBlob) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob.blob_name, data) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) def test_create_blob_from_bytes_with_progress_first(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self._get_blob_reference() data = self.get_random_bytes(LARGE_BLOB_SIZE) # Act progress = [] def callback(response): current = response.context['upload_stream_current'] total = response.context['data_stream_total'] if current is not None: progress.append((current, total)) create_resp = blob.upload_blob( data, blob_type=BlobType.PageBlob, raw_response_hook=callback) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob.blob_name, data) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) self.assert_upload_progress(LARGE_BLOB_SIZE, self.config.max_page_size, progress) def test_create_blob_from_bytes_with_index(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self._get_blob_reference() data = self.get_random_bytes(LARGE_BLOB_SIZE) index = 1024 # Act blob.upload_blob(data[index:], blob_type=BlobType.PageBlob) # Assert self.assertBlobEqual(self.container_name, blob.blob_name, data[1024:]) @record def test_create_blob_from_bytes_with_index_and_count(self): # Arrange blob = self._get_blob_reference() data = self.get_random_bytes(LARGE_BLOB_SIZE) index = 512 count = 1024 # Act create_resp = blob.upload_blob(data[index:], length=count, blob_type=BlobType.PageBlob) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob.blob_name, data[index:index + count]) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) def test_create_blob_from_path(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self._get_blob_reference() data = self.get_random_bytes(LARGE_BLOB_SIZE) FILE_PATH = 'blob_input.temp.dat' with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act with open(FILE_PATH, 'rb') as stream: create_resp = blob.upload_blob(stream, blob_type=BlobType.PageBlob) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob.blob_name, data) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) def test_create_blob_from_path_with_progress(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self._get_blob_reference() data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act progress = [] def callback(response): current = response.context['upload_stream_current'] total = response.context['data_stream_total'] if current is not None: progress.append((current, total)) with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, blob_type=BlobType.PageBlob, raw_response_hook=callback) # Assert self.assertBlobEqual(self.container_name, blob.blob_name, data) self.assert_upload_progress(len(data), self.config.max_page_size, progress) def test_create_blob_from_stream(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self._get_blob_reference() data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act blob_size = len(data) with open(FILE_PATH, 'rb') as stream: create_resp = blob.upload_blob(stream, length=blob_size, blob_type=BlobType.PageBlob) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob.blob_name, data[:blob_size]) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) def test_create_blob_from_stream_with_empty_pages(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange # data is almost all empty (0s) except two ranges blob = self._get_blob_reference() data = bytearray(LARGE_BLOB_SIZE) data[512: 1024] = self.get_random_bytes(512) data[8192: 8196] = self.get_random_bytes(4) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act blob_size = len(data) with open(FILE_PATH, 'rb') as stream: create_resp = blob.upload_blob(stream, length=blob_size, blob_type=BlobType.PageBlob) props = blob.get_blob_properties() # Assert # the uploader should have skipped the empty ranges self.assertBlobEqual(self.container_name, blob.blob_name, data[:blob_size]) page_ranges, cleared = list(blob.get_page_ranges()) self.assertEqual(len(page_ranges), 2) self.assertEqual(page_ranges[0]['start'], 0) self.assertEqual(page_ranges[0]['end'], 4095) self.assertEqual(page_ranges[1]['start'], 8192) self.assertEqual(page_ranges[1]['end'], 12287) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) def test_create_blob_from_stream_non_seekable(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self._get_blob_reference() data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act blob_size = len(data) with open(FILE_PATH, 'rb') as stream: non_seekable_file = StoragePageBlobTest.NonSeekableFile(stream) blob.upload_blob( non_seekable_file, length=blob_size, max_connections=1, blob_type=BlobType.PageBlob) # Assert self.assertBlobEqual(self.container_name, blob.blob_name, data[:blob_size]) def test_create_blob_from_stream_with_progress(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self._get_blob_reference() data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act progress = [] def callback(response): current = response.context['upload_stream_current'] total = response.context['data_stream_total'] if current is not None: progress.append((current, total)) blob_size = len(data) with open(FILE_PATH, 'rb') as stream: blob.upload_blob( stream, length=blob_size, blob_type=BlobType.PageBlob, raw_response_hook=callback) # Assert self.assertBlobEqual(self.container_name, blob.blob_name, data[:blob_size]) self.assert_upload_progress(len(data), self.config.max_page_size, progress) def test_create_blob_from_stream_truncated(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self._get_blob_reference() data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act blob_size = len(data) - 512 with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, length=blob_size, blob_type=BlobType.PageBlob) # Assert self.assertBlobEqual(self.container_name, blob.blob_name, data[:blob_size]) def test_create_blob_from_stream_with_progress_truncated(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self._get_blob_reference() data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act progress = [] def callback(response): current = response.context['upload_stream_current'] total = response.context['data_stream_total'] if current is not None: progress.append((current, total)) blob_size = len(data) - 512 with open(FILE_PATH, 'rb') as stream: blob.upload_blob( stream, length=blob_size, blob_type=BlobType.PageBlob, raw_response_hook=callback) # Assert self.assertBlobEqual(self.container_name, blob.blob_name, data[:blob_size]) self.assert_upload_progress(blob_size, self.config.max_page_size, progress) @record def test_create_blob_with_md5_small(self): # Arrange blob = self._get_blob_reference() data = self.get_random_bytes(512) # Act blob.upload_blob(data, validate_content=True, blob_type=BlobType.PageBlob) # Assert def test_create_blob_with_md5_large(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self._get_blob_reference() data = self.get_random_bytes(LARGE_BLOB_SIZE) # Act blob.upload_blob(data, validate_content=True, blob_type=BlobType.PageBlob) # Assert def test_incremental_copy_blob(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange source_blob = self._create_blob(2048) data = self.get_random_bytes(512) resp1 = source_blob.upload_page(data, 0, 511) resp2 = source_blob.upload_page(data, 1024, 1535) source_snapshot_blob = source_blob.create_snapshot() snapshot_blob = BlobClient( source_blob.url, credential=source_blob.credential, snapshot=source_snapshot_blob) sas_token = snapshot_blob.generate_shared_access_signature( permission=BlobPermissions.READ, expiry=datetime.utcnow() + timedelta(hours=1), ) sas_blob = BlobClient(snapshot_blob.url, credential=sas_token) # Act dest_blob = self.bs.get_blob_client(self.container_name, 'dest_blob') copy = dest_blob.start_copy_from_url(sas_blob.url, incremental_copy=True) # Assert self.assertIsNotNone(copy) self.assertIsNotNone(copy['copy_id']) self.assertEqual(copy['copy_status'], 'pending') copy_blob = self._wait_for_async_copy(dest_blob) self.assertEqual(copy_blob.copy.status, 'success') self.assertIsNotNone(copy_blob.copy.destination_snapshot) # strip off protocol self.assertTrue(copy_blob.copy.source.endswith(sas_blob.url[5:])) @record def test_blob_tier_on_create(self): url = self._get_premium_account_url() credential = self._get_premium_shared_key_credential() pbs = BlobServiceClient(url, credential=credential) try: container_name = self.get_resource_name('utpremiumcontainer') container = pbs.get_container_client(container_name) if not self.is_playback(): container.create_container() # test create_blob API blob = self._get_blob_reference() pblob = pbs.get_blob_client(container_name, blob.blob_name) pblob.create_page_blob(1024, premium_page_blob_tier=PremiumPageBlobTier.P4) props = pblob.get_blob_properties() self.assertEqual(props.blob_tier, PremiumPageBlobTier.P4) self.assertFalse(props.blob_tier_inferred) # test create_blob_from_bytes API blob2 = self._get_blob_reference() pblob2 = pbs.get_blob_client(container_name, blob2.blob_name) byte_data = self.get_random_bytes(1024) pblob2.upload_blob( byte_data, premium_page_blob_tier=PremiumPageBlobTier.P6, blob_type=BlobType.PageBlob) props2 = pblob2.get_blob_properties() self.assertEqual(props2.blob_tier, PremiumPageBlobTier.P6) self.assertFalse(props2.blob_tier_inferred) # test create_blob_from_path API blob3 = self._get_blob_reference() pblob3 = pbs.get_blob_client(container_name, blob3.blob_name) with open(FILE_PATH, 'wb') as stream: stream.write(byte_data) with open(FILE_PATH, 'rb') as stream: pblob3.upload_blob( stream, blob_type=BlobType.PageBlob, premium_page_blob_tier=PremiumPageBlobTier.P10) props3 = pblob3.get_blob_properties() self.assertEqual(props3.blob_tier, PremiumPageBlobTier.P10) self.assertFalse(props3.blob_tier_inferred) finally: container.delete_container() @record def test_blob_tier_set_tier_api(self): url = self._get_premium_account_url() credential = self._get_premium_shared_key_credential() pbs = BlobServiceClient(url, credential=credential) try: container_name = self.get_resource_name('utpremiumcontainer') container = pbs.get_container_client(container_name) if not self.is_playback(): try: container.create_container() except ResourceExistsError: pass blob = self._get_blob_reference() pblob = pbs.get_blob_client(container_name, blob.blob_name) pblob.create_page_blob(1024) blob_ref = pblob.get_blob_properties() self.assertEqual(PremiumPageBlobTier.P10, blob_ref.blob_tier) self.assertIsNotNone(blob_ref.blob_tier) self.assertTrue(blob_ref.blob_tier_inferred) pcontainer = pbs.get_container_client(container_name) blobs = list(pcontainer.list_blobs()) # Assert self.assertIsNotNone(blobs) self.assertGreaterEqual(len(blobs), 1) self.assertIsNotNone(blobs[0]) self.assertNamedItemInContainer(blobs, blob.blob_name) pblob.set_premium_page_blob_tier(PremiumPageBlobTier.P50) blob_ref2 = pblob.get_blob_properties() self.assertEqual(PremiumPageBlobTier.P50, blob_ref2.blob_tier) self.assertFalse(blob_ref2.blob_tier_inferred) blobs = list(pcontainer.list_blobs()) # Assert self.assertIsNotNone(blobs) self.assertGreaterEqual(len(blobs), 1) self.assertIsNotNone(blobs[0]) self.assertNamedItemInContainer(blobs, blob.blob_name) self.assertEqual(blobs[0].blob_tier, PremiumPageBlobTier.P50) self.assertFalse(blobs[0].blob_tier_inferred) finally: container.delete_container() @record def test_blob_tier_copy_blob(self): url = self._get_premium_account_url() credential = self._get_premium_shared_key_credential() pbs = BlobServiceClient(url, credential=credential) try: container_name = self.get_resource_name('utpremiumcontainer') container = pbs.get_container_client(container_name) if not self.is_playback(): try: container.create_container() except ResourceExistsError: pass # Arrange source_blob = pbs.get_blob_client( container_name, self.get_resource_name(TEST_BLOB_PREFIX)) source_blob.create_page_blob(1024, premium_page_blob_tier=PremiumPageBlobTier.P10) # Act source_blob_url = '{0}/{1}/{2}'.format( self._get_premium_account_url(), container_name, source_blob.blob_name) copy_blob = pbs.get_blob_client(container_name, 'blob1copy') copy = copy_blob.start_copy_from_url(source_blob_url, premium_page_blob_tier=PremiumPageBlobTier.P30) # Assert self.assertIsNotNone(copy) self.assertEqual(copy['copy_status'], 'success') self.assertIsNotNone(copy['copy_id']) copy_ref = copy_blob.get_blob_properties() self.assertEqual(copy_ref.blob_tier, PremiumPageBlobTier.P30) source_blob2 = pbs.get_blob_client( container_name, self.get_resource_name(TEST_BLOB_PREFIX)) source_blob2.create_page_blob(1024) source_blob2_url = '{0}/{1}/{2}'.format( self._get_premium_account_url(), source_blob2.container_name, source_blob2.blob_name) copy_blob2 = pbs.get_blob_client(container_name, 'blob2copy') copy2 = copy_blob2.start_copy_from_url(source_blob2_url, premium_page_blob_tier=PremiumPageBlobTier.P60) self.assertIsNotNone(copy2) self.assertEqual(copy2['copy_status'], 'success') self.assertIsNotNone(copy2['copy_id']) copy_ref2 = copy_blob2.get_blob_properties() self.assertEqual(copy_ref2.blob_tier, PremiumPageBlobTier.P60) self.assertFalse(copy_ref2.blob_tier_inferred) copy_blob3 = pbs.get_blob_client(container_name, 'blob3copy') copy3 = copy_blob3.start_copy_from_url(source_blob2_url) self.assertIsNotNone(copy3) self.assertEqual(copy3['copy_status'], 'success') self.assertIsNotNone(copy3['copy_id']) copy_ref3 = copy_blob3.get_blob_properties() self.assertEqual(copy_ref3.blob_tier, PremiumPageBlobTier.P10) self.assertTrue(copy_ref3.blob_tier_inferred) finally: container.delete_container()
class AzureStorage(BaseStorage): def __init__( self, context, azure_container, storage_path, azure_account_name, azure_account_key=None, sas_token=None, connection_string=None, ): super(AzureStorage, self).__init__() self._context = context self._storage_path = storage_path.lstrip("/") self._azure_account_name = azure_account_name self._azure_account_key = azure_account_key self._azure_sas_token = sas_token self._azure_container = azure_container self._azure_connection_string = connection_string self._blob_service_client = BlobServiceClient( AZURE_STORAGE_URL_STRING.format(self._azure_account_name), credential=self._azure_account_key, ) # https://docs.microsoft.com/en-us/rest/api/storageservices/understanding-block-blobs--append-blobs--and-page-blobs api_version = self._blob_service_client.api_version api_version_dt = datetime.strptime(api_version, "%Y-%m-%d") if api_version_dt < _API_VERSION_LIMITS["2016-05-31"][0]: self._max_block_size = _API_VERSION_LIMITS["2016-05-31"][1] elif api_version_dt <= _API_VERSION_LIMITS["2019-07-07"][0]: self._max_block_size = _API_VERSION_LIMITS["2019-07-07"][1] elif api_version_dt >= _API_VERSION_LIMITS["2019-12-12"][0]: self._max_block_size = _API_VERSION_LIMITS["2019-12-12"][1] else: raise Exception("Unknown Azure api version %s" % api_version) def _blob_name_from_path(self, object_path): if ".." in object_path: raise Exception("Relative paths are not allowed; found %s" % object_path) return os.path.join(self._storage_path, object_path).rstrip("/") def _upload_blob_path_from_uuid(self, uuid): return self._blob_name_from_path( self._upload_blob_name_from_uuid(uuid)) def _upload_blob_name_from_uuid(self, uuid): return "uploads/{0}".format(uuid) def _blob(self, blob_name): return self._blob_service_client.get_blob_client( self._azure_container, blob_name) @property def _container(self): return self._blob_service_client.get_container_client( self._azure_container) def get_direct_download_url(self, object_path, request_ip=None, expires_in=60, requires_cors=False, head=False): blob_name = self._blob_name_from_path(object_path) try: sas_token = generate_blob_sas( self._azure_account_name, self._azure_container, blob_name, account_key=self._azure_account_key, permission=ContainerSasPermissions.from_string("r"), expiry=datetime.utcnow() + timedelta(seconds=expires_in), ) blob_url = "{}?{}".format(self._blob(blob_name).url, sas_token) except AzureError: logger.exception( "Exception when trying to get direct download for path %s", object_path) raise IOError("Exception when trying to get direct download") return blob_url def validate(self, client): super(AzureStorage, self).validate(client) def get_content(self, path): blob_name = self._blob_name_from_path(path) try: blob_stream = self._blob(blob_name).download_blob() except AzureError: logger.exception("Exception when trying to get path %s", path) raise IOError("Exception when trying to get path") return blob_stream.content_as_bytes() def put_content(self, path, content): blob_name = self._blob_name_from_path(path) try: self._blob(blob_name).upload_blob(content, blob_type=BlobType.BlockBlob) except AzureError: logger.exception("Exception when trying to put path %s", path) raise IOError("Exception when trying to put path") def stream_read(self, path): with self.stream_read_file(path) as f: while True: buf = f.read(self.buffer_size) if not buf: break yield buf def stream_read_file(self, path): blob_name = self._blob_name_from_path(path) try: output_stream = io.BytesIO() self._blob(blob_name).download_blob().download_to_stream( output_stream) output_stream.seek(0) except AzureError: logger.exception( "Exception when trying to stream_file_read path %s", path) raise IOError("Exception when trying to stream_file_read path") return output_stream def stream_write(self, path, fp, content_type=None, content_encoding=None): blob_name = self._blob_name_from_path(path) content_settings = ContentSettings( content_type=content_type, content_encoding=content_encoding, ) try: self._blob(blob_name).upload_blob( fp, content_settings=content_settings) except AzureError as ae: logger.exception("Exception when trying to stream_write path %s", path) raise IOError("Exception when trying to stream_write path", ae) def exists(self, path): blob_name = self._blob_name_from_path(path) try: self._blob(blob_name).get_blob_properties() except ResourceNotFoundError: return False except AzureError: logger.exception("Exception when trying to check exists path %s", path) raise IOError("Exception when trying to check exists path") return True def remove(self, path): blob_name = self._blob_name_from_path(path) try: self._blob(blob_name).delete_blob() except AzureError: logger.exception("Exception when trying to remove path %s", path) raise IOError("Exception when trying to remove path") def get_checksum(self, path): blob_name = self._blob_name_from_path(path) try: blob_properties = self._blob(blob_name).get_blob_properties() except AzureError: logger.exception( "Exception when trying to get_checksum for path %s", path) raise IOError("Exception when trying to get_checksum path") return blob_properties.etag def initiate_chunked_upload(self): random_uuid = str(uuid.uuid4()) metadata = { _BLOCKS_KEY: [], _CONTENT_TYPE_KEY: None, } return random_uuid, metadata def stream_upload_chunk(self, uuid, offset, length, in_fp, storage_metadata, content_type=None): if length == 0: return 0, storage_metadata, None upload_blob_path = self._upload_blob_path_from_uuid(uuid) new_metadata = copy.deepcopy(storage_metadata) total_bytes_written = 0 while True: current_length = length - total_bytes_written max_length = (min(current_length, self._max_block_size) if length != READ_UNTIL_END else self._max_block_size) if max_length <= 0: break limited = LimitingStream(in_fp, max_length, seekable=False) # Note: Azure fails if a zero-length block is uploaded, so we read all the data here, # and, if there is none, terminate early. block_data = b"" for chunk in iter(lambda: limited.read(31457280), b""): block_data += chunk if len(block_data) == 0: break block_index = len(new_metadata[_BLOCKS_KEY]) block_id = format(block_index, "05") new_metadata[_BLOCKS_KEY].append(block_id) try: self._blob(upload_blob_path).stage_block(block_id, block_data, validate_content=True) except AzureError as ae: logger.exception( "Exception when trying to stream_upload_chunk block %s for %s", block_id, uuid) return total_bytes_written, new_metadata, ae bytes_written = len(block_data) total_bytes_written += bytes_written if bytes_written == 0 or bytes_written < max_length: break if content_type is not None: new_metadata[_CONTENT_TYPE_KEY] = content_type return total_bytes_written, new_metadata, None def complete_chunked_upload(self, uuid, final_path, storage_metadata): """ Complete the chunked upload and store the final results in the path indicated. Returns nothing. """ # Commit the blob's blocks. upload_blob_name = self._upload_blob_name_from_uuid( uuid) # upload/<uuid> upload_blob_path = self._upload_blob_path_from_uuid( uuid) # storage/path/upload/<uuid> block_list = [ BlobBlock(block_id) for block_id in storage_metadata[_BLOCKS_KEY] ] try: if storage_metadata[_CONTENT_TYPE_KEY] is not None: content_settings = ContentSettings( content_type=storage_metadata[_CONTENT_TYPE_KEY]) self._blob(upload_blob_path).commit_block_list( block_list, content_settings=content_settings) else: self._blob(upload_blob_path).commit_block_list(block_list) except AzureError: logger.exception( "Exception when trying to put block list for path %s from upload %s", final_path, uuid, ) raise IOError("Exception when trying to put block list") # Copy the blob to its final location. upload_blob_name = self._upload_blob_name_from_uuid(uuid) copy_source_url = self.get_direct_download_url(upload_blob_name, expires_in=300) try: final_blob_name = self._blob_name_from_path(final_path) cp = self._blob(final_blob_name).start_copy_from_url( copy_source_url) except AzureError: logger.exception( "Exception when trying to set copy uploaded blob %s to path %s", uuid, final_path) raise IOError("Exception when trying to copy uploaded blob") self._await_copy(final_blob_name) # Delete the original blob. logger.debug("Deleting chunked upload %s at path %s", uuid, upload_blob_path) try: self._blob(upload_blob_path).delete_blob() except AzureError: logger.exception( "Exception when trying to set delete uploaded blob %s", uuid) raise IOError("Exception when trying to delete uploaded blob") def cancel_chunked_upload(self, uuid, storage_metadata): """ Cancel the chunked upload and clean up any outstanding partially uploaded data. Returns nothing. """ upload_blob_path = self._upload_blob_path_from_uuid(uuid) logger.debug("Canceling chunked upload %s at path %s", uuid, upload_blob_path) try: self._blob(upload_blob_path).delete_blob() except ResourceNotFoundError: pass def _await_copy(self, blob_name): # Poll for copy completion. blob = self._blob(blob_name) copy_prop = blob.get_blob_properties().copy count = 0 while copy_prop.status == "pending": props = blob.get_blob_properties() copy_prop = props.copy if copy_prop.status == "success": return if copy_prop.status == "failed" or copy_prop.status == "aborted": raise IOError("Copy of blob %s failed with status %s" % (blob_name, copy_prop.status)) count = count + 1 if count > _MAX_COPY_POLL_COUNT: raise IOError("Timed out waiting for copy to complete") time.sleep(_COPY_POLL_SLEEP) def copy_to(self, destination, path): if self.__class__ == destination.__class__: logger.debug( "Starting copying file from Azure %s to Azure %s via an Azure copy", self._azure_container, destination._azure_container, ) copy_source_url = self.get_direct_download_url(path) blob_name = destination._blob_name_from_path(path) dest_blob = destination._blob(blob_name) destination._blob(blob_name).start_copy_from_url(copy_source_url) destination._await_copy(blob_name) logger.debug( "Finished copying file from Azure %s to Azure %s via an Azure copy", self._azure_container, destination._azure_container, ) return # Fallback to a slower, default copy. logger.debug( "Copying file from Azure container %s to %s via a streamed copy", self._azure_container, destination, ) with self.stream_read_file(path) as fp: destination.stream_write(path, fp) def setup(self): # From: https://docs.microsoft.com/en-us/rest/api/storageservices/cross-origin-resource-sharing--cors--support-for-the-azure-storage-services cors = [ CorsRule( allowed_origins="*", allowed_methods=["GET", "PUT"], max_age_in_seconds=3000, exposed_headers=["x-ms-meta-*"], allowed_headers=[ "x-ms-meta-data*", "x-ms-meta-target*", "x-ms-meta-abc", "Content-Type", ], ) ] self._blob_service_client.set_service_properties(cors=cors)
class StorageAppendBlobTest(StorageTestCase): def setUp(self): super(StorageAppendBlobTest, self).setUp() url = self._get_account_url() credential = self._get_shared_key_credential() self.bsc = BlobServiceClient(url, credential=credential, max_block_size=4 * 1024) self.config = self.bsc._config self.container_name = self.get_resource_name('utcontainer') self.source_container_name = self.get_resource_name( 'utcontainersource') if not self.is_playback(): self.bsc.create_container(self.container_name) self.bsc.create_container(self.source_container_name) def tearDown(self): if not self.is_playback(): try: self.bsc.delete_container(self.container_name) except: pass try: self.bsc.delete_container(self.source_container_name) except: pass if os.path.isfile(FILE_PATH): try: os.remove(FILE_PATH) except: pass return super(StorageAppendBlobTest, self).tearDown() # --Helpers----------------------------------------------------------------- def _get_blob_reference(self): return self.get_resource_name(TEST_BLOB_PREFIX) def _create_blob(self): blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.create_append_blob() return blob def _create_source_blob(self, data): blob_client = self.bsc.get_blob_client( self.source_container_name, self.get_resource_name(TEST_BLOB_PREFIX)) blob_client.create_append_blob() blob_client.append_block(data) return blob_client def assertBlobEqual(self, blob, expected_data): stream = blob.download_blob() actual_data = b"".join(list(stream)) self.assertEqual(actual_data, expected_data) class NonSeekableFile(object): def __init__(self, wrapped_file): self.wrapped_file = wrapped_file def write(self, data): self.wrapped_file.write(data) def read(self, count): return self.wrapped_file.read(count) # --Test cases for block blobs -------------------------------------------- @record def test_create_blob(self): # Arrange blob_name = self._get_blob_reference() # Act blob = self.bsc.get_blob_client(self.container_name, blob_name) create_resp = blob.create_append_blob() # Assert blob_properties = blob.get_blob_properties() self.assertIsNotNone(blob_properties) self.assertEqual(blob_properties.etag, create_resp.get('etag')) self.assertEqual(blob_properties.last_modified, create_resp.get('last_modified')) @record def test_create_blob_with_lease_id(self): # Arrange blob = self._create_blob() # Act lease = blob.acquire_lease() create_resp = blob.create_append_blob(lease=lease) # Assert blob_properties = blob.get_blob_properties() self.assertIsNotNone(blob_properties) self.assertEqual(blob_properties.etag, create_resp.get('etag')) self.assertEqual(blob_properties.last_modified, create_resp.get('last_modified')) @record def test_create_blob_with_metadata(self): # Arrange metadata = {'hello': 'world', 'number': '42'} blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) # Act blob.create_append_blob(metadata=metadata) # Assert md = blob.get_blob_properties().metadata self.assertDictEqual(md, metadata) @record def test_append_block(self): # Arrange blob = self._create_blob() # Act for i in range(5): resp = blob.append_block(u'block {0}'.format(i).encode('utf-8')) self.assertEqual(int(resp['blob_append_offset']), 7 * i) self.assertEqual(resp['blob_committed_block_count'], i + 1) self.assertIsNotNone(resp['etag']) self.assertIsNotNone(resp['last_modified']) # Assert self.assertBlobEqual(blob, b'block 0block 1block 2block 3block 4') @record def test_append_block_unicode(self): # Arrange blob = self._create_blob() # Act resp = blob.append_block(u'啊齄丂狛狜', encoding='utf-16') self.assertEqual(int(resp['blob_append_offset']), 0) self.assertEqual(resp['blob_committed_block_count'], 1) self.assertIsNotNone(resp['etag']) self.assertIsNotNone(resp['last_modified']) # Assert @record def test_append_block_with_md5(self): # Arrange blob = self._create_blob() # Act resp = blob.append_block(b'block', validate_content=True) self.assertEqual(int(resp['blob_append_offset']), 0) self.assertEqual(resp['blob_committed_block_count'], 1) self.assertIsNotNone(resp['etag']) self.assertIsNotNone(resp['last_modified']) # Assert @record def test_append_block_from_url(self): # Arrange source_blob_data = self.get_random_bytes(LARGE_BLOB_SIZE) source_blob_client = self._create_source_blob(source_blob_data) sas = source_blob_client.generate_shared_access_signature( permission=BlobSasPermissions(read=True, delete=True), expiry=datetime.utcnow() + timedelta(hours=1), ) destination_blob_client = self._create_blob() # Act: make append block from url calls split = 4 * 1024 resp = destination_blob_client.append_block_from_url( source_blob_client.url + '?' + sas, source_offset=0, source_length=split) self.assertEqual(resp.get('blob_append_offset'), '0') self.assertEqual(resp.get('blob_committed_block_count'), 1) self.assertIsNotNone(resp.get('etag')) self.assertIsNotNone(resp.get('last_modified')) resp = destination_blob_client.append_block_from_url( source_blob_client.url + '?' + sas, source_offset=split, source_length=LARGE_BLOB_SIZE - split) self.assertEqual(resp.get('blob_append_offset'), str(4 * 1024)) self.assertEqual(resp.get('blob_committed_block_count'), 2) self.assertIsNotNone(resp.get('etag')) self.assertIsNotNone(resp.get('last_modified')) # Assert the destination blob is constructed correctly destination_blob_properties = destination_blob_client.get_blob_properties( ) self.assertBlobEqual(destination_blob_client, source_blob_data) self.assertEqual(destination_blob_properties.get('etag'), resp.get('etag')) self.assertEqual(destination_blob_properties.get('last_modified'), resp.get('last_modified')) self.assertEqual(destination_blob_properties.get('size'), LARGE_BLOB_SIZE) # Missing start range shouldn't pass the validation with self.assertRaises(ValueError): destination_blob_client.append_block_from_url( source_blob_client.url + '?' + sas, source_length=LARGE_BLOB_SIZE) @record def test_append_block_from_url_and_validate_content_md5(self): # Arrange source_blob_data = self.get_random_bytes(LARGE_BLOB_SIZE) source_blob_client = self._create_source_blob(source_blob_data) src_md5 = StorageContentValidation.get_content_md5(source_blob_data) sas = source_blob_client.generate_shared_access_signature( permission=BlobSasPermissions(read=True, delete=True), expiry=datetime.utcnow() + timedelta(hours=1), ) destination_blob_client = self._create_blob() # Act part 1: make append block from url calls with correct md5 resp = destination_blob_client.append_block_from_url( source_blob_client.url + '?' + sas, source_content_md5=src_md5) self.assertEqual(resp.get('blob_append_offset'), '0') self.assertEqual(resp.get('blob_committed_block_count'), 1) self.assertIsNotNone(resp.get('etag')) self.assertIsNotNone(resp.get('last_modified')) # Assert the destination blob is constructed correctly destination_blob_properties = destination_blob_client.get_blob_properties( ) self.assertBlobEqual(destination_blob_client, source_blob_data) self.assertEqual(destination_blob_properties.get('etag'), resp.get('etag')) self.assertEqual(destination_blob_properties.get('last_modified'), resp.get('last_modified')) # Act part 2: put block from url with wrong md5 with self.assertRaises(HttpResponseError): destination_blob_client.append_block_from_url( source_blob_client.url + '?' + sas, source_content_md5=StorageContentValidation.get_content_md5( b"POTATO")) @record def test_append_block_from_url_with_source_if_modified(self): # Arrange source_blob_data = self.get_random_bytes(LARGE_BLOB_SIZE) source_blob_client = self._create_source_blob(source_blob_data) source_blob_properties = source_blob_client.get_blob_properties() sas = source_blob_client.generate_shared_access_signature( permission=BlobSasPermissions(read=True, delete=True), expiry=datetime.utcnow() + timedelta(hours=1), ) destination_blob_client = self._create_blob() # Act part 1: make append block from url calls resp = destination_blob_client.append_block_from_url( source_blob_client.url + '?' + sas, source_offset=0, source_length=LARGE_BLOB_SIZE, source_if_modified_since=source_blob_properties.get( 'last_modified') - timedelta(hours=15)) self.assertEqual(resp.get('blob_append_offset'), '0') self.assertEqual(resp.get('blob_committed_block_count'), 1) self.assertIsNotNone(resp.get('etag')) self.assertIsNotNone(resp.get('last_modified')) # Assert the destination blob is constructed correctly destination_blob_properties = destination_blob_client.get_blob_properties( ) self.assertBlobEqual(destination_blob_client, source_blob_data) self.assertEqual(destination_blob_properties.get('etag'), resp.get('etag')) self.assertEqual(destination_blob_properties.get('last_modified'), resp.get('last_modified')) self.assertEqual(destination_blob_properties.get('size'), LARGE_BLOB_SIZE) # Act part 2: put block from url with failing condition with self.assertRaises(ResourceNotFoundError): destination_blob_client.append_block_from_url( source_blob_client.url + '?' + sas, source_offset=0, source_length=LARGE_BLOB_SIZE, source_if_modified_since=source_blob_properties.get( 'last_modified')) @record def test_append_block_from_url_with_source_if_unmodified(self): # Arrange source_blob_data = self.get_random_bytes(LARGE_BLOB_SIZE) source_blob_client = self._create_source_blob(source_blob_data) source_blob_properties = source_blob_client.get_blob_properties() sas = source_blob_client.generate_shared_access_signature( permission=BlobSasPermissions(read=True, delete=True), expiry=datetime.utcnow() + timedelta(hours=1), ) destination_blob_client = self._create_blob() # Act part 1: make append block from url calls resp = destination_blob_client.append_block_from_url( source_blob_client.url + '?' + sas, source_offset=0, source_length=LARGE_BLOB_SIZE, source_if_unmodified_since=source_blob_properties.get( 'last_modified')) self.assertEqual(resp.get('blob_append_offset'), '0') self.assertEqual(resp.get('blob_committed_block_count'), 1) self.assertIsNotNone(resp.get('etag')) self.assertIsNotNone(resp.get('last_modified')) # Assert the destination blob is constructed correctly destination_blob_properties = destination_blob_client.get_blob_properties( ) self.assertBlobEqual(destination_blob_client, source_blob_data) self.assertEqual(destination_blob_properties.get('etag'), resp.get('etag')) self.assertEqual(destination_blob_properties.get('last_modified'), resp.get('last_modified')) self.assertEqual(destination_blob_properties.get('size'), LARGE_BLOB_SIZE) # Act part 2: put block from url with failing condition with self.assertRaises(ResourceModifiedError): destination_blob_client \ .append_block_from_url(source_blob_client.url + '?' + sas, source_offset=0, source_length=LARGE_BLOB_SIZE, if_unmodified_since=source_blob_properties.get('last_modified') - timedelta( hours=15)) @record def test_append_block_from_url_with_source_if_match(self): # Arrange source_blob_data = self.get_random_bytes(LARGE_BLOB_SIZE) source_blob_client = self._create_source_blob(source_blob_data) source_blob_properties = source_blob_client.get_blob_properties() sas = source_blob_client.generate_shared_access_signature( permission=BlobSasPermissions(read=True, delete=True), expiry=datetime.utcnow() + timedelta(hours=1), ) destination_blob_client = self._create_blob() # Act part 1: make append block from url calls resp = destination_blob_client. \ append_block_from_url(source_blob_client.url + '?' + sas, source_offset=0, source_length=LARGE_BLOB_SIZE, source_if_match=source_blob_properties.get('etag')) self.assertEqual(resp.get('blob_append_offset'), '0') self.assertEqual(resp.get('blob_committed_block_count'), 1) self.assertIsNotNone(resp.get('etag')) self.assertIsNotNone(resp.get('last_modified')) # Assert the destination blob is constructed correctly destination_blob_properties = destination_blob_client.get_blob_properties( ) self.assertBlobEqual(destination_blob_client, source_blob_data) self.assertEqual(destination_blob_properties.get('etag'), resp.get('etag')) self.assertEqual(destination_blob_properties.get('last_modified'), resp.get('last_modified')) self.assertEqual(destination_blob_properties.get('size'), LARGE_BLOB_SIZE) # Act part 2: put block from url with failing condition with self.assertRaises(ResourceNotFoundError): destination_blob_client.append_block_from_url( source_blob_client.url + '?' + sas, source_offset=0, source_length=LARGE_BLOB_SIZE, source_if_match='0x111111111111111') @record def test_append_block_from_url_with_source_if_none_match(self): # Arrange source_blob_data = self.get_random_bytes(LARGE_BLOB_SIZE) source_blob_client = self._create_source_blob(source_blob_data) source_blob_properties = source_blob_client.get_blob_properties() sas = source_blob_client.generate_shared_access_signature( permission=BlobSasPermissions(read=True, delete=True), expiry=datetime.utcnow() + timedelta(hours=1), ) destination_blob_client = self._create_blob() # Act part 1: make append block from url calls resp = destination_blob_client. \ append_block_from_url(source_blob_client.url + '?' + sas, source_offset=0, source_length=LARGE_BLOB_SIZE, source_if_none_match='0x111111111111111') self.assertEqual(resp.get('blob_append_offset'), '0') self.assertEqual(resp.get('blob_committed_block_count'), 1) self.assertIsNotNone(resp.get('etag')) self.assertIsNotNone(resp.get('last_modified')) # Assert the destination blob is constructed correctly destination_blob_properties = destination_blob_client.get_blob_properties( ) self.assertBlobEqual(destination_blob_client, source_blob_data) self.assertEqual(destination_blob_properties.get('etag'), resp.get('etag')) self.assertEqual(destination_blob_properties.get('last_modified'), resp.get('last_modified')) self.assertEqual(destination_blob_properties.get('size'), LARGE_BLOB_SIZE) # Act part 2: put block from url with failing condition with self.assertRaises(ResourceNotFoundError): destination_blob_client.append_block_from_url( source_blob_client.url + '?' + sas, source_offset=0, source_length=LARGE_BLOB_SIZE, source_if_none_match=source_blob_properties.get('etag')) @record def test_append_block_from_url_with_if_match(self): # Arrange source_blob_data = self.get_random_bytes(LARGE_BLOB_SIZE) source_blob_client = self._create_source_blob(source_blob_data) sas = source_blob_client.generate_shared_access_signature( permission=BlobSasPermissions(read=True, delete=True), expiry=datetime.utcnow() + timedelta(hours=1), ) destination_blob_name = self._get_blob_reference() destination_blob_client = self.bsc.get_blob_client( self.container_name, destination_blob_name) destination_blob_properties_on_creation = destination_blob_client.create_append_blob( ) # Act part 1: make append block from url calls resp = destination_blob_client. \ append_block_from_url(source_blob_client.url + '?' + sas, source_offset=0, source_length=LARGE_BLOB_SIZE, if_match=destination_blob_properties_on_creation.get('etag')) self.assertEqual(resp.get('blob_append_offset'), '0') self.assertEqual(resp.get('blob_committed_block_count'), 1) self.assertIsNotNone(resp.get('etag')) self.assertIsNotNone(resp.get('last_modified')) # Assert the destination blob is constructed correctly destination_blob_properties = destination_blob_client.get_blob_properties( ) self.assertBlobEqual(destination_blob_client, source_blob_data) self.assertEqual(destination_blob_properties.get('etag'), resp.get('etag')) self.assertEqual(destination_blob_properties.get('last_modified'), resp.get('last_modified')) self.assertEqual(destination_blob_properties.get('size'), LARGE_BLOB_SIZE) # Act part 2: put block from url with failing condition with self.assertRaises(ResourceModifiedError): destination_blob_client.append_block_from_url( source_blob_client.url + '?' + sas, source_offset=0, source_length=LARGE_BLOB_SIZE, if_match='0x111111111111111') @record def test_append_block_from_url_with_if_none_match(self): # Arrange source_blob_data = self.get_random_bytes(LARGE_BLOB_SIZE) source_blob_client = self._create_source_blob(source_blob_data) sas = source_blob_client.generate_shared_access_signature( permission=BlobSasPermissions(read=True, delete=True), expiry=datetime.utcnow() + timedelta(hours=1), ) destination_blob_client = self._create_blob() # Act part 1: make append block from url calls resp = destination_blob_client. \ append_block_from_url(source_blob_client.url + '?' + sas, source_offset=0, source_length=LARGE_BLOB_SIZE, if_none_match='0x111111111111111') self.assertEqual(resp.get('blob_append_offset'), '0') self.assertEqual(resp.get('blob_committed_block_count'), 1) self.assertIsNotNone(resp.get('etag')) self.assertIsNotNone(resp.get('last_modified')) # Assert the destination blob is constructed correctly destination_blob_properties = destination_blob_client.get_blob_properties( ) self.assertBlobEqual(destination_blob_client, source_blob_data) self.assertEqual(destination_blob_properties.get('etag'), resp.get('etag')) self.assertEqual(destination_blob_properties.get('last_modified'), resp.get('last_modified')) self.assertEqual(destination_blob_properties.get('size'), LARGE_BLOB_SIZE) # Act part 2: put block from url with failing condition with self.assertRaises(ResourceModifiedError): destination_blob_client.append_block_from_url( source_blob_client.url + '?' + sas, source_offset=0, source_length=LARGE_BLOB_SIZE, if_none_match=destination_blob_properties.get('etag')) @record def test_append_block_from_url_with_maxsize_condition(self): # Arrange source_blob_data = self.get_random_bytes(LARGE_BLOB_SIZE) source_blob_client = self._create_source_blob(source_blob_data) sas = source_blob_client.generate_shared_access_signature( permission=BlobSasPermissions(read=True, delete=True), expiry=datetime.utcnow() + timedelta(hours=1), ) destination_blob_client = self._create_blob() # Act part 1: make append block from url calls resp = destination_blob_client. \ append_block_from_url(source_blob_client.url + '?' + sas, source_offset=0, source_length=LARGE_BLOB_SIZE, maxsize_condition=LARGE_BLOB_SIZE + 1) self.assertEqual(resp.get('blob_append_offset'), '0') self.assertEqual(resp.get('blob_committed_block_count'), 1) self.assertIsNotNone(resp.get('etag')) self.assertIsNotNone(resp.get('last_modified')) # Assert the destination blob is constructed correctly destination_blob_properties = destination_blob_client.get_blob_properties( ) self.assertBlobEqual(destination_blob_client, source_blob_data) self.assertEqual(destination_blob_properties.get('etag'), resp.get('etag')) self.assertEqual(destination_blob_properties.get('last_modified'), resp.get('last_modified')) self.assertEqual(destination_blob_properties.get('size'), LARGE_BLOB_SIZE) # Act part 2: put block from url with failing condition with self.assertRaises(HttpResponseError): destination_blob_client.append_block_from_url( source_blob_client.url + '?' + sas, source_offset=0, source_length=LARGE_BLOB_SIZE, maxsize_condition=LARGE_BLOB_SIZE + 1) @record def test_append_block_from_url_with_appendpos_condition(self): # Arrange source_blob_data = self.get_random_bytes(LARGE_BLOB_SIZE) source_blob_client = self._create_source_blob(source_blob_data) sas = source_blob_client.generate_shared_access_signature( permission=BlobSasPermissions(read=True, delete=True), expiry=datetime.utcnow() + timedelta(hours=1), ) destination_blob_client = self._create_blob() # Act part 1: make append block from url calls resp = destination_blob_client. \ append_block_from_url(source_blob_client.url + '?' + sas, source_offset=0, source_length=LARGE_BLOB_SIZE, appendpos_condition=0) self.assertEqual(resp.get('blob_append_offset'), '0') self.assertEqual(resp.get('blob_committed_block_count'), 1) self.assertIsNotNone(resp.get('etag')) self.assertIsNotNone(resp.get('last_modified')) # Assert the destination blob is constructed correctly destination_blob_properties = destination_blob_client.get_blob_properties( ) self.assertBlobEqual(destination_blob_client, source_blob_data) self.assertEqual(destination_blob_properties.get('etag'), resp.get('etag')) self.assertEqual(destination_blob_properties.get('last_modified'), resp.get('last_modified')) self.assertEqual(destination_blob_properties.get('size'), LARGE_BLOB_SIZE) # Act part 2: put block from url with failing condition with self.assertRaises(HttpResponseError): destination_blob_client.append_block_from_url( source_blob_client.url + '?' + sas, source_offset=0, source_length=LARGE_BLOB_SIZE, appendpos_condition=0) @record def test_append_block_from_url_with_if_modified(self): # Arrange source_blob_data = self.get_random_bytes(LARGE_BLOB_SIZE) source_blob_client = self._create_source_blob(source_blob_data) source_properties = source_blob_client.get_blob_properties() sas = source_blob_client.generate_shared_access_signature( permission=BlobSasPermissions(read=True, delete=True), expiry=datetime.utcnow() + timedelta(hours=1), ) destination_blob_client = self._create_blob() # Act part 1: make append block from url calls resp = destination_blob_client. \ append_block_from_url(source_blob_client.url + '?' + sas, source_offset=0, source_length=LARGE_BLOB_SIZE, if_modified_since=source_properties.get('last_modified') - timedelta(minutes=15)) self.assertEqual(resp.get('blob_append_offset'), '0') self.assertEqual(resp.get('blob_committed_block_count'), 1) self.assertIsNotNone(resp.get('etag')) self.assertIsNotNone(resp.get('last_modified')) # Assert the destination blob is constructed correctly destination_blob_properties = destination_blob_client.get_blob_properties( ) self.assertBlobEqual(destination_blob_client, source_blob_data) self.assertEqual(destination_blob_properties.get('etag'), resp.get('etag')) self.assertEqual(destination_blob_properties.get('last_modified'), resp.get('last_modified')) self.assertEqual(destination_blob_properties.get('size'), LARGE_BLOB_SIZE) # Act part 2: put block from url with failing condition with self.assertRaises(HttpResponseError): destination_blob_client.append_block_from_url( source_blob_client.url + '?' + sas, source_offset=0, source_length=LARGE_BLOB_SIZE, if_modified_since=destination_blob_properties.get( 'last_modified')) @record def test_append_block_from_url_with_if_unmodified(self): # Arrange source_blob_data = self.get_random_bytes(LARGE_BLOB_SIZE) source_blob_client = self._create_source_blob(source_blob_data) source_properties = source_blob_client.append_block(source_blob_data) sas = source_blob_client.generate_shared_access_signature( permission=BlobSasPermissions(read=True, delete=True), expiry=datetime.utcnow() + timedelta(hours=1), ) destination_blob_client = self._create_blob() # Act part 1: make append block from url calls resp = destination_blob_client. \ append_block_from_url(source_blob_client.url + '?' + sas, source_offset=0, source_length=LARGE_BLOB_SIZE, if_unmodified_since=source_properties.get('last_modified')) self.assertEqual(resp.get('blob_append_offset'), '0') self.assertEqual(resp.get('blob_committed_block_count'), 1) self.assertIsNotNone(resp.get('etag')) self.assertIsNotNone(resp.get('last_modified')) # Assert the destination blob is constructed correctly destination_blob_properties = destination_blob_client.get_blob_properties( ) self.assertBlobEqual(destination_blob_client, source_blob_data) self.assertEqual(destination_blob_properties.get('etag'), resp.get('etag')) self.assertEqual(destination_blob_properties.get('last_modified'), resp.get('last_modified')) self.assertEqual(destination_blob_properties.get('size'), LARGE_BLOB_SIZE) # Act part 2: put block from url with failing condition with self.assertRaises(ResourceModifiedError): destination_blob_client.append_block_from_url( source_blob_client.url + '?' + sas, source_offset=0, source_length=LARGE_BLOB_SIZE, if_unmodified_since=source_properties.get('last_modified') - timedelta(minutes=15)) @record def test_create_append_blob_with_no_overwrite(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data1 = self.get_random_bytes(LARGE_BLOB_SIZE) data2 = self.get_random_bytes(LARGE_BLOB_SIZE + 512) # Act create_resp = blob.upload_blob(data1, overwrite=True, blob_type=BlobType.AppendBlob, metadata={'BlobData': 'Data1'}) update_resp = blob.upload_blob(data2, overwrite=False, blob_type=BlobType.AppendBlob, metadata={'BlobData': 'Data2'}) props = blob.get_blob_properties() # Assert appended_data = data1 + data2 self.assertBlobEqual(blob, appended_data) self.assertEqual(props.etag, update_resp.get('etag')) self.assertEqual(props.blob_type, BlobType.AppendBlob) self.assertEqual(props.last_modified, update_resp.get('last_modified')) self.assertEqual(props.metadata, {'BlobData': 'Data1'}) self.assertEqual(props.size, LARGE_BLOB_SIZE + LARGE_BLOB_SIZE + 512) @record def test_create_append_blob_with_overwrite(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data1 = self.get_random_bytes(LARGE_BLOB_SIZE) data2 = self.get_random_bytes(LARGE_BLOB_SIZE + 512) # Act create_resp = blob.upload_blob(data1, overwrite=True, blob_type=BlobType.AppendBlob, metadata={'BlobData': 'Data1'}) update_resp = blob.upload_blob(data2, overwrite=True, blob_type=BlobType.AppendBlob, metadata={'BlobData': 'Data2'}) props = blob.get_blob_properties() # Assert self.assertBlobEqual(blob, data2) self.assertEqual(props.etag, update_resp.get('etag')) self.assertEqual(props.last_modified, update_resp.get('last_modified')) self.assertEqual(props.metadata, {'BlobData': 'Data2'}) self.assertEqual(props.blob_type, BlobType.AppendBlob) self.assertEqual(props.size, LARGE_BLOB_SIZE + 512) @record def test_append_blob_from_bytes(self): # Arrange blob = self._create_blob() # Act data = b'abcdefghijklmnopqrstuvwxyz' append_resp = blob.upload_blob(data, blob_type=BlobType.AppendBlob) blob_properties = blob.get_blob_properties() # Assert self.assertBlobEqual(blob, data) self.assertEqual(blob_properties.etag, append_resp['etag']) self.assertEqual(blob_properties.last_modified, append_resp['last_modified']) @record def test_append_blob_from_0_bytes(self): # Arrange blob = self._create_blob() # Act data = b'' append_resp = blob.upload_blob(data, blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, data) # appending nothing should not make any network call self.assertIsNone(append_resp.get('etag')) self.assertIsNone(append_resp.get('last_modified')) @record def test_append_blob_from_bytes_with_progress(self): # Arrange blob = self._create_blob() data = b'abcdefghijklmnopqrstuvwxyz' # Act progress = [] def progress_gen(upload): progress.append((0, len(upload))) yield upload upload_data = progress_gen(data) blob.upload_blob(upload_data, blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, data) self.assert_upload_progress(len(data), self.config.max_block_size, progress) @record def test_append_blob_from_bytes_with_index(self): # Arrange blob = self._create_blob() # Act data = b'abcdefghijklmnopqrstuvwxyz' blob.upload_blob(data[3:], blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, data[3:]) @record def test_append_blob_from_bytes_with_index_and_count(self): # Arrange blob = self._create_blob() # Act data = b'abcdefghijklmnopqrstuvwxyz' blob.upload_blob(data[3:], length=5, blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, data[3:8]) @record def test_append_blob_from_bytes_chunked_upload(self): # Arrange blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) # Act append_resp = blob.upload_blob(data, blob_type=BlobType.AppendBlob) blob_properties = blob.get_blob_properties() # Assert self.assertBlobEqual(blob, data) self.assertEqual(blob_properties.etag, append_resp['etag']) self.assertEqual(blob_properties.last_modified, append_resp.get('last_modified')) @record def test_append_blob_from_bytes_with_progress_chunked_upload(self): # Arrange blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) # Act progress = [] def progress_gen(upload): n = self.config.max_block_size total = len(upload) current = 0 while upload: progress.append((current, total)) yield upload[:n] current += len(upload[:n]) upload = upload[n:] upload_data = progress_gen(data) blob.upload_blob(upload_data, blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, data) self.assert_upload_progress(len(data), self.config.max_block_size, progress) @record def test_append_blob_from_bytes_chunked_upload_with_index_and_count(self): # Arrange blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) index = 33 blob_size = len(data) - 66 # Act blob.upload_blob(data[index:], length=blob_size, blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, data[index:index + blob_size]) @record def test_append_blob_from_path_chunked_upload(self): # Arrange blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act with open(FILE_PATH, 'rb') as stream: append_resp = blob.upload_blob(stream, blob_type=BlobType.AppendBlob) blob_properties = blob.get_blob_properties() # Assert self.assertBlobEqual(blob, data) self.assertEqual(blob_properties.etag, append_resp.get('etag')) self.assertEqual(blob_properties.last_modified, append_resp.get('last_modified')) @record def test_append_blob_from_path_with_progress_chunked_upload(self): # Arrange blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act progress = [] def progress_gen(upload): n = self.config.max_block_size total = LARGE_BLOB_SIZE current = 0 while upload: chunk = upload.read(n) if not chunk: break progress.append((current, total)) yield chunk current += len(chunk) with open(FILE_PATH, 'rb') as stream: upload_data = progress_gen(stream) blob.upload_blob(upload_data, blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, data) self.assert_upload_progress(len(data), self.config.max_block_size, progress) @record def test_append_blob_from_stream_chunked_upload(self): # Arrange blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act with open(FILE_PATH, 'rb') as stream: append_resp = blob.upload_blob(stream, blob_type=BlobType.AppendBlob) blob_properties = blob.get_blob_properties() # Assert self.assertBlobEqual(blob, data) self.assertEqual(blob_properties.etag, append_resp.get('etag')) self.assertEqual(blob_properties.last_modified, append_resp.get('last_modified')) @record def test_append_blob_from_stream_non_seekable_chunked_upload_known_size( self): # Arrange blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) blob_size = len(data) - 66 # Act with open(FILE_PATH, 'rb') as stream: non_seekable_file = StorageAppendBlobTest.NonSeekableFile(stream) blob.upload_blob(non_seekable_file, length=blob_size, blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, data[:blob_size]) @record def test_append_blob_from_stream_non_seekable_chunked_upload_unknown_size( self): # Arrange blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act with open(FILE_PATH, 'rb') as stream: non_seekable_file = StorageAppendBlobTest.NonSeekableFile(stream) blob.upload_blob(non_seekable_file, blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, data) @record def test_append_blob_from_stream_with_multiple_appends(self): # Arrange blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream1: stream1.write(data) with open(FILE_PATH, 'wb') as stream2: stream2.write(data) # Act with open(FILE_PATH, 'rb') as stream1: blob.upload_blob(stream1, blob_type=BlobType.AppendBlob) with open(FILE_PATH, 'rb') as stream2: blob.upload_blob(stream2, blob_type=BlobType.AppendBlob) # Assert data = data * 2 self.assertBlobEqual(blob, data) @record def test_append_blob_from_stream_chunked_upload_with_count(self): # Arrange blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act blob_size = len(data) - 301 with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, length=blob_size, blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, data[:blob_size]) def test_append_blob_from_stream_chunked_upload_with_count_parallel(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act blob_size = len(data) - 301 with open(FILE_PATH, 'rb') as stream: append_resp = blob.upload_blob(stream, length=blob_size, blob_type=BlobType.AppendBlob) blob_properties = blob.get_blob_properties() # Assert self.assertBlobEqual(blob, data[:blob_size]) self.assertEqual(blob_properties.etag, append_resp.get('etag')) self.assertEqual(blob_properties.last_modified, append_resp.get('last_modified')) @record def test_append_blob_from_text(self): # Arrange blob = self._create_blob() text = u'hello 啊齄丂狛狜 world' data = text.encode('utf-8') # Act append_resp = blob.upload_blob(text, blob_type=BlobType.AppendBlob) blob_properties = blob.get_blob_properties() # Assert self.assertBlobEqual(blob, data) self.assertEqual(blob_properties.etag, append_resp.get('etag')) self.assertEqual(blob_properties.last_modified, append_resp.get('last_modified')) @record def test_append_blob_from_text_with_encoding(self): # Arrange blob = self._create_blob() text = u'hello 啊齄丂狛狜 world' data = text.encode('utf-16') # Act blob.upload_blob(text, encoding='utf-16', blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, data) @record def test_append_blob_from_text_with_encoding_and_progress(self): # Arrange blob = self._create_blob() text = u'hello 啊齄丂狛狜 world' data = text.encode('utf-16') # Act progress = [] def progress_gen(upload): progress.append((0, len(data))) yield upload upload_data = progress_gen(text) blob.upload_blob(upload_data, encoding='utf-16', blob_type=BlobType.AppendBlob) # Assert self.assert_upload_progress(len(data), self.config.max_block_size, progress) @record def test_append_blob_from_text_chunked_upload(self): # Arrange blob = self._create_blob() data = self.get_random_text_data(LARGE_BLOB_SIZE) encoded_data = data.encode('utf-8') # Act blob.upload_blob(data, blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, encoded_data) @record def test_append_blob_with_md5(self): # Arrange blob = self._create_blob() data = b'hello world' # Act blob.append_block(data, validate_content=True)
class StorageGetBlobTest(StorageTestCase): def setUp(self): super(StorageGetBlobTest, self).setUp() url = self._get_account_url() credential = self._get_shared_key_credential() # test chunking functionality by reducing the threshold # for chunking and the size of each chunk, otherwise # the tests would take too long to execute self.bsc = BlobServiceClient(url, credential=credential, max_single_get_size=1024, max_chunk_get_size=1024) self.config = self.bsc._config self.container_name = self.get_resource_name('utcontainer') if not self.is_playback(): container = self.bsc.get_container_client(self.container_name) container.create_container() self.byte_blob = self.get_resource_name('byteblob') self.byte_data = self.get_random_bytes(64 * 1024 + 5) if not self.is_playback(): blob = self.bsc.get_blob_client(self.container_name, self.byte_blob) blob.upload_blob(self.byte_data) def tearDown(self): if not self.is_playback(): try: self.bsc.delete_container(self.container_name) except: pass if os.path.isfile(FILE_PATH): try: os.remove(FILE_PATH) except: pass return super(StorageGetBlobTest, self).tearDown() # --Helpers----------------------------------------------------------------- def _get_blob_reference(self): return self.get_resource_name(TEST_BLOB_PREFIX) class NonSeekableFile(object): def __init__(self, wrapped_file): self.wrapped_file = wrapped_file def write(self, data): self.wrapped_file.write(data) def read(self, count): return self.wrapped_file.read(count) def seekable(self): return False # -- Get test cases for blobs ---------------------------------------------- @record def test_unicode_get_blob_unicode_data(self): # Arrange blob_data = u'hello world啊齄丂狛狜'.encode('utf-8') blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.upload_blob(blob_data) # Act content = blob.download_blob() # Assert self.assertIsInstance(content.properties, BlobProperties) self.assertEqual(content.readall(), blob_data) @record def test_unicode_get_blob_binary_data(self): # Arrange base64_data = 'AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMDEyMzQ1Njc4OTo7PD0+P0BBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWltcXV5fYGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6e3x9fn+AgYKDhIWGh4iJiouMjY6PkJGSk5SVlpeYmZqbnJ2en6ChoqOkpaanqKmqq6ytrq+wsbKztLW2t7i5uru8vb6/wMHCw8TFxsfIycrLzM3Oz9DR0tPU1dbX2Nna29zd3t/g4eLj5OXm5+jp6uvs7e7v8PHy8/T19vf4+fr7/P3+/wABAgMEBQYHCAkKCwwNDg8QERITFBUWFxgZGhscHR4fICEiIyQlJicoKSorLC0uLzAxMjM0NTY3ODk6Ozw9Pj9AQUJDREVGR0hJSktMTU5PUFFSU1RVVldYWVpbXF1eX2BhYmNkZWZnaGlqa2xtbm9wcXJzdHV2d3h5ent8fX5/gIGCg4SFhoeIiYqLjI2Oj5CRkpOUlZaXmJmam5ydnp+goaKjpKWmp6ipqqusra6vsLGys7S1tre4ubq7vL2+v8DBwsPExcbHyMnKy8zNzs/Q0dLT1NXW19jZ2tvc3d7f4OHi4+Tl5ufo6err7O3u7/Dx8vP09fb3+Pn6+/z9/v8AAQIDBAUGBwgJCgsMDQ4PEBESExQVFhcYGRobHB0eHyAhIiMkJSYnKCkqKywtLi8wMTIzNDU2Nzg5Ojs8PT4/QEFCQ0RFRkdISUpLTE1OT1BRUlNUVVZXWFlaW1xdXl9gYWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXp7fH1+f4CBgoOEhYaHiImKi4yNjo+QkZKTlJWWl5iZmpucnZ6foKGio6SlpqeoqaqrrK2ur7CxsrO0tba3uLm6u7y9vr/AwcLDxMXGx8jJysvMzc7P0NHS09TV1tfY2drb3N3e3+Dh4uPk5ebn6Onq6+zt7u/w8fLz9PX29/j5+vv8/f7/AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMDEyMzQ1Njc4OTo7PD0+P0BBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWltcXV5fYGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6e3x9fn+AgYKDhIWGh4iJiouMjY6PkJGSk5SVlpeYmZqbnJ2en6ChoqOkpaanqKmqq6ytrq+wsbKztLW2t7i5uru8vb6/wMHCw8TFxsfIycrLzM3Oz9DR0tPU1dbX2Nna29zd3t/g4eLj5OXm5+jp6uvs7e7v8PHy8/T19vf4+fr7/P3+/w==' binary_data = base64.b64decode(base64_data) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.upload_blob(binary_data) # Act content = blob.download_blob() # Assert self.assertIsInstance(content.properties, BlobProperties) self.assertEqual(content.readall(), binary_data) @record def test_get_blob_no_content(self): # Arrange blob_data = b'' blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.upload_blob(blob_data) # Act content = blob.download_blob() # Assert self.assertEqual(blob_data, content.readall()) self.assertEqual(0, content.properties.size) def test_get_blob_to_bytes(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self.bsc.get_blob_client(self.container_name, self.byte_blob) # Act content = blob.download_blob(max_concurrency=2).readall() # Assert self.assertEqual(self.byte_data, content) def test_ranged_get_blob_to_bytes_with_single_byte(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self.bsc.get_blob_client(self.container_name, self.byte_blob) # Act content = blob.download_blob(offset=0, length=1).readall() # Assert self.assertEqual(1, len(content)) self.assertEqual(self.byte_data[0], content[0]) # Act content = blob.download_blob(offset=5, length=1).readall() # Assert self.assertEqual(1, len(content)) self.assertEqual(self.byte_data[5], content[0]) @record def test_ranged_get_blob_to_bytes_with_zero_byte(self): blob_data = b'' blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.upload_blob(blob_data) # Act # the get request should fail in this case since the blob is empty and yet there is a range specified with self.assertRaises(HttpResponseError) as e: blob.download_blob(offset=0, length=5) self.assertEqual(StorageErrorCode.invalid_range, e.exception.error_code) with self.assertRaises(HttpResponseError) as e: blob.download_blob(offset=3, length=5) self.assertEqual(StorageErrorCode.invalid_range, e.exception.error_code) @record def test_ranged_get_blob_with_missing_start_range(self): blob_data = b'foobar' blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.upload_blob(blob_data) # Act # the get request should fail fast in this case since start_range is missing while end_range is specified with self.assertRaises(ValueError): blob.download_blob(length=3) def test_get_blob_to_bytes_snapshot(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self.bsc.get_blob_client(self.container_name, self.byte_blob) snapshot_ref = blob.create_snapshot() snapshot = self.bsc.get_blob_client(self.container_name, self.byte_blob, snapshot=snapshot_ref) blob.upload_blob( self.byte_data, overwrite=True) # Modify the blob so the Etag no longer matches # Act content = snapshot.download_blob(max_concurrency=2).readall() # Assert self.assertEqual(self.byte_data, content) def test_get_blob_to_bytes_with_progress(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange progress = [] blob = self.bsc.get_blob_client(self.container_name, self.byte_blob) def callback(response): current = response.context['download_stream_current'] total = response.context['data_stream_total'] progress.append((current, total)) # Act content = blob.download_blob(raw_response_hook=callback, max_concurrency=2).readall() # Assert self.assertEqual(self.byte_data, content) self.assert_download_progress(len(self.byte_data), self.config.max_chunk_get_size, self.config.max_single_get_size, progress) @record def test_get_blob_to_bytes_non_parallel(self): # Arrange progress = [] blob = self.bsc.get_blob_client(self.container_name, self.byte_blob) def callback(response): current = response.context['download_stream_current'] total = response.context['data_stream_total'] progress.append((current, total)) # Act content = blob.download_blob(raw_response_hook=callback, max_concurrency=1).readall() # Assert self.assertEqual(self.byte_data, content) self.assert_download_progress(len(self.byte_data), self.config.max_chunk_get_size, self.config.max_single_get_size, progress) @record def test_get_blob_to_bytes_small(self): # Arrange blob_data = self.get_random_bytes(1024) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.upload_blob(blob_data) progress = [] def callback(response): current = response.context['download_stream_current'] total = response.context['data_stream_total'] progress.append((current, total)) # Act content = blob.download_blob(raw_response_hook=callback).readall() # Assert self.assertEqual(blob_data, content) self.assert_download_progress(len(blob_data), self.config.max_chunk_get_size, self.config.max_single_get_size, progress) def test_get_blob_to_stream(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self.bsc.get_blob_client(self.container_name, self.byte_blob) # Act with open(FILE_PATH, 'wb') as stream: downloader = blob.download_blob(max_concurrency=2) read_bytes = downloader.readinto(stream) # Assert self.assertEqual(read_bytes, len(self.byte_data)) with open(FILE_PATH, 'rb') as stream: actual = stream.read() self.assertEqual(self.byte_data, actual) def test_get_blob_to_stream_with_progress(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange progress = [] blob = self.bsc.get_blob_client(self.container_name, self.byte_blob) def callback(response): current = response.context['download_stream_current'] total = response.context['data_stream_total'] progress.append((current, total)) # Act with open(FILE_PATH, 'wb') as stream: downloader = blob.download_blob(raw_response_hook=callback, max_concurrency=2) read_bytes = downloader.readinto(stream) # Assert self.assertEqual(read_bytes, len(self.byte_data)) with open(FILE_PATH, 'rb') as stream: actual = stream.read() self.assertEqual(self.byte_data, actual) self.assert_download_progress(len(self.byte_data), self.config.max_chunk_get_size, self.config.max_single_get_size, progress) @record def test_get_blob_to_stream_non_parallel(self): # Arrange progress = [] blob = self.bsc.get_blob_client(self.container_name, self.byte_blob) def callback(response): current = response.context['download_stream_current'] total = response.context['data_stream_total'] progress.append((current, total)) # Act with open(FILE_PATH, 'wb') as stream: downloader = blob.download_blob(raw_response_hook=callback, max_concurrency=1) read_bytes = downloader.readinto(stream) # Assert self.assertEqual(read_bytes, len(self.byte_data)) with open(FILE_PATH, 'rb') as stream: actual = stream.read() self.assertEqual(self.byte_data, actual) self.assert_download_progress(len(self.byte_data), self.config.max_chunk_get_size, self.config.max_single_get_size, progress) @record def test_get_blob_to_stream_small(self): # Arrange blob_data = self.get_random_bytes(1024) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.upload_blob(blob_data) progress = [] def callback(response): current = response.context['download_stream_current'] total = response.context['data_stream_total'] progress.append((current, total)) # Act with open(FILE_PATH, 'wb') as stream: downloader = blob.download_blob(raw_response_hook=callback, max_concurrency=2) read_bytes = downloader.readinto(stream) # Assert self.assertEqual(read_bytes, 1024) with open(FILE_PATH, 'rb') as stream: actual = stream.read() self.assertEqual(blob_data, actual) self.assert_download_progress(len(blob_data), self.config.max_chunk_get_size, self.config.max_single_get_size, progress) def test_ranged_get_blob_to_path(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self.bsc.get_blob_client(self.container_name, self.byte_blob) # Act end_range = self.config.max_single_get_size with open(FILE_PATH, 'wb') as stream: downloader = blob.download_blob(offset=1, length=end_range - 1, max_concurrency=2) read_bytes = downloader.readinto(stream) # Assert self.assertEqual(read_bytes, end_range - 1) with open(FILE_PATH, 'rb') as stream: actual = stream.read() self.assertEqual(self.byte_data[1:end_range], actual) def test_ranged_get_blob_to_path_with_progress(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange progress = [] blob = self.bsc.get_blob_client(self.container_name, self.byte_blob) def callback(response): current = response.context['download_stream_current'] total = response.context['data_stream_total'] progress.append((current, total)) # Act start_range = 3 end_range = self.config.max_single_get_size + 1024 with open(FILE_PATH, 'wb') as stream: downloader = blob.download_blob(offset=start_range, length=end_range, raw_response_hook=callback, max_concurrency=2) read_bytes = downloader.readinto(stream) # Assert self.assertEqual(read_bytes, end_range) with open(FILE_PATH, 'rb') as stream: actual = stream.read() self.assertEqual( self.byte_data[start_range:end_range + start_range], actual) self.assert_download_progress(end_range, self.config.max_chunk_get_size, self.config.max_single_get_size, progress) @record def test_ranged_get_blob_to_path_small(self): # Arrange blob = self.bsc.get_blob_client(self.container_name, self.byte_blob) # Act with open(FILE_PATH, 'wb') as stream: downloader = blob.download_blob(offset=1, length=4, max_concurrency=2) read_bytes = downloader.readinto(stream) # Assert self.assertEqual(read_bytes, 4) with open(FILE_PATH, 'rb') as stream: actual = stream.read() self.assertEqual(self.byte_data[1:5], actual) @record def test_ranged_get_blob_to_path_non_parallel(self): # Arrange blob = self.bsc.get_blob_client(self.container_name, self.byte_blob) # Act with open(FILE_PATH, 'wb') as stream: downloader = blob.download_blob(offset=1, length=3, max_concurrency=1) read_bytes = downloader.readinto(stream) # Assert self.assertEqual(read_bytes, 3) with open(FILE_PATH, 'rb') as stream: actual = stream.read() self.assertEqual(self.byte_data[1:4], actual) @record def test_ranged_get_blob_to_path_invalid_range_parallel(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_size = self.config.max_single_get_size + 1 blob_data = self.get_random_bytes(blob_size) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.upload_blob(blob_data) # Act end_range = 2 * self.config.max_single_get_size with open(FILE_PATH, 'wb') as stream: downloader = blob.download_blob(offset=1, length=end_range, max_concurrency=2) read_bytes = downloader.readinto(stream) # Assert self.assertEqual(read_bytes, blob_size) with open(FILE_PATH, 'rb') as stream: actual = stream.read() self.assertEqual(blob_data[1:blob_size], actual) @record def test_ranged_get_blob_to_path_invalid_range_non_parallel(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_size = 1024 blob_data = self.get_random_bytes(blob_size) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.upload_blob(blob_data) # Act end_range = 2 * self.config.max_single_get_size with open(FILE_PATH, 'wb') as stream: downloader = blob.download_blob(offset=1, length=end_range, max_concurrency=2) read_bytes = downloader.readinto(stream) # Assert self.assertEqual(read_bytes, blob_size) with open(FILE_PATH, 'rb') as stream: actual = stream.read() self.assertEqual(blob_data[1:blob_size], actual) # Assert def test_get_blob_to_text(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange text_blob = self.get_resource_name('textblob') text_data = self.get_random_text_data(self.config.max_single_get_size + 1) blob = self.bsc.get_blob_client(self.container_name, text_blob) blob.upload_blob(text_data) # Act stream = blob.download_blob(max_concurrency=2, encoding='UTF-8') content = stream.readall() # Assert self.assertEqual(text_data, content) def test_get_blob_to_text_with_progress(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange text_blob = self.get_resource_name('textblob') text_data = self.get_random_text_data(self.config.max_single_get_size + 1) blob = self.bsc.get_blob_client(self.container_name, text_blob) blob.upload_blob(text_data) progress = [] def callback(response): current = response.context['download_stream_current'] total = response.context['data_stream_total'] progress.append((current, total)) # Act stream = blob.download_blob(raw_response_hook=callback, max_concurrency=2, encoding='UTF-8') content = stream.readall() # Assert self.assertEqual(text_data, content) self.assert_download_progress(len(text_data.encode('utf-8')), self.config.max_chunk_get_size, self.config.max_single_get_size, progress) @record def test_get_blob_to_text_non_parallel(self): # Arrange text_blob = self._get_blob_reference() text_data = self.get_random_text_data(self.config.max_single_get_size + 1) blob = self.bsc.get_blob_client(self.container_name, text_blob) blob.upload_blob(text_data) progress = [] def callback(response): current = response.context['download_stream_current'] total = response.context['data_stream_total'] progress.append((current, total)) # Act stream = blob.download_blob(raw_response_hook=callback, max_concurrency=1, encoding='UTF-8') content = stream.readall() # Assert self.assertEqual(text_data, content) self.assert_download_progress(len(text_data), self.config.max_chunk_get_size, self.config.max_single_get_size, progress) @record def test_get_blob_to_text_small(self): # Arrange blob_data = self.get_random_text_data(1024) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.upload_blob(blob_data) progress = [] def callback(response): current = response.context['download_stream_current'] total = response.context['data_stream_total'] progress.append((current, total)) # Act stream = blob.download_blob(raw_response_hook=callback, encoding='UTF-8') content = stream.readall() # Assert self.assertEqual(blob_data, content) self.assert_download_progress(len(blob_data), self.config.max_chunk_get_size, self.config.max_single_get_size, progress) @record def test_get_blob_to_text_with_encoding(self): # Arrange text = u'hello 啊齄丂狛狜 world' blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.upload_blob(text, encoding='utf-16') # Act stream = blob.download_blob(encoding='UTF-16') content = stream.readall() # Assert self.assertEqual(text, content) @record def test_get_blob_to_text_with_encoding_and_progress(self): # Arrange text = u'hello 啊齄丂狛狜 world' blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.upload_blob(text, encoding='utf-16') # Act progress = [] def callback(response): current = response.context['download_stream_current'] total = response.context['data_stream_total'] progress.append((current, total)) stream = blob.download_blob(raw_response_hook=callback, encoding='UTF-16') content = stream.readall() # Assert self.assertEqual(text, content) self.assert_download_progress(len(text.encode('utf-8')), self.config.max_chunk_get_size, self.config.max_single_get_size, progress) @record def test_get_blob_non_seekable(self): # Arrange blob = self.bsc.get_blob_client(self.container_name, self.byte_blob) # Act with open(FILE_PATH, 'wb') as stream: non_seekable_stream = StorageGetBlobTest.NonSeekableFile(stream) downloader = blob.download_blob(max_concurrency=1) read_bytes = downloader.readinto(non_seekable_stream) # Assert self.assertEqual(read_bytes, len(self.byte_data)) with open(FILE_PATH, 'rb') as stream: actual = stream.read() self.assertEqual(self.byte_data, actual) def test_get_blob_non_seekable_parallel(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self.bsc.get_blob_client(self.container_name, self.byte_blob) # Act with open(FILE_PATH, 'wb') as stream: non_seekable_stream = StorageGetBlobTest.NonSeekableFile(stream) with self.assertRaises(ValueError): downloader = blob.download_blob(max_concurrency=2) properties = downloader.readinto(non_seekable_stream) @record def test_get_blob_to_stream_exact_get_size(self): # Arrange blob_name = self._get_blob_reference() byte_data = self.get_random_bytes(self.config.max_single_get_size) blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.upload_blob(byte_data) progress = [] def callback(response): current = response.context['download_stream_current'] total = response.context['data_stream_total'] progress.append((current, total)) # Act with open(FILE_PATH, 'wb') as stream: downloader = blob.download_blob(raw_response_hook=callback, max_concurrency=2) properties = downloader.readinto(stream) # Assert with open(FILE_PATH, 'rb') as stream: actual = stream.read() self.assertEqual(byte_data, actual) self.assert_download_progress(len(byte_data), self.config.max_chunk_get_size, self.config.max_single_get_size, progress) @record def test_get_blob_exact_get_size(self): # Arrange blob_name = self._get_blob_reference() byte_data = self.get_random_bytes(self.config.max_single_get_size) blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.upload_blob(byte_data) progress = [] def callback(response): current = response.context['download_stream_current'] total = response.context['data_stream_total'] progress.append((current, total)) # Act content = blob.download_blob(raw_response_hook=callback).readall() # Assert self.assertEqual(byte_data, content) self.assert_download_progress(len(byte_data), self.config.max_chunk_get_size, self.config.max_single_get_size, progress) def test_get_blob_exact_chunk_size(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() byte_data = self.get_random_bytes(self.config.max_single_get_size + self.config.max_chunk_get_size) blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.upload_blob(byte_data) progress = [] def callback(response): current = response.context['download_stream_current'] total = response.context['data_stream_total'] progress.append((current, total)) # Act content = blob.download_blob(raw_response_hook=callback).readall() # Assert self.assertEqual(byte_data, content) self.assert_download_progress(len(byte_data), self.config.max_chunk_get_size, self.config.max_single_get_size, progress) def test_get_blob_to_stream_with_md5(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self.bsc.get_blob_client(self.container_name, self.byte_blob) # Act with open(FILE_PATH, 'wb') as stream: downloader = blob.download_blob(validate_content=True, max_concurrency=2) read_bytes = downloader.readinto(stream) # Assert self.assertEqual(read_bytes, len(self.byte_data)) with open(FILE_PATH, 'rb') as stream: actual = stream.read() self.assertEqual(self.byte_data, actual) def test_get_blob_with_md5(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self.bsc.get_blob_client(self.container_name, self.byte_blob) # Act content = blob.download_blob(validate_content=True, max_concurrency=2).readall() # Assert self.assertEqual(self.byte_data, content) def test_get_blob_range_to_stream_with_overall_md5(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self.bsc.get_blob_client(self.container_name, self.byte_blob) props = blob.get_blob_properties() props.content_settings.content_md5 = b'MDAwMDAwMDA=' blob.set_http_headers(props.content_settings) # Act with open(FILE_PATH, 'wb') as stream: downloader = blob.download_blob(offset=0, length=1024, validate_content=True, max_concurrency=2) read_bytes = downloader.readinto(stream) # Assert self.assertEqual(downloader.size, 1024) self.assertEqual(read_bytes, 1024) self.assertEqual(b'MDAwMDAwMDA=', downloader.properties.content_settings.content_md5) def test_get_blob_range_with_overall_md5(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return blob = self.bsc.get_blob_client(self.container_name, self.byte_blob) content = blob.download_blob(offset=0, length=1024, validate_content=True) # Arrange props = blob.get_blob_properties() props.content_settings.content_md5 = b'MDAwMDAwMDA=' blob.set_http_headers(props.content_settings) # Act content = blob.download_blob(offset=0, length=1024, validate_content=True) # Assert self.assertEqual(content.properties.size, 1024) self.assertEqual(b'MDAwMDAwMDA=', content.properties.content_settings.content_md5) def test_get_blob_range_with_range_md5(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return blob = self.bsc.get_blob_client(self.container_name, self.byte_blob) content = blob.download_blob(offset=0, length=1024, validate_content=True) # Arrange props = blob.get_blob_properties() props.content_settings.content_md5 = None blob.set_http_headers(props.content_settings) # Act content = blob.download_blob(offset=0, length=1024, validate_content=True) # Assert self.assertIsNotNone(content.properties.content_settings.content_type) self.assertIsNone(content.properties.content_settings.content_md5)
class StorageBlobRetryTest(StorageTestCase): def setUp(self): super(StorageBlobRetryTest, self).setUp() url = self._get_account_url() credential = self._get_shared_key_credential() retry = ExponentialRetry(initial_backoff=1, increment_base=2, retry_total=3) self.bs = BlobServiceClient(url, credential=credential, retry_policy=retry) self.container_name = self.get_resource_name('utcontainer') if not self.is_playback(): try: self.bs.create_container(self.container_name) except ResourceExistsError: pass def tearDown(self): if not self.is_playback(): try: self.bs.delete_container(self.container_name) except HttpResponseError: pass return super(StorageBlobRetryTest, self).tearDown() # --Helpers----------------------------------------------------------------- class NonSeekableStream(object): def __init__(self, wrapped_stream): self.wrapped_stream = wrapped_stream def write(self, data): self.wrapped_stream.write(data) def read(self, count): return self.wrapped_stream.read(count) def seek(self, *args, **kwargs): raise UnsupportedOperation("boom!") def tell(self): return self.wrapped_stream.tell() @record def test_retry_put_block_with_seekable_stream(self): if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self.get_resource_name('blob') data = self.get_random_bytes(PUT_BLOCK_SIZE) data_stream = BytesIO(data) # rig the response so that it fails for a single time responder = ResponseCallback(status=201, new_status=408) # Act blob = self.bs.get_blob_client(self.container_name, blob_name) blob.stage_block(1, data_stream, raw_response_hook=responder.override_first_status) # Assert _, uncommitted_blocks = blob.get_block_list( block_list_type="uncommitted", raw_response_hook=responder.override_first_status) self.assertEqual(len(uncommitted_blocks), 1) self.assertEqual(uncommitted_blocks[0].size, PUT_BLOCK_SIZE) # Commit block and verify content blob.commit_block_list( ['1'], raw_response_hook=responder.override_first_status) # Assert content = blob.download_blob().readall() self.assertEqual(content, data) @record def test_retry_put_block_with_non_seekable_stream(self): if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self.get_resource_name('blob') data = self.get_random_bytes(PUT_BLOCK_SIZE) data_stream = self.NonSeekableStream(BytesIO(data)) # rig the response so that it fails for a single time responder = ResponseCallback(status=201, new_status=408) # Act blob = self.bs.get_blob_client(self.container_name, blob_name) # Note: put_block transforms non-seekable streams into byte arrays before handing it off to the executor blob.stage_block(1, data_stream, raw_response_hook=responder.override_first_status) # Assert _, uncommitted_blocks = blob.get_block_list( block_list_type="uncommitted", raw_response_hook=responder.override_first_status) self.assertEqual(len(uncommitted_blocks), 1) self.assertEqual(uncommitted_blocks[0].size, PUT_BLOCK_SIZE) # Commit block and verify content blob.commit_block_list( ['1'], raw_response_hook=responder.override_first_status) # Assert content = blob.download_blob().readall() self.assertEqual(content, data) @record def test_retry_put_block_with_non_seekable_stream_fail(self): if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self.get_resource_name('blob') data = self.get_random_bytes(PUT_BLOCK_SIZE) data_stream = self.NonSeekableStream(BytesIO(data)) # rig the response so that it fails for a single time responder = ResponseCallback(status=201, new_status=408) # Act blob = self.bs.get_blob_client(self.container_name, blob_name) with self.assertRaises(HttpResponseError) as error: blob.stage_block(1, data_stream, length=PUT_BLOCK_SIZE, raw_response_hook=responder.override_first_status) # Assert self.assertEqual(error.exception.response.status_code, 408)
# create an output Asset print("Creating output asset " + out_asset_name) # From SDK # create_or_update(resource_group_name, account_name, asset_name, parameters, custom_headers=None, raw=False, **operation_config) outputAsset = client.assets.create_or_update(resource_group_name, account_name, out_asset_name, output_asset) ### Use the Storage SDK to upload the video ### print("Uploading the file " + source_file) # From SDK # BlobServiceClient(account_url, credential=None, **kwargs) blob_service_client = BlobServiceClient(account_url=storage_blob_url, credential=storage_account_key) # From SDK # get_blob_client(container, blob, snapshot=None) blob_client = blob_service_client.get_blob_client(in_container, source_file) # Upload the video to storage as a block blob with open(source_file, "rb") as data: # From SDK # upload_blob(data, blob_type=<BlobType.BlockBlob: 'BlockBlob'>, length=None, metadata=None, **kwargs) blob_client.upload_blob(data, blob_type="BlockBlob") ### Create a Transform ### transform_name = 'MyTrans' + str(thisRandom) # From SDK # TransformOutput(*, preset, on_error=None, relative_priority=None, **kwargs) -> None transform_output = TransformOutput(preset=BuiltInStandardEncoderPreset( preset_name="AdaptiveStreaming")) print("Creating transform " + transform_name) # From SDK # Create_or_update(resource_group_name, account_name, transform_name, outputs, description=None, custom_headers=None, raw=False, **operation_config)
class StorageBlockBlobTest(StorageTestCase): def _setup(self, name, key): # test chunking functionality by reducing the size of each chunk, # otherwise the tests would take too long to execute self.bsc = BlobServiceClient(self.account_url(name, "blob"), credential=key, connection_data_block_size=4 * 1024, max_single_put_size=32 * 1024, max_block_size=4 * 1024) self.config = self.bsc._config self.container_name = self.get_resource_name('utcontainer') if self.is_live: self.bsc.create_container(self.container_name) def _teardown(self, FILE_PATH): if os.path.isfile(FILE_PATH): try: os.remove(FILE_PATH) except: pass #--Helpers----------------------------------------------------------------- def _get_blob_reference(self): return self.get_resource_name(TEST_BLOB_PREFIX) def _create_blob(self): blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.upload_blob(b'') return blob def assertBlobEqual(self, container_name, blob_name, expected_data): blob = self.bsc.get_blob_client(container_name, blob_name) actual_data = blob.download_blob() self.assertEqual(actual_data.readall(), expected_data) class NonSeekableFile(object): def __init__(self, wrapped_file): self.wrapped_file = wrapped_file def write(self, data): self.wrapped_file.write(data) def read(self, count): return self.wrapped_file.read(count) #--Test cases for block blobs -------------------------------------------- @GlobalStorageAccountPreparer() def test_put_block(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) blob = self._create_blob() # Act for i in range(5): resp = blob.stage_block(i, 'block {0}'.format(i).encode('utf-8')) self.assertIsNone(resp) # Assert @GlobalStorageAccountPreparer() def test_put_block_unicode(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) blob = self._create_blob() # Act resp = blob.stage_block('1', u'啊齄丂狛狜') self.assertIsNone(resp) # Assert @GlobalStorageAccountPreparer() def test_put_block_with_md5(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) blob = self._create_blob() # Act blob.stage_block(1, b'block', validate_content=True) # Assert @GlobalStorageAccountPreparer() def test_put_block_list(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.stage_block('1', b'AAA') blob.stage_block('2', b'BBB') blob.stage_block('3', b'CCC') # Act block_list = [ BlobBlock(block_id='1'), BlobBlock(block_id='2'), BlobBlock(block_id='3') ] put_block_list_resp = blob.commit_block_list(block_list) # Assert content = blob.download_blob() self.assertEqual(content.readall(), b'AAABBBCCC') self.assertEqual(content.properties.etag, put_block_list_resp.get('etag')) self.assertEqual(content.properties.last_modified, put_block_list_resp.get('last_modified')) @GlobalStorageAccountPreparer() def test_put_block_list_invalid_block_id(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.stage_block('1', b'AAA') blob.stage_block('2', b'BBB') blob.stage_block('3', b'CCC') # Act try: block_list = [ BlobBlock(block_id='1'), BlobBlock(block_id='2'), BlobBlock(block_id='4') ] blob.commit_block_list(block_list) self.fail() except HttpResponseError as e: self.assertGreaterEqual( str(e).find('specified block list is invalid'), 0) # Assert @GlobalStorageAccountPreparer() def test_put_block_list_with_md5(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.stage_block('1', b'AAA') blob.stage_block('2', b'BBB') blob.stage_block('3', b'CCC') # Act block_list = [ BlobBlock(block_id='1'), BlobBlock(block_id='2'), BlobBlock(block_id='3') ] blob.commit_block_list(block_list, validate_content=True) # Assert @GlobalStorageAccountPreparer() def test_put_block_list_with_blob_tier_specified(self, resource_group, location, storage_account, storage_account_key): # Arrange self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob_client = self.bsc.get_blob_client(self.container_name, blob_name) blob_client.stage_block('1', b'AAA') blob_client.stage_block('2', b'BBB') blob_client.stage_block('3', b'CCC') blob_tier = StandardBlobTier.Cool # Act block_list = [ BlobBlock(block_id='1'), BlobBlock(block_id='2'), BlobBlock(block_id='3') ] blob_client.commit_block_list(block_list, standard_blob_tier=blob_tier) # Assert blob_properties = blob_client.get_blob_properties() self.assertEqual(blob_properties.blob_tier, blob_tier) @GlobalStorageAccountPreparer() def test_get_block_list_no_blocks(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) blob = self._create_blob() # Act block_list = blob.get_block_list('all') # Assert self.assertIsNotNone(block_list) self.assertEqual(len(block_list[1]), 0) self.assertEqual(len(block_list[0]), 0) @GlobalStorageAccountPreparer() def test_get_block_list_uncommitted_blocks(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.stage_block('1', b'AAA') blob.stage_block('2', b'BBB') blob.stage_block('3', b'CCC') # Act block_list = blob.get_block_list('uncommitted') # Assert self.assertIsNotNone(block_list) self.assertEqual(len(block_list), 2) self.assertEqual(len(block_list[1]), 3) self.assertEqual(len(block_list[0]), 0) self.assertEqual(block_list[1][0].id, '1') self.assertEqual(block_list[1][0].size, 3) self.assertEqual(block_list[1][1].id, '2') self.assertEqual(block_list[1][1].size, 3) self.assertEqual(block_list[1][2].id, '3') self.assertEqual(block_list[1][2].size, 3) @GlobalStorageAccountPreparer() def test_get_block_list_committed_blocks(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.stage_block('1', b'AAA') blob.stage_block('2', b'BBB') blob.stage_block('3', b'CCC') block_list = [ BlobBlock(block_id='1'), BlobBlock(block_id='2'), BlobBlock(block_id='3') ] blob.commit_block_list(block_list) # Act block_list = blob.get_block_list('committed') # Assert self.assertIsNotNone(block_list) self.assertEqual(len(block_list), 2) self.assertEqual(len(block_list[1]), 0) self.assertEqual(len(block_list[0]), 3) self.assertEqual(block_list[0][0].id, '1') self.assertEqual(block_list[0][0].size, 3) self.assertEqual(block_list[0][1].id, '2') self.assertEqual(block_list[0][1].size, 3) self.assertEqual(block_list[0][2].id, '3') self.assertEqual(block_list[0][2].size, 3) @GlobalStorageAccountPreparer() def test_create_small_block_blob_with_no_overwrite(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data1 = b'hello world' data2 = b'hello second world' # Act create_resp = blob.upload_blob(data1, overwrite=True) with self.assertRaises(ResourceExistsError): blob.upload_blob(data2, overwrite=False) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data1) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) self.assertEqual(props.blob_type, BlobType.BlockBlob) @GlobalStorageAccountPreparer() def test_create_small_block_blob_with_overwrite(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data1 = b'hello world' data2 = b'hello second world' # Act create_resp = blob.upload_blob(data1, overwrite=True) update_resp = blob.upload_blob(data2, overwrite=True) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data2) self.assertEqual(props.etag, update_resp.get('etag')) self.assertEqual(props.last_modified, update_resp.get('last_modified')) self.assertEqual(props.blob_type, BlobType.BlockBlob) @GlobalStorageAccountPreparer() def test_create_large_block_blob_with_no_overwrite(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data1 = self.get_random_bytes(LARGE_BLOB_SIZE) data2 = self.get_random_bytes(LARGE_BLOB_SIZE) # Act create_resp = blob.upload_blob(data1, overwrite=True, metadata={'blobdata': 'data1'}) with self.assertRaises(ResourceExistsError): blob.upload_blob(data2, overwrite=False, metadata={'blobdata': 'data2'}) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data1) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) self.assertEqual(props.blob_type, BlobType.BlockBlob) self.assertEqual(props.metadata, {'blobdata': 'data1'}) self.assertEqual(props.size, LARGE_BLOB_SIZE) @GlobalStorageAccountPreparer() def test_create_large_block_blob_with_overwrite(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data1 = self.get_random_bytes(LARGE_BLOB_SIZE) data2 = self.get_random_bytes(LARGE_BLOB_SIZE + 512) # Act create_resp = blob.upload_blob(data1, overwrite=True, metadata={'blobdata': 'data1'}) update_resp = blob.upload_blob(data2, overwrite=True, metadata={'blobdata': 'data2'}) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data2) self.assertEqual(props.etag, update_resp.get('etag')) self.assertEqual(props.last_modified, update_resp.get('last_modified')) self.assertEqual(props.blob_type, BlobType.BlockBlob) self.assertEqual(props.metadata, {'blobdata': 'data2'}) self.assertEqual(props.size, LARGE_BLOB_SIZE + 512) @GlobalStorageAccountPreparer() def test_create_blob_from_bytes_single_put(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = b'hello world' # Act create_resp = blob.upload_blob(data) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) @GlobalStorageAccountPreparer() def test_create_blob_from_0_bytes(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = b'' # Act create_resp = blob.upload_blob(data) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) @GlobalStorageAccountPreparer() def test_create_from_bytes_blob_unicode(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = u'hello world' # Act create_resp = blob.upload_blob(data) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) @GlobalStorageAccountPreparer() def test_create_from_bytes_blob_unicode(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) # Act data = u'hello world' create_resp = blob.upload_blob(data) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data.encode('utf-8')) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_create_from_bytes_blob_with_lease_id(self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live self._setup(storage_account.name, storage_account_key) blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) lease = blob.acquire_lease() # Act create_resp = blob.upload_blob(data, lease=lease) # Assert output = blob.download_blob(lease=lease) self.assertEqual(output.readall(), data) self.assertEqual(output.properties.etag, create_resp.get('etag')) self.assertEqual(output.properties.last_modified, create_resp.get('last_modified')) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_create_blob_from_bytes_with_metadata(self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) metadata = {'hello': 'world', 'number': '42'} # Act blob.upload_blob(data, metadata=metadata) # Assert md = blob.get_blob_properties().metadata self.assertDictEqual(md, metadata) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_create_blob_from_bytes_with_properties(self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) # Act content_settings = ContentSettings(content_type='image/png', content_language='spanish') blob.upload_blob(data, content_settings=content_settings) # Assert self.assertBlobEqual(self.container_name, blob_name, data) properties = blob.get_blob_properties() self.assertEqual(properties.content_settings.content_type, content_settings.content_type) self.assertEqual(properties.content_settings.content_language, content_settings.content_language) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_create_blob_from_bytes_with_progress(self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) # Act progress = [] def callback(response): current = response.context['upload_stream_current'] total = response.context['data_stream_total'] if current is not None: progress.append((current, total)) create_resp = blob.upload_blob(data, raw_response_hook=callback) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assert_upload_progress(len(data), self.config.max_block_size, progress) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_create_blob_from_bytes_with_index(self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) # Act blob.upload_blob(data[3:]) # Assert self.assertEqual(data[3:], blob.download_blob().readall()) @GlobalStorageAccountPreparer() def test_create_blob_from_bytes_with_index_and_count( self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) # Act blob.upload_blob(data[3:], length=5) # Assert self.assertEqual(data[3:8], blob.download_blob().readall()) @GlobalStorageAccountPreparer() def test_create_blob_from_bytes_with_index_and_count_and_properties( self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) # Act content_settings = ContentSettings(content_type='image/png', content_language='spanish') blob.upload_blob(data[3:], length=5, content_settings=content_settings) # Assert self.assertEqual(data[3:8], blob.download_blob().readall()) properties = blob.get_blob_properties() self.assertEqual(properties.content_settings.content_type, content_settings.content_type) self.assertEqual(properties.content_settings.content_language, content_settings.content_language) @GlobalStorageAccountPreparer() def test_create_blob_from_bytes_non_parallel(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) # Act blob.upload_blob(data, length=LARGE_BLOB_SIZE, max_concurrency=1) # Assert self.assertBlobEqual(self.container_name, blob.blob_name, data) @GlobalStorageAccountPreparer() def test_create_blob_from_bytes_with_blob_tier_specified( self, resource_group, location, storage_account, storage_account_key): # Arrange self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob_client = self.bsc.get_blob_client(self.container_name, blob_name) data = b'hello world' blob_tier = StandardBlobTier.Cool # Act blob_client.upload_blob(data, standard_blob_tier=blob_tier) blob_properties = blob_client.get_blob_properties() # Assert self.assertEqual(blob_properties.blob_tier, blob_tier) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_create_blob_from_path(self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) FILE_PATH = 'create_blob_from_input.temp.{}.dat'.format( str(uuid.uuid4())) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act with open(FILE_PATH, 'rb') as stream: create_resp = blob.upload_blob(stream) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) self._teardown(FILE_PATH) @GlobalStorageAccountPreparer() def test_create_blob_from_path_non_parallel(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(100) FILE_PATH = 'create_blob_from_path_non_par.temp.{}.dat'.format( str(uuid.uuid4())) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act with open(FILE_PATH, 'rb') as stream: create_resp = blob.upload_blob(stream, length=100, max_concurrency=1) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) self._teardown(FILE_PATH) @GlobalStorageAccountPreparer() def test_upload_blob_from_path_non_parallel_with_standard_blob_tier( self, resource_group, location, storage_account, storage_account_key): # Arrange self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(100) FILE_PATH = '_path_non_parallel_with_standard_blob.temp.{}.dat'.format( str(uuid.uuid4())) with open(FILE_PATH, 'wb') as stream: stream.write(data) blob_tier = StandardBlobTier.Cool # Act with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, length=100, max_concurrency=1, standard_blob_tier=blob_tier) props = blob.get_blob_properties() # Assert self.assertEqual(props.blob_tier, blob_tier) self._teardown(FILE_PATH) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_create_blob_from_path_with_progress(self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) FILE_PATH = 'create_blob_from_path_with_progr.temp.{}.dat'.format( str(uuid.uuid4())) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act progress = [] def callback(response): current = response.context['upload_stream_current'] total = response.context['data_stream_total'] if current is not None: progress.append((current, total)) with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, raw_response_hook=callback) # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assert_upload_progress(len(data), self.config.max_block_size, progress) self._teardown(FILE_PATH) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_create_blob_from_path_with_properties(self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) FILE_PATH = 'blob_from_path_with_properties.temp.{}.dat'.format( str(uuid.uuid4())) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act content_settings = ContentSettings(content_type='image/png', content_language='spanish') with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, content_settings=content_settings) # Assert self.assertBlobEqual(self.container_name, blob_name, data) properties = blob.get_blob_properties() self.assertEqual(properties.content_settings.content_type, content_settings.content_type) self.assertEqual(properties.content_settings.content_language, content_settings.content_language) self._teardown(FILE_PATH) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_create_blob_from_stream_chunked_upload(self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) FILE_PATH = 'blob_from_stream_chunked_up.temp.{}.dat'.format( str(uuid.uuid4())) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act with open(FILE_PATH, 'rb') as stream: create_resp = blob.upload_blob(stream) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) self._teardown(FILE_PATH) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_create_frm_stream_nonseek_chunk_upld_knwn_size( self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) blob_size = len(data) - 66 FILE_PATH = 'stream_nonseek_chunk_upld_knwn_size.temp.{}.dat'.format( str(uuid.uuid4())) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act with open(FILE_PATH, 'rb') as stream: non_seekable_file = StorageBlockBlobTest.NonSeekableFile(stream) blob.upload_blob(non_seekable_file, length=blob_size, max_concurrency=1) # Assert self.assertBlobEqual(self.container_name, blob_name, data[:blob_size]) self._teardown(FILE_PATH) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_create_from_stream_nonseek_chunk_upld_unkwn_size( self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) FILE_PATH = 'stream_nonseek_chunk_upld.temp.{}.dat'.format( str(uuid.uuid4())) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act with open(FILE_PATH, 'rb') as stream: non_seekable_file = StorageBlockBlobTest.NonSeekableFile(stream) blob.upload_blob(non_seekable_file, max_concurrency=1) # Assert self.assertBlobEqual(self.container_name, blob_name, data) self._teardown(FILE_PATH) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_create_blob_from_stream_with_progress_chunked_upload( self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) FILE_PATH = 'stream_with_progress_chunked.temp.{}.dat'.format( str(uuid.uuid4())) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act progress = [] def callback(response): current = response.context['upload_stream_current'] total = response.context['data_stream_total'] if current is not None: progress.append((current, total)) with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, raw_response_hook=callback) # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assert_upload_progress(len(data), self.config.max_block_size, progress) self._teardown(FILE_PATH) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_create_blob_from_stream_chunked_upload_with_count( self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) FILE_PATH = 'chunked_upload_with_count.temp.{}.dat'.format( str(uuid.uuid4())) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act blob_size = len(data) - 301 with open(FILE_PATH, 'rb') as stream: resp = blob.upload_blob(stream, length=blob_size) # Assert self.assertBlobEqual(self.container_name, blob_name, data[:blob_size]) self._teardown(FILE_PATH) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_create_from_stream_chunk_upload_with_cntandrops( self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) FILE_PATH = 'from_stream_chunk_upload_with_cntandrops.temp.{}.dat'.format( str(uuid.uuid4())) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act content_settings = ContentSettings(content_type='image/png', content_language='spanish') blob_size = len(data) - 301 with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, length=blob_size, content_settings=content_settings) # Assert self.assertBlobEqual(self.container_name, blob_name, data[:blob_size]) properties = blob.get_blob_properties() self.assertEqual(properties.content_settings.content_type, content_settings.content_type) self.assertEqual(properties.content_settings.content_language, content_settings.content_language) self._teardown(FILE_PATH) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_create_blob_from_stream_chnked_upload_with_properties( self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) FILE_PATH = 'chnked_upload_with_properti.temp.{}.dat'.format( str(uuid.uuid4())) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act content_settings = ContentSettings(content_type='image/png', content_language='spanish') with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, content_settings=content_settings) # Assert self.assertBlobEqual(self.container_name, blob_name, data) properties = blob.get_blob_properties() self.assertEqual(properties.content_settings.content_type, content_settings.content_type) self.assertEqual(properties.content_settings.content_language, content_settings.content_language) self._teardown(FILE_PATH) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_create_blob_from_stream_chunked_upload_with_properties( self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live # Arrange self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) FILE_PATH = 'blob_from_stream_chunked_upload.temp.{}.dat'.format( str(uuid.uuid4())) with open(FILE_PATH, 'wb') as stream: stream.write(data) blob_tier = StandardBlobTier.Cool # Act content_settings = ContentSettings(content_type='image/png', content_language='spanish') with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, content_settings=content_settings, max_concurrency=2, standard_blob_tier=blob_tier) properties = blob.get_blob_properties() # Assert self.assertEqual(properties.blob_tier, blob_tier) self._teardown(FILE_PATH) @GlobalStorageAccountPreparer() def test_create_blob_from_text(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) text = u'hello 啊齄丂狛狜 world' data = text.encode('utf-8') # Act create_resp = blob.upload_blob(text) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) @GlobalStorageAccountPreparer() def test_create_blob_from_text_with_encoding(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) text = u'hello 啊齄丂狛狜 world' data = text.encode('utf-16') # Act blob.upload_blob(text, encoding='utf-16') # Assert self.assertBlobEqual(self.container_name, blob_name, data) @GlobalStorageAccountPreparer() def test_create_blob_from_text_with_encoding_and_progress( self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) text = u'hello 啊齄丂狛狜 world' data = text.encode('utf-16') # Act progress = [] def callback(response): current = response.context['upload_stream_current'] total = response.context['data_stream_total'] if current is not None: progress.append((current, total)) blob.upload_blob(text, encoding='utf-16', raw_response_hook=callback) # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assert_upload_progress(len(data), self.config.max_block_size, progress) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_create_blob_from_text_chunked_upload(self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_text_data(LARGE_BLOB_SIZE) encoded_data = data.encode('utf-8') # Act blob.upload_blob(data) # Assert self.assertBlobEqual(self.container_name, blob_name, encoded_data) # Assert self.assertBlobEqual(self.container_name, blob_name, encoded_data) @GlobalStorageAccountPreparer() def test_create_blob_with_md5(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = b'hello world' # Act blob.upload_blob(data, validate_content=True) # Assert @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_create_blob_with_md5_chunked(self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) # Act blob.upload_blob(data, validate_content=True)
conn_str="DefaultEndpointsProtocol=https;AccountName=ebhdevstorage" "001;AccountKey=QYSKZ1suXASpD3Cy67U7pkFHOOWPB0Jtl4MEOFF+CNn" "PDp72j4uDVEv9p5X7HTvpafiJpbakvBsyWiSHEqFDOQ==;EndpointSuff" "ix=core.windows.net", container_name="dailyinsightsmailing") blob_service_client = BlobServiceClient( account_url="https://ebhdevstorage001.blob.core.windows.net/", credential= "QYSKZ1suXASpD3Cy67U7pkFHOOWPB0Jtl4MEOFF+CNnPDp72j4uDVEv9p5X7HTvpafi" "JpbakvBsyWiSHEqFDOQ==") data_list = [] blob_list = container.list_blobs() for blob in blob_list: if date_string in blob.name: name = blob.name blob_client = blob_service_client.get_blob_client( blob=name, container="dailyinsightsmailing") stream = blob_client.download_blob().content_as_text() stream = stream.split("}}}") stream = stream[:-1] for ele in stream: ele = ele + "}}}" data_list.append(json.loads(ele)) no_of_clicks = 0 no_of_searches = 0 id_list = [] os_list = [] city_list = [] rank_list = [] tag_list = [] for ele in data_list: if ele['event'][0]['name'] == 'Click':
class AzureBlobClient(Client): """Client class for Azure Blob Storage which handles authentication with Azure for [`AzureBlobPath`](../azblobpath/) instances. See documentation for the [`__init__` method][cloudpathlib.azure.azblobclient.AzureBlobClient.__init__] for detailed authentication options. """ def __init__( self, account_url: Optional[str] = None, credential: Optional[Any] = None, connection_string: Optional[str] = None, blob_service_client: Optional["BlobServiceClient"] = None, local_cache_dir: Optional[Union[str, os.PathLike]] = None, ): """Class constructor. Sets up a [`BlobServiceClient`]( https://docs.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob.blobserviceclient?view=azure-python). Supports the following authentication methods of `BlobServiceClient`. - Environment variable `""AZURE_STORAGE_CONNECTION_STRING"` containing connecting string with account credentials. See [Azure Storage SDK documentation]( https://docs.microsoft.com/en-us/azure/storage/blobs/storage-quickstart-blobs-python#copy-your-credentials-from-the-azure-portal). - Account URL via `account_url`, authenticated either with an embedded SAS token, or with credentials passed to `credentials`. - Connection string via `connection_string`, authenticated either with an embedded SAS token or with credentials passed to `credentials`. - Instantiated and already authenticated [`BlobServiceClient`]( https://docs.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob.blobserviceclient?view=azure-python). If multiple methods are used, priority order is reverse of list above (later in list takes priority). If no methods are used, a [`MissingCredentialsError`][cloudpathlib.exceptions.MissingCredentialsError] exception will be raised raised. Args: account_url (Optional[str]): The URL to the blob storage account, optionally authenticated with a SAS token. See documentation for [`BlobServiceClient`]( https://docs.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob.blobserviceclient?view=azure-python). credential (Optional[Any]): Credentials with which to authenticate. Can be used with `account_url` or `connection_string`, but is unnecessary if the other already has an SAS token. See documentation for [`BlobServiceClient`]( https://docs.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob.blobserviceclient?view=azure-python) or [`BlobServiceClient.from_connection_string`]( https://docs.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob.blobserviceclient?view=azure-python#from-connection-string-conn-str--credential-none----kwargs-). connection_string (Optional[str]): A connection string to an Azure Storage account. See [Azure Storage SDK documentation]( https://docs.microsoft.com/en-us/azure/storage/blobs/storage-quickstart-blobs-python#copy-your-credentials-from-the-azure-portal). blob_service_client (Optional[BlobServiceClient]): Instantiated [`BlobServiceClient`]( https://docs.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob.blobserviceclient?view=azure-python). local_cache_dir (Optional[Union[str, os.PathLike]]): Path to directory to use as cache for downloaded files. If None, will use a temporary directory. """ if connection_string is None: connection_string = os.getenv("AZURE_STORAGE_CONNECTION_STRING", None) if blob_service_client is not None: self.service_client = blob_service_client elif connection_string is not None: self.service_client = BlobServiceClient.from_connection_string( conn_str=connection_string, credential=credential) elif account_url is not None: self.service_client = BlobServiceClient(account_url=account_url, credential=credential) else: raise MissingCredentialsError( "AzureBlobClient does not support anonymous instantiation. " "Credentials are required; see docs for options.") super().__init__(local_cache_dir=local_cache_dir) def _get_metadata(self, cloud_path: AzureBlobPath) -> Dict[str, Any]: blob = self.service_client.get_blob_client( container=cloud_path.container, blob=cloud_path.blob) properties = blob.get_blob_properties() return properties def _download_file(self, cloud_path: AzureBlobPath, local_path: Union[str, os.PathLike]) -> Path: blob = self.service_client.get_blob_client( container=cloud_path.container, blob=cloud_path.blob) download_stream = blob.download_blob() local_path = Path(local_path) local_path.parent.mkdir(exist_ok=True, parents=True) local_path.write_bytes(download_stream.readall()) return local_path def _is_file_or_dir(self, cloud_path: AzureBlobPath) -> Optional[str]: # short-circuit the root-level container if not cloud_path.blob: return "dir" try: self._get_metadata(cloud_path) return "file" except ResourceNotFoundError: prefix = cloud_path.blob if prefix and not prefix.endswith("/"): prefix += "/" # not a file, see if it is a directory container_client = self.service_client.get_container_client( cloud_path.container) try: next(container_client.list_blobs(name_starts_with=prefix)) return "dir" except StopIteration: return None def _exists(self, cloud_path: AzureBlobPath) -> bool: return self._is_file_or_dir(cloud_path) in ["file", "dir"] def _list_dir(self, cloud_path: AzureBlobPath, recursive: bool = False) -> Iterable[AzureBlobPath]: container_client = self.service_client.get_container_client( cloud_path.container) prefix = cloud_path.blob if prefix and not prefix.endswith("/"): prefix += "/" yielded_dirs = set() # NOTE: Not recursive may be slower than necessary since it just filters # the recursive implementation for o in container_client.list_blobs(name_starts_with=prefix): # get directory from this path for parent in PurePosixPath(o.name[len(prefix):]).parents: # if we haven't surfaced thei directory already if parent not in yielded_dirs and str(parent) != ".": # skip if not recursive and this is beyond our depth if not recursive and "/" in str(parent): continue yield self.CloudPath( f"az://{cloud_path.container}/{prefix}{parent}") yielded_dirs.add(parent) # skip file if not recursive and this is beyond our depth if not recursive and "/" in o.name[len(prefix):]: continue yield self.CloudPath(f"az://{cloud_path.container}/{o.name}") def _move_file(self, src: AzureBlobPath, dst: AzureBlobPath, remove_src: bool = True) -> AzureBlobPath: # just a touch, so "REPLACE" metadata if src == dst: blob_client = self.service_client.get_blob_client( container=src.container, blob=src.blob) blob_client.set_blob_metadata(metadata=dict( last_modified=str(datetime.utcnow().timestamp()))) else: target = self.service_client.get_blob_client( container=dst.container, blob=dst.blob) source = self.service_client.get_blob_client( container=src.container, blob=src.blob) target.start_copy_from_url(source.url) if remove_src: self._remove(src) return dst def _remove(self, cloud_path: AzureBlobPath) -> None: if self._is_file_or_dir(cloud_path) == "dir": blobs = [ b.blob for b in self._list_dir(cloud_path, recursive=True) ] container_client = self.service_client.get_container_client( cloud_path.container) container_client.delete_blobs(*blobs) elif self._is_file_or_dir(cloud_path) == "file": blob = self.service_client.get_blob_client( container=cloud_path.container, blob=cloud_path.blob) blob.delete_blob() def _upload_file(self, local_path: Union[str, os.PathLike], cloud_path: AzureBlobPath) -> AzureBlobPath: blob = self.service_client.get_blob_client( container=cloud_path.container, blob=cloud_path.blob) blob.upload_blob(Path(local_path).read_bytes(), overwrite=True) return cloud_path
class AzureBlobStorage: """Class for interacting with Azure Blob Storage.""" def __init__(self, abs_name: str, connect: bool = False): """Initialize connector for Azure Python SDK.""" self.connected = False self.abs_site = f"{abs_name}.blob.core.windows.net" self.credentials: Optional[AzCredentials] = None self.abs_client: Optional[BlobServiceClient] = None if connect is True: self.connect() def connect( self, auth_methods: List = None, silent: bool = False, ): """Authenticate with the SDK.""" self.credentials = az_connect(auth_methods=auth_methods, silent=silent) if not self.credentials: raise CloudError("Could not obtain credentials.") self.abs_client = BlobServiceClient(self.abs_site, self.credentials.modern) if not self.abs_client: raise CloudError("Could not create a Blob Storage client.") self.connected = True def containers(self) -> pd.DataFrame: """Return containers in the Azure Blob Storage Account.""" try: container_list = self.abs_client.list_containers() # type:ignore except ServiceRequestError as err: raise CloudError( "Unable to connect check the Azure Blob Store account name" ) from err if container_list: containers_df = _parse_returned_items( container_list, remove_list=["lease", "encryption_scope"]) else: containers_df = None return containers_df def create_container(self, container_name: str, **kwargs) -> pd.DataFrame: """ Create a new container within the Azure Blob Storage account. Parameters ---------- container_name : str The name for the new container. Additional container parameters can be passed as kwargs Returns ------- pd.DataFrame Details of the created container. """ try: new_container = self.abs_client.create_container( # type: ignore container_name, **kwargs) # type:ignore except ResourceExistsError as err: raise CloudError( f"Container {container_name} already exists.") from err properties = new_container.get_container_properties() container_df = _parse_returned_items([properties], ["encryption_scope", "lease"]) return container_df def blobs(self, container_name: str) -> Optional[pd.DataFrame]: """ Get a list of blobs in a container. Parameters ---------- container_name : str The name of the container to get blobs from. Returns ------- pd.DataFrame Details of the blobs. """ container_client = self.abs_client.get_container_client( container_name) # type: ignore blobs = list(container_client.list_blobs()) return _parse_returned_items(blobs) if blobs else None def upload_to_blob(self, blob: Any, container_name: str, blob_name: str, overwrite: bool = True): """ Upload a blob of data. Parameters ---------- blob : Any The data to upload. container_name : str The name of the container to upload the blob to. blob_name : str The name to give the blob. overwrite : bool, optional Whether or not you want to overwrite the blob if it exists, by default True. """ try: blob_client = self.abs_client.get_blob_client( # type:ignore container=container_name, blob=blob_name) upload = blob_client.upload_blob(blob, overwrite=overwrite) except ResourceNotFoundError as err: raise CloudError( "Unknown container, check container name or create it first." ) from err if not upload["error_code"]: print("Upload complete") else: raise CloudError( f"There was a problem uploading the blob: {upload['error_code']}" ) return True def get_blob(self, container_name: str, blob_name: str) -> bytes: """ Get a blob from the Azure Blob Storage account. Parameters ---------- container_name : str The name of the container that holds the blob. blob_name : str The name of the blob to download. Returns ------- bytes The content of the blob in bytes. """ blob_client = self.abs_client.get_blob_client( # type: ignore container=container_name, blob=blob_name) if blob_client.exists(): data_stream = blob_client.download_blob() data = data_stream.content_as_bytes() else: raise CloudError( f"The blob {blob_name} does not exist in {container_name}") return data def delete_blob(self, container_name: str, blob_name: str) -> bool: """ Delete a blob from the Azure Blob Storage account. Parameters ---------- container_name : str The container name that has the blob. blob_name : str The name of the blob to delete. Note deleting a blob also deletes associated snapshots. Returns ------- bool True if blob successfully deleted """ blob_client = self.abs_client.get_blob_client( # type: ignore container=container_name, blob=blob_name) if blob_client.exists(): blob_client.delete_blob(delete_snapshots="include") else: raise CloudError( f"The blob {blob_name} does not exist in {container_name}") return True def get_sas_token( self, container_name: str, blob_name: str, end: datetime.datetime = None, permission: str = "r", ) -> str: """ Generate a shared access string (SAS) token for a blob. Parameters ---------- container_name : str The name of the Azure Blob Storage container that holds the blob. blob_name : str The name of the blob to generate the SAS token for. end : datetime.datetime, optional The datetime the SAS token should expire, by default this is 7 days from now. permission : str, optional The permissions to give the SAS token, by default 'r' for read. Returns ------- str A URI of the blob with SAS token. """ start = datetime.datetime.now() if not end: end = start + datetime.timedelta(days=7) key = self.abs_client.get_user_delegation_key(start, end) # type: ignore abs_name = self.abs_client.account_name # type: ignore sast = generate_blob_sas( abs_name, container_name, blob_name, user_delegation_key=key, permission=permission, expiry=end, start=start, ) full_path = f"https://{abs_name}.blob.core.windows.net/{container_name}/{blob_name}?{sast}" return full_path
class StorageCPKTest(StorageTestCase): def setUp(self): super(StorageCPKTest, self).setUp() url = self._get_account_url() # test chunking functionality by reducing the size of each chunk, # otherwise the tests would take too long to execute self.bsc = BlobServiceClient( url, credential=self.settings.STORAGE_ACCOUNT_KEY, connection_data_block_size=1024, max_single_put_size=1024, min_large_block_upload_threshold=1024, max_block_size=1024, max_page_size=1024) self.config = self.bsc._config self.container_name = self.get_resource_name('utcontainer') # prep some test data so that they can be used in upload tests self.byte_data = self.get_random_bytes(64 * 1024) if not self.is_playback(): self.bsc.create_container(self.container_name) def tearDown(self): if not self.is_playback(): try: self.bsc.delete_container(self.container_name) except: pass return super(StorageCPKTest, self).tearDown() # --Helpers----------------------------------------------------------------- def _get_blob_reference(self): return self.get_resource_name("cpk") def _create_block_blob(self, blob_name=None, data=None, cpk=None, max_concurrency=1): blob_name = blob_name if blob_name else self._get_blob_reference() blob_client = self.bsc.get_blob_client(self.container_name, blob_name) data = data if data else b'' resp = blob_client.upload_blob(data, cpk=cpk, max_concurrency=max_concurrency) return blob_client, resp def _create_append_blob(self, cpk=None): blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.create_append_blob(cpk=cpk) return blob def _create_page_blob(self, cpk=None): blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.create_page_blob(1024 * 1024, cpk=cpk) return blob # -- Test cases for APIs supporting CPK ---------------------------------------------- @record def test_put_block_and_put_block_list(self): # Arrange blob_client, _ = self._create_block_blob() blob_client.stage_block('1', b'AAA', cpk=TEST_ENCRYPTION_KEY) blob_client.stage_block('2', b'BBB', cpk=TEST_ENCRYPTION_KEY) blob_client.stage_block('3', b'CCC', cpk=TEST_ENCRYPTION_KEY) # Act block_list = [ BlobBlock(block_id='1'), BlobBlock(block_id='2'), BlobBlock(block_id='3') ] put_block_list_resp = blob_client.commit_block_list( block_list, cpk=TEST_ENCRYPTION_KEY) # Assert self.assertIsNotNone(put_block_list_resp['etag']) self.assertIsNotNone(put_block_list_resp['last_modified']) self.assertTrue(put_block_list_resp['request_server_encrypted']) self.assertEqual(put_block_list_resp['encryption_key_sha256'], TEST_ENCRYPTION_KEY.key_hash) # Act get the blob content without cpk should fail with self.assertRaises(HttpResponseError): blob_client.download_blob() # Act get the blob content blob = blob_client.download_blob(cpk=TEST_ENCRYPTION_KEY) # Assert content was retrieved with the cpk self.assertEqual(blob.content_as_bytes(), b'AAABBBCCC') self.assertEqual(blob.properties.etag, put_block_list_resp['etag']) self.assertEqual(blob.properties.last_modified, put_block_list_resp['last_modified']) self.assertEqual(blob.properties.encryption_key_sha256, TEST_ENCRYPTION_KEY.key_hash) def test_create_block_blob_with_chunks(self): # parallel operation if TestMode.need_recording_file(self.test_mode): return # Arrange # to force the in-memory chunks to be used self.config.use_byte_buffer = True # Act # create_blob_from_bytes forces the in-memory chunks to be used blob_client, upload_response = self._create_block_blob( data=self.byte_data, cpk=TEST_ENCRYPTION_KEY, max_concurrency=2) # Assert self.assertIsNotNone(upload_response['etag']) self.assertIsNotNone(upload_response['last_modified']) self.assertTrue(upload_response['request_server_encrypted']) self.assertEqual(upload_response['encryption_key_sha256'], TEST_ENCRYPTION_KEY.key_hash) # Act get the blob content without cpk should fail with self.assertRaises(HttpResponseError): blob_client.download_blob() # Act get the blob content blob = blob_client.download_blob(cpk=TEST_ENCRYPTION_KEY) # Assert content was retrieved with the cpk self.assertEqual(blob.content_as_bytes(), self.byte_data) self.assertEqual(blob.properties.etag, upload_response['etag']) self.assertEqual(blob.properties.last_modified, upload_response['last_modified']) self.assertEqual(blob.properties.encryption_key_sha256, TEST_ENCRYPTION_KEY.key_hash) def test_create_block_blob_with_sub_streams(self): # problem with the recording framework can only run live if TestMode.need_recording_file(self.test_mode): return # Act # create_blob_from_bytes forces the in-memory chunks to be used blob_client, upload_response = self._create_block_blob( data=self.byte_data, cpk=TEST_ENCRYPTION_KEY, max_concurrency=2) # Assert self.assertIsNotNone(upload_response['etag']) self.assertIsNotNone(upload_response['last_modified']) self.assertTrue(upload_response['request_server_encrypted']) self.assertEqual(upload_response['encryption_key_sha256'], TEST_ENCRYPTION_KEY.key_hash) # Act get the blob content without cpk should fail with self.assertRaises(HttpResponseError): blob_client.download_blob() # Act get the blob content blob = blob_client.download_blob(cpk=TEST_ENCRYPTION_KEY) # Assert content was retrieved with the cpk self.assertEqual(blob.content_as_bytes(), self.byte_data) self.assertEqual(blob.properties.etag, upload_response['etag']) self.assertEqual(blob.properties.last_modified, upload_response['last_modified']) self.assertEqual(blob.properties.encryption_key_sha256, TEST_ENCRYPTION_KEY.key_hash) @record def test_create_block_blob_with_single_chunk(self): # Act data = b'AAABBBCCC' # create_blob_from_bytes forces the in-memory chunks to be used blob_client, upload_response = self._create_block_blob( data=data, cpk=TEST_ENCRYPTION_KEY) # Assert self.assertIsNotNone(upload_response['etag']) self.assertIsNotNone(upload_response['last_modified']) self.assertTrue(upload_response['request_server_encrypted']) self.assertEqual(upload_response['encryption_key_sha256'], TEST_ENCRYPTION_KEY.key_hash) # Act get the blob content without cpk should fail with self.assertRaises(HttpResponseError): blob_client.download_blob() # Act get the blob content blob = blob_client.download_blob(cpk=TEST_ENCRYPTION_KEY) # Assert content was retrieved with the cpk self.assertEqual(blob.content_as_bytes(), data) self.assertEqual(blob.properties.etag, upload_response['etag']) self.assertEqual(blob.properties.last_modified, upload_response['last_modified']) self.assertEqual(blob.properties.encryption_key_sha256, TEST_ENCRYPTION_KEY.key_hash) @record def test_put_block_from_url_and_commit_with_cpk(self): # Arrange # create source blob and get source blob url source_blob_name = self.get_resource_name("sourceblob") self.config.use_byte_buffer = True # Make sure using chunk upload, then we can record the request source_blob_client, _ = self._create_block_blob( blob_name=source_blob_name, data=self.byte_data) source_blob_sas = source_blob_client.generate_shared_access_signature( permission=BlobSasPermissions(read=True), expiry=datetime.utcnow() + timedelta(hours=1)) source_blob_url = source_blob_client.url + "?" + source_blob_sas # create destination blob self.config.use_byte_buffer = False destination_blob_client, _ = self._create_block_blob( cpk=TEST_ENCRYPTION_KEY) # Act part 1: make put block from url calls destination_blob_client.stage_block_from_url( block_id=1, source_url=source_blob_url, source_offset=0, source_length=4 * 1024, cpk=TEST_ENCRYPTION_KEY) destination_blob_client.stage_block_from_url( block_id=2, source_url=source_blob_url, source_offset=4 * 1024, source_length=4 * 1024, cpk=TEST_ENCRYPTION_KEY) # Assert blocks committed, uncommitted = destination_blob_client.get_block_list('all') self.assertEqual(len(uncommitted), 2) self.assertEqual(len(committed), 0) # commit the blocks without cpk should fail block_list = [BlobBlock(block_id='1'), BlobBlock(block_id='2')] with self.assertRaises(HttpResponseError): destination_blob_client.commit_block_list(block_list) # Act commit the blocks with cpk should succeed put_block_list_resp = destination_blob_client.commit_block_list( block_list, cpk=TEST_ENCRYPTION_KEY) # Assert self.assertIsNotNone(put_block_list_resp['etag']) self.assertIsNotNone(put_block_list_resp['last_modified']) self.assertTrue(put_block_list_resp['request_server_encrypted']) self.assertEqual(put_block_list_resp['encryption_key_sha256'], TEST_ENCRYPTION_KEY.key_hash) # Act get the blob content blob = destination_blob_client.download_blob(cpk=TEST_ENCRYPTION_KEY) # Assert content was retrieved with the cpk self.assertEqual(blob.content_as_bytes(), self.byte_data[0:8 * 1024]) self.assertEqual(blob.properties.etag, put_block_list_resp['etag']) self.assertEqual(blob.properties.last_modified, put_block_list_resp['last_modified']) self.assertEqual(blob.properties.encryption_key_sha256, TEST_ENCRYPTION_KEY.key_hash) @record def test_append_block(self): # Arrange blob_client = self._create_append_blob(cpk=TEST_ENCRYPTION_KEY) # Act for content in [b'AAA', b'BBB', b'CCC']: append_blob_prop = blob_client.append_block( content, cpk=TEST_ENCRYPTION_KEY) # Assert self.assertIsNotNone(append_blob_prop['etag']) self.assertIsNotNone(append_blob_prop['last_modified']) self.assertTrue(append_blob_prop['request_server_encrypted']) self.assertEqual(append_blob_prop['encryption_key_sha256'], TEST_ENCRYPTION_KEY.key_hash) # Act get the blob content without cpk should fail with self.assertRaises(HttpResponseError): blob_client.download_blob() # Act get the blob content blob = blob_client.download_blob(cpk=TEST_ENCRYPTION_KEY) # Assert content was retrieved with the cpk self.assertEqual(blob.content_as_bytes(), b'AAABBBCCC') self.assertEqual(blob.properties.encryption_key_sha256, TEST_ENCRYPTION_KEY.key_hash) @record def test_append_block_from_url(self): # Arrange source_blob_name = self.get_resource_name("sourceblob") self.config.use_byte_buffer = True # chunk upload source_blob_client, _ = self._create_block_blob( blob_name=source_blob_name, data=self.byte_data) source_blob_sas = source_blob_client.generate_shared_access_signature( permission=BlobSasPermissions(read=True), expiry=datetime.utcnow() + timedelta(hours=1)) source_blob_url = source_blob_client.url + "?" + source_blob_sas self.config.use_byte_buffer = False destination_blob_client = self._create_append_blob( cpk=TEST_ENCRYPTION_KEY) # Act append_blob_prop = destination_blob_client.append_block_from_url( source_blob_url, source_offset=0, source_length=4 * 1024, cpk=TEST_ENCRYPTION_KEY) # Assert self.assertIsNotNone(append_blob_prop['etag']) self.assertIsNotNone(append_blob_prop['last_modified']) # TODO: verify that the swagger is correct, header wasn't added for the response # self.assertTrue(append_blob_prop['request_server_encrypted']) self.assertEqual(append_blob_prop['encryption_key_sha256'], TEST_ENCRYPTION_KEY.key_hash) # Act get the blob content without cpk should fail with self.assertRaises(HttpResponseError): destination_blob_client.download_blob() # Act get the blob content blob = destination_blob_client.download_blob(cpk=TEST_ENCRYPTION_KEY) # Assert content was retrieved with the cpk self.assertEqual(blob.content_as_bytes(), self.byte_data[0:4 * 1024]) self.assertEqual(blob.properties.encryption_key_sha256, TEST_ENCRYPTION_KEY.key_hash) @record def test_create_append_blob_with_chunks(self): # Arrange blob_client = self._create_append_blob(cpk=TEST_ENCRYPTION_KEY) # Act append_blob_prop = blob_client.upload_blob( self.byte_data, blob_type=BlobType.AppendBlob, cpk=TEST_ENCRYPTION_KEY) # Assert self.assertIsNotNone(append_blob_prop['etag']) self.assertIsNotNone(append_blob_prop['last_modified']) self.assertTrue(append_blob_prop['request_server_encrypted']) self.assertEqual(append_blob_prop['encryption_key_sha256'], TEST_ENCRYPTION_KEY.key_hash) # Act get the blob content without cpk should fail with self.assertRaises(HttpResponseError): blob_client.download_blob() # Act get the blob content blob = blob_client.download_blob(cpk=TEST_ENCRYPTION_KEY) # Assert content was retrieved with the cpk self.assertEqual(blob.content_as_bytes(), self.byte_data) self.assertEqual(blob.properties.encryption_key_sha256, TEST_ENCRYPTION_KEY.key_hash) @record def test_update_page(self): # Arrange blob_client = self._create_page_blob(cpk=TEST_ENCRYPTION_KEY) # Act page_blob_prop = blob_client.upload_page(self.byte_data, offset=0, length=len(self.byte_data), cpk=TEST_ENCRYPTION_KEY) # Assert self.assertIsNotNone(page_blob_prop['etag']) self.assertIsNotNone(page_blob_prop['last_modified']) self.assertTrue(page_blob_prop['request_server_encrypted']) self.assertEqual(page_blob_prop['encryption_key_sha256'], TEST_ENCRYPTION_KEY.key_hash) # Act get the blob content without cpk should fail with self.assertRaises(HttpResponseError): blob_client.download_blob() # Act get the blob content blob = blob_client.download_blob( offset=0, length=len(self.byte_data), cpk=TEST_ENCRYPTION_KEY, ) # Assert content was retrieved with the cpk self.assertEqual(blob.content_as_bytes(), self.byte_data) self.assertEqual(blob.properties.encryption_key_sha256, TEST_ENCRYPTION_KEY.key_hash) @record def test_update_page_from_url(self): # Arrange source_blob_name = self.get_resource_name("sourceblob") self.config.use_byte_buffer = True # Make sure using chunk upload, then we can record the request source_blob_client, _ = self._create_block_blob( blob_name=source_blob_name, data=self.byte_data) source_blob_sas = source_blob_client.generate_shared_access_signature( permission=BlobSasPermissions(read=True), expiry=datetime.utcnow() + timedelta(hours=1)) source_blob_url = source_blob_client.url + "?" + source_blob_sas self.config.use_byte_buffer = False blob_client = self._create_page_blob(cpk=TEST_ENCRYPTION_KEY) # Act page_blob_prop = blob_client.upload_pages_from_url( source_blob_url, offset=0, length=len(self.byte_data), source_offset=0, cpk=TEST_ENCRYPTION_KEY) # Assert self.assertIsNotNone(page_blob_prop['etag']) self.assertIsNotNone(page_blob_prop['last_modified']) self.assertTrue(page_blob_prop['request_server_encrypted']) # TODO: FIX SWAGGER # self.assertEqual(page_blob_prop['encryption_key_sha256'], TEST_ENCRYPTION_KEY.key_hash) # Act get the blob content without cpk should fail with self.assertRaises(HttpResponseError): blob_client.download_blob() # Act get the blob content blob = blob_client.download_blob( offset=0, length=len(self.byte_data), cpk=TEST_ENCRYPTION_KEY, ) # Assert content was retrieved with the cpk self.assertEqual(blob.content_as_bytes(), self.byte_data) self.assertEqual(blob.properties.encryption_key_sha256, TEST_ENCRYPTION_KEY.key_hash) def test_create_page_blob_with_chunks(self): if TestMode.need_recording_file(self.test_mode): return # Act blob_client = self.bsc.get_blob_client(self.container_name, self._get_blob_reference()) page_blob_prop = blob_client.upload_blob(self.byte_data, blob_type=BlobType.PageBlob, max_concurrency=2, cpk=TEST_ENCRYPTION_KEY) # Assert self.assertIsNotNone(page_blob_prop['etag']) self.assertIsNotNone(page_blob_prop['last_modified']) self.assertTrue(page_blob_prop['request_server_encrypted']) self.assertEqual(page_blob_prop['encryption_key_sha256'], TEST_ENCRYPTION_KEY.key_hash) # Act get the blob content without cpk should fail with self.assertRaises(HttpResponseError): blob_client.download_blob() # Act get the blob content blob = blob_client.download_blob(cpk=TEST_ENCRYPTION_KEY) # Assert content was retrieved with the cpk self.assertEqual(blob.content_as_bytes(), self.byte_data) self.assertEqual(blob.properties.encryption_key_sha256, TEST_ENCRYPTION_KEY.key_hash) # TODO: verify why clear page works without providing cpk # @record # def test_clear_page(self): # # Arrange # blob_client = self.bsc.get_blob_client(self.container_name, self._get_blob_reference()) # data = self.get_random_bytes(1024) # blob_client.upload_blob(data, blob_type=BlobType.PageBlob, cpk=TEST_ENCRYPTION_KEY) # # # Act # blob = blob_client.download_blob(cpk=TEST_ENCRYPTION_KEY) # self.assertEquals(blob.content_as_bytes(), data) # # # with self.assertRaises(HttpResponseError): # # blob_client.clear_page(0, 511) # # resp = blob_client.clear_page(0, 511, cpk=TEST_ENCRYPTION_KEY) # blob = blob_client.download_blob(0, 511, cpk=TEST_ENCRYPTION_KEY) # # # Assert # self.assertIsNotNone(resp.get('etag')) # self.assertIsNotNone(resp.get('last_modified')) # self.assertIsNotNone(resp.get('blob_sequence_number')) # self.assertEquals(blob.content_as_bytes(), b'\x00' * 512) # # blob = blob_client.download_blob(512, 1023, cpk=TEST_ENCRYPTION_KEY) # self.assertEquals(blob.content_as_bytes(), data[512:]) @record def test_get_set_blob_metadata(self): # Arrange blob_client, _ = self._create_block_blob(data=b'AAABBBCCC', cpk=TEST_ENCRYPTION_KEY) # Act without the encryption key should fail with self.assertRaises(HttpResponseError): blob_client.get_blob_properties() # Act blob_props = blob_client.get_blob_properties(cpk=TEST_ENCRYPTION_KEY) # Assert self.assertTrue(blob_props.server_encrypted) self.assertEqual(blob_props.encryption_key_sha256, TEST_ENCRYPTION_KEY.key_hash) # Act set blob properties metadata = {'hello': 'world', 'number': '42', 'UP': 'UPval'} with self.assertRaises(HttpResponseError): blob_client.set_blob_metadata(metadata=metadata, ) blob_client.set_blob_metadata(metadata=metadata, cpk=TEST_ENCRYPTION_KEY) # Assert blob_props = blob_client.get_blob_properties(cpk=TEST_ENCRYPTION_KEY) md = blob_props.metadata self.assertEqual(3, len(md)) self.assertEqual(md['hello'], 'world') self.assertEqual(md['number'], '42') self.assertEqual(md['UP'], 'UPval') self.assertFalse('up' in md) @record def test_snapshot_blob(self): # Arrange blob_client, _ = self._create_block_blob(data=b'AAABBBCCC', cpk=TEST_ENCRYPTION_KEY) # Act without cpk should not work with self.assertRaises(HttpResponseError): blob_client.create_snapshot() # Act with cpk should work blob_snapshot = blob_client.create_snapshot(cpk=TEST_ENCRYPTION_KEY) # Assert self.assertIsNotNone(blob_snapshot)
#Class to hold API functions class get_flightdata(): def __init__(self): return def states_all(self): url = "https://opensky-network.org/api/states/all" response = requests.get(url).json() return response #JSON Response needs to be parsed and inserted into table json_data = get_flightdata().states_all() #print json_data flights = [] timepulled = json_data['time'] for i in json_data['states']: if i[1].startswith('JBU') == True: flights.append(i) #Create pandas data table from the response headers = ['icao24','callsign','origin_country','time_position','last_contact','longitude','latitude','baro_altitude','on_ground','velocity','true_track','vertical_rate','sensors','geo_altitude','squawk','spi','position_source'] data = pd.DataFrame(flights, columns=headers) #Connect to azure blob storage account and dump the JSON response into the storage account credential = "8g2Fqc9sTpHKfwmew7A54I182vyVmBnQM6Z9lHf9V0fvxj5A0oq5WsagpRrR/Dtas8+a/2m7jwMMFoqq8Qk7Qw==" service = BlobServiceClient(account_url="https://openskystorage.blob.core.windows.net/", credential=credential) theblob = service.get_blob_client(container="statesall", blob=str(time.time())) theblob.upload_blob(data.to_csv(index=False))
class StorageLargestBlockBlobTest(StorageTestCase): def _setup(self, storage_account, key, additional_policies=None, min_large_block_upload_threshold=1 * 1024 * 1024, max_single_put_size=32 * 1024): self.bsc = BlobServiceClient( self.account_url(storage_account, "blob"), credential=key, max_single_put_size=max_single_put_size, max_block_size=LARGEST_BLOCK_SIZE, min_large_block_upload_threshold=min_large_block_upload_threshold, _additional_pipeline_policies=additional_policies) self.config = self.bsc._config self.container_name = self.get_resource_name('utcontainer') self.container_name = self.container_name + str(uuid.uuid4()) if self.is_live: self.bsc.create_container(self.container_name) def _teardown(self, file_name): if path.isfile(file_name): try: remove(file_name) except: pass # --Helpers----------------------------------------------------------------- def _get_blob_reference(self): return self.get_resource_name(TEST_BLOB_PREFIX) def _create_blob(self): blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.upload_blob(b'') return blob # --Test cases for block blobs -------------------------------------------- @pytest.mark.live_test_only @pytest.mark.skip(reason="This takes really long time") @GlobalStorageAccountPreparer() def test_put_block_bytes_largest(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account, storage_account_key) blob = self._create_blob() # Act data = urandom(LARGEST_BLOCK_SIZE) blockId = str(uuid.uuid4()).encode('utf-8') resp = blob.stage_block(blockId, data, length=LARGEST_BLOCK_SIZE) blob.commit_block_list([BlobBlock(blockId)]) block_list = blob.get_block_list() # Assert self.assertIsNotNone(resp) assert 'content_md5' in resp assert 'content_crc64' in resp assert 'request_id' in resp self.assertIsNotNone(block_list) self.assertEqual(len(block_list), 2) self.assertEqual(len(block_list[1]), 0) self.assertEqual(len(block_list[0]), 1) self.assertEqual(block_list[0][0].size, LARGEST_BLOCK_SIZE) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_put_block_bytes_largest_without_network(self, resource_group, location, storage_account, storage_account_key): payload_dropping_policy = PayloadDroppingPolicy() credential_policy = _format_shared_key_credential( storage_account.name, storage_account_key) self._setup(storage_account, storage_account_key, [payload_dropping_policy, credential_policy]) blob = self._create_blob() # Act data = urandom(LARGEST_BLOCK_SIZE) blockId = str(uuid.uuid4()).encode('utf-8') resp = blob.stage_block(blockId, data, length=LARGEST_BLOCK_SIZE) blob.commit_block_list([BlobBlock(blockId)]) block_list = blob.get_block_list() # Assert self.assertIsNotNone(resp) assert 'content_md5' in resp assert 'content_crc64' in resp assert 'request_id' in resp self.assertIsNotNone(block_list) self.assertEqual(len(block_list), 2) self.assertEqual(len(block_list[1]), 0) self.assertEqual(len(block_list[0]), 1) self.assertEqual(payload_dropping_policy.put_block_counter, 1) self.assertEqual(payload_dropping_policy.put_block_sizes[0], LARGEST_BLOCK_SIZE) @pytest.mark.live_test_only @pytest.mark.skip(reason="This takes really long time") @GlobalStorageAccountPreparer() def test_put_block_stream_largest(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account, storage_account_key) blob = self._create_blob() # Act stream = LargeStream(LARGEST_BLOCK_SIZE) blockId = str(uuid.uuid4()) requestId = str(uuid.uuid4()) resp = blob.stage_block(blockId, stream, length=LARGEST_BLOCK_SIZE, client_request_id=requestId) blob.commit_block_list([BlobBlock(blockId)]) block_list = blob.get_block_list() # Assert self.assertIsNotNone(resp) assert 'content_md5' in resp assert 'content_crc64' in resp assert 'request_id' in resp self.assertIsNotNone(block_list) self.assertEqual(len(block_list), 2) self.assertEqual(len(block_list[1]), 0) self.assertEqual(len(block_list[0]), 1) self.assertEqual(block_list[0][0].size, LARGEST_BLOCK_SIZE) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_put_block_stream_largest_without_network(self, resource_group, location, storage_account, storage_account_key): payload_dropping_policy = PayloadDroppingPolicy() credential_policy = _format_shared_key_credential( storage_account.name, storage_account_key) self._setup(storage_account, storage_account_key, [payload_dropping_policy, credential_policy]) blob = self._create_blob() # Act stream = LargeStream(LARGEST_BLOCK_SIZE) blockId = str(uuid.uuid4()) requestId = str(uuid.uuid4()) resp = blob.stage_block(blockId, stream, length=LARGEST_BLOCK_SIZE, client_request_id=requestId) blob.commit_block_list([BlobBlock(blockId)]) block_list = blob.get_block_list() # Assert self.assertIsNotNone(resp) assert 'content_md5' in resp assert 'content_crc64' in resp assert 'request_id' in resp self.assertIsNotNone(block_list) self.assertEqual(len(block_list), 2) self.assertEqual(len(block_list[1]), 0) self.assertEqual(len(block_list[0]), 1) self.assertEqual(payload_dropping_policy.put_block_counter, 1) self.assertEqual(payload_dropping_policy.put_block_sizes[0], LARGEST_BLOCK_SIZE) @pytest.mark.live_test_only @pytest.mark.skip(reason="This takes really long time") @GlobalStorageAccountPreparer() def test_create_largest_blob_from_path(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) FILE_PATH = 'largest_blob_from_path.temp.{}.dat'.format( str(uuid.uuid4())) with open(FILE_PATH, 'wb') as stream: largeStream = LargeStream(LARGEST_BLOCK_SIZE, 100 * 1024 * 1024) chunk = largeStream.read() while chunk: stream.write(chunk) chunk = largeStream.read() # Act with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, max_concurrency=2) # Assert self._teardown(FILE_PATH) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_create_largest_blob_from_path_without_network( self, resource_group, location, storage_account, storage_account_key): payload_dropping_policy = PayloadDroppingPolicy() credential_policy = _format_shared_key_credential( storage_account.name, storage_account_key) self._setup(storage_account, storage_account_key, [payload_dropping_policy, credential_policy]) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) FILE_PATH = 'largest_blob_from_path.temp.{}.dat'.format( str(uuid.uuid4())) with open(FILE_PATH, 'wb') as stream: largeStream = LargeStream(LARGEST_BLOCK_SIZE, 100 * 1024 * 1024) chunk = largeStream.read() while chunk: stream.write(chunk) chunk = largeStream.read() # Act with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, max_concurrency=2) # Assert self._teardown(FILE_PATH) self.assertEqual(payload_dropping_policy.put_block_counter, 1) self.assertEqual(payload_dropping_policy.put_block_sizes[0], LARGEST_BLOCK_SIZE) @pytest.mark.skip(reason="This takes really long time") @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_create_largest_blob_from_stream_without_network( self, resource_group, location, storage_account, storage_account_key): payload_dropping_policy = PayloadDroppingPolicy() credential_policy = _format_shared_key_credential( storage_account.name, storage_account_key) self._setup(storage_account, storage_account_key, [payload_dropping_policy, credential_policy]) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) number_of_blocks = 50000 stream = LargeStream(LARGEST_BLOCK_SIZE * number_of_blocks) # Act blob.upload_blob(stream, max_concurrency=1) # Assert self.assertEqual(payload_dropping_policy.put_block_counter, number_of_blocks) self.assertEqual(payload_dropping_policy.put_block_sizes[0], LARGEST_BLOCK_SIZE) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_create_largest_blob_from_stream_single_upload_without_network( self, resource_group, location, storage_account, storage_account_key): payload_dropping_policy = PayloadDroppingPolicy() credential_policy = _format_shared_key_credential( storage_account.name, storage_account_key) self._setup(storage_account, storage_account_key, [payload_dropping_policy, credential_policy], max_single_put_size=LARGEST_SINGLE_UPLOAD_SIZE + 1) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) stream = LargeStream(LARGEST_SINGLE_UPLOAD_SIZE) # Act blob.upload_blob(stream, length=LARGEST_SINGLE_UPLOAD_SIZE, max_concurrency=1) # Assert self.assertEqual(payload_dropping_policy.put_block_counter, 0) self.assertEqual(payload_dropping_policy.put_blob_counter, 1)
class StorageLargeBlockBlobTest(StorageTestCase): def _setup(self, name, key): # test chunking functionality by reducing the threshold # for chunking and the size of each chunk, otherwise # the tests would take too long to execute self.bsc = BlobServiceClient(self.account_url(name, "blob"), credential=key, max_single_put_size=32 * 1024, max_block_size=2 * 1024 * 1024, min_large_block_upload_threshold=1 * 1024 * 1024) self.config = self.bsc._config self.container_name = self.get_resource_name('utcontainer') if self.is_live: self.bsc.create_container(self.container_name) def _teardown(self, file_name): if path.isfile(file_name): try: remove(file_name) except: pass # --Helpers----------------------------------------------------------------- def _get_blob_reference(self): return self.get_resource_name(TEST_BLOB_PREFIX) def _create_blob(self): blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.upload_blob(b'') return blob def assertBlobEqual(self, container_name, blob_name, expected_data): blob = self.bsc.get_blob_client(container_name, blob_name) actual_data = blob.download_blob() self.assertEqual(b"".join(list(actual_data.chunks())), expected_data) # --Test cases for block blobs -------------------------------------------- @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_put_block_bytes_large(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) blob = self._create_blob() # Act for i in range(5): resp = blob.stage_block('block {0}'.format(i).encode('utf-8'), urandom(LARGE_BLOCK_SIZE)) self.assertIsNone(resp) # Assert @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_put_block_bytes_large_with_md5(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) blob = self._create_blob() # Act for i in range(5): resp = blob.stage_block('block {0}'.format(i).encode('utf-8'), urandom(LARGE_BLOCK_SIZE), validate_content=True) self.assertIsNone(resp) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_put_block_stream_large(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) blob = self._create_blob() # Act for i in range(5): stream = BytesIO(bytearray(LARGE_BLOCK_SIZE)) resp = resp = blob.stage_block( 'block {0}'.format(i).encode('utf-8'), stream, length=LARGE_BLOCK_SIZE) self.assertIsNone(resp) # Assert @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_put_block_stream_large_with_md5(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) blob = self._create_blob() # Act for i in range(5): stream = BytesIO(bytearray(LARGE_BLOCK_SIZE)) resp = resp = blob.stage_block( 'block {0}'.format(i).encode('utf-8'), stream, length=LARGE_BLOCK_SIZE, validate_content=True) self.assertIsNone(resp) # Assert @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_create_large_blob_from_path(self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = bytearray(urandom(LARGE_BLOB_SIZE)) FILE_PATH = 'large_blob_from_path.temp.dat' with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, max_concurrency=2) # Assert self.assertBlobEqual(self.container_name, blob_name, data) self._teardown(FILE_PATH) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_create_large_blob_from_path_with_md5(self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = bytearray(urandom(LARGE_BLOB_SIZE)) FILE_PATH = "blob_from_path_with_md5.temp.dat" with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, validate_content=True, max_concurrency=2) # Assert self.assertBlobEqual(self.container_name, blob_name, data) self._teardown(FILE_PATH) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_create_large_blob_from_path_non_parallel(self, resource_group, location, storage_account, storage_account_key): self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = bytearray(self.get_random_bytes(100)) FILE_PATH = "blob_from_path_non_parallel.temp.dat" with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, max_concurrency=1) # Assert self.assertBlobEqual(self.container_name, blob_name, data) self._teardown(FILE_PATH) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_create_large_blob_from_path_with_progress(self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = bytearray(urandom(LARGE_BLOB_SIZE)) FILE_PATH = "blob_from_path_with_progress.temp.dat" with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act progress = [] def callback(response): current = response.context['upload_stream_current'] total = response.context['data_stream_total'] if current is not None: progress.append((current, total)) with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, max_concurrency=2, raw_response_hook=callback) # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assert_upload_progress(len(data), self.config.max_block_size, progress) self._teardown(FILE_PATH) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_create_large_blob_from_path_with_properties( self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = bytearray(urandom(LARGE_BLOB_SIZE)) FILE_PATH = 'blob_from_path_with_properties.temp.dat' with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act content_settings = ContentSettings(content_type='image/png', content_language='spanish') with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, content_settings=content_settings, max_concurrency=2) # Assert self.assertBlobEqual(self.container_name, blob_name, data) properties = blob.get_blob_properties() self.assertEqual(properties.content_settings.content_type, content_settings.content_type) self.assertEqual(properties.content_settings.content_language, content_settings.content_language) self._teardown(FILE_PATH) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_create_large_blob_from_stream_chunked_upload( self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = bytearray(urandom(LARGE_BLOB_SIZE)) FILE_PATH = 'blob_from_stream_chunked_upload.temp.dat' with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, max_concurrency=2) # Assert self.assertBlobEqual(self.container_name, blob_name, data) self._teardown(FILE_PATH) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_creat_lrgblob_frm_stream_w_progress_chnkd_upload( self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = bytearray(urandom(LARGE_BLOB_SIZE)) FILE_PATH = 'stream_w_progress_chnkd_upload.temp.dat' with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act progress = [] def callback(response): current = response.context['upload_stream_current'] total = response.context['data_stream_total'] if current is not None: progress.append((current, total)) with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, max_concurrency=2, raw_response_hook=callback) # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assert_upload_progress(len(data), self.config.max_block_size, progress) self._teardown(FILE_PATH) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_create_large_blob_from_stream_chunked_upload_with_count( self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = bytearray(urandom(LARGE_BLOB_SIZE)) FILE_PATH = 'chunked_upload_with_count.temp.dat' with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act blob_size = len(data) - 301 with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, length=blob_size, max_concurrency=2) # Assert self.assertBlobEqual(self.container_name, blob_name, data[:blob_size]) self._teardown(FILE_PATH) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_creat_lrgblob_frm_strm_chnkd_uplod_w_count_n_props( self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = bytearray(urandom(LARGE_BLOB_SIZE)) FILE_PATH = 'plod_w_count_n_props.temp.dat' with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act content_settings = ContentSettings(content_type='image/png', content_language='spanish') blob_size = len(data) - 301 with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, length=blob_size, content_settings=content_settings, max_concurrency=2) # Assert self.assertBlobEqual(self.container_name, blob_name, data[:blob_size]) properties = blob.get_blob_properties() self.assertEqual(properties.content_settings.content_type, content_settings.content_type) self.assertEqual(properties.content_settings.content_language, content_settings.content_language) self._teardown(FILE_PATH) @pytest.mark.live_test_only @GlobalStorageAccountPreparer() def test_creat_lrg_blob_frm_stream_chnked_upload_w_props( self, resource_group, location, storage_account, storage_account_key): # parallel tests introduce random order of requests, can only run live self._setup(storage_account.name, storage_account_key) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = bytearray(urandom(LARGE_BLOB_SIZE)) FILE_PATH = 'creat_lrg_blob.temp.dat' with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act content_settings = ContentSettings(content_type='image/png', content_language='spanish') with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, content_settings=content_settings, max_concurrency=2) # Assert self.assertBlobEqual(self.container_name, blob_name, data) properties = blob.get_blob_properties() self.assertEqual(properties.content_settings.content_type, content_settings.content_type) self.assertEqual(properties.content_settings.content_language, content_settings.content_language) self._teardown(FILE_PATH)
def test_adfv2_dataflows_adlsgen2_delete_piicolumns(pytestconfig): # https://docs.microsoft.com/en-us/samples/azure-samples/data-lake-analytics-python-auth-options/authenticating-your-python-application-against-azure-active-directory/ # access_token = credentials.token["access_token"] adfv2name = pytestconfig.getoption('adfv2name') adlsgen2stor = pytestconfig.getoption('adlsgen2stor') accesskeyadls = pytestconfig.getoption('accesskeyadls') subscriptionid = pytestconfig.getoption('subscriptionid') rg = pytestconfig.getoption('rg') # # Since Azure DevOps SPN created ADFv2 instance, Azure DevOps SPN has owner rights and can execute pipelin using REST (Contributor is minimally required) tokenadf = pytestconfig.getoption('tokenadf') adfv2namepipeline = "adlsgen2-dataflows-delete-piicolumns" url = "https://management.azure.com/subscriptions/{}/resourceGroups/{}/providers/Microsoft.DataFactory/factories/{}/pipelines/{}/createRun?api-version=2018-06-01".format(subscriptionid, rg, adfv2name, adfv2namepipeline) response = requests.post(url, headers={'Authorization': "Bearer " + tokenadf}, json={ "outputfolder": "curated" } ) # assert response.status_code == 200, "test failed, pipeline not started, " + str(response.content) # runid = response.json()['runId'] # count = 0 while True: response = requests.get( "https://management.azure.com/subscriptions/{}/resourceGroups/{}/providers/Microsoft.DataFactory/factories/{}/pipelineruns/{}?api-version=2018-06-01".format(subscriptionid, rg, adfv2name, runid), headers={'Authorization': "Bearer " + tokenadf} ) status = response.json()['status'] if status == "InProgress" or status == "Queued": count += 1 if count < 30: time.sleep(30) # wait 30 seconds before next status update else: # timeout break else: # pipeline has end state, script has finished print("hier2") break # assert count <30, "test failed, time out" #credential = CustomTokenCredential(tokenadls) credential = accesskeyadls storage_account_source_url = "https://" + adlsgen2stor + ".blob.core.windows.net" # client_source = BlobServiceClient(account_url=storage_account_source_url, credential=credential) container_source = client_source.get_container_client("curated") # blob_list = container_source.list_blobs(include=['snapshots']) for blob in blob_list: bottled_file = blob.name assert bottled_file == "AdultCensusIncomePIIremoved.parquet", "parquet file not found" # blob_client = client_source.get_blob_client(container="curated", blob="AdultCensusIncomePIIremoved.parquet") with open("AdultCensusIncomePIIremoved.parquet", "wb") as my_blob: download_stream = blob_client.download_blob() my_blob.write(download_stream.readall()) # parquet_file = pq.ParquetFile('AdultCensusIncomePIIremoved.parquet') i = 0 while i < parquet_file.metadata.row_group(0).num_columns: print(parquet_file.metadata.row_group(0).column(i).path_in_schema) if parquet_file.metadata.row_group(0).column(i).path_in_schema == "age": break i+=1 # assert i == parquet_file.metadata.row_group(0).num_columns, "PII age data still present"
class StorageAppendBlobTest(StorageTestCase): def setUp(self): super(StorageAppendBlobTest, self).setUp() url = self._get_account_url() credential = self._get_shared_key_credential() self.bsc = BlobServiceClient(url, credential=credential, max_block_size=4 * 1024) self.config = self.bsc._config self.container_name = self.get_resource_name('utcontainer') if not self.is_playback(): self.bsc.create_container(self.container_name) def tearDown(self): if not self.is_playback(): try: self.bsc.delete_container(self.container_name) except: pass if os.path.isfile(FILE_PATH): try: os.remove(FILE_PATH) except: pass return super(StorageAppendBlobTest, self).tearDown() #--Helpers----------------------------------------------------------------- def _get_blob_reference(self): return self.get_resource_name(TEST_BLOB_PREFIX) def _create_blob(self): blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.create_append_blob() return blob def assertBlobEqual(self, blob, expected_data): stream = blob.download_blob() actual_data = b"".join(list(stream)) self.assertEqual(actual_data, expected_data) class NonSeekableFile(object): def __init__(self, wrapped_file): self.wrapped_file = wrapped_file def write(self, data): self.wrapped_file.write(data) def read(self, count): return self.wrapped_file.read(count) #--Test cases for block blobs -------------------------------------------- @record def test_create_blob(self): # Arrange blob_name = self._get_blob_reference() # Act blob = self.bsc.get_blob_client(self.container_name, blob_name) create_resp = blob.create_append_blob() # Assert blob_properties = blob.get_blob_properties() self.assertIsNotNone(blob_properties) self.assertEqual(blob_properties.etag, create_resp.get('etag')) self.assertEqual(blob_properties.last_modified, create_resp.get('last_modified')) @record def test_create_blob_with_lease_id(self): # Arrange blob = self._create_blob() # Act lease = blob.acquire_lease() create_resp = blob.create_append_blob(lease=lease) # Assert blob_properties = blob.get_blob_properties() self.assertIsNotNone(blob_properties) self.assertEqual(blob_properties.etag, create_resp.get('etag')) self.assertEqual(blob_properties.last_modified, create_resp.get('last_modified')) @record def test_create_blob_with_metadata(self): # Arrange metadata = {'hello': 'world', 'number': '42'} blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) # Act blob.create_append_blob(metadata=metadata) # Assert md = blob.get_blob_properties().metadata self.assertDictEqual(md, metadata) @record def test_append_block(self): # Arrange blob = self._create_blob() # Act for i in range(5): resp = blob.append_block(u'block {0}'.format(i).encode('utf-8')) self.assertEqual(int(resp['blob_append_offset']), 7 * i) self.assertEqual(resp['blob_committed_block_count'], i + 1) self.assertIsNotNone(resp['etag']) self.assertIsNotNone(resp['last_modified']) # Assert self.assertBlobEqual(blob, b'block 0block 1block 2block 3block 4') @record def test_append_block_unicode(self): # Arrange blob = self._create_blob() # Act resp = blob.append_block(u'啊齄丂狛狜', encoding='utf-16') self.assertEqual(int(resp['blob_append_offset']), 0) self.assertEqual(resp['blob_committed_block_count'], 1) self.assertIsNotNone(resp['etag']) self.assertIsNotNone(resp['last_modified']) # Assert @record def test_append_block_with_md5(self): # Arrange blob = self._create_blob() # Act resp = blob.append_block(b'block', validate_content=True) self.assertEqual(int(resp['blob_append_offset']), 0) self.assertEqual(resp['blob_committed_block_count'], 1) self.assertIsNotNone(resp['etag']) self.assertIsNotNone(resp['last_modified']) # Assert @record def test_create_append_blob_with_no_overwrite(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data1 = self.get_random_bytes(LARGE_BLOB_SIZE) data2 = self.get_random_bytes(LARGE_BLOB_SIZE + 512) # Act create_resp = blob.upload_blob(data1, overwrite=True, blob_type=BlobType.AppendBlob, metadata={'BlobData': 'Data1'}) update_resp = blob.upload_blob(data2, overwrite=False, blob_type=BlobType.AppendBlob, metadata={'BlobData': 'Data2'}) props = blob.get_blob_properties() # Assert appended_data = data1 + data2 self.assertBlobEqual(blob, appended_data) self.assertEqual(props.etag, update_resp.get('etag')) self.assertEqual(props.blob_type, BlobType.AppendBlob) self.assertEqual(props.last_modified, update_resp.get('last_modified')) self.assertEqual(props.metadata, {'BlobData': 'Data1'}) self.assertEqual(props.size, LARGE_BLOB_SIZE + LARGE_BLOB_SIZE + 512) @record def test_create_append_blob_with_overwrite(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data1 = self.get_random_bytes(LARGE_BLOB_SIZE) data2 = self.get_random_bytes(LARGE_BLOB_SIZE + 512) # Act create_resp = blob.upload_blob(data1, overwrite=True, blob_type=BlobType.AppendBlob, metadata={'BlobData': 'Data1'}) update_resp = blob.upload_blob(data2, overwrite=True, blob_type=BlobType.AppendBlob, metadata={'BlobData': 'Data2'}) props = blob.get_blob_properties() # Assert self.assertBlobEqual(blob, data2) self.assertEqual(props.etag, update_resp.get('etag')) self.assertEqual(props.last_modified, update_resp.get('last_modified')) self.assertEqual(props.metadata, {'BlobData': 'Data2'}) self.assertEqual(props.blob_type, BlobType.AppendBlob) self.assertEqual(props.size, LARGE_BLOB_SIZE + 512) @record def test_append_blob_from_bytes(self): # Arrange blob = self._create_blob() # Act data = b'abcdefghijklmnopqrstuvwxyz' append_resp = blob.upload_blob(data, blob_type=BlobType.AppendBlob) blob_properties = blob.get_blob_properties() # Assert self.assertBlobEqual(blob, data) self.assertEqual(blob_properties.etag, append_resp['etag']) self.assertEqual(blob_properties.last_modified, append_resp['last_modified']) @record def test_append_blob_from_0_bytes(self): # Arrange blob = self._create_blob() # Act data = b'' append_resp = blob.upload_blob(data, blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, data) # appending nothing should not make any network call self.assertIsNone(append_resp.get('etag')) self.assertIsNone(append_resp.get('last_modified')) @record def test_append_blob_from_bytes_with_progress(self): # Arrange blob = self._create_blob() data = b'abcdefghijklmnopqrstuvwxyz' # Act progress = [] def progress_gen(upload): progress.append((0, len(upload))) yield upload upload_data = progress_gen(data) blob.upload_blob(upload_data, blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, data) self.assert_upload_progress(len(data), self.config.max_block_size, progress) @record def test_append_blob_from_bytes_with_index(self): # Arrange blob = self._create_blob() # Act data = b'abcdefghijklmnopqrstuvwxyz' blob.upload_blob(data[3:], blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, data[3:]) @record def test_append_blob_from_bytes_with_index_and_count(self): # Arrange blob = self._create_blob() # Act data = b'abcdefghijklmnopqrstuvwxyz' blob.upload_blob(data[3:], length=5, blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, data[3:8]) @record def test_append_blob_from_bytes_chunked_upload(self): # Arrange blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) # Act append_resp = blob.upload_blob(data, blob_type=BlobType.AppendBlob) blob_properties = blob.get_blob_properties() # Assert self.assertBlobEqual(blob, data) self.assertEqual(blob_properties.etag, append_resp['etag']) self.assertEqual(blob_properties.last_modified, append_resp.get('last_modified')) @record def test_append_blob_from_bytes_with_progress_chunked_upload(self): # Arrange blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) # Act progress = [] def progress_gen(upload): n = self.config.max_block_size total = len(upload) current = 0 while upload: progress.append((current, total)) yield upload[:n] current += len(upload[:n]) upload = upload[n:] upload_data = progress_gen(data) blob.upload_blob(upload_data, blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, data) self.assert_upload_progress(len(data), self.config.max_block_size, progress) @record def test_append_blob_from_bytes_chunked_upload_with_index_and_count(self): # Arrange blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) index = 33 blob_size = len(data) - 66 # Act blob.upload_blob(data[index:], length=blob_size, blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, data[index:index + blob_size]) @record def test_append_blob_from_path_chunked_upload(self): # Arrange blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act with open(FILE_PATH, 'rb') as stream: append_resp = blob.upload_blob(stream, blob_type=BlobType.AppendBlob) blob_properties = blob.get_blob_properties() # Assert self.assertBlobEqual(blob, data) self.assertEqual(blob_properties.etag, append_resp.get('etag')) self.assertEqual(blob_properties.last_modified, append_resp.get('last_modified')) @record def test_append_blob_from_path_with_progress_chunked_upload(self): # Arrange blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act progress = [] def progress_gen(upload): n = self.config.max_block_size total = LARGE_BLOB_SIZE current = 0 while upload: chunk = upload.read(n) if not chunk: break progress.append((current, total)) yield chunk current += len(chunk) with open(FILE_PATH, 'rb') as stream: upload_data = progress_gen(stream) blob.upload_blob(upload_data, blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, data) self.assert_upload_progress(len(data), self.config.max_block_size, progress) @record def test_append_blob_from_stream_chunked_upload(self): # Arrange blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act with open(FILE_PATH, 'rb') as stream: append_resp = blob.upload_blob(stream, blob_type=BlobType.AppendBlob) blob_properties = blob.get_blob_properties() # Assert self.assertBlobEqual(blob, data) self.assertEqual(blob_properties.etag, append_resp.get('etag')) self.assertEqual(blob_properties.last_modified, append_resp.get('last_modified')) @record def test_append_blob_from_stream_non_seekable_chunked_upload_known_size( self): # Arrange blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) blob_size = len(data) - 66 # Act with open(FILE_PATH, 'rb') as stream: non_seekable_file = StorageAppendBlobTest.NonSeekableFile(stream) blob.upload_blob(non_seekable_file, length=blob_size, blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, data[:blob_size]) @record def test_append_blob_from_stream_non_seekable_chunked_upload_unknown_size( self): # Arrange blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act with open(FILE_PATH, 'rb') as stream: non_seekable_file = StorageAppendBlobTest.NonSeekableFile(stream) blob.upload_blob(non_seekable_file, blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, data) @record def test_append_blob_from_stream_with_multiple_appends(self): # Arrange blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream1: stream1.write(data) with open(FILE_PATH, 'wb') as stream2: stream2.write(data) # Act with open(FILE_PATH, 'rb') as stream1: blob.upload_blob(stream1, blob_type=BlobType.AppendBlob) with open(FILE_PATH, 'rb') as stream2: blob.upload_blob(stream2, blob_type=BlobType.AppendBlob) # Assert data = data * 2 self.assertBlobEqual(blob, data) @record def test_append_blob_from_stream_chunked_upload_with_count(self): # Arrange blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act blob_size = len(data) - 301 with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, length=blob_size, blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, data[:blob_size]) def test_append_blob_from_stream_chunked_upload_with_count_parallel(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act blob_size = len(data) - 301 with open(FILE_PATH, 'rb') as stream: append_resp = blob.upload_blob(stream, length=blob_size, blob_type=BlobType.AppendBlob) blob_properties = blob.get_blob_properties() # Assert self.assertBlobEqual(blob, data[:blob_size]) self.assertEqual(blob_properties.etag, append_resp.get('etag')) self.assertEqual(blob_properties.last_modified, append_resp.get('last_modified')) @record def test_append_blob_from_text(self): # Arrange blob = self._create_blob() text = u'hello 啊齄丂狛狜 world' data = text.encode('utf-8') # Act append_resp = blob.upload_blob(text, blob_type=BlobType.AppendBlob) blob_properties = blob.get_blob_properties() # Assert self.assertBlobEqual(blob, data) self.assertEqual(blob_properties.etag, append_resp.get('etag')) self.assertEqual(blob_properties.last_modified, append_resp.get('last_modified')) @record def test_append_blob_from_text_with_encoding(self): # Arrange blob = self._create_blob() text = u'hello 啊齄丂狛狜 world' data = text.encode('utf-16') # Act blob.upload_blob(text, encoding='utf-16', blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, data) @record def test_append_blob_from_text_with_encoding_and_progress(self): # Arrange blob = self._create_blob() text = u'hello 啊齄丂狛狜 world' data = text.encode('utf-16') # Act progress = [] def progress_gen(upload): progress.append((0, len(data))) yield upload upload_data = progress_gen(text) blob.upload_blob(upload_data, encoding='utf-16', blob_type=BlobType.AppendBlob) # Assert self.assert_upload_progress(len(data), self.config.max_block_size, progress) @record def test_append_blob_from_text_chunked_upload(self): # Arrange blob = self._create_blob() data = self.get_random_text_data(LARGE_BLOB_SIZE) encoded_data = data.encode('utf-8') # Act blob.upload_blob(data, blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, encoded_data) @record def test_append_blob_with_md5(self): # Arrange blob = self._create_blob() data = b'hello world' # Act blob.append_block(data, validate_content=True)
class AzureBlobFileSystem(AbstractFileSystem): def __init__( self, account_name: str, account_key: str = None, connection_string: str = None, credential: str = None, sas_token: str = None, request_session=None, socket_timeout: int = None, client_id: str = None, client_secret: str = None, tenant_id: str = None, ): super().__init__() self.account_name = account_name self.account_key = account_key self.connection_string = connection_string self.credential = credential self.sas_token = sas_token self.request_session = request_session self.socket_timeout = socket_timeout self.client_id = client_id self.client_secret = client_secret self.tenant_id = tenant_id if (self.credential is None and self.account_key is None and self.sas_token is None and self.client_id is not None): self.credential = self._get_credential_from_service_principal() self.do_connect() def _get_credential_from_service_principal(self): """ Create a Credential for authentication. This can include a TokenCredential client_id, client_secret and tenant_id Returns ------- Credential """ from azure.identity import ClientSecretCredential sp_token = ClientSecretCredential( tenant_id=self.tenant_id, client_id=self.client_id, client_secret=self.client_secret, ) return sp_token def do_connect(self): """Connect to the BlobServiceClient, using user-specified connection details. Tries credentials first, then connection string and finally account key Raises ------ ValueError if none of the connection details are available """ try: self.account_url: str = f"https://{self.account_name}.blob.core.windows.net" if self.credential is not None: self.service_client = BlobServiceClient( account_url=self.account_url, credential=self.credential) elif self.connection_string is not None: self.service_client = BlobServiceClient.from_connection_string( conn_str=self.connection_string) elif self.account_key is not None: self.service_client = BlobServiceClient( account_url=self.account_url, credential=self.account_key) elif self.sas_token is not None: self.service_client = BlobServiceClient( account_url=self.account_url + self.sas_token, credential=None) else: self.service_client = BlobServiceClient( account_url=self.account_url) except Exception as e: raise ValueError(f"unable to connect to account for {e}") def exists(self, path): """ Checks whether the given path exists in the File System Returns ------- Boolean """ split_path = path.split("/") container_name = split_path[0] sub_path = "/".join(split_path[1:]) container = self.service_client.get_container_client(container_name) it = container.list_blobs(name_starts_with=sub_path) return len(list(it)) > 0 def ls(self, path, refresh=True): """ Finds all the files in the given path in the File System Returns ------- List of full paths of all files found in given path """ return self.find(path) def isfile(self, path): """Is this entry file-like? Azure fs only stores path to files and not folders. This is always true """ return True def find(self, path): """ Finds all the files in the given path in the File System Returns ------- List of full paths of all files found in given path """ split_path = path.split("/") container_name = split_path[0] sub_path = "/".join(split_path[1:]) container = self.service_client.get_container_client(container_name) it = container.list_blobs(name_starts_with=sub_path) return [f"{container_name}/{item['name']}" for item in it] def rm(self, path, recursive=False, maxdepth=None): """Removes all the files in the given path""" split_path = path.split("/") container_name = split_path[0] sub_path = "/".join(split_path[1:]) container = self.service_client.get_container_client(container_name) it = container.list_blobs(name_starts_with=sub_path) for item in it: container.delete_blob(item) def makedirs(self, path, exist_ok=False): """Recursively creates directories in path""" # in azure empty directories have no meaning, so makedirs not needed return def get_mapper(self, root, check=False, create=False): """Create key-value interface for given root""" return FSMap(root, self) def upload(self, path, value): """Uploads value to the given path""" split_path = path.split("/") container_name = split_path[0] sub_path = "/".join(split_path[1:]) blob_client = self.service_client.get_blob_client( container_name, sub_path) blob_client.upload_blob(value, overwrite=True) def download(self, path): """Downloads the value from the given path""" if not self.exists(path): raise KeyError() split_path = path.split("/") container_name = split_path[0] sub_path = "/".join(split_path[1:]) blob_client = self.service_client.get_blob_client( container_name, sub_path) return blob_client.download_blob().readall() def cat_file(self, path): return self.download(path) def pipe_file(self, path, value): return self.upload(path, value)
class StorageBlobEncryptionTest(StorageTestCase): def setUp(self): super(StorageBlobEncryptionTest, self).setUp() url = self._get_account_url() credential = self._get_shared_key_credential() # test chunking functionality by reducing the threshold # for chunking and the size of each chunk, otherwise # the tests would take too long to execute self.bsc = BlobServiceClient( url, credential=credential, max_single_put_size=32 * 1024, max_block_size=4 * 1024, max_page_size=4 * 1024) self.config = self.bsc._config self.container_name = self.get_resource_name('utcontainer') self.blob_types = (BlobType.BlockBlob, BlobType.PageBlob, BlobType.AppendBlob) self.container_name = self.get_resource_name('utcontainer') self.bytes = b'Foo' if not self.is_playback(): container = self.bsc.get_container_client(self.container_name) container.create_container() def tearDown(self): if not self.is_playback(): try: self.bsc.delete_container(self.container_name) except: pass if path.isfile(FILE_PATH): try: remove(FILE_PATH) except: pass return super(StorageBlobEncryptionTest, self).tearDown() #--Helpers----------------------------------------------------------------- def _get_container_reference(self): return self.get_resource_name(TEST_CONTAINER_PREFIX) def _get_blob_reference(self, blob_type): return self.get_resource_name(TEST_BLOB_PREFIXES[blob_type.value]) def _create_small_blob(self, blob_type): blob_name = self._get_blob_reference(blob_type) blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.upload_blob(self.bytes, blob_type=blob_type) return blob #--Test cases for blob encryption ---------------------------------------- @record def test_missing_attribute_kek_wrap(self): # In the shared method _generate_blob_encryption_key # Arrange self.bsc.require_encryption = True valid_key = KeyWrapper('key1') # Act invalid_key_1 = lambda: None #functions are objects, so this effectively creates an empty object invalid_key_1.get_key_wrap_algorithm = valid_key.get_key_wrap_algorithm invalid_key_1.get_kid = valid_key.get_kid # No attribute wrap_key self.bsc.key_encryption_key = invalid_key_1 with self.assertRaises(AttributeError): self._create_small_blob(BlobType.BlockBlob) invalid_key_2 = lambda: None #functions are objects, so this effectively creates an empty object invalid_key_2.wrap_key = valid_key.wrap_key invalid_key_2.get_kid = valid_key.get_kid # No attribute get_key_wrap_algorithm self.bsc.key_encryption_key = invalid_key_2 with self.assertRaises(AttributeError): self._create_small_blob(BlobType.BlockBlob) invalid_key_3 = lambda: None #functions are objects, so this effectively creates an empty object invalid_key_3.get_key_wrap_algorithm = valid_key.get_key_wrap_algorithm invalid_key_3.wrap_key = valid_key.wrap_key # No attribute get_kid self.bsc.key_encryption_key = invalid_key_2 with self.assertRaises(AttributeError): self._create_small_blob(BlobType.BlockBlob) @record def test_invalid_value_kek_wrap(self): # Arrange self.bsc.require_encryption = True self.bsc.key_encryption_key = KeyWrapper('key1') self.bsc.key_encryption_key.get_key_wrap_algorithm = None try: self._create_small_blob(BlobType.BlockBlob) self.fail() except AttributeError as e: self.assertEqual(str(e), _ERROR_OBJECT_INVALID.format('key encryption key', 'get_key_wrap_algorithm')) self.bsc.key_encryption_key = KeyWrapper('key1') self.bsc.key_encryption_key.get_kid = None with self.assertRaises(AttributeError): self._create_small_blob(BlobType.BlockBlob) self.bsc.key_encryption_key = KeyWrapper('key1') self.bsc.key_encryption_key.wrap_key = None with self.assertRaises(AttributeError): self._create_small_blob(BlobType.BlockBlob) @record def test_missing_attribute_kek_unwrap(self): # Shared between all services in decrypt_blob # Arrange self.bsc.require_encryption = True valid_key = KeyWrapper('key1') self.bsc.key_encryption_key = valid_key blob = self._create_small_blob(BlobType.BlockBlob) # Act # Note that KeyWrapper has a default value for key_id, so these Exceptions # are not due to non_matching kids. invalid_key_1 = lambda: None #functions are objects, so this effectively creates an empty object invalid_key_1.get_kid = valid_key.get_kid #No attribute unwrap_key blob.key_encryption_key = invalid_key_1 with self.assertRaises(HttpResponseError): blob.download_blob().content_as_bytes() invalid_key_2 = lambda: None #functions are objects, so this effectively creates an empty object invalid_key_2.unwrap_key = valid_key.unwrap_key blob.key_encryption_key = invalid_key_2 #No attribute get_kid with self.assertRaises(HttpResponseError): blob.download_blob().content_as_bytes() @record def test_invalid_value_kek_unwrap(self): if TestMode.need_recording_file(self.test_mode): return # Arrange self.bsc.require_encryption = True self.bsc.key_encryption_key = KeyWrapper('key1') blob = self._create_small_blob(BlobType.BlockBlob) # Act blob.key_encryption_key = KeyWrapper('key1') blob.key_encryption_key.unwrap_key = None with self.assertRaises(HttpResponseError) as e: blob.download_blob().content_as_bytes() self.assertEqual(str(e.exception), 'Decryption failed.') @record def test_get_blob_kek(self): # Arrange self.bsc.require_encryption = True self.bsc.key_encryption_key = KeyWrapper('key1') blob = self._create_small_blob(BlobType.BlockBlob) # Act content = blob.download_blob() # Assert self.assertEqual(b"".join(list(content)), self.bytes) @record def test_get_blob_resolver(self): # Arrange self.bsc.require_encryption = True self.bsc.key_encryption_key = KeyWrapper('key1') key_resolver = KeyResolver() key_resolver.put_key(self.bsc.key_encryption_key) self.bsc.key_resolver_function = key_resolver.resolve_key blob = self._create_small_blob(BlobType.BlockBlob) # Act self.bsc.key_encryption_key = None content = blob.download_blob().content_as_bytes() # Assert self.assertEqual(content, self.bytes) def test_get_blob_kek_RSA(self): # We can only generate random RSA keys, so this must be run live or # the playback test will fail due to a change in kek values. if TestMode.need_recording_file(self.test_mode): return # Arrange self.bsc.require_encryption = True self.bsc.key_encryption_key = RSAKeyWrapper('key2') blob = self._create_small_blob(BlobType.BlockBlob) # Act content = blob.download_blob() # Assert self.assertEqual(b"".join(list(content)), self.bytes) @record def test_get_blob_nonmatching_kid(self): if TestMode.need_recording_file(self.test_mode): return # Arrange self.bsc.require_encryption = True self.bsc.key_encryption_key = KeyWrapper('key1') blob = self._create_small_blob(BlobType.BlockBlob) # Act self.bsc.key_encryption_key.kid = 'Invalid' # Assert with self.assertRaises(HttpResponseError) as e: blob.download_blob().content_as_bytes() self.assertEqual(str(e.exception), 'Decryption failed.') @record def test_put_blob_invalid_stream_type(self): # Arrange self.bsc.require_encryption = True self.bsc.key_encryption_key = KeyWrapper('key1') small_stream = StringIO(u'small') large_stream = StringIO(u'large' * self.config.max_single_put_size) blob_name = self._get_blob_reference(BlobType.BlockBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Assert # Block blob specific single shot with self.assertRaises(TypeError) as e: blob.upload_blob(small_stream, length=5) self.assertTrue('Blob data should be of type bytes.' in str(e.exception)) # Generic blob chunked with self.assertRaises(TypeError) as e: blob.upload_blob(large_stream) self.assertTrue('Blob data should be of type bytes.' in str(e.exception)) def test_put_blob_chunking_required_mult_of_block_size(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange self.bsc.key_encryption_key = KeyWrapper('key1') self.bsc.require_encryption = True content = self.get_random_bytes( self.config.max_single_put_size + self.config.max_block_size) blob_name = self._get_blob_reference(BlobType.BlockBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Act blob.upload_blob(content, max_concurrency=3) blob_content = blob.download_blob().content_as_bytes(max_concurrency=3) # Assert self.assertEqual(content, blob_content) def test_put_blob_chunking_required_non_mult_of_block_size(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange self.bsc.key_encryption_key = KeyWrapper('key1') self.bsc.require_encryption = True content = urandom(self.config.max_single_put_size + 1) blob_name = self._get_blob_reference(BlobType.BlockBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Act blob.upload_blob(content, max_concurrency=3) blob_content = blob.download_blob().content_as_bytes(max_concurrency=3) # Assert self.assertEqual(content, blob_content) def test_put_blob_chunking_required_range_specified(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange self.bsc.key_encryption_key = KeyWrapper('key1') self.bsc.require_encryption = True content = self.get_random_bytes(self.config.max_single_put_size * 2) blob_name = self._get_blob_reference(BlobType.BlockBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Act blob.upload_blob( content, length=self.config.max_single_put_size + 53, max_concurrency=3) blob_content = blob.download_blob().content_as_bytes(max_concurrency=3) # Assert self.assertEqual(content[:self.config.max_single_put_size+53], blob_content) @record def test_put_block_blob_single_shot(self): # Arrange self.bsc.key_encryption_key = KeyWrapper('key1') self.bsc.require_encryption = True content = b'small' blob_name = self._get_blob_reference(BlobType.BlockBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Act blob.upload_blob(content) blob_content = blob.download_blob().content_as_bytes() # Assert self.assertEqual(content, blob_content) @record def test_put_blob_range(self): # Arrange self.bsc.require_encryption = True self.bsc.key_encryption_key = KeyWrapper('key1') content = b'Random repeats' * self.config.max_single_put_size * 5 # All page blob uploads call _upload_chunks, so this will test the ability # of that function to handle ranges even though it's a small blob blob_name = self._get_blob_reference(BlobType.BlockBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Act blob.upload_blob( content[2:], length=self.config.max_single_put_size + 5, max_concurrency=1) blob_content = blob.download_blob().content_as_bytes(max_concurrency=1) # Assert self.assertEqual(content[2:2 + self.config.max_single_put_size + 5], blob_content) @record def test_put_blob_empty(self): # Arrange self.bsc.key_encryption_key = KeyWrapper('key1') self.bsc.require_encryption = True content = b'' blob_name = self._get_blob_reference(BlobType.BlockBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Act blob.upload_blob(content) blob_content = blob.download_blob().content_as_bytes(max_concurrency=2) # Assert self.assertEqual(content, blob_content) @record def test_put_blob_serial_upload_chunking(self): # Arrange self.bsc.key_encryption_key = KeyWrapper('key1') self.bsc.require_encryption = True content = self.get_random_bytes(self.config.max_single_put_size + 1) blob_name = self._get_blob_reference(BlobType.BlockBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Act blob.upload_blob(content, max_concurrency=1) blob_content = blob.download_blob().content_as_bytes(max_concurrency=1) # Assert self.assertEqual(content, blob_content) @record def test_get_blob_range_beginning_to_middle(self): # Arrange self.bsc.key_encryption_key = KeyWrapper('key1') self.bsc.require_encryption = True content = self.get_random_bytes(128) blob_name = self._get_blob_reference(BlobType.BlockBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Act blob.upload_blob(content, max_concurrency=1) blob_content = blob.download_blob(offset=0, length=50).content_as_bytes(max_concurrency=1) # Assert self.assertEqual(content[:50], blob_content) @record def test_get_blob_range_middle_to_end(self): # Arrange self.bsc.key_encryption_key = KeyWrapper('key1') self.bsc.require_encryption = True content = self.get_random_bytes(128) blob_name = self._get_blob_reference(BlobType.BlockBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Act blob.upload_blob(content, max_concurrency=1) blob_content = blob.download_blob(offset=100, length=28).content_as_bytes() blob_content2 = blob.download_blob(offset=100).content_as_bytes() # Assert self.assertEqual(content[100:], blob_content) self.assertEqual(content[100:], blob_content2) @record def test_get_blob_range_middle_to_middle(self): # Arrange self.bsc.key_encryption_key = KeyWrapper('key1') self.bsc.require_encryption = True content = self.get_random_bytes(128) blob_name = self._get_blob_reference(BlobType.BlockBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Act blob.upload_blob(content) blob_content = blob.download_blob(offset=5, length=93).content_as_bytes() # Assert self.assertEqual(content[5:98], blob_content) @record def test_get_blob_range_aligns_on_16_byte_block(self): # Arrange self.bsc.key_encryption_key = KeyWrapper('key1') self.bsc.require_encryption = True content = self.get_random_bytes(128) blob_name = self._get_blob_reference(BlobType.BlockBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Act blob.upload_blob(content) blob_content = blob.download_blob(offset=48, length=16).content_as_bytes() # Assert self.assertEqual(content[48:64], blob_content) @record def test_get_blob_range_expanded_to_beginning_block_align(self): # Arrange self.bsc.key_encryption_key = KeyWrapper('key1') self.bsc.require_encryption = True content = self.get_random_bytes(128) blob_name = self._get_blob_reference(BlobType.BlockBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Act blob.upload_blob(content) blob_content = blob.download_blob(offset=5, length=50).content_as_bytes() # Assert self.assertEqual(content[5:55], blob_content) @record def test_get_blob_range_expanded_to_beginning_iv(self): # Arrange self.bsc.key_encryption_key = KeyWrapper('key1') self.bsc.require_encryption = True content = self.get_random_bytes(128) blob_name = self._get_blob_reference(BlobType.BlockBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Act blob.upload_blob(content) blob_content = blob.download_blob(offset=22, length=20).content_as_bytes() # Assert self.assertEqual(content[22:42], blob_content) @record def test_put_blob_strict_mode(self): # Arrange self.bsc.require_encryption = True content = urandom(512) # Assert for service in self.blob_types: blob_name = self._get_blob_reference(service) blob = self.bsc.get_blob_client(self.container_name, blob_name) with self.assertRaises(ValueError): blob.upload_blob(content, blob_type=service) stream = BytesIO(content) with self.assertRaises(ValueError): blob.upload_blob(stream, length=512, blob_type=service) FILE_PATH = 'blob_input.temp.dat' with open(FILE_PATH, 'wb') as stream: stream.write(content) with open(FILE_PATH, 'rb') as stream: with self.assertRaises(ValueError): blob.upload_blob(stream, blob_type=service) with self.assertRaises(ValueError): blob.upload_blob('To encrypt', blob_type=service) @record def test_get_blob_strict_mode_no_policy(self): # Arrange self.bsc.require_encryption = True self.bsc.key_encryption_key = KeyWrapper('key1') blob = self._create_small_blob(BlobType.BlockBlob) # Act blob.key_encryption_key = None # Assert with self.assertRaises(ValueError): blob.download_blob().content_as_bytes() @record def test_get_blob_strict_mode_unencrypted_blob(self): # Arrange blob = self._create_small_blob(BlobType.BlockBlob) # Act blob.require_encryption = True blob.key_encryption_key = KeyWrapper('key1') # Assert with self.assertRaises(HttpResponseError): blob.download_blob().content_as_bytes() @record def test_invalid_methods_fail_block(self): # Arrange self.bsc.key_encryption_key = KeyWrapper('key1') blob_name = self._get_blob_reference(BlobType.BlockBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Assert with self.assertRaises(ValueError) as e: blob.stage_block('block1', urandom(32)) self.assertEqual(str(e.exception), _ERROR_UNSUPPORTED_METHOD_FOR_ENCRYPTION) with self.assertRaises(ValueError) as e: blob.commit_block_list(['block1']) self.assertEqual(str(e.exception), _ERROR_UNSUPPORTED_METHOD_FOR_ENCRYPTION) @record def test_invalid_methods_fail_append(self): # Arrange self.bsc.key_encryption_key = KeyWrapper('key1') blob_name = self._get_blob_reference(BlobType.AppendBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Assert with self.assertRaises(ValueError) as e: blob.append_block(urandom(32)) self.assertEqual(str(e.exception), _ERROR_UNSUPPORTED_METHOD_FOR_ENCRYPTION) with self.assertRaises(ValueError) as e: blob.create_append_blob() self.assertEqual(str(e.exception), _ERROR_UNSUPPORTED_METHOD_FOR_ENCRYPTION) # All append_from operations funnel into append_from_stream, so testing one is sufficient with self.assertRaises(ValueError) as e: blob.upload_blob(b'To encrypt', blob_type=BlobType.AppendBlob) self.assertEqual(str(e.exception), _ERROR_UNSUPPORTED_METHOD_FOR_ENCRYPTION) @record def test_invalid_methods_fail_page(self): # Arrange self.bsc.key_encryption_key = KeyWrapper('key1') blob_name = self._get_blob_reference(BlobType.PageBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Assert with self.assertRaises(ValueError) as e: blob.upload_page(urandom(512), offset=0, length=512) self.assertEqual(str(e.exception), _ERROR_UNSUPPORTED_METHOD_FOR_ENCRYPTION) with self.assertRaises(ValueError) as e: blob.create_page_blob(512) self.assertEqual(str(e.exception), _ERROR_UNSUPPORTED_METHOD_FOR_ENCRYPTION) @record def test_validate_encryption(self): # Arrange self.bsc.require_encryption = True kek = KeyWrapper('key1') self.bsc.key_encryption_key = kek blob = self._create_small_blob(BlobType.BlockBlob) # Act blob.require_encryption = False blob.key_encryption_key = None content = blob.download_blob() data = content.content_as_bytes() encryption_data = _dict_to_encryption_data(loads(content.properties.metadata['encryptiondata'])) iv = encryption_data.content_encryption_IV content_encryption_key = _validate_and_unwrap_cek(encryption_data, kek, None) cipher = _generate_AES_CBC_cipher(content_encryption_key, iv) decryptor = cipher.decryptor() unpadder = PKCS7(128).unpadder() content = decryptor.update(data) + decryptor.finalize() content = unpadder.update(content) + unpadder.finalize() self.assertEqual(self.bytes, content) @record def test_create_block_blob_from_star(self): self._create_blob_from_star(BlobType.BlockBlob, self.bytes, self.bytes) stream = BytesIO(self.bytes) self._create_blob_from_star(BlobType.BlockBlob, self.bytes, stream) FILE_PATH = 'blob_input.temp.dat' with open(FILE_PATH, 'wb') as stream: stream.write(self.bytes) with open(FILE_PATH, 'rb') as stream: self._create_blob_from_star(BlobType.BlockBlob, self.bytes, stream) self._create_blob_from_star(BlobType.BlockBlob, b'To encrypt', 'To encrypt') @record def test_create_page_blob_from_star(self): content = self.get_random_bytes(512) self._create_blob_from_star(BlobType.PageBlob, content, content) stream = BytesIO(content) self._create_blob_from_star(BlobType.PageBlob, content, stream, length=512) FILE_PATH = 'blob_input.temp.dat' with open(FILE_PATH, 'wb') as stream: stream.write(content) with open(FILE_PATH, 'rb') as stream: self._create_blob_from_star(BlobType.PageBlob, content, stream) def _create_blob_from_star(self, blob_type, content, data, **kwargs): blob_name = self._get_blob_reference(blob_type) blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.key_encryption_key = KeyWrapper('key1') blob.require_encryption = True blob.upload_blob(data, blob_type=blob_type, **kwargs) blob_content = blob.download_blob().content_as_bytes() self.assertEqual(content, blob_content) blob.delete_blob() @record def test_get_blob_to_star(self): # Arrange self.bsc.require_encryption = True self.bsc.key_encryption_key = KeyWrapper('key1') blob = self._create_small_blob(BlobType.BlockBlob) # Act iter_blob = b"".join(list(blob.download_blob())) bytes_blob = blob.download_blob().content_as_bytes() stream_blob = BytesIO() blob.download_blob().download_to_stream(stream_blob) stream_blob.seek(0) text_blob = blob.download_blob().content_as_text() # Assert self.assertEqual(self.bytes, iter_blob) self.assertEqual(self.bytes, bytes_blob) self.assertEqual(self.bytes, stream_blob.read()) self.assertEqual(self.bytes.decode(), text_blob)
datetime.now().strftime("Time:%H:%M:%S-Azure Connection - OK")) except: logging.info( datetime.now().strftime("Time:%H:%M:%S-Azure Connection - FAILED")) try: #Local File local_path = (os.path.abspath(r"../Files/")) csvfile = os.listdir(local_path) local_file_name = "/" + csvfile[0] full_path_to_file = (local_path + local_file_name) logging.info(datetime.now().strftime("Time:%H:%M:%S-Local File - OK")) except: logging.info(datetime.now().strftime("Time:%H:%M:%S-Local File - ERROR")) try: #Upload Local File to Container logging.info( datetime.now().strftime("Time:%H:%M:%S-CSV to Azure-Uploading")) blob_client = blob_service.get_blob_client(container=container_name, blob=local_file_name) with open(full_path_to_file, "rb") as data: blob_client.upload_blob(data, overwrite=True) logging.info( datetime.now().strftime("Time:%H:%M:%S-CSV to Azure-Finished")) except: logging.info( datetime.now().strftime("Time:%H:%M:%S-Upload CSV to Azure - FAILED"))
def traverse_and_create_index(dir, sas_url=None, overwrite_files=False, template_fun=create_plain_index, basepath=None): ''' Recursively traverses the local directory *dir* and generates a index file for each folder using *template_fun* to generate the HTML output. Excludes hidden files. Args: dir: string, path to directory template_fun: function taking three arguments (string, list of string, list of string) representing the current root, the list of folders, and the list of files. Should return the HTML source of the index file Return: None ''' print("Traversing {}".format(dir)) # Make sure we remove the trailing / dir = os.path.normpath(dir) # If we want to set the content type in blob storage using a SAS URL if sas_url: # Example: sas_url = 'https://accname.blob.core.windows.net/bname/path/to/folder?st=...&se=...&sp=...&...' if '?' in sas_url: # 'https://accname.blob.core.windows.net/bname/path/to/folder' and 'st=...&se=...&sp=...&...' base_url, sas_token = sas_url.split('?', 1) else: # 'https://accname.blob.core.windows.net/bname/path/to/folder' and None base_url, sas_token = sas_url, None # Remove https:// from base url # 'accname.blob.core.windows.net/bname/path/to/folder' base_url = base_url.split("//", 1)[1] # Everything up to the first dot is accout name # 'accname' account_name = base_url.split(".", 1)[0] # get everything after the first / # 'bname/path/to/folder' query_string = base_url.split("/", 1)[1] # Get container name and subfolder if '/' in query_string: # 'bname', 'path/to/folder' container_name, container_folder = query_string.split("/", 1) else: container_name, container_folder = query_string, '' # Prepare the storage access target_settings = ContentSettings(content_type='text/html') blob_service = BlobServiceClient( account_url=f'{account_name}.blob.core.windows.net', credential=sas_token) # Traverse directory and all sub directories, excluding hidden files for root, dirs, files in os.walk(dir): # Exclude files and folders that are hidden files = [f for f in files if not f[0] == '.'] dirs[:] = [d for d in dirs if not d[0] == '.'] # Output is written to file *root*/index.html output_file = os.path.join(root, "index.html") if not overwrite_files and os.path.isfile(output_file): print('Skipping {}, file exists'.format(output_file)) continue print("Generating {}".format(output_file)) # Generate HTML with template function dirname = None if basepath is not None: dirname = os.path.relpath(root, basepath) html = template_fun(root[len(dir):], dirs, files, dirname) # Write to file with open(output_file, 'wt') as fi: fi.write(html) # Set content type in blob storage if sas_url: if container_folder: output_blob_path = container_folder + '/' + output_file[ len(dir) + 1:] else: output_blob_path = output_file[len(dir) + 1:] try: blob_client = blob_service.get_blob_client( container_name, output_blob_path) blob_client.set_http_headers(content_settings=target_settings) except azure.common.AzureMissingResourceHttpError: print( 'ERROR: It seems the SAS URL is incorrect or does not allow setting properties.' ) return
class AzureStorage: # Initialize the Azure Storage client def __init__(self, storage_url, container_name): self.account_url = storage_url self.container_name = container_name # Acquire a credential object for the app identity. When running in the cloud, # DefaultAzureCredential uses the app's managed identity or user-assigned service principal. # When run locally, DefaultAzureCredential relies on environment variables named # AZURE_CLIENT_ID, AZURE_CLIENT_SECRET, and AZURE_TENANT_ID. credential = DefaultAzureCredential() # Create the BlobServiceClient and connect to the storage container try: self.blob_service_client = BlobServiceClient( account_url=self.account_url, credential=credential) self.container_client = self.blob_service_client.get_container_client( self.container_name) except Exception as e: logger.error(e) # Upload blob to Azure Storage def upload_blob(self, file, subfolder=''): if subfolder == '': target_blob = os.path.basename(file) else: target_blob = subfolder + "/" + os.path.basename(file) try: # Create a blob client using the local file name as the name for the blob blob_client = self.blob_service_client.get_blob_client( container=self.container_name, blob=(target_blob)) try: # Check if blob already exists if blob_client.get_blob_properties()['size'] > 0: logger.warning( f"{target_blob} already exists in the selected path. Skipping upload." ) return None except ResourceNotFoundError as e: # catch exception that indicates that the blob does not exist and we are good to upload file pass logger.info(f"Uploading {target_blob} to Azure Storage") # Upload the file and measure upload time elapsed_time = time.time() with open(file, "rb") as data: blob_client.upload_blob(data) elapsed_time = round(time.time() - elapsed_time, 2) logger.info( f"Upload succeeded after {str(elapsed_time)} seconds for: {target_blob}" ) except Exception as e: logger.error(e) return None blob_url = self.account_url + self.container_name + '/' + target_blob return blob_url # Download blob from Azure Storage def download_blob(self, destination_file, source_file, destination_folder='', source_folder=''): # Check if file was included in the Post, if not return warning filename = secure_filename(source_file) if not filename: logger.warning("Must select a file to download first!") return None if source_folder == '': target_blob = filename else: target_blob = source_folder + '/' + filename if destination_folder == '': out_file = os.path.join(os.getcwd(), destination_file) else: out_file = os.path.join(destination_folder, destination_file) try: # Create a blob client to blob_client = self.blob_service_client.get_blob_client( container=self.container_name, blob=target_blob) try: # Attempt download of blob to local storage with open(out_file, "wb") as my_blob: blob_data = blob_client.download_blob() blob_data.readinto(my_blob) except ResourceNotFoundError as e: logger.error(f"Download file failed. {target_blob} not found") return None logger.info(f"Downloaded {target_blob} to {out_file}") except Exception as e: logger.error(e) return None return out_file # Delete specified blob def delete_blob(self, blob_name): if blob_name is None: logger.warning("Sent delete request without specified blob name") else: try: blob_client = self.blob_service_client.get_blob_client( container=self.container_name, blob=blob_name) logger.info(f"Deleting blob: {blob_name}") blob_client.delete_blob(delete_snapshots=False) except ResourceNotFoundError: logger.warning( f"Sent delete request for: { blob_name } but blob was not found" ) # Return list of blobs in the container def list_blobs(self): try: blob_list = self.container_client.list_blobs() except Exception: logger.error( f"Failed to list Blobs in container {self.container_name}") return None return blob_list # Delete all blobs in the storage container def clear_storage(self): blob_list = self.list_blobs() for blob in blob_list: self.delete_blob(blob['name'])
class AzureStorageHelper(object): def __init__(self, *args, **kwargs): if "stay_on_remote" in kwargs: del kwargs["stay_on_remote"] # if not handed down explicitely, try to read credentials from # environment variables. for (csavar, envvar) in [ ("account_url", "AZ_BLOB_ACCOUNT_URL"), ("credential", "AZ_BLOB_CREDENTIAL"), ]: if csavar not in kwargs and envvar in os.environ: kwargs[csavar] = os.environ.get(envvar) assert ( "account_url" in kwargs ), "Missing AZ_BLOB_ACCOUNT_URL env var (and possibly AZ_BLOB_CREDENTIAL)" # remove leading '?' from SAS if needed # if kwargs.get("sas_token", "").startswith("?"): # kwargs["sas_token"] = kwargs["sas_token"][1:] # by right only account_key or sas_token should be set, but we let # BlobServiceClient deal with the ambiguity self.blob_service_client = BlobServiceClient(**kwargs) def container_exists(self, container_name): return any( True for _ in self.blob_service_client.list_containers(container_name)) def upload_to_azure_storage( self, container_name, file_path, blob_name=None, use_relative_path_for_blob_name=True, relative_start_dir=None, extra_args=None, ): """ Upload a file to Azure Storage This function uploads a file to an Azure Storage Container as a blob. Args: container_name: the name of the Azure container to use file_path: The path to the file to upload. blob_name: The name to set for the blob on Azure. If not specified, this will default to the name of the file. Returns: The blob_name of the file on Azure if written, None otherwise """ file_path = os.path.realpath(os.path.expanduser(file_path)) assert container_name, "container_name must be specified" assert os.path.exists(file_path), ( "The file path specified does not exist: %s" % file_path) assert os.path.isfile(file_path), ( "The file path specified does not appear to be a file: %s" % file_path) container_client = self.blob_service_client.get_container_client( container_name) try: container_client.create_container() except azure.core.exceptions.ResourceExistsError: pass if not blob_name: if use_relative_path_for_blob_name: if relative_start_dir: path_blob_name = os.path.relpath(file_path, relative_start_dir) else: path_blob_name = os.path.relpath(file_path) else: path_blob_name = os.path.basename(file_path) blob_name = path_blob_name blob_client = container_client.get_blob_client(blob_name) # upload_blob fails, if blob exists if self.exists_in_container(container_name, blob_name): blob_client.delete_blob() try: with open(file_path, "rb") as data: blob_client.upload_blob(data, blob_type="BlockBlob") return blob_client.get_blob_properties().name except Exception as e: raise WorkflowError("Error in creating blob. %s" % str(e)) # return None def download_from_azure_storage( self, container_name, blob_name, destination_path=None, expandBlobNameIntoDirs=True, make_dest_dirs=True, create_stub_only=False, ): """ Download a file from Azure Storage This function downloads an object from a specified Azure Storage container. Args: container_name: the name of the Azure Storage container to use (container name only) destination_path: If specified, the file will be saved to this path, otherwise cwd. expandBlobNameIntoDirs: Since Azure blob names can include slashes, if this is True (defult) then Azure blob names with slashes are expanded into directories on the receiving end. If it is False, the blob name is passed to os.path.basename() to get the substring following the last slash. make_dest_dirs: If this is True (default) and the destination path includes directories that do not exist, they will be created. Returns: The destination path of the downloaded file on the receiving end, or None if the destination_path could not be downloaded """ assert container_name, "container_name must be specified" assert blob_name, "blob_name must be specified" if destination_path: destination_path = os.path.realpath( os.path.expanduser(destination_path)) else: if expandBlobNameIntoDirs: destination_path = os.path.join(os.getcwd(), blob_name) else: destination_path = os.path.join(os.getcwd(), os.path.basename(blob_name)) # if the destination path does not exist if make_dest_dirs: os.makedirs(os.path.dirname(destination_path), exist_ok=True) b = self.blob_service_client.get_blob_client(container_name, blob_name) if not create_stub_only: with open(destination_path, "wb") as my_blob: blob_data = b.download_blob() blob_data.readinto(my_blob) else: # just create an empty file with the right timestamps ts = b.get_blob_properties().last_modified.timestamp() with open(destination_path, "wb") as fp: os.utime( fp.name, (ts, ts), ) return destination_path def delete_from_container(self, container_name, blob_name): """ Delete a file from Azure Storage container This function deletes an object from a specified Azure Storage container. Args: container_name: the name of the Azure Storage container to use (container name only, not endpoint) blob_name: the name of the blob to delete from the container Returns: nothing """ assert container_name, "container_name must be specified" assert blob_name, "blob_name must be specified" b = self.blob_service_client.get_blob_client(container_name, blob_name) b.delete_blob() def exists_in_container(self, container_name, blob_name): """ Returns whether the blob exists in the container Args: container_name: the name of the Azure Storage container (container name only, not endpoint) blob_name: the blob_name of the object to delete from the container Returns: True | False """ assert ( container_name ), 'container_name must be specified (did you try to write to "root" or forgot to set --default-remote-prefix?)' assert blob_name, "blob_name must be specified" cc = self.blob_service_client.get_container_client(container_name) return any(True for _ in cc.list_blobs(name_starts_with=blob_name)) def blob_size(self, container_name, blob_name): """ Returns the size of a blob Args: container_name: the name of the Azure Storage container (container name only, not endpoint) blob_name: the blob_name of the object to delete from the container Returns: Size in kb """ assert container_name, "container_name must be specified" assert blob_name, "blob_name must be specified" b = self.blob_service_client.get_blob_client(container_name, blob_name) return b.get_blob_properties().size // 1024 def blob_last_modified(self, container_name, blob_name): """ Returns a timestamp of a blob Args: container_name: the name of the Azure Storage container (container name only, not endpoint) blob_name: the blob_name of the object to delete from the container Returns: timestamp """ assert container_name, "container_name must be specified" assert blob_name, "blob_name must be specified" b = self.blob_service_client.get_blob_client(container_name, blob_name) return b.get_blob_properties().last_modified.timestamp() def list_blobs(self, container_name): """ Returns a list of blobs from the container Args: container_name: the name of the Azure Storage container (container name only, not endpoint) Returns: list of blobs """ assert container_name, "container_name must be specified" c = self.blob_service_client.get_container_client(container_name) return [b.name for b in c.list_blobs()]
class StorageBlockBlobTest(StorageTestCase): def _setup(self, storage_account_name, key, container_prefix='utcontainer'): account_url = self.account_url(storage_account_name, "blob") if not isinstance(account_url, str): account_url = account_url.encode('utf-8') key = key.encode('utf-8') self.bsc = BlobServiceClient(account_url, credential=key, connection_data_block_size=4 * 1024, max_single_put_size=32 * 1024, max_block_size=4 * 1024) self.config = self.bsc._config self.container_name = self.get_resource_name(container_prefix) # create source blob to be copied from self.source_blob_name = self.get_resource_name('srcblob') self.source_blob_name_with_special_chars = 'भारत¥test/testsubÐirÍ/' + self.get_resource_name( 'srcÆblob') self.source_blob_data = self.get_random_bytes(SOURCE_BLOB_SIZE) self.source_blob_with_special_chars_data = self.get_random_bytes( SOURCE_BLOB_SIZE) blob = self.bsc.get_blob_client(self.container_name, self.source_blob_name) blob_with_special_chars = self.bsc.get_blob_client( self.container_name, self.source_blob_name_with_special_chars) if self.is_live: self.bsc.create_container(self.container_name) blob.upload_blob(self.source_blob_data) blob_with_special_chars.upload_blob( self.source_blob_with_special_chars_data) # generate a SAS so that it is accessible with a URL sas_token = generate_blob_sas( blob.account_name, blob.container_name, blob.blob_name, snapshot=blob.snapshot, account_key=blob.credential.account_key, permission=BlobSasPermissions(read=True), expiry=datetime.utcnow() + timedelta(hours=1), ) # generate a SAS so that it is accessible with a URL sas_token_for_special_chars = generate_blob_sas( blob_with_special_chars.account_name, blob_with_special_chars.container_name, blob_with_special_chars.blob_name, snapshot=blob_with_special_chars.snapshot, account_key=blob_with_special_chars.credential.account_key, permission=BlobSasPermissions(read=True), expiry=datetime.utcnow() + timedelta(hours=1), ) self.source_blob_url_without_sas = blob.url self.source_blob_url = BlobClient.from_blob_url( blob.url, credential=sas_token).url self.source_blob_url_with_special_chars = BlobClient.from_blob_url( blob_with_special_chars.url, credential=sas_token_for_special_chars).url @BlobPreparer() def test_put_block_from_url_with_oauth(self, storage_account_name, storage_account_key): # Arrange self._setup(storage_account_name, storage_account_key, container_prefix="container1") split = 4 * 1024 destination_blob_name = self.get_resource_name('destblob') destination_blob_client = self.bsc.get_blob_client( self.container_name, destination_blob_name) token = "Bearer {}".format(self.generate_oauth_token().get_token( "https://storage.azure.com/.default").token) # Assert this operation fails without a credential with self.assertRaises(HttpResponseError): destination_blob_client.stage_block_from_url( block_id=1, source_url=self.source_blob_url_without_sas, source_offset=0, source_length=split) # Assert it passes after passing an oauth credential destination_blob_client.stage_block_from_url( block_id=1, source_url=self.source_blob_url_without_sas, source_offset=0, source_length=split, source_authorization=token) destination_blob_client.stage_block_from_url( block_id=2, source_url=self.source_blob_url_without_sas, source_offset=split, source_length=split, source_authorization=token) committed, uncommitted = destination_blob_client.get_block_list('all') self.assertEqual(len(uncommitted), 2) self.assertEqual(len(committed), 0) # Act part 2: commit the blocks destination_blob_client.commit_block_list(['1', '2']) # Assert destination blob has right content destination_blob_data = destination_blob_client.download_blob( ).readall() self.assertEqual(len(destination_blob_data), 8 * 1024) self.assertEqual(destination_blob_data, self.source_blob_data) self.assertEqual(self.source_blob_data, destination_blob_data) @BlobPreparer() def test_put_block_from_url_and_commit(self, storage_account_name, storage_account_key): self._setup(storage_account_name, storage_account_key) dest_blob_name = self.get_resource_name('destblob') dest_blob = self.bsc.get_blob_client(self.container_name, dest_blob_name) # Act part 1: make put block from url calls split = 4 * 1024 dest_blob.stage_block_from_url(block_id=1, source_url=self.source_blob_url, source_offset=0, source_length=split) dest_blob.stage_block_from_url(block_id=2, source_url=self.source_blob_url, source_offset=split, source_length=split) # Assert blocks committed, uncommitted = dest_blob.get_block_list('all') self.assertEqual(len(uncommitted), 2) self.assertEqual(len(committed), 0) # Act part 2: commit the blocks dest_blob.commit_block_list(['1', '2']) # Assert destination blob has right content content = dest_blob.download_blob().readall() self.assertEqual(len(content), 8 * 1024) self.assertEqual(content, self.source_blob_data) dest_blob.stage_block_from_url( block_id=3, source_url=self.source_blob_url_with_special_chars, source_offset=0, source_length=split) dest_blob.stage_block_from_url( block_id=4, source_url=self.source_blob_url_with_special_chars, source_offset=split, source_length=split) # Assert blocks committed, uncommitted = dest_blob.get_block_list('all') self.assertEqual(len(uncommitted), 2) self.assertEqual(len(committed), 2) # Act part 2: commit the blocks dest_blob.commit_block_list(['3', '4']) # Assert destination blob has right content content = dest_blob.download_blob().readall() self.assertEqual(len(content), 8 * 1024) self.assertEqual(content, self.source_blob_with_special_chars_data) @BlobPreparer() def test_put_block_from_url_and_validate_content_md5( self, storage_account_name, storage_account_key): self._setup(storage_account_name, storage_account_key) dest_blob_name = self.get_resource_name('destblob') dest_blob = self.bsc.get_blob_client(self.container_name, dest_blob_name) src_md5 = StorageContentValidation.get_content_md5( self.source_blob_data) # Act part 1: put block from url with md5 validation dest_blob.stage_block_from_url(block_id=1, source_url=self.source_blob_url, source_content_md5=src_md5, source_offset=0, source_length=8 * 1024) # Assert block was staged committed, uncommitted = dest_blob.get_block_list('all') self.assertEqual(len(uncommitted), 1) self.assertEqual(len(committed), 0) # Act part 2: put block from url with wrong md5 fake_md5 = StorageContentValidation.get_content_md5(b"POTATO") with self.assertRaises(HttpResponseError) as error: dest_blob.stage_block_from_url(block_id=2, source_url=self.source_blob_url, source_content_md5=fake_md5, source_offset=0, source_length=8 * 1024) self.assertEqual(error.exception.error_code, StorageErrorCode.md5_mismatch) # Assert block was not staged committed, uncommitted = dest_blob.get_block_list('all') self.assertEqual(len(uncommitted), 1) self.assertEqual(len(committed), 0) @BlobPreparer() def test_copy_blob_sync(self, storage_account_name, storage_account_key): self._setup(storage_account_name, storage_account_key) dest_blob_name = self.get_resource_name('destblob') dest_blob = self.bsc.get_blob_client(self.container_name, dest_blob_name) # Act copy_props = dest_blob.start_copy_from_url(self.source_blob_url, requires_sync=True) # Assert self.assertIsNotNone(copy_props) self.assertIsNotNone(copy_props['copy_id']) self.assertEqual('success', copy_props['copy_status']) # Verify content content = dest_blob.download_blob().readall() self.assertEqual(self.source_blob_data, content) copy_props_with_special_chars = dest_blob.start_copy_from_url( self.source_blob_url_with_special_chars, requires_sync=True) # Assert self.assertIsNotNone(copy_props_with_special_chars) self.assertIsNotNone(copy_props_with_special_chars['copy_id']) self.assertEqual('success', copy_props_with_special_chars['copy_status']) # Verify content content = dest_blob.download_blob().readall() self.assertEqual(self.source_blob_with_special_chars_data, content) @pytest.mark.playback_test_only @BlobPreparer() def test_sync_copy_blob_returns_vid(self, storage_account_name, storage_account_key): self._setup(storage_account_name, storage_account_key) dest_blob_name = self.get_resource_name('destblob') dest_blob = self.bsc.get_blob_client(self.container_name, dest_blob_name) # Act copy_props = dest_blob.start_copy_from_url(self.source_blob_url, requires_sync=True) # Assert self.assertIsNotNone(copy_props['version_id']) self.assertIsNotNone(copy_props) self.assertIsNotNone(copy_props['copy_id']) self.assertEqual('success', copy_props['copy_status']) # Verify content content = dest_blob.download_blob().readall() self.assertEqual(self.source_blob_data, content)
def main(message: func.ServiceBusMessage): # Log the Service Bus Message as plaintext message_body = message.get_body().decode("utf-8") logging.info('Python ServiceBus topic trigger processed message.') logging.info(f'Message Body: {message_body}') quarantine_storage_connection_string = os.environ.get( 'QUARANTINE_STORAGE_CONNECTION_STRING') promote_storage_connection_string = os.environ.get( 'PROMOTE_STORAGE_CONNECTION_STRING') promote_mode = get_promote_mode() quarantine_mode = get_quarantine_mode() message = json.loads(message_body) file_url = message['file_url'] (_, blob_container, blob_name) = parse_blob_information(file_url) credential = DefaultAzureCredential(exclude_environment_credential=True) source_blob_service_client = BlobServiceClient( account_url=get_blob_account_url(file_url), credential=credential) blob_url_sas = get_blob_url_sas(source_blob_service_client, file_url) protecting_blob_client = source_blob_service_client.get_blob_client( blob_container, blob_name) existing_metadata = get_existing_metadata(protecting_blob_client) existing_tags = get_existing_tags(protecting_blob_client) result = message['scanning_result'] findings = result['Findings'] logging.info(f'findings: {json.dumps(findings)}') operation = 'quarantine' if findings else 'promotion' mode = quarantine_mode if findings else promote_mode dest_storage_connection_string = quarantine_storage_connection_string if findings else promote_storage_connection_string scan_result = 'malicious' if findings else 'no issues found' scan_date = time.strftime('%Y/%m/%d %H:%M:%S', time.localtime(message['timestamp'])) if not dest_storage_connection_string: print(f'Skip: No storage connection string specified for {operation}') return codes = result['Codes'] code = CODE_EMPTY if len(codes) > 0: code = CODE_SKIP_MULTIPLE if len(codes) > 1 else codes[0] fssTags = { 'scanned': 'true', 'scanDate': scan_date, 'scanResult': scan_result, 'scanDetailCode': str(code), 'scanDetailMessage': CODE_MESSAGES.get(code, CODE_MESSAGES[CODE_MISC]) } logging.info(f'FSS tags: {fssTags}') metadata = compose_metadata(existing_metadata, fssTags) tags = compose_tags(existing_tags, fssTags) dest_blob_service_client = BlobServiceClient.from_connection_string( dest_storage_connection_string) copy_object( source_blob_url=blob_url_sas, container=blob_container, blob_name=blob_name, metadata=metadata, tags=tags, dest_blob_service_client=dest_blob_service_client, ) if mode == 'move': protecting_blob_client.delete_blob() logging.info(f'File {operation} is successful (mode: {mode})')
class StorageLargeBlockBlobTest(StorageTestCase): def setUp(self): super(StorageLargeBlockBlobTest, self).setUp() url = self._get_account_url() credential = self._get_shared_key_credential() # test chunking functionality by reducing the threshold # for chunking and the size of each chunk, otherwise # the tests would take too long to execute self.bsc = BlobServiceClient(url, credential=credential, max_single_put_size=32 * 1024, max_block_size=2 * 1024 * 1024, min_large_block_upload_threshold=1 * 1024 * 1024) self.config = self.bsc._config self.container_name = self.get_resource_name('utcontainer') if not self.is_playback(): self.bsc.create_container(self.container_name) def tearDown(self): if not self.is_playback(): try: self.bsc.delete_container(self.container_name) except: pass if os.path.isfile(FILE_PATH): try: os.remove(FILE_PATH) except: pass return super(StorageLargeBlockBlobTest, self).tearDown() # --Helpers----------------------------------------------------------------- def _get_blob_reference(self): return self.get_resource_name(TEST_BLOB_PREFIX) def _create_blob(self): blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.upload_blob(b'') return blob def assertBlobEqual(self, container_name, blob_name, expected_data): blob = self.bsc.get_blob_client(container_name, blob_name) actual_data = blob.download_blob() self.assertEqual(b"".join(list(actual_data.chunks())), expected_data) # --Test cases for block blobs -------------------------------------------- def test_put_block_bytes_large(self): if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self._create_blob() # Act for i in range(5): resp = blob.stage_block('block {0}'.format(i).encode('utf-8'), os.urandom(LARGE_BLOCK_SIZE)) self.assertIsNone(resp) # Assert def test_put_block_bytes_large_with_md5(self): if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self._create_blob() # Act for i in range(5): resp = blob.stage_block('block {0}'.format(i).encode('utf-8'), os.urandom(LARGE_BLOCK_SIZE), validate_content=True) self.assertIsNone(resp) def test_put_block_stream_large(self): if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self._create_blob() # Act for i in range(5): stream = BytesIO(bytearray(LARGE_BLOCK_SIZE)) resp = resp = blob.stage_block( 'block {0}'.format(i).encode('utf-8'), stream, length=LARGE_BLOCK_SIZE) self.assertIsNone(resp) # Assert def test_put_block_stream_large_with_md5(self): if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self._create_blob() # Act for i in range(5): stream = BytesIO(bytearray(LARGE_BLOCK_SIZE)) resp = resp = blob.stage_block( 'block {0}'.format(i).encode('utf-8'), stream, length=LARGE_BLOCK_SIZE, validate_content=True) self.assertIsNone(resp) # Assert def test_create_large_blob_from_path(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = bytearray(os.urandom(LARGE_BLOB_SIZE)) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, max_concurrency=2) # Assert self.assertBlobEqual(self.container_name, blob_name, data) def test_create_large_blob_from_path_with_md5(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = bytearray(os.urandom(LARGE_BLOB_SIZE)) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, validate_content=True, max_concurrency=2) # Assert self.assertBlobEqual(self.container_name, blob_name, data) def test_create_large_blob_from_path_non_parallel(self): if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = bytearray(self.get_random_bytes(100)) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, max_concurrency=1) # Assert self.assertBlobEqual(self.container_name, blob_name, data) def test_create_large_blob_from_path_with_progress(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = bytearray(os.urandom(LARGE_BLOB_SIZE)) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act progress = [] def callback(response): current = response.context['upload_stream_current'] total = response.context['data_stream_total'] if current is not None: progress.append((current, total)) with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, max_concurrency=2, raw_response_hook=callback) # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assert_upload_progress(len(data), self.config.max_block_size, progress) def test_create_large_blob_from_path_with_properties(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = bytearray(os.urandom(LARGE_BLOB_SIZE)) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act content_settings = ContentSettings(content_type='image/png', content_language='spanish') with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, content_settings=content_settings, max_concurrency=2) # Assert self.assertBlobEqual(self.container_name, blob_name, data) properties = blob.get_blob_properties() self.assertEqual(properties.content_settings.content_type, content_settings.content_type) self.assertEqual(properties.content_settings.content_language, content_settings.content_language) def test_create_large_blob_from_stream_chunked_upload(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = bytearray(os.urandom(LARGE_BLOB_SIZE)) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, max_concurrency=2) # Assert self.assertBlobEqual(self.container_name, blob_name, data) def test_create_large_blob_from_stream_with_progress_chunked_upload(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = bytearray(os.urandom(LARGE_BLOB_SIZE)) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act progress = [] def callback(response): current = response.context['upload_stream_current'] total = response.context['data_stream_total'] if current is not None: progress.append((current, total)) with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, max_concurrency=2, raw_response_hook=callback) # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assert_upload_progress(len(data), self.config.max_block_size, progress) def test_create_large_blob_from_stream_chunked_upload_with_count(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = bytearray(os.urandom(LARGE_BLOB_SIZE)) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act blob_size = len(data) - 301 with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, length=blob_size, max_concurrency=2) # Assert self.assertBlobEqual(self.container_name, blob_name, data[:blob_size]) def test_create_large_blob_from_stream_chunked_upload_with_count_and_properties( self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = bytearray(os.urandom(LARGE_BLOB_SIZE)) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act content_settings = ContentSettings(content_type='image/png', content_language='spanish') blob_size = len(data) - 301 with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, length=blob_size, content_settings=content_settings, max_concurrency=2) # Assert self.assertBlobEqual(self.container_name, blob_name, data[:blob_size]) properties = blob.get_blob_properties() self.assertEqual(properties.content_settings.content_type, content_settings.content_type) self.assertEqual(properties.content_settings.content_language, content_settings.content_language) def test_create_large_blob_from_stream_chunked_upload_with_properties( self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = bytearray(os.urandom(LARGE_BLOB_SIZE)) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act content_settings = ContentSettings(content_type='image/png', content_language='spanish') with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, content_settings=content_settings, max_concurrency=2) # Assert self.assertBlobEqual(self.container_name, blob_name, data) properties = blob.get_blob_properties() self.assertEqual(properties.content_settings.content_type, content_settings.content_type) self.assertEqual(properties.content_settings.content_language, content_settings.content_language)
def create_blob_client(self, BlobServiceClient, Containername, Blobname): """Create a blob client using Blob service client""" client = BlobServiceClient.get_blob_client(container=Containername, blob=Blobname) return client
class BlobStorageAccountTest(StorageTestCase): def setUp(self): super(BlobStorageAccountTest, self).setUp() url = self._get_account_url() credential = self._get_shared_key_credential() self.bsc = BlobServiceClient(url, credential=credential) self.container_name = self.get_resource_name('utcontainer') if not self.is_playback(): self.bsc.create_container(self.container_name) def tearDown(self): if not self.is_playback(): try: self.bsc.delete_container(self.container_name) except: pass return super(BlobStorageAccountTest, self).tearDown() # --Helpers----------------------------------------------------------------- def _get_blob_reference(self): blob_name = self.get_resource_name(TEST_BLOB_PREFIX) return self.bsc.get_blob_client(self.container_name, blob_name) def _create_blob(self): blob = self._get_blob_reference() blob.upload_blob(b'') return blob def assertBlobEqual(self, container_name, blob_name, expected_data): blob = self.bsc.get_blob_client(container_name, blob_name) actual_data = blob.download_blob().content_as_bytes() self.assertEqual(actual_data, expected_data) # --Tests specific to Blob Storage Accounts (not general purpose)------------ @record def test_standard_blob_tier_set_tier_api(self): container = self.bsc.get_container_client(self.container_name) tiers = [ StandardBlobTier.Archive, StandardBlobTier.Cool, StandardBlobTier.Hot ] for tier in tiers: blob = self._get_blob_reference() data = b'hello world' blob.upload_blob(data) blob_ref = blob.get_blob_properties() self.assertIsNotNone(blob_ref.blob_tier) self.assertTrue(blob_ref.blob_tier_inferred) self.assertIsNone(blob_ref.blob_tier_change_time) blobs = list(container.list_blobs()) # Assert self.assertIsNotNone(blobs) self.assertGreaterEqual(len(blobs), 1) self.assertIsNotNone(blobs[0]) self.assertNamedItemInContainer(blobs, blob.blob_name) self.assertIsNotNone(blobs[0].blob_tier) self.assertTrue(blobs[0].blob_tier_inferred) self.assertIsNone(blobs[0].blob_tier_change_time) blob.set_standard_blob_tier(tier) blob_ref2 = blob.get_blob_properties() self.assertEqual(tier, blob_ref2.blob_tier) self.assertFalse(blob_ref2.blob_tier_inferred) self.assertIsNotNone(blob_ref2.blob_tier_change_time) blobs = list(container.list_blobs()) # Assert self.assertIsNotNone(blobs) self.assertGreaterEqual(len(blobs), 1) self.assertIsNotNone(blobs[0]) self.assertNamedItemInContainer(blobs, blob.blob_name) self.assertEqual(blobs[0].blob_tier, tier) self.assertFalse(blobs[0].blob_tier_inferred) self.assertIsNotNone(blobs[0].blob_tier_change_time) blob.delete_blob() @record def test_rehydration_status(self): blob_name = 'rehydration_test_blob_1' blob_name2 = 'rehydration_test_blob_2' container = self.bsc.get_container_client(self.container_name) data = b'hello world' blob = container.upload_blob(blob_name, data) blob.set_standard_blob_tier(StandardBlobTier.Archive) blob.set_standard_blob_tier(StandardBlobTier.Cool) blob_ref = blob.get_blob_properties() self.assertEqual(StandardBlobTier.Archive, blob_ref.blob_tier) self.assertEqual("rehydrate-pending-to-cool", blob_ref.archive_status) self.assertFalse(blob_ref.blob_tier_inferred) blobs = list(container.list_blobs()) blob.delete_blob() # Assert self.assertIsNotNone(blobs) self.assertGreaterEqual(len(blobs), 1) self.assertIsNotNone(blobs[0]) self.assertNamedItemInContainer(blobs, blob.blob_name) self.assertEqual(StandardBlobTier.Archive, blobs[0].blob_tier) self.assertEqual("rehydrate-pending-to-cool", blobs[0].archive_status) self.assertFalse(blobs[0].blob_tier_inferred) blob2 = container.upload_blob(blob_name2, data) blob2.set_standard_blob_tier(StandardBlobTier.Archive) blob2.set_standard_blob_tier(StandardBlobTier.Hot) blob_ref2 = blob2.get_blob_properties() self.assertEqual(StandardBlobTier.Archive, blob_ref2.blob_tier) self.assertEqual("rehydrate-pending-to-hot", blob_ref2.archive_status) self.assertFalse(blob_ref2.blob_tier_inferred) blobs = list(container.list_blobs()) # Assert self.assertIsNotNone(blobs) self.assertGreaterEqual(len(blobs), 1) self.assertIsNotNone(blobs[0]) self.assertNamedItemInContainer(blobs, blob2.blob_name) self.assertEqual(StandardBlobTier.Archive, blobs[0].blob_tier) self.assertEqual("rehydrate-pending-to-hot", blobs[0].archive_status) self.assertFalse(blobs[0].blob_tier_inferred)
def test_blob_tier_copy_blob(self): url = self._get_premium_account_url() credential = self._get_premium_shared_key_credential() pbs = BlobServiceClient(url, credential=credential) try: container_name = self.get_resource_name('utpremiumcontainer') container = pbs.get_container_client(container_name) if not self.is_playback(): try: container.create_container() except ResourceExistsError: pass # Arrange source_blob = pbs.get_blob_client( container_name, self.get_resource_name(TEST_BLOB_PREFIX)) source_blob.create_page_blob(1024, premium_page_blob_tier=PremiumPageBlobTier.P10) # Act source_blob_url = '{0}/{1}/{2}'.format( self._get_premium_account_url(), container_name, source_blob.blob_name) copy_blob = pbs.get_blob_client(container_name, 'blob1copy') copy = copy_blob.start_copy_from_url(source_blob_url, premium_page_blob_tier=PremiumPageBlobTier.P30) # Assert self.assertIsNotNone(copy) self.assertEqual(copy['copy_status'], 'success') self.assertIsNotNone(copy['copy_id']) copy_ref = copy_blob.get_blob_properties() self.assertEqual(copy_ref.blob_tier, PremiumPageBlobTier.P30) source_blob2 = pbs.get_blob_client( container_name, self.get_resource_name(TEST_BLOB_PREFIX)) source_blob2.create_page_blob(1024) source_blob2_url = '{0}/{1}/{2}'.format( self._get_premium_account_url(), source_blob2.container_name, source_blob2.blob_name) copy_blob2 = pbs.get_blob_client(container_name, 'blob2copy') copy2 = copy_blob2.start_copy_from_url(source_blob2_url, premium_page_blob_tier=PremiumPageBlobTier.P60) self.assertIsNotNone(copy2) self.assertEqual(copy2['copy_status'], 'success') self.assertIsNotNone(copy2['copy_id']) copy_ref2 = copy_blob2.get_blob_properties() self.assertEqual(copy_ref2.blob_tier, PremiumPageBlobTier.P60) self.assertFalse(copy_ref2.blob_tier_inferred) copy_blob3 = pbs.get_blob_client(container_name, 'blob3copy') copy3 = copy_blob3.start_copy_from_url(source_blob2_url) self.assertIsNotNone(copy3) self.assertEqual(copy3['copy_status'], 'success') self.assertIsNotNone(copy3['copy_id']) copy_ref3 = copy_blob3.get_blob_properties() self.assertEqual(copy_ref3.blob_tier, PremiumPageBlobTier.P10) self.assertTrue(copy_ref3.blob_tier_inferred) finally: container.delete_container()
class AzureStorageClient(object): """Connects to an Azure Blob Storage service account.""" def __init__( self, container: str, connection_string: Optional[str] = None, account_url: Optional[str] = None, credential: Optional[str] = None, ) -> None: if connection_string: self.client = BlobServiceClient.from_connection_string(connection_string) elif account_url: self.client = BlobServiceClient(account_url, credential) logging.info("Trying to create Azure Blob Storage Container: {}.".format(container)) try: self.client.create_container(container.split("/")[0]) logging.info("Successfully created container {}.".format(container)) except ResourceExistsError: logging.info( "Container {} already exists, and will be used to store checkpoints.".format( container ) ) except HttpResponseError as e: if e.error_code == StorageErrorCode.invalid_uri: # type: ignore logging.warning( ( "The storage client raised the following HttpResponseError:\n{}\nPlease " "ignore this warning if this is because the account url provided points " "to a container instead of a storage account; otherwise, it may be " "necessary to fix your config.yaml." ).format(e) ) else: logging.error("Failed while trying to create container {}.".format(container)) raise e @util.preserve_random_state def put(self, container_name: str, blob_name: str, filename: Union[str, Path]) -> None: """Upload a file to the specified blob in the specified container.""" with open(filename, "rb") as file: self.client.get_blob_client(container_name, blob_name).upload_blob(file) @util.preserve_random_state def get(self, container_name: str, blob_name: str, filename: str) -> None: """Download the specified blob in the specified container to a file.""" with open(filename, "wb") as file: stream = self.client.get_blob_client(container_name, blob_name).download_blob() stream.readinto(file) @util.preserve_random_state def delete_files(self, container_name: str, files: List[str]) -> None: """Deletes the specified files from the specified container.""" for file in files: self.client.get_blob_client(container_name, file).delete_blob() @util.preserve_random_state def list_files( self, container_name: str, file_prefix: Optional[Union[str, Path]] = None ) -> List[str]: """Lists files within the specified container that have the specified file prefix. Lists all files if file_prefix is None. """ container = self.client.get_container_client(container_name) files = [blob["name"] for blob in container.list_blobs(name_starts_with=file_prefix)] return files