def upload_cube(data): """ create segy of data and upload to azure blob return: guid of cube """ fname = tempfile.mktemp("segy") segyio.tools.from_array(fname, data) from oneseismic.scan.__main__ import main as scan_main from oneseismic.internal import blobfs from oneseismic.internal import localfs meta = json.loads(scan_main([fname])) credential = CustomTokenCredential() blob_service_client = BlobServiceClient(STORAGE_URL, credential) outputfs = blobfs(blob_service_client) try: blob_service_client.delete_container("results") except ResourceNotFoundError as error: pass try: blob_service_client.delete_container(meta["guid"]) except ResourceNotFoundError as error: pass shape = [64, 64, 64] params = {"subcube-dims": shape} inputfs = localfs('.') import oneseismic.upload with inputfs.open(fname, 'rb') as f: oneseismic.upload.upload(meta, shape, f, outputfs) return meta["guid"]
def create_dls(wait_a_sec): credential = CustomTokenCredential() blob_service_client = BlobServiceClient(STORAGE_URL, credential) try: blob_service_client.delete_container("dls") except Exception: pass blob_service_client.create_container("dls") dummy_data = pd.DataFrame.from_dict({ "key0": ["val00", "val01"], "key1": ["val10", "val11"] }).to_dict(orient="split") for (data_set, file) in itertools.product(["deep", "shallow"], ["systems", "complexes", "elements"]): upload( blob_service_client, dummy_data, f"{FOLDER_NAME}/{data_set}/{file}.json", ) yield dummy_data blob_service_client.delete_container("dls")
def add_sanitizers(test_proxy): add_remove_header_sanitizer(headers="Ocp-Apim-Subscription-Key") add_remove_header_sanitizer(headers="Retry-After") add_general_regex_sanitizer( value="fakeendpoint", regex="(?<=\\/\\/)[a-z-]+(?=\\.cognitiveservices\\.azure\\.com)" ) add_general_regex_sanitizer( regex="(?<=\\/\\/)[a-z]+(?=(?:|-secondary)\\.(?:table|blob|queue)\\.core\\.windows\\.net)", value="fakeendpoint", ) add_oauth_response_sanitizer() # run tests yield # Dogfood env uses a static storage account so we clean up the blob resources # This is unnecessary for AzureCloud where each storage account is deleted at the end of testing if is_live() and os.getenv("TRANSLATION_ENVIRONMENT") == "Dogfood": client = BlobServiceClient( "https://" + os.getenv("TRANSLATION_DOCUMENT_STORAGE_NAME") + ".blob.core.windows.net/", os.getenv("TRANSLATION_DOCUMENT_STORAGE_KEY") ) for container in client.list_containers(): client.delete_container(container)
def azblob_file(azblob_credentials, cloud_bucket_name, download_gcs_public_data, public=False): acc_url = f"https://{azblob_credentials['storage_account']}.blob.core.windows.net" azblob_client = BlobServiceClient( account_url=acc_url, credential=azblob_credentials["shared_key"]) container_name = cloud_bucket_name + random_char(3).lower() if public: container_name += "public" print(f"\nUpload dataset to private azure blob container {container_name}") if container_name not in [ cntr["name"] for cntr in azblob_client.list_containers() ]: if public: azblob_client.create_container(name=container_name, metadata=None, public_access="container") else: azblob_client.create_container(name=container_name, metadata=None, public_access=None) blob_client = azblob_client.get_blob_client(container_name, "myfile.csv") with open(download_gcs_public_data, "r") as f: blob_client.upload_blob(f.read(), blob_type="BlockBlob", overwrite=True) yield f"{container_name}/myfile.csv" azblob_client.delete_container(container_name) print( f"\nAzure Blob Container {container_name} is now marked for deletion")
def test_request_callback_signed_header(self, resource_group, location, storage_account, storage_account_key): # Arrange service = BlobServiceClient(self._account_url(storage_account.name), credential=storage_account_key) name = self.get_resource_name('cont') # Act def callback(request): if request.http_request.method == 'PUT': request.http_request.headers['x-ms-meta-hello'] = 'world' # Assert try: container = service.create_container(name, raw_request_hook=callback) metadata = container.get_container_properties().metadata self.assertEqual(metadata, {'hello': 'world'}) finally: service.delete_container(name)
class AzBlobManagerSync: """A utility class to help working with Azure Storage. This class implements synchronous methods based on the Microsoft Python SDK azure.storage.blob See: https://docs.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob?view=azure-python Avalaible: - Basic methods to work with containers and blobs """ def __init__(self, connection_string=None, account_url=None, credential=None): """Instantiate an asynchronous AzBlobManagerSync object. Args: connection_string (str): A connection string to an Azure Storage account. account_url (str): The URL to the blob storage account. Any other entities included in the URL path (e.g. container or blob) will be discarded. This URL can be optionally authenticated with a SAS token. credential (str): The credentials with which to authenticate. This is optional if the account URL already has a SAS token, or the connection string already has shared access key values. The value can be a SAS token string, an account shared access key, or an instance of a TokenCredentials class from azure.identity. Credentials provided here will take precedence over those in the connection string. Examples: Creating the AzBlobManagerSync with account url and a shared access key: azStorageManager = AzBlobManagerSync.create(account_url=self.url, credential=self.shared_access_key) Creating the AzBlobManagerSync with a connection string that has the shared access key: azStorageManager = AzBlobManagerSync.CREATE(onnection_string='DefaultEndpointsProtocol=http;...') """ self.connection_string = connection_string self.account_url = account_url self.credential = credential try: from azure.storage.blob import BlobServiceClient self.blob_service_client = BlobServiceClient if (self.connection_string is not None): # Create BlobServiceClient from a Connection String self.blob_service_client = BlobServiceClient.from_connection_string( conn_str=self.connection_string, credential=self.credential) else: # Create the BlobServiceClient with account url and credential. self.blob_service_client = BlobServiceClient( account_url=self.account_url, credential=self.credential) except AzureError as err: self._logAzureError(err=err) except Exception: logger.exception('') def _logAzureError(self, err=AzureError): msg = err.message.split('\n')[0] logger.error(f'AzureError error: {msg}') def create_container(self, container_name): """Creates a new container. Args: container_name (str): The name of the container. See https://docs.microsoft.com/en-us/rest/api/storageservices/naming-and-referencing-containers--blobs--and-metadata for naming convention Returns: bool: The return value. True for success, False otherwise. """ success = False try: new_container = self.blob_service_client.create_container( container_name) properties = new_container.get_container_properties() success = properties is not None and properties.name == container_name except ResourceExistsError: logger.info(f'Container \"{container_name}\" already exists.') except AzureError as err: self._logAzureError(err=err) except Exception: logger.exception('') return success def delete_container(self, container_name): """Deletes a container. Args: container_name (str): The name of the container. Returns: bool: The return value. True for success, False otherwise. """ success = False try: self.blob_service_client.delete_container(container_name) success = True except ResourceNotFoundError: logger.info(f'Container \"{container_name}\" doesn not exist.') except AzureError as err: self._logAzureError(err=err) except Exception: logger.exception('') return success def _list_containers(self, name_starts_with=None, include_metadata=False): """Lists containers. Args: name_starts_with (str): Filters the results to return only containers whose names begin with the specified prefix. include_metadata (bool): Specifies that container metadata to be returned in the response. Returns: ItemPaged[ContainerProperties]: An iterable (auto-paging) of ContainerProperties. """ try: containers = [] for container in self.blob_service_client.list_containers( name_starts_with=name_starts_with, include_metadata=include_metadata): containers.append(container) return containers except AzureError as err: self._logAzureError(err=err) except Exception: logger.exception('') return None def list_containers_name(self, name_starts_with=None): """Lists containers' name. Args: name_starts_with (str): Filters the results to return only containers whose names begin with the specified prefix. Returns: list: A list of strings representing the container names. """ containers_list = [] containers = self._list_containers( name_starts_with=name_starts_with, include_metadata=False) if (containers is None): return containers_list for container in containers: containers_list.append(container['name']) return containers_list def create_append_blob(self, container_name, blob_name, replace_blob=False): """Creates an append blob in an existing container. Args: container_name (str): The name of the container. blob_name (str): The name of the blob. replace_blob (bool): If True, deletes existing blob with same name Returns: bool: The return value. True for success, False otherwise. """ success = False try: blob_client = self.blob_service_client.get_blob_client( container_name, blob_name) # raise ResourceNotFoundError if blob does not exist blob_client.get_blob_properties() # blob exists already if (replace_blob is True): blob_client.create_append_blob() success = True except ResourceNotFoundError: blob_client.create_append_blob() success = True except AzureError as err: self._logAzureError(err=err) except Exception: logger.exception('') return success def create_page_blob(self, container_name, blob_name, size=1024, content_settings=None, metadata=None, premium_page_blob_tier=None): """Creates a page blob in an existing container. Args: container_name (str): The name of the container. blob_name (str): The name of the blob. size (int): This specifies the maximum size for the page blob, up to 1 TB. The page blob size must be aligned to a 512-byte boundary content_settings (ContentSettings): ContentSettings object used to set blob properties. Used to set content type, encoding, language, disposition, md5, and cache control. metadata (dict(str, str)): Name-value pairs associated with the blob as metadata premium_page_blob_tier (PremiumPageBlobTier): A page blob tier value to set the blob to Returns: bool: The return value. True for success, False otherwise. """ success = False try: blob_client = self.blob_service_client.get_blob_client( container_name, blob_name) blob_client.create_page_blob( size, content_settings, metadata, premium_page_blob_tier) success = True except AzureError as err: self._logAzureError(err=err) except Exception: logger.exception('') return success def delete_blob(self, container_name, blob_name): """Deletes a blob. Args: container_name (str): The name of the container. blob_name (str): The name of the blob. Returns: bool: The return value. True for success, False otherwise. """ success = False try: blob_client = self.blob_service_client.get_blob_client( container_name, blob_name) blob_client.delete_blob() success = True except AzureError as err: self._logAzureError(err=err) except Exception: logger.exception('') return success def list_blobs(self, container_name): """Lists the blobs in the specified container. Args: container_name (str): The name of the container. Returns: list: A list of strings representing the blob names. """ blobs_list = [] try: container_client = self.blob_service_client.get_container_client( container_name) for blob in container_client.list_blobs(): blobs_list.append(blob) except AzureError as err: self._logAzureError(err=err) except Exception: logger.exception(f'Fatal error') return blobs_list def upload_data(self, data, container_name, blob_name, blob_type='BlockBlob'): """Creates a new blob from a data source with automatic chunking. Args: data: The blob data to upload. container_name (str): The name of the container. blob_name (str): The name of the blob. blob_typr (str): The type of the blob. This can be either BlockBlob, PageBlob or AppendBlob. Returns: bool: The return value. True for success, False otherwise. """ success = False try: blob_client = self.blob_service_client.get_blob_client( container_name, blob_name) blob_client.upload_blob(data) success = True except AzureError as err: self._logAzureError(err=err) except Exception: logger.exception('') return success def append_block(self, data, container_name, blob_name): """Commits a new block of data to the end of the existing append blob. Args: data: Content of the block. container_name (str): The name of the container. blob_name (str): The name of the blob. Returns: bool: The return value. True for success, False otherwise. """ success = False try: blob_client = self.blob_service_client.get_blob_client( container_name, blob_name) blob_client.append_block(data) success = True except AzureError as err: self._logAzureError(err=err) except Exception: logger.exception('') return success def download_data(self, container_name, blob_name): """Downloads a blob. Args: container_name (str): The name of the container. blob_name (str): The name of the blob. Returns: stream: The data stream """ try: blob_client = self.blob_service_client.get_blob_client( container_name, blob_name) stream = blob_client.download_blob() return stream.readall() except AzureError as err: self._logAzureError(err=err) except Exception: logger.exception('')
class StorageAppendBlobTest(StorageTestCase): def setUp(self): super(StorageAppendBlobTest, self).setUp() url = self._get_account_url() credential = self._get_shared_key_credential() self.bsc = BlobServiceClient(url, credential=credential, max_block_size=4 * 1024) self.config = self.bsc._config self.container_name = self.get_resource_name('utcontainer') self.source_container_name = self.get_resource_name('utcontainersource') if not self.is_playback(): self.bsc.create_container(self.container_name) self.bsc.create_container(self.source_container_name) def tearDown(self): if not self.is_playback(): try: self.bsc.delete_container(self.container_name) except: pass try: self.bsc.delete_container(self.source_container_name) except: pass if os.path.isfile(FILE_PATH): try: os.remove(FILE_PATH) except: pass return super(StorageAppendBlobTest, self).tearDown() # --Helpers----------------------------------------------------------------- def _get_blob_reference(self): return self.get_resource_name(TEST_BLOB_PREFIX) def _create_blob(self): blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client( self.container_name, blob_name) blob.create_append_blob() return blob def _create_source_blob(self, data): blob_client = self.bsc.get_blob_client(self.source_container_name, self.get_resource_name(TEST_BLOB_PREFIX)) blob_client.create_append_blob() blob_client.append_block(data) return blob_client def assertBlobEqual(self, blob, expected_data): stream = blob.download_blob() actual_data = stream.readall() self.assertEqual(actual_data, expected_data) class NonSeekableFile(object): def __init__(self, wrapped_file): self.wrapped_file = wrapped_file def write(self, data): self.wrapped_file.write(data) def read(self, count): return self.wrapped_file.read(count) # --Test cases for block blobs -------------------------------------------- @record def test_create_blob(self): # Arrange blob_name = self._get_blob_reference() # Act blob = self.bsc.get_blob_client(self.container_name, blob_name) create_resp = blob.create_append_blob() # Assert blob_properties = blob.get_blob_properties() self.assertIsNotNone(blob_properties) self.assertEqual(blob_properties.etag, create_resp.get('etag')) self.assertEqual(blob_properties.last_modified, create_resp.get('last_modified')) @record def test_create_blob_with_lease_id(self): # Arrange blob = self._create_blob() # Act lease = blob.acquire_lease() create_resp = blob.create_append_blob(lease=lease) # Assert blob_properties = blob.get_blob_properties() self.assertIsNotNone(blob_properties) self.assertEqual(blob_properties.etag, create_resp.get('etag')) self.assertEqual(blob_properties.last_modified, create_resp.get('last_modified')) @record def test_create_blob_with_metadata(self): # Arrange metadata = {'hello': 'world', 'number': '42'} blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) # Act blob.create_append_blob(metadata=metadata) # Assert md = blob.get_blob_properties().metadata self.assertDictEqual(md, metadata) @record def test_append_block(self): # Arrange blob = self._create_blob() # Act for i in range(5): resp = blob.append_block(u'block {0}'.format(i).encode('utf-8')) self.assertEqual(int(resp['blob_append_offset']), 7 * i) self.assertEqual(resp['blob_committed_block_count'], i + 1) self.assertIsNotNone(resp['etag']) self.assertIsNotNone(resp['last_modified']) # Assert self.assertBlobEqual(blob, b'block 0block 1block 2block 3block 4') @record def test_append_block_unicode(self): # Arrange blob = self._create_blob() # Act resp = blob.append_block(u'啊齄丂狛狜', encoding='utf-16') self.assertEqual(int(resp['blob_append_offset']), 0) self.assertEqual(resp['blob_committed_block_count'], 1) self.assertIsNotNone(resp['etag']) self.assertIsNotNone(resp['last_modified']) # Assert @record def test_append_block_with_md5(self): # Arrange blob = self._create_blob() # Act resp = blob.append_block(b'block', validate_content=True) self.assertEqual(int(resp['blob_append_offset']), 0) self.assertEqual(resp['blob_committed_block_count'], 1) self.assertIsNotNone(resp['etag']) self.assertIsNotNone(resp['last_modified']) # Assert @record def test_append_block_from_url(self): # Arrange source_blob_data = self.get_random_bytes(LARGE_BLOB_SIZE) source_blob_client = self._create_source_blob(source_blob_data) sas = generate_blob_sas( source_blob_client.account_name, source_blob_client.container_name, source_blob_client.blob_name, snapshot=source_blob_client.snapshot, account_key=source_blob_client.credential.account_key, permission=BlobSasPermissions(read=True, delete=True), expiry=datetime.utcnow() + timedelta(hours=1), ) destination_blob_client = self._create_blob() # Act: make append block from url calls split = 4 * 1024 resp = destination_blob_client.append_block_from_url(source_blob_client.url + '?' + sas, source_offset=0, source_length=split) self.assertEqual(resp.get('blob_append_offset'), '0') self.assertEqual(resp.get('blob_committed_block_count'), 1) self.assertIsNotNone(resp.get('etag')) self.assertIsNotNone(resp.get('last_modified')) resp = destination_blob_client.append_block_from_url(source_blob_client.url + '?' + sas, source_offset=split, source_length=LARGE_BLOB_SIZE - split) self.assertEqual(resp.get('blob_append_offset'), str(4 * 1024)) self.assertEqual(resp.get('blob_committed_block_count'), 2) self.assertIsNotNone(resp.get('etag')) self.assertIsNotNone(resp.get('last_modified')) # Assert the destination blob is constructed correctly destination_blob_properties = destination_blob_client.get_blob_properties() self.assertBlobEqual(destination_blob_client, source_blob_data) self.assertEqual(destination_blob_properties.get('etag'), resp.get('etag')) self.assertEqual(destination_blob_properties.get('last_modified'), resp.get('last_modified')) self.assertEqual(destination_blob_properties.get('size'), LARGE_BLOB_SIZE) # Missing start range shouldn't pass the validation with self.assertRaises(ValueError): destination_blob_client.append_block_from_url(source_blob_client.url + '?' + sas, source_length=LARGE_BLOB_SIZE) @record def test_append_block_from_url_and_validate_content_md5(self): # Arrange source_blob_data = self.get_random_bytes(LARGE_BLOB_SIZE) source_blob_client = self._create_source_blob(source_blob_data) src_md5 = StorageContentValidation.get_content_md5(source_blob_data) sas = generate_blob_sas( source_blob_client.account_name, source_blob_client.container_name, source_blob_client.blob_name, snapshot=source_blob_client.snapshot, account_key=source_blob_client.credential.account_key, permission=BlobSasPermissions(read=True, delete=True), expiry=datetime.utcnow() + timedelta(hours=1), ) destination_blob_client = self._create_blob() # Act part 1: make append block from url calls with correct md5 resp = destination_blob_client.append_block_from_url(source_blob_client.url + '?' + sas, source_content_md5=src_md5) self.assertEqual(resp.get('blob_append_offset'), '0') self.assertEqual(resp.get('blob_committed_block_count'), 1) self.assertIsNotNone(resp.get('etag')) self.assertIsNotNone(resp.get('last_modified')) # Assert the destination blob is constructed correctly destination_blob_properties = destination_blob_client.get_blob_properties() self.assertBlobEqual(destination_blob_client, source_blob_data) self.assertEqual(destination_blob_properties.get('etag'), resp.get('etag')) self.assertEqual(destination_blob_properties.get('last_modified'), resp.get('last_modified')) # Act part 2: put block from url with wrong md5 with self.assertRaises(HttpResponseError): destination_blob_client.append_block_from_url(source_blob_client.url + '?' + sas, source_content_md5=StorageContentValidation.get_content_md5( b"POTATO")) @record def test_append_block_from_url_with_source_if_modified(self): # Arrange source_blob_data = self.get_random_bytes(LARGE_BLOB_SIZE) source_blob_client = self._create_source_blob(source_blob_data) source_blob_properties = source_blob_client.get_blob_properties() sas = generate_blob_sas( source_blob_client.account_name, source_blob_client.container_name, source_blob_client.blob_name, snapshot=source_blob_client.snapshot, account_key=source_blob_client.credential.account_key, permission=BlobSasPermissions(read=True, delete=True), expiry=datetime.utcnow() + timedelta(hours=1), ) destination_blob_client = self._create_blob() # Act part 1: make append block from url calls resp = destination_blob_client.append_block_from_url(source_blob_client.url + '?' + sas, source_offset=0, source_length=LARGE_BLOB_SIZE, source_if_modified_since=source_blob_properties.get( 'last_modified') - timedelta(hours=15)) self.assertEqual(resp.get('blob_append_offset'), '0') self.assertEqual(resp.get('blob_committed_block_count'), 1) self.assertIsNotNone(resp.get('etag')) self.assertIsNotNone(resp.get('last_modified')) # Assert the destination blob is constructed correctly destination_blob_properties = destination_blob_client.get_blob_properties() self.assertBlobEqual(destination_blob_client, source_blob_data) self.assertEqual(destination_blob_properties.get('etag'), resp.get('etag')) self.assertEqual(destination_blob_properties.get('last_modified'), resp.get('last_modified')) self.assertEqual(destination_blob_properties.get('size'), LARGE_BLOB_SIZE) # Act part 2: put block from url with failing condition with self.assertRaises(ResourceNotFoundError): destination_blob_client.append_block_from_url(source_blob_client.url + '?' + sas, source_offset=0, source_length=LARGE_BLOB_SIZE, source_if_modified_since=source_blob_properties.get( 'last_modified')) @record def test_append_block_from_url_with_source_if_unmodified(self): # Arrange source_blob_data = self.get_random_bytes(LARGE_BLOB_SIZE) source_blob_client = self._create_source_blob(source_blob_data) source_blob_properties = source_blob_client.get_blob_properties() sas = generate_blob_sas( source_blob_client.account_name, source_blob_client.container_name, source_blob_client.blob_name, snapshot=source_blob_client.snapshot, account_key=source_blob_client.credential.account_key, permission=BlobSasPermissions(read=True, delete=True), expiry=datetime.utcnow() + timedelta(hours=1), ) destination_blob_client = self._create_blob() # Act part 1: make append block from url calls resp = destination_blob_client.append_block_from_url(source_blob_client.url + '?' + sas, source_offset=0, source_length=LARGE_BLOB_SIZE, source_if_unmodified_since=source_blob_properties.get( 'last_modified')) self.assertEqual(resp.get('blob_append_offset'), '0') self.assertEqual(resp.get('blob_committed_block_count'), 1) self.assertIsNotNone(resp.get('etag')) self.assertIsNotNone(resp.get('last_modified')) # Assert the destination blob is constructed correctly destination_blob_properties = destination_blob_client.get_blob_properties() self.assertBlobEqual(destination_blob_client, source_blob_data) self.assertEqual(destination_blob_properties.get('etag'), resp.get('etag')) self.assertEqual(destination_blob_properties.get('last_modified'), resp.get('last_modified')) self.assertEqual(destination_blob_properties.get('size'), LARGE_BLOB_SIZE) # Act part 2: put block from url with failing condition with self.assertRaises(ResourceModifiedError): destination_blob_client \ .append_block_from_url(source_blob_client.url + '?' + sas, source_offset=0, source_length=LARGE_BLOB_SIZE, if_unmodified_since=source_blob_properties.get('last_modified') - timedelta( hours=15)) @record def test_append_block_from_url_with_source_if_match(self): # Arrange source_blob_data = self.get_random_bytes(LARGE_BLOB_SIZE) source_blob_client = self._create_source_blob(source_blob_data) source_blob_properties = source_blob_client.get_blob_properties() sas = generate_blob_sas( source_blob_client.account_name, source_blob_client.container_name, source_blob_client.blob_name, snapshot=source_blob_client.snapshot, account_key=source_blob_client.credential.account_key, permission=BlobSasPermissions(read=True, delete=True), expiry=datetime.utcnow() + timedelta(hours=1), ) destination_blob_client = self._create_blob() # Act part 1: make append block from url calls resp = destination_blob_client. \ append_block_from_url(source_blob_client.url + '?' + sas, source_offset=0, source_length=LARGE_BLOB_SIZE, source_etag=source_blob_properties.get('etag'), source_match_condition=MatchConditions.IfNotModified) self.assertEqual(resp.get('blob_append_offset'), '0') self.assertEqual(resp.get('blob_committed_block_count'), 1) self.assertIsNotNone(resp.get('etag')) self.assertIsNotNone(resp.get('last_modified')) # Assert the destination blob is constructed correctly destination_blob_properties = destination_blob_client.get_blob_properties() self.assertBlobEqual(destination_blob_client, source_blob_data) self.assertEqual(destination_blob_properties.get('etag'), resp.get('etag')) self.assertEqual(destination_blob_properties.get('last_modified'), resp.get('last_modified')) self.assertEqual(destination_blob_properties.get('size'), LARGE_BLOB_SIZE) # Act part 2: put block from url with failing condition with self.assertRaises(ResourceNotFoundError): destination_blob_client.append_block_from_url(source_blob_client.url + '?' + sas, source_offset=0, source_length=LARGE_BLOB_SIZE, source_etag='0x111111111111111', source_match_condition=MatchConditions.IfNotModified) @record def test_append_block_from_url_with_source_if_none_match(self): # Arrange source_blob_data = self.get_random_bytes(LARGE_BLOB_SIZE) source_blob_client = self._create_source_blob(source_blob_data) source_blob_properties = source_blob_client.get_blob_properties() sas = generate_blob_sas( source_blob_client.account_name, source_blob_client.container_name, source_blob_client.blob_name, snapshot=source_blob_client.snapshot, account_key=source_blob_client.credential.account_key, permission=BlobSasPermissions(read=True, delete=True), expiry=datetime.utcnow() + timedelta(hours=1), ) destination_blob_client = self._create_blob() # Act part 1: make append block from url calls resp = destination_blob_client. \ append_block_from_url(source_blob_client.url + '?' + sas, source_offset=0, source_length=LARGE_BLOB_SIZE, source_etag='0x111111111111111', source_match_condition=MatchConditions.IfModified) self.assertEqual(resp.get('blob_append_offset'), '0') self.assertEqual(resp.get('blob_committed_block_count'), 1) self.assertIsNotNone(resp.get('etag')) self.assertIsNotNone(resp.get('last_modified')) # Assert the destination blob is constructed correctly destination_blob_properties = destination_blob_client.get_blob_properties() self.assertBlobEqual(destination_blob_client, source_blob_data) self.assertEqual(destination_blob_properties.get('etag'), resp.get('etag')) self.assertEqual(destination_blob_properties.get('last_modified'), resp.get('last_modified')) self.assertEqual(destination_blob_properties.get('size'), LARGE_BLOB_SIZE) # Act part 2: put block from url with failing condition with self.assertRaises(ResourceNotFoundError): destination_blob_client.append_block_from_url(source_blob_client.url + '?' + sas, source_offset=0, source_length=LARGE_BLOB_SIZE, source_etag=source_blob_properties.get('etag'), source_match_condition=MatchConditions.IfModified) @record def test_append_block_from_url_with_if_match(self): # Arrange source_blob_data = self.get_random_bytes(LARGE_BLOB_SIZE) source_blob_client = self._create_source_blob(source_blob_data) sas = generate_blob_sas( source_blob_client.account_name, source_blob_client.container_name, source_blob_client.blob_name, snapshot=source_blob_client.snapshot, account_key=source_blob_client.credential.account_key, permission=BlobSasPermissions(read=True, delete=True), expiry=datetime.utcnow() + timedelta(hours=1), ) destination_blob_name = self._get_blob_reference() destination_blob_client = self.bsc.get_blob_client( self.container_name, destination_blob_name) destination_blob_properties_on_creation = destination_blob_client.create_append_blob() # Act part 1: make append block from url calls resp = destination_blob_client. \ append_block_from_url(source_blob_client.url + '?' + sas, source_offset=0, source_length=LARGE_BLOB_SIZE, etag=destination_blob_properties_on_creation.get('etag'), match_condition=MatchConditions.IfNotModified) self.assertEqual(resp.get('blob_append_offset'), '0') self.assertEqual(resp.get('blob_committed_block_count'), 1) self.assertIsNotNone(resp.get('etag')) self.assertIsNotNone(resp.get('last_modified')) # Assert the destination blob is constructed correctly destination_blob_properties = destination_blob_client.get_blob_properties() self.assertBlobEqual(destination_blob_client, source_blob_data) self.assertEqual(destination_blob_properties.get('etag'), resp.get('etag')) self.assertEqual(destination_blob_properties.get('last_modified'), resp.get('last_modified')) self.assertEqual(destination_blob_properties.get('size'), LARGE_BLOB_SIZE) # Act part 2: put block from url with failing condition with self.assertRaises(ResourceModifiedError): destination_blob_client.append_block_from_url(source_blob_client.url + '?' + sas, source_offset=0, source_length=LARGE_BLOB_SIZE, etag='0x111111111111111', match_condition=MatchConditions.IfNotModified) @record def test_append_block_from_url_with_if_none_match(self): # Arrange source_blob_data = self.get_random_bytes(LARGE_BLOB_SIZE) source_blob_client = self._create_source_blob(source_blob_data) sas = generate_blob_sas( source_blob_client.account_name, source_blob_client.container_name, source_blob_client.blob_name, snapshot=source_blob_client.snapshot, account_key=source_blob_client.credential.account_key, permission=BlobSasPermissions(read=True, delete=True), expiry=datetime.utcnow() + timedelta(hours=1), ) destination_blob_client = self._create_blob() # Act part 1: make append block from url calls resp = destination_blob_client. \ append_block_from_url(source_blob_client.url + '?' + sas, source_offset=0, source_length=LARGE_BLOB_SIZE, etag='0x111111111111111', match_condition=MatchConditions.IfModified) self.assertEqual(resp.get('blob_append_offset'), '0') self.assertEqual(resp.get('blob_committed_block_count'), 1) self.assertIsNotNone(resp.get('etag')) self.assertIsNotNone(resp.get('last_modified')) # Assert the destination blob is constructed correctly destination_blob_properties = destination_blob_client.get_blob_properties() self.assertBlobEqual(destination_blob_client, source_blob_data) self.assertEqual(destination_blob_properties.get('etag'), resp.get('etag')) self.assertEqual(destination_blob_properties.get('last_modified'), resp.get('last_modified')) self.assertEqual(destination_blob_properties.get('size'), LARGE_BLOB_SIZE) # Act part 2: put block from url with failing condition with self.assertRaises(ResourceModifiedError): destination_blob_client.append_block_from_url(source_blob_client.url + '?' + sas, source_offset=0, source_length=LARGE_BLOB_SIZE, etag=destination_blob_properties.get('etag'), match_condition=MatchConditions.IfModified) @record def test_append_block_from_url_with_maxsize_condition(self): # Arrange source_blob_data = self.get_random_bytes(LARGE_BLOB_SIZE) source_blob_client = self._create_source_blob(source_blob_data) sas = generate_blob_sas( source_blob_client.account_name, source_blob_client.container_name, source_blob_client.blob_name, snapshot=source_blob_client.snapshot, account_key=source_blob_client.credential.account_key, permission=BlobSasPermissions(read=True, delete=True), expiry=datetime.utcnow() + timedelta(hours=1), ) destination_blob_client = self._create_blob() # Act part 1: make append block from url calls resp = destination_blob_client. \ append_block_from_url(source_blob_client.url + '?' + sas, source_offset=0, source_length=LARGE_BLOB_SIZE, maxsize_condition=LARGE_BLOB_SIZE + 1) self.assertEqual(resp.get('blob_append_offset'), '0') self.assertEqual(resp.get('blob_committed_block_count'), 1) self.assertIsNotNone(resp.get('etag')) self.assertIsNotNone(resp.get('last_modified')) # Assert the destination blob is constructed correctly destination_blob_properties = destination_blob_client.get_blob_properties() self.assertBlobEqual(destination_blob_client, source_blob_data) self.assertEqual(destination_blob_properties.get('etag'), resp.get('etag')) self.assertEqual(destination_blob_properties.get('last_modified'), resp.get('last_modified')) self.assertEqual(destination_blob_properties.get('size'), LARGE_BLOB_SIZE) # Act part 2: put block from url with failing condition with self.assertRaises(HttpResponseError): destination_blob_client.append_block_from_url(source_blob_client.url + '?' + sas, source_offset=0, source_length=LARGE_BLOB_SIZE, maxsize_condition=LARGE_BLOB_SIZE + 1) @record def test_append_block_from_url_with_appendpos_condition(self): # Arrange source_blob_data = self.get_random_bytes(LARGE_BLOB_SIZE) source_blob_client = self._create_source_blob(source_blob_data) sas = generate_blob_sas( source_blob_client.account_name, source_blob_client.container_name, source_blob_client.blob_name, snapshot=source_blob_client.snapshot, account_key=source_blob_client.credential.account_key, permission=BlobSasPermissions(read=True, delete=True), expiry=datetime.utcnow() + timedelta(hours=1), ) destination_blob_client = self._create_blob() # Act part 1: make append block from url calls resp = destination_blob_client. \ append_block_from_url(source_blob_client.url + '?' + sas, source_offset=0, source_length=LARGE_BLOB_SIZE, appendpos_condition=0) self.assertEqual(resp.get('blob_append_offset'), '0') self.assertEqual(resp.get('blob_committed_block_count'), 1) self.assertIsNotNone(resp.get('etag')) self.assertIsNotNone(resp.get('last_modified')) # Assert the destination blob is constructed correctly destination_blob_properties = destination_blob_client.get_blob_properties() self.assertBlobEqual(destination_blob_client, source_blob_data) self.assertEqual(destination_blob_properties.get('etag'), resp.get('etag')) self.assertEqual(destination_blob_properties.get('last_modified'), resp.get('last_modified')) self.assertEqual(destination_blob_properties.get('size'), LARGE_BLOB_SIZE) # Act part 2: put block from url with failing condition with self.assertRaises(HttpResponseError): destination_blob_client.append_block_from_url(source_blob_client.url + '?' + sas, source_offset=0, source_length=LARGE_BLOB_SIZE, appendpos_condition=0) @record def test_append_block_from_url_with_if_modified(self): # Arrange source_blob_data = self.get_random_bytes(LARGE_BLOB_SIZE) source_blob_client = self._create_source_blob(source_blob_data) source_properties = source_blob_client.get_blob_properties() sas = generate_blob_sas( source_blob_client.account_name, source_blob_client.container_name, source_blob_client.blob_name, snapshot=source_blob_client.snapshot, account_key=source_blob_client.credential.account_key, permission=BlobSasPermissions(read=True, delete=True), expiry=datetime.utcnow() + timedelta(hours=1), ) destination_blob_client = self._create_blob() # Act part 1: make append block from url calls resp = destination_blob_client. \ append_block_from_url(source_blob_client.url + '?' + sas, source_offset=0, source_length=LARGE_BLOB_SIZE, if_modified_since=source_properties.get('last_modified') - timedelta(minutes=15)) self.assertEqual(resp.get('blob_append_offset'), '0') self.assertEqual(resp.get('blob_committed_block_count'), 1) self.assertIsNotNone(resp.get('etag')) self.assertIsNotNone(resp.get('last_modified')) # Assert the destination blob is constructed correctly destination_blob_properties = destination_blob_client.get_blob_properties() self.assertBlobEqual(destination_blob_client, source_blob_data) self.assertEqual(destination_blob_properties.get('etag'), resp.get('etag')) self.assertEqual(destination_blob_properties.get('last_modified'), resp.get('last_modified')) self.assertEqual(destination_blob_properties.get('size'), LARGE_BLOB_SIZE) # Act part 2: put block from url with failing condition with self.assertRaises(HttpResponseError): destination_blob_client.append_block_from_url(source_blob_client.url + '?' + sas, source_offset=0, source_length=LARGE_BLOB_SIZE, if_modified_since=destination_blob_properties.get( 'last_modified')) @record def test_append_block_from_url_with_if_unmodified(self): # Arrange source_blob_data = self.get_random_bytes(LARGE_BLOB_SIZE) source_blob_client = self._create_source_blob(source_blob_data) source_properties = source_blob_client.append_block(source_blob_data) sas = generate_blob_sas( source_blob_client.account_name, source_blob_client.container_name, source_blob_client.blob_name, snapshot=source_blob_client.snapshot, account_key=source_blob_client.credential.account_key, permission=BlobSasPermissions(read=True, delete=True), expiry=datetime.utcnow() + timedelta(hours=1), ) destination_blob_client = self._create_blob() # Act part 1: make append block from url calls resp = destination_blob_client. \ append_block_from_url(source_blob_client.url + '?' + sas, source_offset=0, source_length=LARGE_BLOB_SIZE, if_unmodified_since=source_properties.get('last_modified')) self.assertEqual(resp.get('blob_append_offset'), '0') self.assertEqual(resp.get('blob_committed_block_count'), 1) self.assertIsNotNone(resp.get('etag')) self.assertIsNotNone(resp.get('last_modified')) # Assert the destination blob is constructed correctly destination_blob_properties = destination_blob_client.get_blob_properties() self.assertBlobEqual(destination_blob_client, source_blob_data) self.assertEqual(destination_blob_properties.get('etag'), resp.get('etag')) self.assertEqual(destination_blob_properties.get('last_modified'), resp.get('last_modified')) self.assertEqual(destination_blob_properties.get('size'), LARGE_BLOB_SIZE) # Act part 2: put block from url with failing condition with self.assertRaises(ResourceModifiedError): destination_blob_client.append_block_from_url(source_blob_client.url + '?' + sas, source_offset=0, source_length=LARGE_BLOB_SIZE, if_unmodified_since=source_properties.get( 'last_modified') - timedelta(minutes=15)) @record def test_create_append_blob_with_no_overwrite(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client( self.container_name, blob_name) data1 = self.get_random_bytes(LARGE_BLOB_SIZE) data2 = self.get_random_bytes(LARGE_BLOB_SIZE + 512) # Act create_resp = blob.upload_blob( data1, overwrite=True, blob_type=BlobType.AppendBlob, metadata={'BlobData': 'Data1'}) update_resp = blob.upload_blob( data2, overwrite=False, blob_type=BlobType.AppendBlob, metadata={'BlobData': 'Data2'}) props = blob.get_blob_properties() # Assert appended_data = data1 + data2 self.assertBlobEqual(blob, appended_data) self.assertEqual(props.etag, update_resp.get('etag')) self.assertEqual(props.blob_type, BlobType.AppendBlob) self.assertEqual(props.last_modified, update_resp.get('last_modified')) self.assertEqual(props.metadata, {'BlobData': 'Data1'}) self.assertEqual(props.size, LARGE_BLOB_SIZE + LARGE_BLOB_SIZE + 512) @record def test_create_append_blob_with_overwrite(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client( self.container_name, blob_name) data1 = self.get_random_bytes(LARGE_BLOB_SIZE) data2 = self.get_random_bytes(LARGE_BLOB_SIZE + 512) # Act create_resp = blob.upload_blob( data1, overwrite=True, blob_type=BlobType.AppendBlob, metadata={'BlobData': 'Data1'}) update_resp = blob.upload_blob( data2, overwrite=True, blob_type=BlobType.AppendBlob, metadata={'BlobData': 'Data2'}) props = blob.get_blob_properties() # Assert self.assertBlobEqual(blob, data2) self.assertEqual(props.etag, update_resp.get('etag')) self.assertEqual(props.last_modified, update_resp.get('last_modified')) self.assertEqual(props.metadata, {'BlobData': 'Data2'}) self.assertEqual(props.blob_type, BlobType.AppendBlob) self.assertEqual(props.size, LARGE_BLOB_SIZE + 512) @record def test_append_blob_from_bytes(self): # Arrange blob = self._create_blob() # Act data = b'abcdefghijklmnopqrstuvwxyz' append_resp = blob.upload_blob(data, blob_type=BlobType.AppendBlob) blob_properties = blob.get_blob_properties() # Assert self.assertBlobEqual(blob, data) self.assertEqual(blob_properties.etag, append_resp['etag']) self.assertEqual(blob_properties.last_modified, append_resp['last_modified']) @record def test_append_blob_from_0_bytes(self): # Arrange blob = self._create_blob() # Act data = b'' append_resp = blob.upload_blob(data, blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, data) # appending nothing should not make any network call self.assertIsNone(append_resp.get('etag')) self.assertIsNone(append_resp.get('last_modified')) @record def test_append_blob_from_bytes_with_progress(self): # Arrange blob = self._create_blob() data = b'abcdefghijklmnopqrstuvwxyz' # Act progress = [] def progress_gen(upload): progress.append((0, len(upload))) yield upload upload_data = progress_gen(data) blob.upload_blob(upload_data, blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, data) self.assert_upload_progress(len(data), self.config.max_block_size, progress) @record def test_append_blob_from_bytes_with_index(self): # Arrange blob = self._create_blob() # Act data = b'abcdefghijklmnopqrstuvwxyz' blob.upload_blob(data[3:], blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, data[3:]) @record def test_append_blob_from_bytes_with_index_and_count(self): # Arrange blob = self._create_blob() # Act data = b'abcdefghijklmnopqrstuvwxyz' blob.upload_blob(data[3:], length=5, blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, data[3:8]) @record def test_append_blob_from_bytes_chunked_upload(self): # Arrange blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) # Act append_resp = blob.upload_blob(data, blob_type=BlobType.AppendBlob) blob_properties = blob.get_blob_properties() # Assert self.assertBlobEqual(blob, data) self.assertEqual(blob_properties.etag, append_resp['etag']) self.assertEqual(blob_properties.last_modified, append_resp.get('last_modified')) @record def test_append_blob_from_bytes_with_progress_chunked_upload(self): # Arrange blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) # Act progress = [] def progress_gen(upload): n = self.config.max_block_size total = len(upload) current = 0 while upload: progress.append((current, total)) yield upload[:n] current += len(upload[:n]) upload = upload[n:] upload_data = progress_gen(data) blob.upload_blob(upload_data, blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, data) self.assert_upload_progress(len(data), self.config.max_block_size, progress) @record def test_append_blob_from_bytes_chunked_upload_with_index_and_count(self): # Arrange blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) index = 33 blob_size = len(data) - 66 # Act blob.upload_blob(data[index:], length=blob_size, blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, data[index:index + blob_size]) @record def test_append_blob_from_path_chunked_upload(self): # Arrange blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act with open(FILE_PATH, 'rb') as stream: append_resp = blob.upload_blob(stream, blob_type=BlobType.AppendBlob) blob_properties = blob.get_blob_properties() # Assert self.assertBlobEqual(blob, data) self.assertEqual(blob_properties.etag, append_resp.get('etag')) self.assertEqual(blob_properties.last_modified, append_resp.get('last_modified')) @record def test_append_blob_from_path_with_progress_chunked_upload(self): # Arrange blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act progress = [] def progress_gen(upload): n = self.config.max_block_size total = LARGE_BLOB_SIZE current = 0 while upload: chunk = upload.read(n) if not chunk: break progress.append((current, total)) yield chunk current += len(chunk) with open(FILE_PATH, 'rb') as stream: upload_data = progress_gen(stream) blob.upload_blob(upload_data, blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, data) self.assert_upload_progress(len(data), self.config.max_block_size, progress) @record def test_append_blob_from_stream_chunked_upload(self): # Arrange blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act with open(FILE_PATH, 'rb') as stream: append_resp = blob.upload_blob(stream, blob_type=BlobType.AppendBlob) blob_properties = blob.get_blob_properties() # Assert self.assertBlobEqual(blob, data) self.assertEqual(blob_properties.etag, append_resp.get('etag')) self.assertEqual(blob_properties.last_modified, append_resp.get('last_modified')) @record def test_append_blob_from_stream_non_seekable_chunked_upload_known_size(self): # Arrange blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) blob_size = len(data) - 66 # Act with open(FILE_PATH, 'rb') as stream: non_seekable_file = StorageAppendBlobTest.NonSeekableFile(stream) blob.upload_blob(non_seekable_file, length=blob_size, blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, data[:blob_size]) @record def test_append_blob_from_stream_non_seekable_chunked_upload_unknown_size(self): # Arrange blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act with open(FILE_PATH, 'rb') as stream: non_seekable_file = StorageAppendBlobTest.NonSeekableFile(stream) blob.upload_blob(non_seekable_file, blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, data) @record def test_append_blob_from_stream_with_multiple_appends(self): # Arrange blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream1: stream1.write(data) with open(FILE_PATH, 'wb') as stream2: stream2.write(data) # Act with open(FILE_PATH, 'rb') as stream1: blob.upload_blob(stream1, blob_type=BlobType.AppendBlob) with open(FILE_PATH, 'rb') as stream2: blob.upload_blob(stream2, blob_type=BlobType.AppendBlob) # Assert data = data * 2 self.assertBlobEqual(blob, data) @record def test_append_blob_from_stream_chunked_upload_with_count(self): # Arrange blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act blob_size = len(data) - 301 with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, length=blob_size, blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, data[:blob_size]) def test_append_blob_from_stream_chunked_upload_with_count_parallel(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act blob_size = len(data) - 301 with open(FILE_PATH, 'rb') as stream: append_resp = blob.upload_blob(stream, length=blob_size, blob_type=BlobType.AppendBlob) blob_properties = blob.get_blob_properties() # Assert self.assertBlobEqual(blob, data[:blob_size]) self.assertEqual(blob_properties.etag, append_resp.get('etag')) self.assertEqual(blob_properties.last_modified, append_resp.get('last_modified')) @record def test_append_blob_from_text(self): # Arrange blob = self._create_blob() text = u'hello 啊齄丂狛狜 world' data = text.encode('utf-8') # Act append_resp = blob.upload_blob(text, blob_type=BlobType.AppendBlob) blob_properties = blob.get_blob_properties() # Assert self.assertBlobEqual(blob, data) self.assertEqual(blob_properties.etag, append_resp.get('etag')) self.assertEqual(blob_properties.last_modified, append_resp.get('last_modified')) @record def test_append_blob_from_text_with_encoding(self): # Arrange blob = self._create_blob() text = u'hello 啊齄丂狛狜 world' data = text.encode('utf-16') # Act blob.upload_blob(text, encoding='utf-16', blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, data) @record def test_append_blob_from_text_with_encoding_and_progress(self): # Arrange blob = self._create_blob() text = u'hello 啊齄丂狛狜 world' data = text.encode('utf-16') # Act progress = [] def progress_gen(upload): progress.append((0, len(data))) yield upload upload_data = progress_gen(text) blob.upload_blob(upload_data, encoding='utf-16', blob_type=BlobType.AppendBlob) # Assert self.assert_upload_progress(len(data), self.config.max_block_size, progress) @record def test_append_blob_from_text_chunked_upload(self): # Arrange blob = self._create_blob() data = self.get_random_text_data(LARGE_BLOB_SIZE) encoded_data = data.encode('utf-8') # Act blob.upload_blob(data, blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, encoded_data) @record def test_append_blob_with_md5(self): # Arrange blob = self._create_blob() data = b'hello world' # Act blob.append_block(data, validate_content=True)
class StorageBlobEncryptionTest(StorageTestCase): def setUp(self): super(StorageBlobEncryptionTest, self).setUp() url = self._get_account_url() credential = self._get_shared_key_credential() # test chunking functionality by reducing the threshold # for chunking and the size of each chunk, otherwise # the tests would take too long to execute self.bsc = BlobServiceClient(url, credential=credential, max_single_put_size=32 * 1024, max_block_size=4 * 1024, max_page_size=4 * 1024) self.config = self.bsc._config self.container_name = self.get_resource_name('utcontainer') self.blob_types = (BlobType.BlockBlob, BlobType.PageBlob, BlobType.AppendBlob) self.container_name = self.get_resource_name('utcontainer') self.bytes = b'Foo' if not self.is_playback(): container = self.bsc.get_container_client(self.container_name) container.create_container() def tearDown(self): if not self.is_playback(): try: self.bsc.delete_container(self.container_name) except: pass if path.isfile(FILE_PATH): try: remove(FILE_PATH) except: pass return super(StorageBlobEncryptionTest, self).tearDown() #--Helpers----------------------------------------------------------------- def _get_container_reference(self): return self.get_resource_name(TEST_CONTAINER_PREFIX) def _get_blob_reference(self, blob_type): return self.get_resource_name(TEST_BLOB_PREFIXES[blob_type.value]) def _create_small_blob(self, blob_type): blob_name = self._get_blob_reference(blob_type) blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.upload_blob(self.bytes, blob_type=blob_type) return blob #--Test cases for blob encryption ---------------------------------------- @record def test_missing_attribute_kek_wrap(self): # In the shared method _generate_blob_encryption_key # Arrange self.bsc.require_encryption = True valid_key = KeyWrapper('key1') # Act invalid_key_1 = lambda: None #functions are objects, so this effectively creates an empty object invalid_key_1.get_key_wrap_algorithm = valid_key.get_key_wrap_algorithm invalid_key_1.get_kid = valid_key.get_kid # No attribute wrap_key self.bsc.key_encryption_key = invalid_key_1 with self.assertRaises(AttributeError): self._create_small_blob(BlobType.BlockBlob) invalid_key_2 = lambda: None #functions are objects, so this effectively creates an empty object invalid_key_2.wrap_key = valid_key.wrap_key invalid_key_2.get_kid = valid_key.get_kid # No attribute get_key_wrap_algorithm self.bsc.key_encryption_key = invalid_key_2 with self.assertRaises(AttributeError): self._create_small_blob(BlobType.BlockBlob) invalid_key_3 = lambda: None #functions are objects, so this effectively creates an empty object invalid_key_3.get_key_wrap_algorithm = valid_key.get_key_wrap_algorithm invalid_key_3.wrap_key = valid_key.wrap_key # No attribute get_kid self.bsc.key_encryption_key = invalid_key_2 with self.assertRaises(AttributeError): self._create_small_blob(BlobType.BlockBlob) @record def test_invalid_value_kek_wrap(self): # Arrange self.bsc.require_encryption = True self.bsc.key_encryption_key = KeyWrapper('key1') self.bsc.key_encryption_key.get_key_wrap_algorithm = None try: self._create_small_blob(BlobType.BlockBlob) self.fail() except AttributeError as e: self.assertEqual( str(e), _ERROR_OBJECT_INVALID.format('key encryption key', 'get_key_wrap_algorithm')) self.bsc.key_encryption_key = KeyWrapper('key1') self.bsc.key_encryption_key.get_kid = None with self.assertRaises(AttributeError): self._create_small_blob(BlobType.BlockBlob) self.bsc.key_encryption_key = KeyWrapper('key1') self.bsc.key_encryption_key.wrap_key = None with self.assertRaises(AttributeError): self._create_small_blob(BlobType.BlockBlob) @record def test_missing_attribute_kek_unwrap(self): # Shared between all services in decrypt_blob # Arrange self.bsc.require_encryption = True valid_key = KeyWrapper('key1') self.bsc.key_encryption_key = valid_key blob = self._create_small_blob(BlobType.BlockBlob) # Act # Note that KeyWrapper has a default value for key_id, so these Exceptions # are not due to non_matching kids. invalid_key_1 = lambda: None #functions are objects, so this effectively creates an empty object invalid_key_1.get_kid = valid_key.get_kid #No attribute unwrap_key blob.key_encryption_key = invalid_key_1 with self.assertRaises(HttpResponseError): blob.download_blob().content_as_bytes() invalid_key_2 = lambda: None #functions are objects, so this effectively creates an empty object invalid_key_2.unwrap_key = valid_key.unwrap_key blob.key_encryption_key = invalid_key_2 #No attribute get_kid with self.assertRaises(HttpResponseError): blob.download_blob().content_as_bytes() @record def test_invalid_value_kek_unwrap(self): if TestMode.need_recording_file(self.test_mode): return # Arrange self.bsc.require_encryption = True self.bsc.key_encryption_key = KeyWrapper('key1') blob = self._create_small_blob(BlobType.BlockBlob) # Act blob.key_encryption_key = KeyWrapper('key1') blob.key_encryption_key.unwrap_key = None with self.assertRaises(HttpResponseError) as e: blob.download_blob().content_as_bytes() self.assertEqual(str(e.exception), 'Decryption failed.') @record def test_get_blob_kek(self): # Arrange self.bsc.require_encryption = True self.bsc.key_encryption_key = KeyWrapper('key1') blob = self._create_small_blob(BlobType.BlockBlob) # Act content = blob.download_blob() # Assert self.assertEqual(b"".join(list(content)), self.bytes) @record def test_get_blob_resolver(self): # Arrange self.bsc.require_encryption = True self.bsc.key_encryption_key = KeyWrapper('key1') key_resolver = KeyResolver() key_resolver.put_key(self.bsc.key_encryption_key) self.bsc.key_resolver_function = key_resolver.resolve_key blob = self._create_small_blob(BlobType.BlockBlob) # Act self.bsc.key_encryption_key = None content = blob.download_blob().content_as_bytes() # Assert self.assertEqual(content, self.bytes) def test_get_blob_kek_RSA(self): # We can only generate random RSA keys, so this must be run live or # the playback test will fail due to a change in kek values. if TestMode.need_recording_file(self.test_mode): return # Arrange self.bsc.require_encryption = True self.bsc.key_encryption_key = RSAKeyWrapper('key2') blob = self._create_small_blob(BlobType.BlockBlob) # Act content = blob.download_blob() # Assert self.assertEqual(b"".join(list(content)), self.bytes) @record def test_get_blob_nonmatching_kid(self): if TestMode.need_recording_file(self.test_mode): return # Arrange self.bsc.require_encryption = True self.bsc.key_encryption_key = KeyWrapper('key1') blob = self._create_small_blob(BlobType.BlockBlob) # Act self.bsc.key_encryption_key.kid = 'Invalid' # Assert with self.assertRaises(HttpResponseError) as e: blob.download_blob().content_as_bytes() self.assertEqual(str(e.exception), 'Decryption failed.') @record def test_put_blob_invalid_stream_type(self): # Arrange self.bsc.require_encryption = True self.bsc.key_encryption_key = KeyWrapper('key1') small_stream = StringIO(u'small') large_stream = StringIO(u'large' * self.config.max_single_put_size) blob_name = self._get_blob_reference(BlobType.BlockBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Assert # Block blob specific single shot with self.assertRaises(TypeError) as e: blob.upload_blob(small_stream, length=5) self.assertTrue( 'Blob data should be of type bytes.' in str(e.exception)) # Generic blob chunked with self.assertRaises(TypeError) as e: blob.upload_blob(large_stream) self.assertTrue( 'Blob data should be of type bytes.' in str(e.exception)) def test_put_blob_chunking_required_mult_of_block_size(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange self.bsc.key_encryption_key = KeyWrapper('key1') self.bsc.require_encryption = True content = self.get_random_bytes(self.config.max_single_put_size + self.config.max_block_size) blob_name = self._get_blob_reference(BlobType.BlockBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Act blob.upload_blob(content, max_connections=3) blob_content = blob.download_blob().content_as_bytes(max_connections=3) # Assert self.assertEqual(content, blob_content) def test_put_blob_chunking_required_non_mult_of_block_size(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange self.bsc.key_encryption_key = KeyWrapper('key1') self.bsc.require_encryption = True content = urandom(self.config.max_single_put_size + 1) blob_name = self._get_blob_reference(BlobType.BlockBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Act blob.upload_blob(content, max_connections=3) blob_content = blob.download_blob().content_as_bytes(max_connections=3) # Assert self.assertEqual(content, blob_content) def test_put_blob_chunking_required_range_specified(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange self.bsc.key_encryption_key = KeyWrapper('key1') self.bsc.require_encryption = True content = self.get_random_bytes(self.config.max_single_put_size * 2) blob_name = self._get_blob_reference(BlobType.BlockBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Act blob.upload_blob(content, length=self.config.max_single_put_size + 53, max_connections=3) blob_content = blob.download_blob().content_as_bytes(max_connections=3) # Assert self.assertEqual(content[:self.config.max_single_put_size + 53], blob_content) @record def test_put_block_blob_single_shot(self): # Arrange self.bsc.key_encryption_key = KeyWrapper('key1') self.bsc.require_encryption = True content = b'small' blob_name = self._get_blob_reference(BlobType.BlockBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Act blob.upload_blob(content) blob_content = blob.download_blob().content_as_bytes() # Assert self.assertEqual(content, blob_content) @record def test_put_blob_range(self): # Arrange self.bsc.require_encryption = True self.bsc.key_encryption_key = KeyWrapper('key1') content = b'Random repeats' * self.config.max_single_put_size * 5 # All page blob uploads call _upload_chunks, so this will test the ability # of that function to handle ranges even though it's a small blob blob_name = self._get_blob_reference(BlobType.BlockBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Act blob.upload_blob(content[2:], length=self.config.max_single_put_size + 5, max_connections=1) blob_content = blob.download_blob().content_as_bytes(max_connections=1) # Assert self.assertEqual(content[2:2 + self.config.max_single_put_size + 5], blob_content) @record def test_put_blob_empty(self): # Arrange self.bsc.key_encryption_key = KeyWrapper('key1') self.bsc.require_encryption = True content = b'' blob_name = self._get_blob_reference(BlobType.BlockBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Act blob.upload_blob(content) blob_content = blob.download_blob().content_as_bytes(max_connections=2) # Assert self.assertEqual(content, blob_content) @record def test_put_blob_serial_upload_chunking(self): # Arrange self.bsc.key_encryption_key = KeyWrapper('key1') self.bsc.require_encryption = True content = self.get_random_bytes(self.config.max_single_put_size + 1) blob_name = self._get_blob_reference(BlobType.BlockBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Act blob.upload_blob(content, max_connections=1) blob_content = blob.download_blob().content_as_bytes(max_connections=1) # Assert self.assertEqual(content, blob_content) @record def test_get_blob_range_beginning_to_middle(self): # Arrange self.bsc.key_encryption_key = KeyWrapper('key1') self.bsc.require_encryption = True content = self.get_random_bytes(128) blob_name = self._get_blob_reference(BlobType.BlockBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Act blob.upload_blob(content, max_connections=1) blob_content = blob.download_blob( offset=0, length=50).content_as_bytes(max_connections=1) # Assert self.assertEqual(content[:51], blob_content) @record def test_get_blob_range_middle_to_end(self): # Arrange self.bsc.key_encryption_key = KeyWrapper('key1') self.bsc.require_encryption = True content = self.get_random_bytes(128) blob_name = self._get_blob_reference(BlobType.BlockBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Act blob.upload_blob(content, max_connections=1) blob_content = blob.download_blob(offset=50, length=127).content_as_bytes() blob_content2 = blob.download_blob(offset=50).content_as_bytes() # Assert self.assertEqual(content[50:], blob_content) self.assertEqual(content[50:], blob_content2) @record def test_get_blob_range_middle_to_middle(self): # Arrange self.bsc.key_encryption_key = KeyWrapper('key1') self.bsc.require_encryption = True content = self.get_random_bytes(128) blob_name = self._get_blob_reference(BlobType.BlockBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Act blob.upload_blob(content) blob_content = blob.download_blob(offset=50, length=93).content_as_bytes() # Assert self.assertEqual(content[50:94], blob_content) @record def test_get_blob_range_aligns_on_16_byte_block(self): # Arrange self.bsc.key_encryption_key = KeyWrapper('key1') self.bsc.require_encryption = True content = self.get_random_bytes(128) blob_name = self._get_blob_reference(BlobType.BlockBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Act blob.upload_blob(content) blob_content = blob.download_blob(offset=48, length=63).content_as_bytes() # Assert self.assertEqual(content[48:64], blob_content) @record def test_get_blob_range_expanded_to_beginning_block_align(self): # Arrange self.bsc.key_encryption_key = KeyWrapper('key1') self.bsc.require_encryption = True content = self.get_random_bytes(128) blob_name = self._get_blob_reference(BlobType.BlockBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Act blob.upload_blob(content) blob_content = blob.download_blob(offset=5, length=50).content_as_bytes() # Assert self.assertEqual(content[5:51], blob_content) @record def test_get_blob_range_expanded_to_beginning_iv(self): # Arrange self.bsc.key_encryption_key = KeyWrapper('key1') self.bsc.require_encryption = True content = self.get_random_bytes(128) blob_name = self._get_blob_reference(BlobType.BlockBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Act blob.upload_blob(content) blob_content = blob.download_blob(offset=22, length=42).content_as_bytes() # Assert self.assertEqual(content[22:43], blob_content) @record def test_put_blob_strict_mode(self): # Arrange self.bsc.require_encryption = True content = urandom(512) # Assert for service in self.blob_types: blob_name = self._get_blob_reference(service) blob = self.bsc.get_blob_client(self.container_name, blob_name) with self.assertRaises(ValueError): blob.upload_blob(content, blob_type=service) stream = BytesIO(content) with self.assertRaises(ValueError): blob.upload_blob(stream, length=512, blob_type=service) FILE_PATH = 'blob_input.temp.dat' with open(FILE_PATH, 'wb') as stream: stream.write(content) with open(FILE_PATH, 'rb') as stream: with self.assertRaises(ValueError): blob.upload_blob(stream, blob_type=service) with self.assertRaises(ValueError): blob.upload_blob('To encrypt', blob_type=service) @record def test_get_blob_strict_mode_no_policy(self): # Arrange self.bsc.require_encryption = True self.bsc.key_encryption_key = KeyWrapper('key1') blob = self._create_small_blob(BlobType.BlockBlob) # Act blob.key_encryption_key = None # Assert with self.assertRaises(ValueError): blob.download_blob().content_as_bytes() @record def test_get_blob_strict_mode_unencrypted_blob(self): # Arrange blob = self._create_small_blob(BlobType.BlockBlob) # Act blob.require_encryption = True blob.key_encryption_key = KeyWrapper('key1') # Assert with self.assertRaises(HttpResponseError): blob.download_blob().content_as_bytes() @record def test_invalid_methods_fail_block(self): # Arrange self.bsc.key_encryption_key = KeyWrapper('key1') blob_name = self._get_blob_reference(BlobType.BlockBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Assert with self.assertRaises(ValueError) as e: blob.stage_block('block1', urandom(32)) self.assertEqual(str(e.exception), _ERROR_UNSUPPORTED_METHOD_FOR_ENCRYPTION) with self.assertRaises(ValueError) as e: blob.commit_block_list(['block1']) self.assertEqual(str(e.exception), _ERROR_UNSUPPORTED_METHOD_FOR_ENCRYPTION) @record def test_invalid_methods_fail_append(self): # Arrange self.bsc.key_encryption_key = KeyWrapper('key1') blob_name = self._get_blob_reference(BlobType.AppendBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Assert with self.assertRaises(ValueError) as e: blob.append_block(urandom(32)) self.assertEqual(str(e.exception), _ERROR_UNSUPPORTED_METHOD_FOR_ENCRYPTION) with self.assertRaises(ValueError) as e: blob.create_append_blob() self.assertEqual(str(e.exception), _ERROR_UNSUPPORTED_METHOD_FOR_ENCRYPTION) # All append_from operations funnel into append_from_stream, so testing one is sufficient with self.assertRaises(ValueError) as e: blob.upload_blob(b'To encrypt', blob_type=BlobType.AppendBlob) self.assertEqual(str(e.exception), _ERROR_UNSUPPORTED_METHOD_FOR_ENCRYPTION) @record def test_invalid_methods_fail_page(self): # Arrange self.bsc.key_encryption_key = KeyWrapper('key1') blob_name = self._get_blob_reference(BlobType.PageBlob) blob = self.bsc.get_blob_client(self.container_name, blob_name) # Assert with self.assertRaises(ValueError) as e: blob.upload_page(urandom(512), 0, 511, blob_type=BlobType.PageBlob) self.assertEqual(str(e.exception), _ERROR_UNSUPPORTED_METHOD_FOR_ENCRYPTION) with self.assertRaises(ValueError) as e: blob.create_page_blob(512) self.assertEqual(str(e.exception), _ERROR_UNSUPPORTED_METHOD_FOR_ENCRYPTION) @record def test_validate_encryption(self): # Arrange self.bsc.require_encryption = True kek = KeyWrapper('key1') self.bsc.key_encryption_key = kek blob = self._create_small_blob(BlobType.BlockBlob) # Act blob.require_encryption = False blob.key_encryption_key = None content = blob.download_blob() data = content.content_as_bytes() encryption_data = _dict_to_encryption_data( loads(content.properties.metadata['encryptiondata'])) iv = encryption_data.content_encryption_IV content_encryption_key = _validate_and_unwrap_cek( encryption_data, kek, None) cipher = _generate_AES_CBC_cipher(content_encryption_key, iv) decryptor = cipher.decryptor() unpadder = PKCS7(128).unpadder() content = decryptor.update(data) + decryptor.finalize() content = unpadder.update(content) + unpadder.finalize() self.assertEqual(self.bytes, content) @record def test_create_block_blob_from_star(self): self._create_blob_from_star(BlobType.BlockBlob, self.bytes, self.bytes) stream = BytesIO(self.bytes) self._create_blob_from_star(BlobType.BlockBlob, self.bytes, stream) FILE_PATH = 'blob_input.temp.dat' with open(FILE_PATH, 'wb') as stream: stream.write(self.bytes) with open(FILE_PATH, 'rb') as stream: self._create_blob_from_star(BlobType.BlockBlob, self.bytes, stream) self._create_blob_from_star(BlobType.BlockBlob, b'To encrypt', 'To encrypt') @record def test_create_page_blob_from_star(self): content = self.get_random_bytes(512) self._create_blob_from_star(BlobType.PageBlob, content, content) stream = BytesIO(content) self._create_blob_from_star(BlobType.PageBlob, content, stream, length=512) FILE_PATH = 'blob_input.temp.dat' with open(FILE_PATH, 'wb') as stream: stream.write(content) with open(FILE_PATH, 'rb') as stream: self._create_blob_from_star(BlobType.PageBlob, content, stream) def _create_blob_from_star(self, blob_type, content, data, **kwargs): blob_name = self._get_blob_reference(blob_type) blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.key_encryption_key = KeyWrapper('key1') blob.require_encryption = True blob.upload_blob(data, blob_type=blob_type, **kwargs) blob_content = blob.download_blob().content_as_bytes() self.assertEqual(content, blob_content) @record def test_get_blob_to_star(self): # Arrange self.bsc.require_encryption = True self.bsc.key_encryption_key = KeyWrapper('key1') blob = self._create_small_blob(BlobType.BlockBlob) # Act iter_blob = b"".join(list(blob.download_blob())) bytes_blob = blob.download_blob().content_as_bytes() stream_blob = BytesIO() blob.download_blob().download_to_stream(stream_blob) stream_blob.seek(0) text_blob = blob.download_blob().content_as_text() # Assert self.assertEqual(self.bytes, iter_blob) self.assertEqual(self.bytes, bytes_blob) self.assertEqual(self.bytes, stream_blob.read()) self.assertEqual(self.bytes.decode(), text_blob)
class StorageAppendBlobTest(StorageTestCase): def setUp(self): super(StorageAppendBlobTest, self).setUp() url = self._get_account_url() credential = self._get_shared_key_credential() self.bsc = BlobServiceClient(url, credential=credential, max_block_size=4 * 1024) self.config = self.bsc._config self.container_name = self.get_resource_name('utcontainer') if not self.is_playback(): self.bsc.create_container(self.container_name) def tearDown(self): if not self.is_playback(): try: self.bsc.delete_container(self.container_name) except: pass if os.path.isfile(FILE_PATH): try: os.remove(FILE_PATH) except: pass return super(StorageAppendBlobTest, self).tearDown() #--Helpers----------------------------------------------------------------- def _get_blob_reference(self): return self.get_resource_name(TEST_BLOB_PREFIX) def _create_blob(self): blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client( self.container_name, blob_name) blob.create_append_blob() return blob def assertBlobEqual(self, blob, expected_data): stream = blob.download_blob() actual_data = b"".join(list(stream)) self.assertEqual(actual_data, expected_data) class NonSeekableFile(object): def __init__(self, wrapped_file): self.wrapped_file = wrapped_file def write(self, data): self.wrapped_file.write(data) def read(self, count): return self.wrapped_file.read(count) #--Test cases for block blobs -------------------------------------------- @record def test_create_blob(self): # Arrange blob_name = self._get_blob_reference() # Act blob = self.bsc.get_blob_client(self.container_name, blob_name) create_resp = blob.create_append_blob() # Assert blob_properties = blob.get_blob_properties() self.assertIsNotNone(blob_properties) self.assertEqual(blob_properties.etag, create_resp.get('etag')) self.assertEqual(blob_properties.last_modified, create_resp.get('last_modified')) @record def test_create_blob_with_lease_id(self): # Arrange blob = self._create_blob() # Act lease = blob.acquire_lease() create_resp = blob.create_append_blob(lease=lease) # Assert blob_properties = blob.get_blob_properties() self.assertIsNotNone(blob_properties) self.assertEqual(blob_properties.etag, create_resp.get('etag')) self.assertEqual(blob_properties.last_modified, create_resp.get('last_modified')) @record def test_create_blob_with_metadata(self): # Arrange metadata = {'hello': 'world', 'number': '42'} blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) # Act blob.create_append_blob(metadata=metadata) # Assert md = blob.get_blob_properties().metadata self.assertDictEqual(md, metadata) @record def test_append_block(self): # Arrange blob = self._create_blob() # Act for i in range(5): resp = blob.append_block(u'block {0}'.format(i).encode('utf-8')) self.assertEqual(int(resp['blob_append_offset']), 7 * i) self.assertEqual(resp['blob_committed_block_count'], i + 1) self.assertIsNotNone(resp['etag']) self.assertIsNotNone(resp['last_modified']) # Assert self.assertBlobEqual(blob, b'block 0block 1block 2block 3block 4') @record def test_append_block_unicode(self): # Arrange blob = self._create_blob() # Act resp = blob.append_block(u'啊齄丂狛狜', encoding='utf-16') self.assertEqual(int(resp['blob_append_offset']), 0) self.assertEqual(resp['blob_committed_block_count'], 1) self.assertIsNotNone(resp['etag']) self.assertIsNotNone(resp['last_modified']) # Assert @record def test_append_block_with_md5(self): # Arrange blob = self._create_blob() # Act resp = blob.append_block(b'block', validate_content=True) self.assertEqual(int(resp['blob_append_offset']), 0) self.assertEqual(resp['blob_committed_block_count'], 1) self.assertIsNotNone(resp['etag']) self.assertIsNotNone(resp['last_modified']) # Assert @record def test_create_append_blob_with_no_overwrite(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client( self.container_name, blob_name) data1 = self.get_random_bytes(LARGE_BLOB_SIZE) data2 = self.get_random_bytes(LARGE_BLOB_SIZE + 512) # Act create_resp = blob.upload_blob( data1, overwrite=True, blob_type=BlobType.AppendBlob, metadata={'BlobData': 'Data1'}) update_resp = blob.upload_blob( data2, overwrite=False, blob_type=BlobType.AppendBlob, metadata={'BlobData': 'Data2'}) props = blob.get_blob_properties() # Assert appended_data = data1 + data2 self.assertBlobEqual(blob, appended_data) self.assertEqual(props.etag, update_resp.get('etag')) self.assertEqual(props.blob_type, BlobType.AppendBlob) self.assertEqual(props.last_modified, update_resp.get('last_modified')) self.assertEqual(props.metadata, {'BlobData': 'Data1'}) self.assertEqual(props.size, LARGE_BLOB_SIZE + LARGE_BLOB_SIZE + 512) @record def test_create_append_blob_with_overwrite(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client( self.container_name, blob_name) data1 = self.get_random_bytes(LARGE_BLOB_SIZE) data2 = self.get_random_bytes(LARGE_BLOB_SIZE + 512) # Act create_resp = blob.upload_blob( data1, overwrite=True, blob_type=BlobType.AppendBlob, metadata={'BlobData': 'Data1'}) update_resp = blob.upload_blob( data2, overwrite=True, blob_type=BlobType.AppendBlob, metadata={'BlobData': 'Data2'}) props = blob.get_blob_properties() # Assert self.assertBlobEqual(blob, data2) self.assertEqual(props.etag, update_resp.get('etag')) self.assertEqual(props.last_modified, update_resp.get('last_modified')) self.assertEqual(props.metadata, {'BlobData': 'Data2'}) self.assertEqual(props.blob_type, BlobType.AppendBlob) self.assertEqual(props.size, LARGE_BLOB_SIZE + 512) @record def test_append_blob_from_bytes(self): # Arrange blob = self._create_blob() # Act data = b'abcdefghijklmnopqrstuvwxyz' append_resp = blob.upload_blob(data, blob_type=BlobType.AppendBlob) blob_properties = blob.get_blob_properties() # Assert self.assertBlobEqual(blob, data) self.assertEqual(blob_properties.etag, append_resp['etag']) self.assertEqual(blob_properties.last_modified, append_resp['last_modified']) @record def test_append_blob_from_0_bytes(self): # Arrange blob = self._create_blob() # Act data = b'' append_resp = blob.upload_blob(data, blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, data) # appending nothing should not make any network call self.assertIsNone(append_resp.get('etag')) self.assertIsNone(append_resp.get('last_modified')) @record def test_append_blob_from_bytes_with_progress(self): # Arrange blob = self._create_blob() data = b'abcdefghijklmnopqrstuvwxyz' # Act progress = [] def progress_gen(upload): progress.append((0, len(upload))) yield upload upload_data = progress_gen(data) blob.upload_blob(upload_data, blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, data) self.assert_upload_progress(len(data), self.config.blob_settings.max_block_size, progress) @record def test_append_blob_from_bytes_with_index(self): # Arrange blob = self._create_blob() # Act data = b'abcdefghijklmnopqrstuvwxyz' blob.upload_blob(data[3:], blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, data[3:]) @record def test_append_blob_from_bytes_with_index_and_count(self): # Arrange blob = self._create_blob() # Act data = b'abcdefghijklmnopqrstuvwxyz' blob.upload_blob(data[3:], length=5, blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, data[3:8]) @record def test_append_blob_from_bytes_chunked_upload(self): # Arrange blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) # Act append_resp = blob.upload_blob(data, blob_type=BlobType.AppendBlob) blob_properties = blob.get_blob_properties() # Assert self.assertBlobEqual(blob, data) self.assertEqual(blob_properties.etag, append_resp['etag']) self.assertEqual(blob_properties.last_modified, append_resp.get('last_modified')) @record def test_append_blob_from_bytes_with_progress_chunked_upload(self): # Arrange blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) # Act progress = [] def progress_gen(upload): n = self.config.blob_settings.max_block_size total = len(upload) current = 0 while upload: progress.append((current, total)) yield upload[:n] current += len(upload[:n]) upload = upload[n:] upload_data = progress_gen(data) blob.upload_blob(upload_data, blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, data) self.assert_upload_progress(len(data), self.config.blob_settings.max_block_size, progress) @record def test_append_blob_from_bytes_chunked_upload_with_index_and_count(self): # Arrange blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) index = 33 blob_size = len(data) - 66 # Act blob.upload_blob(data[index:], length=blob_size, blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, data[index:index + blob_size]) @record def test_append_blob_from_path_chunked_upload(self): # Arrange blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act with open(FILE_PATH, 'rb') as stream: append_resp = blob.upload_blob(stream, blob_type=BlobType.AppendBlob) blob_properties = blob.get_blob_properties() # Assert self.assertBlobEqual(blob, data) self.assertEqual(blob_properties.etag, append_resp.get('etag')) self.assertEqual(blob_properties.last_modified, append_resp.get('last_modified')) @record def test_append_blob_from_path_with_progress_chunked_upload(self): # Arrange blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act progress = [] def progress_gen(upload): n = self.config.blob_settings.max_block_size total = LARGE_BLOB_SIZE current = 0 while upload: chunk = upload.read(n) if not chunk: break progress.append((current, total)) yield chunk current += len(chunk) with open(FILE_PATH, 'rb') as stream: upload_data = progress_gen(stream) blob.upload_blob(upload_data, blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, data) self.assert_upload_progress(len(data), self.config.blob_settings.max_block_size, progress) @record def test_append_blob_from_stream_chunked_upload(self): # Arrange blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act with open(FILE_PATH, 'rb') as stream: append_resp = blob.upload_blob(stream, blob_type=BlobType.AppendBlob) blob_properties = blob.get_blob_properties() # Assert self.assertBlobEqual(blob, data) self.assertEqual(blob_properties.etag, append_resp.get('etag')) self.assertEqual(blob_properties.last_modified, append_resp.get('last_modified')) @record def test_append_blob_from_stream_non_seekable_chunked_upload_known_size(self): # Arrange blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) blob_size = len(data) - 66 # Act with open(FILE_PATH, 'rb') as stream: non_seekable_file = StorageAppendBlobTest.NonSeekableFile(stream) blob.upload_blob(non_seekable_file, length=blob_size, blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, data[:blob_size]) @record def test_append_blob_from_stream_non_seekable_chunked_upload_unknown_size(self): # Arrange blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act with open(FILE_PATH, 'rb') as stream: non_seekable_file = StorageAppendBlobTest.NonSeekableFile(stream) blob.upload_blob(non_seekable_file, blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, data) @record def test_append_blob_from_stream_with_multiple_appends(self): # Arrange blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream1: stream1.write(data) with open(FILE_PATH, 'wb') as stream2: stream2.write(data) # Act with open(FILE_PATH, 'rb') as stream1: blob.upload_blob(stream1, blob_type=BlobType.AppendBlob) with open(FILE_PATH, 'rb') as stream2: blob.upload_blob(stream2, blob_type=BlobType.AppendBlob) # Assert data = data * 2 self.assertBlobEqual(blob, data) @record def test_append_blob_from_stream_chunked_upload_with_count(self): # Arrange blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act blob_size = len(data) - 301 with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, length=blob_size, blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, data[:blob_size]) def test_append_blob_from_stream_chunked_upload_with_count_parallel(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act blob_size = len(data) - 301 with open(FILE_PATH, 'rb') as stream: append_resp = blob.upload_blob(stream, length=blob_size, blob_type=BlobType.AppendBlob) blob_properties = blob.get_blob_properties() # Assert self.assertBlobEqual(blob, data[:blob_size]) self.assertEqual(blob_properties.etag, append_resp.get('etag')) self.assertEqual(blob_properties.last_modified, append_resp.get('last_modified')) @record def test_append_blob_from_text(self): # Arrange blob = self._create_blob() text = u'hello 啊齄丂狛狜 world' data = text.encode('utf-8') # Act append_resp = blob.upload_blob(text, blob_type=BlobType.AppendBlob) blob_properties = blob.get_blob_properties() # Assert self.assertBlobEqual(blob, data) self.assertEqual(blob_properties.etag, append_resp.get('etag')) self.assertEqual(blob_properties.last_modified, append_resp.get('last_modified')) @record def test_append_blob_from_text_with_encoding(self): # Arrange blob = self._create_blob() text = u'hello 啊齄丂狛狜 world' data = text.encode('utf-16') # Act blob.upload_blob(text, encoding='utf-16', blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, data) @record def test_append_blob_from_text_with_encoding_and_progress(self): # Arrange blob = self._create_blob() text = u'hello 啊齄丂狛狜 world' data = text.encode('utf-16') # Act progress = [] def progress_gen(upload): progress.append((0, len(data))) yield upload upload_data = progress_gen(text) blob.upload_blob(upload_data, encoding='utf-16', blob_type=BlobType.AppendBlob) # Assert self.assert_upload_progress(len(data), self.config.blob_settings.max_block_size, progress) @record def test_append_blob_from_text_chunked_upload(self): # Arrange blob = self._create_blob() data = self.get_random_text_data(LARGE_BLOB_SIZE) encoded_data = data.encode('utf-8') # Act blob.upload_blob(data, blob_type=BlobType.AppendBlob) # Assert self.assertBlobEqual(blob, encoded_data) @record def test_append_blob_with_md5(self): # Arrange blob = self._create_blob() data = b'hello world' # Act blob.append_block(data, validate_content=True)
class AzureBlobFileSystem(AbstractFileSystem): """ Access Azure Datalake Gen2 and Azure Storage if it were a file system using Multiprotocol Access Parameters ---------- account_name: str The storage account name. This is used to authenticate requests signed with an account key and to construct the storage endpoint. It is required unless a connection string is given, or if a custom domain is used with anonymous authentication. account_key: str The storage account key. This is used for shared key authentication. If any of account key, sas token or client_id is specified, anonymous access will be used. sas_token: str A shared access signature token to use to authenticate requests instead of the account key. If account key and sas token are both specified, account key will be used to sign. If any of account key, sas token or client_id are specified, anonymous access will be used. request_session: Session The session object to use for http requests. connection_string: str If specified, this will override all other parameters besides request session. See http://azure.microsoft.com/en-us/documentation/articles/storage-configure-connection-string/ for the connection string format. socket_timeout: int If specified, this will override the default socket timeout. The timeout specified is in seconds. See DEFAULT_SOCKET_TIMEOUT in _constants.py for the default value. token_credential: TokenCredential A token credential used to authenticate HTTPS requests. The token value should be updated before its expiration. blocksize: int The block size to use for download/upload operations. Defaults to the value of ``BlockBlobService.MAX_BLOCK_SIZE`` client_id: str Client ID to use when authenticating using an AD Service Principal client/secret. client_secret: str Client secret to use when authenticating using an AD Service Principal client/secret. tenant_id: str Tenant ID to use when authenticating using an AD Service Principal client/secret. Examples -------- >>> abfs = AzureBlobFileSystem(account_name="XXXX", account_key="XXXX", container_name="XXXX") >>> abfs.ls('') ** Sharded Parquet & csv files can be read as: ** ------------------------------------------ ddf = dd.read_csv('abfs://container_name/folder/*.csv', storage_options={ ... 'account_name': ACCOUNT_NAME, 'account_key': ACCOUNT_KEY}) ddf = dd.read_parquet('abfs://container_name/folder.parquet', storage_options={ ... 'account_name': ACCOUNT_NAME, 'account_key': ACCOUNT_KEY,}) """ protocol = "abfs" def __init__( self, account_name: str, account_key: str = None, connection_string: str = None, credential: str = None, sas_token: str = None, request_session=None, socket_timeout: int = None, token_credential=None, blocksize: int = create_configuration(storage_sdk="blob").max_block_size, client_id: str = None, client_secret: str = None, tenant_id: str = None, ): AbstractFileSystem.__init__(self) self.account_name = account_name self.account_key = account_key self.connection_string = connection_string self.credential = credential self.sas_token = sas_token self.request_session = request_session self.socket_timeout = socket_timeout self.token_credential = token_credential self.blocksize = blocksize self.client_id = client_id self.client_secret = client_secret self.tenant_id = tenant_id if ( self.token_credential is None and self.account_key is None and self.sas_token is None and self.client_id is not None ): self.token_credential = self._get_token_from_service_principal() self.do_connect() @classmethod def _strip_protocol(cls, path: str): """ Remove the protocol from the input path Parameters ---------- path: str Path to remove the protocol from Returns ------- str Returns a path without the protocol """ logging.debug(f"_strip_protocol for {path}") ops = infer_storage_options(path) # we need to make sure that the path retains # the format {host}/{path} # here host is the container_name if ops.get("host", None): ops["path"] = ops["host"] + ops["path"] ops["path"] = ops["path"].lstrip("/") logging.debug(f"_strip_protocol({path}) = {ops}") return ops["path"] def _get_token_from_service_principal(self): """ Create a TokenCredential given a client_id, client_secret and tenant_id Returns ------- TokenCredential """ from azure.common.credentials import ServicePrincipalCredentials from azure.storage.common import TokenCredential sp_cred = ServicePrincipalCredentials( client_id=self.client_id, secret=self.client_secret, tenant=self.tenant_id, resource="https://storage.azure.com/", ) token_cred = TokenCredential(sp_cred.token["access_token"]) return token_cred def do_connect(self): """Connect to the BlobServiceClient, using user-specified connection details. Tries credentials first, then connection string and finally account key Raises ------ ValueError if none of the connection details are available """ self.account_url: str = f"https://{self.account_name}.blob.core.windows.net" if self.credential is not None: self.service_client = BlobServiceClient( account_url=self.account_url, credential=self.credential ) elif self.connection_string is not None: self.service_client = BlobServiceClient.from_connection_string( conn_str=self.connection_string ) elif self.account_key is not None: self.service_client = BlobServiceClient( account_url=self.account_url, credential=self.account_key ) else: raise ValueError("unable to connect with provided params!!") def split_path(self, path, delimiter="/", return_container: bool = False, **kwargs): """ Normalize ABFS path string into bucket and key. Parameters ---------- path : string Input path, like `abfs://my_container/path/to/file` delimiter: string Delimiter used to split the path return_container: bool Examples -------- >>> split_path("abfs://my_container/path/to/file") ['my_container', 'path/to/file'] """ if path in ["", delimiter]: return "", "" path = self._strip_protocol(path) path = path.lstrip(delimiter) if "/" not in path: # this means path is the container_name return path, "" else: return path.split(delimiter, 1) # def _generate_blobs(self, *args, **kwargs): # """Follow next_marker to get ALL results.""" # logging.debug("running _generate_blobs...") # blobs = self.blob_fs.list_blobs(*args, **kwargs) # yield from blobs # while blobs.next_marker: # logging.debug(f"following next_marker {blobs.next_marker}") # kwargs["marker"] = blobs.next_marker # blobs = self.blob_fs.list_blobs(*args, **kwargs) # yield from blobs # def _matches( # self, container_name, path, as_directory=False, delimiter="/", **kwargs # ): # """check if the path returns an exact match""" # path = path.rstrip(delimiter) # gen = self.blob_fs.list_blob_names( # container_name=container_name, # prefix=path, # delimiter=delimiter, # num_results=None, # ) # contents = list(gen) # if not contents: # return False # if as_directory: # return contents[0] == path + delimiter # else: # return contents[0] == path def ls( self, path: str, detail: bool = False, invalidate_cache: bool = True, delimiter: str = "/", return_glob: bool = False, **kwargs, ): """ Create a list of blob names from a blob container Parameters ---------- path: str Path to an Azure Blob with its container name detail: bool If False, return a list of blob names, else a list of dictionaries with blob details invalidate_cache: bool If True, do not use the cache delimiter: str Delimiter used to split paths return_glob: bool """ logging.debug(f"abfs.ls() is searching for {path}") container, path = self.split_path(path) if (container in ["", delimiter]) and (path in ["", delimiter]): # This is the case where only the containers are being returned logging.info( "Returning a list of containers in the azure blob storage account" ) if detail: contents = self.service_client.list_containers(include_metadata=True) return self._details(contents) else: contents = self.service_client.list_containers() return [f"{c.name}{delimiter}" for c in contents] else: if container not in ["", delimiter]: # This is the case where the container name is passed container_client = self.service_client.get_container_client( container=container ) blobs = container_client.walk_blobs(name_starts_with=path) try: blobs = [blob for blob in blobs] except Exception: raise FileNotFoundError if len(blobs) > 1: if return_glob: return self._details(blobs, return_glob=True) if detail: return self._details(blobs) else: return [ f"{blob.container}{delimiter}{blob.name}" for blob in blobs ] elif len(blobs) == 1: if (blobs[0].name.rstrip(delimiter) == path) and not blobs[ 0 ].has_key( # NOQA "blob_type" ): path = blobs[0].name blobs = container_client.walk_blobs(name_starts_with=path) if return_glob: return self._details(blobs, return_glob=True) if detail: return self._details(blobs) else: return [ f"{blob.container}{delimiter}{blob.name}" for blob in blobs ] elif isinstance(blobs[0], BlobPrefix): if detail: for blob_page in blobs: return self._details(blob_page) else: outblobs = [] for blob_page in blobs: for blob in blob_page: outblobs.append( f"{blob.container}{delimiter}{blob.name}" ) return outblobs elif blobs[0]["blob_type"] == "BlockBlob": if detail: return self._details(blobs) else: return [ f"{blob.container}{delimiter}{blob.name}" for blob in blobs ] elif isinstance(blobs[0], ItemPaged): outblobs = [] for page in blobs: for b in page: outblobs.append(b) else: raise FileNotFoundError( f"Unable to identify blobs in {path} for {blobs[0].name}" ) elif len(blobs) == 0: if return_glob or (path in ["", delimiter]): return [] else: raise FileNotFoundError else: raise FileNotFoundError def _details(self, contents, delimiter="/", return_glob: bool = False, **kwargs): """ Return a list of dictionaries of specifying details about the contents Parameters ---------- contents delimiter: str Delimiter used to separate containers and files return_glob: bool Returns ------- List of dicts Returns details about the contents, such as name, size and type """ pathlist = [] for c in contents: data = {} if c.has_key("container"): # NOQA data["name"] = f"{c.container}{delimiter}{c.name}" if c.has_key("size"): # NOQA data["size"] = c.size else: data["size"] = 0 if data["size"] == 0: data["type"] = "directory" else: data["type"] = "file" else: data["name"] = f"{c.name}{delimiter}" data["size"] = 0 data["type"] = "directory" if return_glob: data["name"] = data["name"].rstrip("/") pathlist.append(data) return pathlist def walk(self, path: str, maxdepth=None, **kwargs): """ Return all files belows path List all files, recursing into subdirectories; output is iterator-style, like ``os.walk()``. For a simple list of files, ``find()`` is available. Note that the "files" outputted will include anything that is not a directory, such as links. Parameters ---------- path: str Root to recurse into maxdepth: int Maximum recursion depth. None means limitless, but not recommended on link-based file-systems. **kwargs are passed to ``ls`` """ path = self._strip_protocol(path) full_dirs = {} dirs = {} files = {} detail = kwargs.pop("detail", False) try: listing = self.ls(path, detail=True, return_glob=True, **kwargs) except (FileNotFoundError, IOError): return [], [], [] for info in listing: # each info name must be at least [path]/part , but here # we check also for names like [path]/part/ pathname = info["name"].rstrip("/") name = pathname.rsplit("/", 1)[-1] if info["type"] == "directory" and pathname != path: # do not include "self" path full_dirs[pathname] = info dirs[name] = info elif pathname == path: # file-like with same name as give path files[""] = info else: files[name] = info if detail: yield path, dirs, files else: yield path, list(dirs), list(files) if maxdepth is not None: maxdepth -= 1 if maxdepth < 1: return for d in full_dirs: yield from self.walk(d, maxdepth=maxdepth, detail=detail, **kwargs) def mkdir(self, path, delimiter="/", exists_ok=False, **kwargs): """ Create directory entry at path Parameters ---------- path: str The path to create delimiter: str Delimiter to use when splitting the path exists_ok: bool If True, raise an exception if the directory already exists. Defaults to False """ container_name, path = self.split_path(path, delimiter=delimiter) if not exists_ok: if (container_name not in self.ls("")) and (not path): # create new container self.service_client.create_container(name=container_name) elif ( container_name in [container_path.split("/")[0] for container_path in self.ls("")] ) and path: ## attempt to create prefix container_client = self.service_client.get_container_client( container=container_name ) container_client.upload_blob(name=path, data="") else: ## everything else raise RuntimeError(f"Cannot create {container_name}{delimiter}{path}.") else: if container_name in self.ls("") and path: container_client = self.service_client.get_container_client( container=container_name ) container_client.upload_blob(name=path, data="") def rmdir(self, path: str, delimiter="/", **kwargs): """ Remove a directory, if empty Parameters ---------- path: str Path of directory to remove delimiter: str Delimiter to use when splitting the path """ container_name, path = self.split_path(path, delimiter=delimiter) if (container_name + delimiter in self.ls("")) and (not path): # delete container self.service_client.delete_container(container_name) def _rm(self, path, delimiter="/", **kwargs): """ Delete a given file Parameters ---------- path: str Path to file to delete delimiter: str Delimiter to use when splitting the path """ if self.isfile(path): container_name, path = self.split_path(path, delimiter=delimiter) container_client = self.service_client.get_container_client( container=container_name ) logging.debug(f"Delete blob {path} in {container_name}") container_client.delete_blob(path) elif self.isdir(path): container_name, path = self.split_path(path, delimiter=delimiter) container_client = self.service_client.get_container_client( container=container_name ) if (container_name + delimiter in self.ls("")) and (not path): logging.debug(f"Delete container {container_name}") container_client.delete_container(container_name) else: raise RuntimeError(f"cannot delete {path}") def _open( self, path: str, mode: str = "rb", block_size: int = None, autocommit: bool = True, cache_options=None, **kwargs, ): """Open a file on the datalake, or a block blob Parameters ---------- path: str Path to file to open mode: str What mode to open the file in - defaults to "rb" block_size: int Size per block for multi-part downloads. autocommit: bool Whether or not to write to the destination directly cache_type: str One of "readahead", "none", "mmap", "bytes", defaults to "readahead" Caching policy in read mode. See the definitions here: https://filesystem-spec.readthedocs.io/en/latest/api.html#readbuffering """ logging.debug(f"_open: {path}") return AzureBlobFile( fs=self, path=path, mode=mode, block_size=block_size or self.blocksize, autocommit=autocommit, cache_options=cache_options, **kwargs, )
for c in missing_containers: print(c) print('\nExtra containers:') for c in extra_containers: print(c) #%% Delete extra containers if delete_extra_containers: # c = extra_containers[0] for c in extra_containers: print('Delete container {} from storage account {}?'.format( c, target_blob_service_client.account_name)) if confirm(): target_blob_service_client.delete_container(c) #%% Create missing containers # c = missing_containers[0] for c in missing_containers: print('Creating container {} in storage account {}'.format( c, target_blob_service_client.account_name)) target_blob_service_client.create_container(c) #%% Generate azcopy commands azcopy_commands = [] # c = source_container_names[0] for c in source_container_names:
class BlobStorageAccountTest(StorageTestCase): def setUp(self): super(BlobStorageAccountTest, self).setUp() url = self._get_account_url() credential = self._get_shared_key_credential() self.bsc = BlobServiceClient(url, credential=credential) self.container_name = self.get_resource_name('utcontainer') if not self.is_playback(): self.bsc.create_container(self.container_name) def tearDown(self): if not self.is_playback(): try: self.bsc.delete_container(self.container_name) except: pass return super(BlobStorageAccountTest, self).tearDown() # --Helpers----------------------------------------------------------------- def _get_blob_reference(self): blob_name = self.get_resource_name(TEST_BLOB_PREFIX) return self.bsc.get_blob_client(self.container_name, blob_name) def _create_blob(self): blob = self._get_blob_reference() blob.upload_blob(b'') return blob def assertBlobEqual(self, container_name, blob_name, expected_data): blob = self.bsc.get_blob_client(container_name, blob_name) actual_data = blob.download_blob().content_as_bytes() self.assertEqual(actual_data, expected_data) # --Tests specific to Blob Storage Accounts (not general purpose)------------ @record def test_standard_blob_tier_set_tier_api(self): container = self.bsc.get_container_client(self.container_name) tiers = [ StandardBlobTier.Archive, StandardBlobTier.Cool, StandardBlobTier.Hot ] for tier in tiers: blob = self._get_blob_reference() data = b'hello world' blob.upload_blob(data) blob_ref = blob.get_blob_properties() self.assertIsNotNone(blob_ref.blob_tier) self.assertTrue(blob_ref.blob_tier_inferred) self.assertIsNone(blob_ref.blob_tier_change_time) blobs = list(container.list_blobs()) # Assert self.assertIsNotNone(blobs) self.assertGreaterEqual(len(blobs), 1) self.assertIsNotNone(blobs[0]) self.assertNamedItemInContainer(blobs, blob.blob_name) self.assertIsNotNone(blobs[0].blob_tier) self.assertTrue(blobs[0].blob_tier_inferred) self.assertIsNone(blobs[0].blob_tier_change_time) blob.set_standard_blob_tier(tier) blob_ref2 = blob.get_blob_properties() self.assertEqual(tier, blob_ref2.blob_tier) self.assertFalse(blob_ref2.blob_tier_inferred) self.assertIsNotNone(blob_ref2.blob_tier_change_time) blobs = list(container.list_blobs()) # Assert self.assertIsNotNone(blobs) self.assertGreaterEqual(len(blobs), 1) self.assertIsNotNone(blobs[0]) self.assertNamedItemInContainer(blobs, blob.blob_name) self.assertEqual(blobs[0].blob_tier, tier) self.assertFalse(blobs[0].blob_tier_inferred) self.assertIsNotNone(blobs[0].blob_tier_change_time) blob.delete_blob() @record def test_set_standard_blob_tier_with_rehydrate_priority(self): # Arrange blob_client = self._create_blob() blob_tier = StandardBlobTier.Archive rehydrate_tier = StandardBlobTier.Cool rehydrate_priority = RehydratePriority.standard # Act blob_client.set_standard_blob_tier( blob_tier, rehydrate_priority=rehydrate_priority) blob_client.set_standard_blob_tier(rehydrate_tier) blob_props = blob_client.get_blob_properties() # Assert self.assertEquals('rehydrate-pending-to-cool', blob_props.archive_status) @record def test_rehydration_status(self): blob_name = 'rehydration_test_blob_1' blob_name2 = 'rehydration_test_blob_2' container = self.bsc.get_container_client(self.container_name) data = b'hello world' blob = container.upload_blob(blob_name, data) blob.set_standard_blob_tier(StandardBlobTier.Archive) blob.set_standard_blob_tier(StandardBlobTier.Cool) blob_ref = blob.get_blob_properties() self.assertEqual(StandardBlobTier.Archive, blob_ref.blob_tier) self.assertEqual("rehydrate-pending-to-cool", blob_ref.archive_status) self.assertFalse(blob_ref.blob_tier_inferred) blobs = list(container.list_blobs()) blob.delete_blob() # Assert self.assertIsNotNone(blobs) self.assertGreaterEqual(len(blobs), 1) self.assertIsNotNone(blobs[0]) self.assertNamedItemInContainer(blobs, blob.blob_name) self.assertEqual(StandardBlobTier.Archive, blobs[0].blob_tier) self.assertEqual("rehydrate-pending-to-cool", blobs[0].archive_status) self.assertFalse(blobs[0].blob_tier_inferred) blob2 = container.upload_blob(blob_name2, data) blob2.set_standard_blob_tier(StandardBlobTier.Archive) blob2.set_standard_blob_tier(StandardBlobTier.Hot) blob_ref2 = blob2.get_blob_properties() self.assertEqual(StandardBlobTier.Archive, blob_ref2.blob_tier) self.assertEqual("rehydrate-pending-to-hot", blob_ref2.archive_status) self.assertFalse(blob_ref2.blob_tier_inferred) blobs = list(container.list_blobs()) # Assert self.assertIsNotNone(blobs) self.assertGreaterEqual(len(blobs), 1) self.assertIsNotNone(blobs[0]) self.assertNamedItemInContainer(blobs, blob2.blob_name) self.assertEqual(StandardBlobTier.Archive, blobs[0].blob_tier) self.assertEqual("rehydrate-pending-to-hot", blobs[0].archive_status) self.assertFalse(blobs[0].blob_tier_inferred)
class StorageCPKTest(StorageTestCase): def setUp(self): super(StorageCPKTest, self).setUp() url = self._get_account_url() # test chunking functionality by reducing the size of each chunk, # otherwise the tests would take too long to execute self.bsc = BlobServiceClient( url, credential=self.settings.STORAGE_ACCOUNT_KEY, connection_data_block_size=1024, max_single_put_size=1024, min_large_block_upload_threshold=1024, max_block_size=1024, max_page_size=1024) self.config = self.bsc._config self.container_name = self.get_resource_name('utcontainer') # prep some test data so that they can be used in upload tests self.byte_data = self.get_random_bytes(64 * 1024) if not self.is_playback(): self.bsc.create_container(self.container_name) def tearDown(self): if not self.is_playback(): try: self.bsc.delete_container(self.container_name) except: pass return super(StorageCPKTest, self).tearDown() # --Helpers----------------------------------------------------------------- def _get_blob_reference(self): return self.get_resource_name("cpk") def _create_block_blob(self, blob_name=None, data=None, cpk=None, max_concurrency=1): blob_name = blob_name if blob_name else self._get_blob_reference() blob_client = self.bsc.get_blob_client(self.container_name, blob_name) data = data if data else b'' resp = blob_client.upload_blob(data, cpk=cpk, max_concurrency=max_concurrency) return blob_client, resp def _create_append_blob(self, cpk=None): blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.create_append_blob(cpk=cpk) return blob def _create_page_blob(self, cpk=None): blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.create_page_blob(1024 * 1024, cpk=cpk) return blob # -- Test cases for APIs supporting CPK ---------------------------------------------- @record def test_put_block_and_put_block_list(self): # Arrange blob_client, _ = self._create_block_blob() blob_client.stage_block('1', b'AAA', cpk=TEST_ENCRYPTION_KEY) blob_client.stage_block('2', b'BBB', cpk=TEST_ENCRYPTION_KEY) blob_client.stage_block('3', b'CCC', cpk=TEST_ENCRYPTION_KEY) # Act block_list = [ BlobBlock(block_id='1'), BlobBlock(block_id='2'), BlobBlock(block_id='3') ] put_block_list_resp = blob_client.commit_block_list( block_list, cpk=TEST_ENCRYPTION_KEY) # Assert self.assertIsNotNone(put_block_list_resp['etag']) self.assertIsNotNone(put_block_list_resp['last_modified']) self.assertTrue(put_block_list_resp['request_server_encrypted']) self.assertEqual(put_block_list_resp['encryption_key_sha256'], TEST_ENCRYPTION_KEY.key_hash) # Act get the blob content without cpk should fail with self.assertRaises(HttpResponseError): blob_client.download_blob() # Act get the blob content blob = blob_client.download_blob(cpk=TEST_ENCRYPTION_KEY) # Assert content was retrieved with the cpk self.assertEqual(blob.readall(), b'AAABBBCCC') self.assertEqual(blob.properties.etag, put_block_list_resp['etag']) self.assertEqual(blob.properties.last_modified, put_block_list_resp['last_modified']) self.assertEqual(blob.properties.encryption_key_sha256, TEST_ENCRYPTION_KEY.key_hash) def test_create_block_blob_with_chunks(self): # parallel operation if TestMode.need_recording_file(self.test_mode): return # Arrange # to force the in-memory chunks to be used self.config.use_byte_buffer = True # Act # create_blob_from_bytes forces the in-memory chunks to be used blob_client, upload_response = self._create_block_blob( data=self.byte_data, cpk=TEST_ENCRYPTION_KEY, max_concurrency=2) # Assert self.assertIsNotNone(upload_response['etag']) self.assertIsNotNone(upload_response['last_modified']) self.assertTrue(upload_response['request_server_encrypted']) self.assertEqual(upload_response['encryption_key_sha256'], TEST_ENCRYPTION_KEY.key_hash) # Act get the blob content without cpk should fail with self.assertRaises(HttpResponseError): blob_client.download_blob() # Act get the blob content blob = blob_client.download_blob(cpk=TEST_ENCRYPTION_KEY) # Assert content was retrieved with the cpk self.assertEqual(blob.readall(), self.byte_data) self.assertEqual(blob.properties.etag, upload_response['etag']) self.assertEqual(blob.properties.last_modified, upload_response['last_modified']) self.assertEqual(blob.properties.encryption_key_sha256, TEST_ENCRYPTION_KEY.key_hash) def test_create_block_blob_with_sub_streams(self): # problem with the recording framework can only run live if TestMode.need_recording_file(self.test_mode): return # Act # create_blob_from_bytes forces the in-memory chunks to be used blob_client, upload_response = self._create_block_blob( data=self.byte_data, cpk=TEST_ENCRYPTION_KEY, max_concurrency=2) # Assert self.assertIsNotNone(upload_response['etag']) self.assertIsNotNone(upload_response['last_modified']) self.assertTrue(upload_response['request_server_encrypted']) self.assertEqual(upload_response['encryption_key_sha256'], TEST_ENCRYPTION_KEY.key_hash) # Act get the blob content without cpk should fail with self.assertRaises(HttpResponseError): blob_client.download_blob() # Act get the blob content blob = blob_client.download_blob(cpk=TEST_ENCRYPTION_KEY) # Assert content was retrieved with the cpk self.assertEqual(blob.readall(), self.byte_data) self.assertEqual(blob.properties.etag, upload_response['etag']) self.assertEqual(blob.properties.last_modified, upload_response['last_modified']) self.assertEqual(blob.properties.encryption_key_sha256, TEST_ENCRYPTION_KEY.key_hash) @record def test_create_block_blob_with_single_chunk(self): # Act data = b'AAABBBCCC' # create_blob_from_bytes forces the in-memory chunks to be used blob_client, upload_response = self._create_block_blob( data=data, cpk=TEST_ENCRYPTION_KEY) # Assert self.assertIsNotNone(upload_response['etag']) self.assertIsNotNone(upload_response['last_modified']) self.assertTrue(upload_response['request_server_encrypted']) self.assertEqual(upload_response['encryption_key_sha256'], TEST_ENCRYPTION_KEY.key_hash) # Act get the blob content without cpk should fail with self.assertRaises(HttpResponseError): blob_client.download_blob() # Act get the blob content blob = blob_client.download_blob(cpk=TEST_ENCRYPTION_KEY) # Assert content was retrieved with the cpk self.assertEqual(blob.readall(), data) self.assertEqual(blob.properties.etag, upload_response['etag']) self.assertEqual(blob.properties.last_modified, upload_response['last_modified']) self.assertEqual(blob.properties.encryption_key_sha256, TEST_ENCRYPTION_KEY.key_hash) @record def test_put_block_from_url_and_commit_with_cpk(self): # Arrange # create source blob and get source blob url source_blob_name = self.get_resource_name("sourceblob") self.config.use_byte_buffer = True # Make sure using chunk upload, then we can record the request source_blob_client, _ = self._create_block_blob( blob_name=source_blob_name, data=self.byte_data) source_blob_sas = generate_blob_sas( source_blob_client.account_name, source_blob_client.container_name, source_blob_client.blob_name, snapshot=source_blob_client.snapshot, account_key=source_blob_client.credential.account_key, permission=BlobSasPermissions(read=True), expiry=datetime.utcnow() + timedelta(hours=1)) source_blob_url = source_blob_client.url + "?" + source_blob_sas # create destination blob self.config.use_byte_buffer = False destination_blob_client, _ = self._create_block_blob( cpk=TEST_ENCRYPTION_KEY) # Act part 1: make put block from url calls destination_blob_client.stage_block_from_url( block_id=1, source_url=source_blob_url, source_offset=0, source_length=4 * 1024, cpk=TEST_ENCRYPTION_KEY) destination_blob_client.stage_block_from_url( block_id=2, source_url=source_blob_url, source_offset=4 * 1024, source_length=4 * 1024, cpk=TEST_ENCRYPTION_KEY) # Assert blocks committed, uncommitted = destination_blob_client.get_block_list('all') self.assertEqual(len(uncommitted), 2) self.assertEqual(len(committed), 0) # commit the blocks without cpk should fail block_list = [BlobBlock(block_id='1'), BlobBlock(block_id='2')] with self.assertRaises(HttpResponseError): destination_blob_client.commit_block_list(block_list) # Act commit the blocks with cpk should succeed put_block_list_resp = destination_blob_client.commit_block_list( block_list, cpk=TEST_ENCRYPTION_KEY) # Assert self.assertIsNotNone(put_block_list_resp['etag']) self.assertIsNotNone(put_block_list_resp['last_modified']) self.assertTrue(put_block_list_resp['request_server_encrypted']) self.assertEqual(put_block_list_resp['encryption_key_sha256'], TEST_ENCRYPTION_KEY.key_hash) # Act get the blob content blob = destination_blob_client.download_blob(cpk=TEST_ENCRYPTION_KEY) # Assert content was retrieved with the cpk self.assertEqual(blob.readall(), self.byte_data[0:8 * 1024]) self.assertEqual(blob.properties.etag, put_block_list_resp['etag']) self.assertEqual(blob.properties.last_modified, put_block_list_resp['last_modified']) self.assertEqual(blob.properties.encryption_key_sha256, TEST_ENCRYPTION_KEY.key_hash) @record def test_append_block(self): # Arrange blob_client = self._create_append_blob(cpk=TEST_ENCRYPTION_KEY) # Act for content in [b'AAA', b'BBB', b'CCC']: append_blob_prop = blob_client.append_block( content, cpk=TEST_ENCRYPTION_KEY) # Assert self.assertIsNotNone(append_blob_prop['etag']) self.assertIsNotNone(append_blob_prop['last_modified']) self.assertTrue(append_blob_prop['request_server_encrypted']) self.assertEqual(append_blob_prop['encryption_key_sha256'], TEST_ENCRYPTION_KEY.key_hash) # Act get the blob content without cpk should fail with self.assertRaises(HttpResponseError): blob_client.download_blob() # Act get the blob content blob = blob_client.download_blob(cpk=TEST_ENCRYPTION_KEY) # Assert content was retrieved with the cpk self.assertEqual(blob.readall(), b'AAABBBCCC') self.assertEqual(blob.properties.encryption_key_sha256, TEST_ENCRYPTION_KEY.key_hash) @record def test_append_block_from_url(self): # Arrange source_blob_name = self.get_resource_name("sourceblob") self.config.use_byte_buffer = True # chunk upload source_blob_client, _ = self._create_block_blob( blob_name=source_blob_name, data=self.byte_data) source_blob_sas = generate_blob_sas( source_blob_client.account_name, source_blob_client.container_name, source_blob_client.blob_name, snapshot=source_blob_client.snapshot, account_key=source_blob_client.credential.account_key, permission=BlobSasPermissions(read=True), expiry=datetime.utcnow() + timedelta(hours=1)) source_blob_url = source_blob_client.url + "?" + source_blob_sas self.config.use_byte_buffer = False destination_blob_client = self._create_append_blob( cpk=TEST_ENCRYPTION_KEY) # Act append_blob_prop = destination_blob_client.append_block_from_url( source_blob_url, source_offset=0, source_length=4 * 1024, cpk=TEST_ENCRYPTION_KEY) # Assert self.assertIsNotNone(append_blob_prop['etag']) self.assertIsNotNone(append_blob_prop['last_modified']) # TODO: verify that the swagger is correct, header wasn't added for the response # self.assertTrue(append_blob_prop['request_server_encrypted']) self.assertEqual(append_blob_prop['encryption_key_sha256'], TEST_ENCRYPTION_KEY.key_hash) # Act get the blob content without cpk should fail with self.assertRaises(HttpResponseError): destination_blob_client.download_blob() # Act get the blob content blob = destination_blob_client.download_blob(cpk=TEST_ENCRYPTION_KEY) # Assert content was retrieved with the cpk self.assertEqual(blob.readall(), self.byte_data[0:4 * 1024]) self.assertEqual(blob.properties.encryption_key_sha256, TEST_ENCRYPTION_KEY.key_hash) @record def test_create_append_blob_with_chunks(self): # Arrange blob_client = self._create_append_blob(cpk=TEST_ENCRYPTION_KEY) # Act append_blob_prop = blob_client.upload_blob( self.byte_data, blob_type=BlobType.AppendBlob, cpk=TEST_ENCRYPTION_KEY) # Assert self.assertIsNotNone(append_blob_prop['etag']) self.assertIsNotNone(append_blob_prop['last_modified']) self.assertTrue(append_blob_prop['request_server_encrypted']) self.assertEqual(append_blob_prop['encryption_key_sha256'], TEST_ENCRYPTION_KEY.key_hash) # Act get the blob content without cpk should fail with self.assertRaises(HttpResponseError): blob_client.download_blob() # Act get the blob content blob = blob_client.download_blob(cpk=TEST_ENCRYPTION_KEY) # Assert content was retrieved with the cpk self.assertEqual(blob.readall(), self.byte_data) self.assertEqual(blob.properties.encryption_key_sha256, TEST_ENCRYPTION_KEY.key_hash) @record def test_update_page(self): # Arrange blob_client = self._create_page_blob(cpk=TEST_ENCRYPTION_KEY) # Act page_blob_prop = blob_client.upload_page(self.byte_data, offset=0, length=len(self.byte_data), cpk=TEST_ENCRYPTION_KEY) # Assert self.assertIsNotNone(page_blob_prop['etag']) self.assertIsNotNone(page_blob_prop['last_modified']) self.assertTrue(page_blob_prop['request_server_encrypted']) self.assertEqual(page_blob_prop['encryption_key_sha256'], TEST_ENCRYPTION_KEY.key_hash) # Act get the blob content without cpk should fail with self.assertRaises(HttpResponseError): blob_client.download_blob() # Act get the blob content blob = blob_client.download_blob( offset=0, length=len(self.byte_data), cpk=TEST_ENCRYPTION_KEY, ) # Assert content was retrieved with the cpk self.assertEqual(blob.readall(), self.byte_data) self.assertEqual(blob.properties.encryption_key_sha256, TEST_ENCRYPTION_KEY.key_hash) @record def test_update_page_from_url(self): # Arrange source_blob_name = self.get_resource_name("sourceblob") self.config.use_byte_buffer = True # Make sure using chunk upload, then we can record the request source_blob_client, _ = self._create_block_blob( blob_name=source_blob_name, data=self.byte_data) source_blob_sas = generate_blob_sas( source_blob_client.account_name, source_blob_client.container_name, source_blob_client.blob_name, snapshot=source_blob_client.snapshot, account_key=source_blob_client.credential.account_key, permission=BlobSasPermissions(read=True), expiry=datetime.utcnow() + timedelta(hours=1)) source_blob_url = source_blob_client.url + "?" + source_blob_sas self.config.use_byte_buffer = False blob_client = self._create_page_blob(cpk=TEST_ENCRYPTION_KEY) # Act page_blob_prop = blob_client.upload_pages_from_url( source_blob_url, offset=0, length=len(self.byte_data), source_offset=0, cpk=TEST_ENCRYPTION_KEY) # Assert self.assertIsNotNone(page_blob_prop['etag']) self.assertIsNotNone(page_blob_prop['last_modified']) self.assertTrue(page_blob_prop['request_server_encrypted']) # TODO: FIX SWAGGER # self.assertEqual(page_blob_prop['encryption_key_sha256'], TEST_ENCRYPTION_KEY.key_hash) # Act get the blob content without cpk should fail with self.assertRaises(HttpResponseError): blob_client.download_blob() # Act get the blob content blob = blob_client.download_blob( offset=0, length=len(self.byte_data), cpk=TEST_ENCRYPTION_KEY, ) # Assert content was retrieved with the cpk self.assertEqual(blob.readall(), self.byte_data) self.assertEqual(blob.properties.encryption_key_sha256, TEST_ENCRYPTION_KEY.key_hash) def test_create_page_blob_with_chunks(self): if TestMode.need_recording_file(self.test_mode): return # Act blob_client = self.bsc.get_blob_client(self.container_name, self._get_blob_reference()) page_blob_prop = blob_client.upload_blob(self.byte_data, blob_type=BlobType.PageBlob, max_concurrency=2, cpk=TEST_ENCRYPTION_KEY) # Assert self.assertIsNotNone(page_blob_prop['etag']) self.assertIsNotNone(page_blob_prop['last_modified']) self.assertTrue(page_blob_prop['request_server_encrypted']) self.assertEqual(page_blob_prop['encryption_key_sha256'], TEST_ENCRYPTION_KEY.key_hash) # Act get the blob content without cpk should fail with self.assertRaises(HttpResponseError): blob_client.download_blob() # Act get the blob content blob = blob_client.download_blob(cpk=TEST_ENCRYPTION_KEY) # Assert content was retrieved with the cpk self.assertEqual(blob.readall(), self.byte_data) self.assertEqual(blob.properties.encryption_key_sha256, TEST_ENCRYPTION_KEY.key_hash) # TODO: verify why clear page works without providing cpk # @record # def test_clear_page(self): # # Arrange # blob_client = self.bsc.get_blob_client(self.container_name, self._get_blob_reference()) # data = self.get_random_bytes(1024) # blob_client.upload_blob(data, blob_type=BlobType.PageBlob, cpk=TEST_ENCRYPTION_KEY) # # # Act # blob = blob_client.download_blob(cpk=TEST_ENCRYPTION_KEY) # self.assertEqual(blob.readall(), data) # # # with self.assertRaises(HttpResponseError): # # blob_client.clear_page(0, 511) # # resp = blob_client.clear_page(0, 511, cpk=TEST_ENCRYPTION_KEY) # blob = blob_client.download_blob(0, 511, cpk=TEST_ENCRYPTION_KEY) # # # Assert # self.assertIsNotNone(resp.get('etag')) # self.assertIsNotNone(resp.get('last_modified')) # self.assertIsNotNone(resp.get('blob_sequence_number')) # self.assertEqual(blob.readall(), b'\x00' * 512) # # blob = blob_client.download_blob(512, 1023, cpk=TEST_ENCRYPTION_KEY) # self.assertEqual(blob.readall(), data[512:]) @record def test_get_set_blob_metadata(self): # Arrange blob_client, _ = self._create_block_blob(data=b'AAABBBCCC', cpk=TEST_ENCRYPTION_KEY) # Act without the encryption key should fail with self.assertRaises(HttpResponseError): blob_client.get_blob_properties() # Act blob_props = blob_client.get_blob_properties(cpk=TEST_ENCRYPTION_KEY) # Assert self.assertTrue(blob_props.server_encrypted) self.assertEqual(blob_props.encryption_key_sha256, TEST_ENCRYPTION_KEY.key_hash) # Act set blob properties metadata = {'hello': 'world', 'number': '42', 'UP': 'UPval'} with self.assertRaises(HttpResponseError): blob_client.set_blob_metadata(metadata=metadata, ) blob_client.set_blob_metadata(metadata=metadata, cpk=TEST_ENCRYPTION_KEY) # Assert blob_props = blob_client.get_blob_properties(cpk=TEST_ENCRYPTION_KEY) md = blob_props.metadata self.assertEqual(3, len(md)) self.assertEqual(md['hello'], 'world') self.assertEqual(md['number'], '42') self.assertEqual(md['UP'], 'UPval') self.assertFalse('up' in md) @record def test_snapshot_blob(self): # Arrange blob_client, _ = self._create_block_blob(data=b'AAABBBCCC', cpk=TEST_ENCRYPTION_KEY) # Act without cpk should not work with self.assertRaises(HttpResponseError): blob_client.create_snapshot() # Act with cpk should work blob_snapshot = blob_client.create_snapshot(cpk=TEST_ENCRYPTION_KEY) # Assert self.assertIsNotNone(blob_snapshot)
class StoragePageBlobTest(StorageTestCase): def setUp(self): super(StoragePageBlobTest, self).setUp() url = self._get_account_url() # test chunking functionality by reducing the size of each chunk, # otherwise the tests would take too long to execute credential = self._get_shared_key_credential() self.bs = BlobServiceClient( url, credential=credential, connection_data_block_size=4 * 1024, max_page_size=4 * 1024) self.config = self.bs._config self.container_name = self.get_resource_name('utcontainer') if not self.is_playback(): self.bs.create_container(self.container_name) def tearDown(self): if not self.is_playback(): try: self.bs.delete_container(self.container_name) except: pass if os.path.isfile(FILE_PATH): try: os.remove(FILE_PATH) except: pass return super(StoragePageBlobTest, self).tearDown() #--Helpers----------------------------------------------------------------- def _get_blob_reference(self): return self.bs.get_blob_client( self.container_name, self.get_resource_name(TEST_BLOB_PREFIX)) def _create_blob(self, length=512): blob = self._get_blob_reference() blob.create_page_blob(size=length) return blob def assertBlobEqual(self, container_name, blob_name, expected_data): blob = self.bs.get_blob_client(container_name, blob_name) actual_data = blob.download_blob() self.assertEqual(b"".join(list(actual_data)), expected_data) def assertRangeEqual(self, container_name, blob_name, expected_data, start_range, end_range): blob = self.bs.get_blob_client(container_name, blob_name) actual_data = blob.download_blob(offset=start_range, length=end_range) self.assertEqual(b"".join(list(actual_data)), expected_data) class NonSeekableFile(object): def __init__(self, wrapped_file): self.wrapped_file = wrapped_file def write(self, data): self.wrapped_file.write(data) def read(self, count): return self.wrapped_file.read(count) #--Test cases for page blobs -------------------------------------------- @record def test_create_blob(self): # Arrange blob = self._get_blob_reference() # Act resp = blob.create_page_blob(1024) # Assert self.assertIsNotNone(resp.get('etag')) self.assertIsNotNone(resp.get('last_modified')) self.assertTrue(blob.get_blob_properties()) @record def test_create_blob_with_metadata(self): # Arrange blob = self._get_blob_reference() metadata = {'hello': 'world', 'number': '42'} # Act resp = blob.create_page_blob(512, metadata=metadata) # Assert md = blob.get_blob_properties() self.assertDictEqual(md.metadata, metadata) @record def test_put_page_with_lease_id(self): # Arrange blob = self._create_blob() lease = blob.acquire_lease() # Act data = self.get_random_bytes(512) blob.upload_page(data, 0, 511, lease=lease) # Assert content = blob.download_blob(lease=lease) self.assertEqual(b"".join(list(content)), data) @record def test_update_page(self): # Arrange blob = self._create_blob() # Act data = self.get_random_bytes(512) resp = blob.upload_page(data, 0, 511) # Assert self.assertIsNotNone(resp.get('etag')) self.assertIsNotNone(resp.get('last_modified')) self.assertIsNotNone(resp.get('blob_sequence_number')) self.assertBlobEqual(self.container_name, blob.blob_name, data) @record def test_create_8tb_blob(self): # Arrange blob = self._get_blob_reference() # Act resp = blob.create_page_blob(EIGHT_TB) props = blob.get_blob_properties() page_ranges, cleared = blob.get_page_ranges() # Assert self.assertIsNotNone(resp.get('etag')) self.assertIsNotNone(resp.get('last_modified')) self.assertIsInstance(props, BlobProperties) self.assertEqual(props.size, EIGHT_TB) self.assertEqual(0, len(page_ranges)) @record def test_create_larger_than_8tb_blob_fail(self): # Arrange blob = self._get_blob_reference() # Act with self.assertRaises(HttpResponseError): blob.create_page_blob(EIGHT_TB + 1) @record def test_update_8tb_blob_page(self): # Arrange blob = self._get_blob_reference() blob.create_page_blob(EIGHT_TB) # Act data = self.get_random_bytes(512) start_range = EIGHT_TB - 512 end_range = EIGHT_TB - 1 resp = blob.upload_page(data, start_range, end_range) props = blob.get_blob_properties() page_ranges, cleared = blob.get_page_ranges() # Assert self.assertIsNotNone(resp.get('etag')) self.assertIsNotNone(resp.get('last_modified')) self.assertIsNotNone(resp.get('blob_sequence_number')) self.assertRangeEqual(self.container_name, blob.blob_name, data, start_range, end_range) self.assertEqual(props.size, EIGHT_TB) self.assertEqual(1, len(page_ranges)) self.assertEqual(page_ranges[0]['start'], start_range) self.assertEqual(page_ranges[0]['end'], end_range) @record def test_update_page_with_md5(self): # Arrange blob = self._create_blob() # Act data = self.get_random_bytes(512) resp = blob.upload_page(data, 0, 511, validate_content=True) # Assert @record def test_clear_page(self): # Arrange blob = self._create_blob() # Act resp = blob.clear_page(0, 511) # Assert self.assertIsNotNone(resp.get('etag')) self.assertIsNotNone(resp.get('last_modified')) self.assertIsNotNone(resp.get('blob_sequence_number')) self.assertBlobEqual(self.container_name, blob.blob_name, b'\x00' * 512) @record def test_put_page_if_sequence_number_lt_success(self): # Arrange blob = self._get_blob_reference() data = self.get_random_bytes(512) start_sequence = 10 blob.create_page_blob(512, sequence_number=start_sequence) # Act blob.upload_page(data, 0, 511, if_sequence_number_lt=start_sequence + 1) # Assert self.assertBlobEqual(self.container_name, blob.blob_name, data) @record def test_update_page_if_sequence_number_lt_failure(self): # Arrange blob = self._get_blob_reference() data = self.get_random_bytes(512) start_sequence = 10 blob.create_page_blob(512, sequence_number=start_sequence) # Act with self.assertRaises(HttpResponseError): blob.upload_page(data, 0, 511, if_sequence_number_lt=start_sequence) # Assert @record def test_update_page_if_sequence_number_lte_success(self): # Arrange blob = self._get_blob_reference() data = self.get_random_bytes(512) start_sequence = 10 blob.create_page_blob(512, sequence_number=start_sequence) # Act blob.upload_page(data, 0, 511, if_sequence_number_lte=start_sequence) # Assert self.assertBlobEqual(self.container_name, blob.blob_name, data) @record def test_update_page_if_sequence_number_lte_failure(self): # Arrange blob = self._get_blob_reference() data = self.get_random_bytes(512) start_sequence = 10 blob.create_page_blob(512, sequence_number=start_sequence) # Act with self.assertRaises(HttpResponseError): blob.upload_page(data, 0, 511, if_sequence_number_lte=start_sequence - 1) # Assert @record def test_update_page_if_sequence_number_eq_success(self): # Arrange blob = self._get_blob_reference() data = self.get_random_bytes(512) start_sequence = 10 blob.create_page_blob(512, sequence_number=start_sequence) # Act blob.upload_page(data, 0, 511, if_sequence_number_eq=start_sequence) # Assert self.assertBlobEqual(self.container_name, blob.blob_name, data) @record def test_update_page_if_sequence_number_eq_failure(self): # Arrange blob = self._get_blob_reference() data = self.get_random_bytes(512) start_sequence = 10 blob.create_page_blob(512, sequence_number=start_sequence) # Act with self.assertRaises(HttpResponseError): blob.upload_page(data, 0, 511, if_sequence_number_eq=start_sequence - 1) # Assert @record def test_update_page_unicode(self): # Arrange blob = self._create_blob() # Act data = u'abcdefghijklmnop' * 32 resp = blob.upload_page(data, 0, 511) # Assert self.assertIsNotNone(resp.get('etag')) self.assertIsNotNone(resp.get('last_modified')) @record def test_get_page_ranges_no_pages(self): # Arrange blob = self._create_blob() # Act ranges, cleared = blob.get_page_ranges() # Assert self.assertIsNotNone(ranges) self.assertIsInstance(ranges, list) self.assertEqual(len(ranges), 0) @record def test_get_page_ranges_2_pages(self): # Arrange blob = self._create_blob(2048) data = self.get_random_bytes(512) resp1 = blob.upload_page(data, 0, 511) resp2 = blob.upload_page(data, 1024, 1535) # Act ranges, cleared = blob.get_page_ranges() # Assert self.assertIsNotNone(ranges) self.assertIsInstance(ranges, list) self.assertEqual(len(ranges), 2) self.assertEqual(ranges[0]['start'], 0) self.assertEqual(ranges[0]['end'], 511) self.assertEqual(ranges[1]['start'], 1024) self.assertEqual(ranges[1]['end'], 1535) @record def test_get_page_ranges_diff(self): # Arrange blob = self._create_blob(2048) data = self.get_random_bytes(1536) snapshot1 = blob.create_snapshot() blob.upload_page(data, 0, 1535) snapshot2 = blob.create_snapshot() blob.clear_page(512, 1023) # Act ranges1, cleared1 = blob.get_page_ranges(previous_snapshot_diff=snapshot1) ranges2, cleared2 = blob.get_page_ranges(previous_snapshot_diff=snapshot2['snapshot']) # Assert self.assertIsNotNone(ranges1) self.assertIsInstance(ranges1, list) self.assertEqual(len(ranges1), 2) self.assertIsInstance(cleared1, list) self.assertEqual(len(cleared1), 1) self.assertEqual(ranges1[0]['start'], 0) self.assertEqual(ranges1[0]['end'], 511) self.assertEqual(cleared1[0]['start'], 512) self.assertEqual(cleared1[0]['end'], 1023) self.assertEqual(ranges1[1]['start'], 1024) self.assertEqual(ranges1[1]['end'], 1535) self.assertIsNotNone(ranges2) self.assertIsInstance(ranges2, list) self.assertEqual(len(ranges2), 0) self.assertIsInstance(cleared2, list) self.assertEqual(len(cleared2), 1) self.assertEqual(cleared2[0]['start'], 512) self.assertEqual(cleared2[0]['end'], 1023) @record def test_update_page_fail(self): # Arrange blob = self._create_blob(2048) data = self.get_random_bytes(512) resp1 = blob.upload_page(data, 0, 511) # Act try: blob.upload_page(data, 1024, 1536) except ValueError as e: self.assertEqual(str(e), 'end_range must be an integer that aligns with 512 page size') return # Assert raise Exception('Page range validation failed to throw on failure case') @record def test_resize_blob(self): # Arrange blob = self._create_blob(1024) # Act resp = blob.resize_blob(512) # Assert self.assertIsNotNone(resp.get('etag')) self.assertIsNotNone(resp.get('last_modified')) self.assertIsNotNone(resp.get('blob_sequence_number')) props = blob.get_blob_properties() self.assertIsInstance(props, BlobProperties) self.assertEqual(props.size, 512) @record def test_set_sequence_number_blob(self): # Arrange blob = self._create_blob() # Act resp = blob.set_sequence_number(SequenceNumberAction.Update, 6) #Assert self.assertIsNotNone(resp.get('etag')) self.assertIsNotNone(resp.get('last_modified')) self.assertIsNotNone(resp.get('blob_sequence_number')) props = blob.get_blob_properties() self.assertIsInstance(props, BlobProperties) self.assertEqual(props.page_blob_sequence_number, 6) @record def test_create_page_blob_with_no_overwrite(self): # Arrange blob = self._get_blob_reference() data1 = self.get_random_bytes(LARGE_BLOB_SIZE) data2 = self.get_random_bytes(LARGE_BLOB_SIZE + 512) # Act create_resp = blob.upload_blob( data1, overwrite=True, blob_type=BlobType.PageBlob, metadata={'BlobData': 'Data1'}) with self.assertRaises(ResourceExistsError): blob.upload_blob( data2, overwrite=False, blob_type=BlobType.PageBlob, metadata={'BlobData': 'Data2'}) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob.blob_name, data1) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) self.assertEqual(props.metadata, {'BlobData': 'Data1'}) self.assertEqual(props.size, LARGE_BLOB_SIZE) self.assertEqual(props.blob_type, BlobType.PageBlob) @record def test_create_page_blob_with_overwrite(self): # Arrange blob = self._get_blob_reference() data1 = self.get_random_bytes(LARGE_BLOB_SIZE) data2 = self.get_random_bytes(LARGE_BLOB_SIZE + 512) # Act create_resp = blob.upload_blob( data1, overwrite=True, blob_type=BlobType.PageBlob, metadata={'BlobData': 'Data1'}) update_resp = blob.upload_blob( data2, overwrite=True, blob_type=BlobType.PageBlob, metadata={'BlobData': 'Data2'}) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob.blob_name, data2) self.assertEqual(props.etag, update_resp.get('etag')) self.assertEqual(props.last_modified, update_resp.get('last_modified')) self.assertEqual(props.metadata, {'BlobData': 'Data2'}) self.assertEqual(props.size, LARGE_BLOB_SIZE + 512) self.assertEqual(props.blob_type, BlobType.PageBlob) def test_create_blob_from_bytes(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self._get_blob_reference() data = self.get_random_bytes(LARGE_BLOB_SIZE) # Act create_resp = blob.upload_blob(data, blob_type=BlobType.PageBlob) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob.blob_name, data) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) def test_create_blob_from_0_bytes(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self._get_blob_reference() data = self.get_random_bytes(0) # Act create_resp = blob.upload_blob(data, blob_type=BlobType.PageBlob) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob.blob_name, data) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) def test_create_blob_from_bytes_with_progress_first(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self._get_blob_reference() data = self.get_random_bytes(LARGE_BLOB_SIZE) # Act progress = [] def callback(response): current = response.context['upload_stream_current'] total = response.context['data_stream_total'] if current is not None: progress.append((current, total)) create_resp = blob.upload_blob( data, blob_type=BlobType.PageBlob, raw_response_hook=callback) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob.blob_name, data) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) self.assert_upload_progress( LARGE_BLOB_SIZE, self.config.blob_settings.max_page_size, progress) def test_create_blob_from_bytes_with_index(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self._get_blob_reference() data = self.get_random_bytes(LARGE_BLOB_SIZE) index = 1024 # Act blob.upload_blob(data[index:], blob_type=BlobType.PageBlob) # Assert self.assertBlobEqual(self.container_name, blob.blob_name, data[1024:]) @record def test_create_blob_from_bytes_with_index_and_count(self): # Arrange blob = self._get_blob_reference() data = self.get_random_bytes(LARGE_BLOB_SIZE) index = 512 count = 1024 # Act create_resp = blob.upload_blob(data[index:], length=count, blob_type=BlobType.PageBlob) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob.blob_name, data[index:index + count]) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) def test_create_blob_from_path(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self._get_blob_reference() data = self.get_random_bytes(LARGE_BLOB_SIZE) FILE_PATH = 'blob_input.temp.dat' with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act with open(FILE_PATH, 'rb') as stream: create_resp = blob.upload_blob(stream, blob_type=BlobType.PageBlob) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob.blob_name, data) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) def test_create_blob_from_path_with_progress(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self._get_blob_reference() data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act progress = [] def callback(response): current = response.context['upload_stream_current'] total = response.context['data_stream_total'] if current is not None: progress.append((current, total)) with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, blob_type=BlobType.PageBlob, raw_response_hook=callback) # Assert self.assertBlobEqual(self.container_name, blob.blob_name, data) self.assert_upload_progress(len(data), self.config.blob_settings.max_page_size, progress) def test_create_blob_from_stream(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self._get_blob_reference() data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act blob_size = len(data) with open(FILE_PATH, 'rb') as stream: create_resp = blob.upload_blob(stream, length=blob_size, blob_type=BlobType.PageBlob) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob.blob_name, data[:blob_size]) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) def test_create_blob_from_stream_with_empty_pages(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange # data is almost all empty (0s) except two ranges blob = self._get_blob_reference() data = bytearray(LARGE_BLOB_SIZE) data[512: 1024] = self.get_random_bytes(512) data[8192: 8196] = self.get_random_bytes(4) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act blob_size = len(data) with open(FILE_PATH, 'rb') as stream: create_resp = blob.upload_blob(stream, length=blob_size, blob_type=BlobType.PageBlob) props = blob.get_blob_properties() # Assert # the uploader should have skipped the empty ranges self.assertBlobEqual(self.container_name, blob.blob_name, data[:blob_size]) page_ranges, cleared = list(blob.get_page_ranges()) self.assertEqual(len(page_ranges), 2) self.assertEqual(page_ranges[0]['start'], 0) self.assertEqual(page_ranges[0]['end'], 4095) self.assertEqual(page_ranges[1]['start'], 8192) self.assertEqual(page_ranges[1]['end'], 12287) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) def test_create_blob_from_stream_non_seekable(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self._get_blob_reference() data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act blob_size = len(data) with open(FILE_PATH, 'rb') as stream: non_seekable_file = StoragePageBlobTest.NonSeekableFile(stream) blob.upload_blob( non_seekable_file, length=blob_size, max_connections=1, blob_type=BlobType.PageBlob) # Assert self.assertBlobEqual(self.container_name, blob.blob_name, data[:blob_size]) def test_create_blob_from_stream_with_progress(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self._get_blob_reference() data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act progress = [] def callback(response): current = response.context['upload_stream_current'] total = response.context['data_stream_total'] if current is not None: progress.append((current, total)) blob_size = len(data) with open(FILE_PATH, 'rb') as stream: blob.upload_blob( stream, length=blob_size, blob_type=BlobType.PageBlob, raw_response_hook=callback) # Assert self.assertBlobEqual(self.container_name, blob.blob_name, data[:blob_size]) self.assert_upload_progress(len(data), self.config.blob_settings.max_page_size, progress) def test_create_blob_from_stream_truncated(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self._get_blob_reference() data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act blob_size = len(data) - 512 with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, length=blob_size, blob_type=BlobType.PageBlob) # Assert self.assertBlobEqual(self.container_name, blob.blob_name, data[:blob_size]) def test_create_blob_from_stream_with_progress_truncated(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self._get_blob_reference() data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act progress = [] def callback(response): current = response.context['upload_stream_current'] total = response.context['data_stream_total'] if current is not None: progress.append((current, total)) blob_size = len(data) - 512 with open(FILE_PATH, 'rb') as stream: blob.upload_blob( stream, length=blob_size, blob_type=BlobType.PageBlob, raw_response_hook=callback) # Assert self.assertBlobEqual(self.container_name, blob.blob_name, data[:blob_size]) self.assert_upload_progress(blob_size, self.config.blob_settings.max_page_size, progress) @record def test_create_blob_with_md5_small(self): # Arrange blob = self._get_blob_reference() data = self.get_random_bytes(512) # Act blob.upload_blob(data, validate_content=True, blob_type=BlobType.PageBlob) # Assert def test_create_blob_with_md5_large(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self._get_blob_reference() data = self.get_random_bytes(LARGE_BLOB_SIZE) # Act blob.upload_blob(data, validate_content=True, blob_type=BlobType.PageBlob) # Assert def test_incremental_copy_blob(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange source_blob = self._create_blob(2048) data = self.get_random_bytes(512) resp1 = source_blob.upload_page(data, 0, 511) resp2 = source_blob.upload_page(data, 1024, 1535) source_snapshot_blob = source_blob.create_snapshot() snapshot_blob = BlobClient( source_blob.url, credential=source_blob.credential, snapshot=source_snapshot_blob) sas_token = snapshot_blob.generate_shared_access_signature( permission=BlobPermissions.READ, expiry=datetime.utcnow() + timedelta(hours=1), ) sas_blob = BlobClient(snapshot_blob.url, credential=sas_token) # Act dest_blob = self.bs.get_blob_client(self.container_name, 'dest_blob') copy = dest_blob.copy_blob_from_url(sas_blob.url, incremental_copy=True) # Assert self.assertIsNotNone(copy) self.assertIsNotNone(copy.copy_id()) self.assertEqual(copy.status(), 'pending') copy.wait() copy_blob = dest_blob.get_blob_properties() self.assertEqual(copy_blob.copy.status, 'success') self.assertIsNotNone(copy_blob.copy.destination_snapshot) # strip off protocol self.assertTrue(copy_blob.copy.source.endswith(sas_blob.url[5:])) @record def test_blob_tier_on_create(self): url = self._get_premium_account_url() credential = self._get_premium_shared_key_credential() pbs = BlobServiceClient(url, credential=credential) try: container_name = self.get_resource_name('utpremiumcontainer') container = pbs.get_container_client(container_name) if not self.is_playback(): container.create_container() # test create_blob API blob = self._get_blob_reference() pblob = pbs.get_blob_client(container_name, blob.blob_name) pblob.create_page_blob(1024, premium_page_blob_tier=PremiumPageBlobTier.P4) props = pblob.get_blob_properties() self.assertEqual(props.blob_tier, PremiumPageBlobTier.P4) self.assertFalse(props.blob_tier_inferred) # test create_blob_from_bytes API blob2 = self._get_blob_reference() pblob2 = pbs.get_blob_client(container_name, blob2.blob_name) byte_data = self.get_random_bytes(1024) pblob2.upload_blob( byte_data, premium_page_blob_tier=PremiumPageBlobTier.P6, blob_type=BlobType.PageBlob) props2 = pblob2.get_blob_properties() self.assertEqual(props2.blob_tier, PremiumPageBlobTier.P6) self.assertFalse(props2.blob_tier_inferred) # test create_blob_from_path API blob3 = self._get_blob_reference() pblob3 = pbs.get_blob_client(container_name, blob3.blob_name) with open(FILE_PATH, 'wb') as stream: stream.write(byte_data) with open(FILE_PATH, 'rb') as stream: pblob3.upload_blob( stream, blob_type=BlobType.PageBlob, premium_page_blob_tier=PremiumPageBlobTier.P10) props3 = pblob3.get_blob_properties() self.assertEqual(props3.blob_tier, PremiumPageBlobTier.P10) self.assertFalse(props3.blob_tier_inferred) finally: container.delete_container() @record def test_blob_tier_set_tier_api(self): url = self._get_premium_account_url() credential = self._get_premium_shared_key_credential() pbs = BlobServiceClient(url, credential=credential) try: container_name = self.get_resource_name('utpremiumcontainer') container = pbs.get_container_client(container_name) if not self.is_playback(): try: container.create_container() except ResourceExistsError: pass blob = self._get_blob_reference() pblob = pbs.get_blob_client(container_name, blob.blob_name) pblob.create_page_blob(1024) blob_ref = pblob.get_blob_properties() self.assertEqual(PremiumPageBlobTier.P10, blob_ref.blob_tier) self.assertIsNotNone(blob_ref.blob_tier) self.assertTrue(blob_ref.blob_tier_inferred) pcontainer = pbs.get_container_client(container_name) blobs = list(pcontainer.list_blobs()) # Assert self.assertIsNotNone(blobs) self.assertGreaterEqual(len(blobs), 1) self.assertIsNotNone(blobs[0]) self.assertNamedItemInContainer(blobs, blob.blob_name) pblob.set_premium_page_blob_tier(PremiumPageBlobTier.P50) blob_ref2 = pblob.get_blob_properties() self.assertEqual(PremiumPageBlobTier.P50, blob_ref2.blob_tier) self.assertFalse(blob_ref2.blob_tier_inferred) blobs = list(pcontainer.list_blobs()) # Assert self.assertIsNotNone(blobs) self.assertGreaterEqual(len(blobs), 1) self.assertIsNotNone(blobs[0]) self.assertNamedItemInContainer(blobs, blob.blob_name) self.assertEqual(blobs[0].blob_tier, PremiumPageBlobTier.P50) self.assertFalse(blobs[0].blob_tier_inferred) finally: container.delete_container() @record def test_blob_tier_copy_blob(self): url = self._get_premium_account_url() credential = self._get_premium_shared_key_credential() pbs = BlobServiceClient(url, credential=credential) try: container_name = self.get_resource_name('utpremiumcontainer') container = pbs.get_container_client(container_name) if not self.is_playback(): try: container.create_container() except ResourceExistsError: pass # Arrange source_blob = pbs.get_blob_client( container_name, self.get_resource_name(TEST_BLOB_PREFIX)) source_blob.create_page_blob(1024, premium_page_blob_tier=PremiumPageBlobTier.P10) # Act source_blob_url = '{0}/{1}/{2}'.format( self._get_premium_account_url(), container_name, source_blob.blob_name) copy_blob = pbs.get_blob_client(container_name, 'blob1copy') copy = copy_blob.copy_blob_from_url(source_blob_url, premium_page_blob_tier=PremiumPageBlobTier.P30) # Assert self.assertIsNotNone(copy) self.assertEqual(copy.status(), 'success') self.assertIsNotNone(copy.copy_id()) copy_ref = copy_blob.get_blob_properties() self.assertEqual(copy_ref.blob_tier, PremiumPageBlobTier.P30) source_blob2 = pbs.get_blob_client( container_name, self.get_resource_name(TEST_BLOB_PREFIX)) source_blob2.create_page_blob(1024) source_blob2_url = '{0}/{1}/{2}'.format( self._get_premium_account_url(), source_blob2.container_name, source_blob2.blob_name) copy_blob2 = pbs.get_blob_client(container_name, 'blob2copy') copy2 = copy_blob2.copy_blob_from_url(source_blob2_url, premium_page_blob_tier=PremiumPageBlobTier.P60) self.assertIsNotNone(copy2) self.assertEqual(copy2.status(), 'success') self.assertIsNotNone(copy2.copy_id()) copy_ref2 = copy_blob2.get_blob_properties() self.assertEqual(copy_ref2.blob_tier, PremiumPageBlobTier.P60) self.assertFalse(copy_ref2.blob_tier_inferred) copy_blob3 = pbs.get_blob_client(container_name, 'blob3copy') copy3 = copy_blob3.copy_blob_from_url(source_blob2_url) self.assertIsNotNone(copy3) self.assertEqual(copy3.status(), 'success') self.assertIsNotNone(copy3.copy_id()) copy_ref3 = copy_blob3.get_blob_properties() self.assertEqual(copy_ref3.blob_tier, PremiumPageBlobTier.P10) self.assertTrue(copy_ref3.blob_tier_inferred) finally: container.delete_container()
class StorageGetBlobTest(StorageTestCase): def setUp(self): super(StorageGetBlobTest, self).setUp() url = self._get_account_url() credential = self._get_shared_key_credential() # test chunking functionality by reducing the threshold # for chunking and the size of each chunk, otherwise # the tests would take too long to execute self.bsc = BlobServiceClient(url, credential=credential, max_single_get_size=32 * 1024, max_chunk_get_size=4 * 1024) self.config = self.bsc._config self.container_name = self.get_resource_name('utcontainer') if not self.is_playback(): container = self.bsc.get_container_client(self.container_name) container.create_container() self.byte_blob = self.get_resource_name('byteblob') self.byte_data = self.get_random_bytes(64 * 1024 + 5) if not self.is_playback(): blob = self.bsc.get_blob_client(self.container_name, self.byte_blob) blob.upload_blob(self.byte_data) def tearDown(self): if not self.is_playback(): try: self.bsc.delete_container(self.container_name) except: pass if os.path.isfile(FILE_PATH): try: os.remove(FILE_PATH) except: pass return super(StorageGetBlobTest, self).tearDown() # --Helpers----------------------------------------------------------------- def _get_blob_reference(self): return self.get_resource_name(TEST_BLOB_PREFIX) class NonSeekableFile(object): def __init__(self, wrapped_file): self.wrapped_file = wrapped_file def write(self, data): self.wrapped_file.write(data) def read(self, count): return self.wrapped_file.read(count) def seekable(self): return False # -- Get test cases for blobs ---------------------------------------------- @record def test_unicode_get_blob_unicode_data(self): # Arrange blob_data = u'hello world啊齄丂狛狜'.encode('utf-8') blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.upload_blob(blob_data) # Act content = blob.download_blob() # Assert self.assertIsInstance(content.properties, BlobProperties) self.assertEqual(content.content_as_bytes(), blob_data) @record def test_unicode_get_blob_binary_data(self): # Arrange base64_data = 'AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMDEyMzQ1Njc4OTo7PD0+P0BBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWltcXV5fYGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6e3x9fn+AgYKDhIWGh4iJiouMjY6PkJGSk5SVlpeYmZqbnJ2en6ChoqOkpaanqKmqq6ytrq+wsbKztLW2t7i5uru8vb6/wMHCw8TFxsfIycrLzM3Oz9DR0tPU1dbX2Nna29zd3t/g4eLj5OXm5+jp6uvs7e7v8PHy8/T19vf4+fr7/P3+/wABAgMEBQYHCAkKCwwNDg8QERITFBUWFxgZGhscHR4fICEiIyQlJicoKSorLC0uLzAxMjM0NTY3ODk6Ozw9Pj9AQUJDREVGR0hJSktMTU5PUFFSU1RVVldYWVpbXF1eX2BhYmNkZWZnaGlqa2xtbm9wcXJzdHV2d3h5ent8fX5/gIGCg4SFhoeIiYqLjI2Oj5CRkpOUlZaXmJmam5ydnp+goaKjpKWmp6ipqqusra6vsLGys7S1tre4ubq7vL2+v8DBwsPExcbHyMnKy8zNzs/Q0dLT1NXW19jZ2tvc3d7f4OHi4+Tl5ufo6err7O3u7/Dx8vP09fb3+Pn6+/z9/v8AAQIDBAUGBwgJCgsMDQ4PEBESExQVFhcYGRobHB0eHyAhIiMkJSYnKCkqKywtLi8wMTIzNDU2Nzg5Ojs8PT4/QEFCQ0RFRkdISUpLTE1OT1BRUlNUVVZXWFlaW1xdXl9gYWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXp7fH1+f4CBgoOEhYaHiImKi4yNjo+QkZKTlJWWl5iZmpucnZ6foKGio6SlpqeoqaqrrK2ur7CxsrO0tba3uLm6u7y9vr/AwcLDxMXGx8jJysvMzc7P0NHS09TV1tfY2drb3N3e3+Dh4uPk5ebn6Onq6+zt7u/w8fLz9PX29/j5+vv8/f7/AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMDEyMzQ1Njc4OTo7PD0+P0BBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWltcXV5fYGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6e3x9fn+AgYKDhIWGh4iJiouMjY6PkJGSk5SVlpeYmZqbnJ2en6ChoqOkpaanqKmqq6ytrq+wsbKztLW2t7i5uru8vb6/wMHCw8TFxsfIycrLzM3Oz9DR0tPU1dbX2Nna29zd3t/g4eLj5OXm5+jp6uvs7e7v8PHy8/T19vf4+fr7/P3+/w==' binary_data = base64.b64decode(base64_data) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.upload_blob(binary_data) # Act content = blob.download_blob() # Assert self.assertIsInstance(content.properties, BlobProperties) self.assertEqual(content.content_as_bytes(), binary_data) @record def test_get_blob_no_content(self): # Arrange blob_data = b'' blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.upload_blob(blob_data) # Act content = blob.download_blob() # Assert self.assertEqual(blob_data, content.content_as_bytes()) self.assertEqual(0, content.properties.size) def test_get_blob_to_bytes(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self.bsc.get_blob_client(self.container_name, self.byte_blob) # Act content = blob.download_blob().content_as_bytes(max_connections=2) # Assert self.assertEqual(self.byte_data, content) def test_ranged_get_blob_to_bytes_with_single_byte(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self.bsc.get_blob_client(self.container_name, self.byte_blob) # Act content = blob.download_blob(offset=0, length=0).content_as_bytes() # Assert self.assertEqual(1, len(content)) self.assertEqual(self.byte_data[0], content[0]) # Act content = blob.download_blob(offset=5, length=5).content_as_bytes() # Assert self.assertEqual(1, len(content)) self.assertEqual(self.byte_data[5], content[0]) @record def test_ranged_get_blob_to_bytes_with_zero_byte(self): blob_data = b'' blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.upload_blob(blob_data) # Act # the get request should fail in this case since the blob is empty and yet there is a range specified with self.assertRaises(HttpResponseError) as e: blob.download_blob(offset=0, length=5) self.assertEqual(StorageErrorCode.invalid_range, e.exception.error_code) with self.assertRaises(HttpResponseError) as e: blob.download_blob(offset=3, length=5) self.assertEqual(StorageErrorCode.invalid_range, e.exception.error_code) @record def test_ranged_get_blob_with_missing_start_range(self): blob_data = b'foobar' blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.upload_blob(blob_data) # Act # the get request should fail fast in this case since start_range is missing while end_range is specified with self.assertRaises(ValueError): blob.download_blob(length=3) def test_get_blob_to_bytes_snapshot(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self.bsc.get_blob_client(self.container_name, self.byte_blob) snapshot_ref = blob.create_snapshot() snapshot = self.bsc.get_blob_client(self.container_name, self.byte_blob, snapshot=snapshot_ref) blob.upload_blob( self.byte_data, overwrite=True) # Modify the blob so the Etag no longer matches # Act content = snapshot.download_blob().content_as_bytes(max_connections=2) # Assert self.assertEqual(self.byte_data, content) def test_get_blob_to_bytes_with_progress(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange progress = [] blob = self.bsc.get_blob_client(self.container_name, self.byte_blob) def callback(response): current = response.context['download_stream_current'] total = response.context['data_stream_total'] progress.append((current, total)) # Act content = blob.download_blob( raw_response_hook=callback).content_as_bytes(max_connections=2) # Assert self.assertEqual(self.byte_data, content) self.assert_download_progress( len(self.byte_data), self.config.blob_settings.max_chunk_get_size, self.config.blob_settings.max_single_get_size, progress) @record def test_get_blob_to_bytes_non_parallel(self): # Arrange progress = [] blob = self.bsc.get_blob_client(self.container_name, self.byte_blob) def callback(response): current = response.context['download_stream_current'] total = response.context['data_stream_total'] progress.append((current, total)) # Act content = blob.download_blob( raw_response_hook=callback).content_as_bytes(max_connections=1) # Assert self.assertEqual(self.byte_data, content) self.assert_download_progress( len(self.byte_data), self.config.blob_settings.max_chunk_get_size, self.config.blob_settings.max_single_get_size, progress) @record def test_get_blob_to_bytes_small(self): # Arrange blob_data = self.get_random_bytes(1024) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.upload_blob(blob_data) progress = [] def callback(response): current = response.context['download_stream_current'] total = response.context['data_stream_total'] progress.append((current, total)) # Act content = blob.download_blob( raw_response_hook=callback).content_as_bytes() # Assert self.assertEqual(blob_data, content) self.assert_download_progress( len(blob_data), self.config.blob_settings.max_chunk_get_size, self.config.blob_settings.max_single_get_size, progress) def test_get_blob_to_stream(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self.bsc.get_blob_client(self.container_name, self.byte_blob) # Act with open(FILE_PATH, 'wb') as stream: downloader = blob.download_blob() properties = downloader.download_to_stream(stream, max_connections=2) # Assert self.assertIsInstance(properties, BlobProperties) with open(FILE_PATH, 'rb') as stream: actual = stream.read() self.assertEqual(self.byte_data, actual) def test_get_blob_to_stream_with_progress(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange progress = [] blob = self.bsc.get_blob_client(self.container_name, self.byte_blob) def callback(response): current = response.context['download_stream_current'] total = response.context['data_stream_total'] progress.append((current, total)) # Act with open(FILE_PATH, 'wb') as stream: downloader = blob.download_blob(raw_response_hook=callback) properties = downloader.download_to_stream(stream, max_connections=2) # Assert self.assertIsInstance(properties, BlobProperties) with open(FILE_PATH, 'rb') as stream: actual = stream.read() self.assertEqual(self.byte_data, actual) self.assert_download_progress( len(self.byte_data), self.config.blob_settings.max_chunk_get_size, self.config.blob_settings.max_single_get_size, progress) @record def test_get_blob_to_stream_non_parallel(self): # Arrange progress = [] blob = self.bsc.get_blob_client(self.container_name, self.byte_blob) def callback(response): current = response.context['download_stream_current'] total = response.context['data_stream_total'] progress.append((current, total)) # Act with open(FILE_PATH, 'wb') as stream: downloader = blob.download_blob(raw_response_hook=callback) properties = downloader.download_to_stream(stream, max_connections=1) # Assert self.assertIsInstance(properties, BlobProperties) with open(FILE_PATH, 'rb') as stream: actual = stream.read() self.assertEqual(self.byte_data, actual) self.assert_download_progress( len(self.byte_data), self.config.blob_settings.max_chunk_get_size, self.config.blob_settings.max_single_get_size, progress) @record def test_get_blob_to_stream_small(self): # Arrange blob_data = self.get_random_bytes(1024) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.upload_blob(blob_data) progress = [] def callback(response): current = response.context['download_stream_current'] total = response.context['data_stream_total'] progress.append((current, total)) # Act with open(FILE_PATH, 'wb') as stream: downloader = blob.download_blob(raw_response_hook=callback) properties = downloader.download_to_stream(stream, max_connections=2) # Assert self.assertIsInstance(properties, BlobProperties) with open(FILE_PATH, 'rb') as stream: actual = stream.read() self.assertEqual(blob_data, actual) self.assert_download_progress( len(blob_data), self.config.blob_settings.max_chunk_get_size, self.config.blob_settings.max_single_get_size, progress) def test_ranged_get_blob_to_path(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self.bsc.get_blob_client(self.container_name, self.byte_blob) # Act end_range = self.config.blob_settings.max_single_get_size with open(FILE_PATH, 'wb') as stream: downloader = blob.download_blob(offset=1, length=end_range) properties = downloader.download_to_stream(stream, max_connections=2) # Assert self.assertIsInstance(properties, BlobProperties) with open(FILE_PATH, 'rb') as stream: actual = stream.read() self.assertEqual(self.byte_data[1:end_range + 1], actual) def test_ranged_get_blob_to_path_with_progress(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange progress = [] blob = self.bsc.get_blob_client(self.container_name, self.byte_blob) def callback(response): current = response.context['download_stream_current'] total = response.context['data_stream_total'] progress.append((current, total)) # Act start_range = 3 end_range = self.config.blob_settings.max_single_get_size + 1024 with open(FILE_PATH, 'wb') as stream: downloader = blob.download_blob(offset=start_range, length=end_range, raw_response_hook=callback) properties = downloader.download_to_stream(stream, max_connections=2) # Assert self.assertIsInstance(properties, BlobProperties) with open(FILE_PATH, 'rb') as stream: actual = stream.read() self.assertEqual(self.byte_data[start_range:end_range + 1], actual) self.assert_download_progress( end_range - start_range + 1, self.config.blob_settings.max_chunk_get_size, self.config.blob_settings.max_single_get_size, progress) @record def test_ranged_get_blob_to_path_small(self): # Arrange blob = self.bsc.get_blob_client(self.container_name, self.byte_blob) # Act with open(FILE_PATH, 'wb') as stream: downloader = blob.download_blob(offset=1, length=4) properties = downloader.download_to_stream(stream, max_connections=2) # Assert self.assertIsInstance(properties, BlobProperties) with open(FILE_PATH, 'rb') as stream: actual = stream.read() self.assertEqual(self.byte_data[1:5], actual) @record def test_ranged_get_blob_to_path_non_parallel(self): # Arrange blob = self.bsc.get_blob_client(self.container_name, self.byte_blob) # Act with open(FILE_PATH, 'wb') as stream: downloader = blob.download_blob(offset=1, length=3) properties = downloader.download_to_stream(stream, max_connections=1) # Assert self.assertIsInstance(properties, BlobProperties) with open(FILE_PATH, 'rb') as stream: actual = stream.read() self.assertEqual(self.byte_data[1:4], actual) @record def test_ranged_get_blob_to_path_invalid_range_parallel(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_size = self.config.blob_settings.max_single_get_size + 1 blob_data = self.get_random_bytes(blob_size) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.upload_blob(blob_data) # Act end_range = 2 * self.config.blob_settings.max_single_get_size with open(FILE_PATH, 'wb') as stream: downloader = blob.download_blob(offset=1, length=end_range) properties = downloader.download_to_stream(stream, max_connections=2) # Assert self.assertIsInstance(properties, BlobProperties) with open(FILE_PATH, 'rb') as stream: actual = stream.read() self.assertEqual(blob_data[1:blob_size], actual) @record def test_ranged_get_blob_to_path_invalid_range_non_parallel(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_size = 1024 blob_data = self.get_random_bytes(blob_size) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.upload_blob(blob_data) # Act end_range = 2 * self.config.blob_settings.max_single_get_size with open(FILE_PATH, 'wb') as stream: downloader = blob.download_blob(offset=1, length=end_range) properties = downloader.download_to_stream(stream, max_connections=2) # Assert self.assertIsInstance(properties, BlobProperties) with open(FILE_PATH, 'rb') as stream: actual = stream.read() self.assertEqual(blob_data[1:blob_size], actual) # Assert def test_get_blob_to_text(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange text_blob = self.get_resource_name('textblob') text_data = self.get_random_text_data( self.config.blob_settings.max_single_get_size + 1) blob = self.bsc.get_blob_client(self.container_name, text_blob) blob.upload_blob(text_data) # Act content = blob.download_blob().content_as_text(max_connections=2) # Assert self.assertEqual(text_data, content) def test_get_blob_to_text_with_progress(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange text_blob = self.get_resource_name('textblob') text_data = self.get_random_text_data( self.config.blob_settings.max_single_get_size + 1) blob = self.bsc.get_blob_client(self.container_name, text_blob) blob.upload_blob(text_data) progress = [] def callback(response): current = response.context['download_stream_current'] total = response.context['data_stream_total'] progress.append((current, total)) # Act content = blob.download_blob( raw_response_hook=callback).content_as_text(max_connections=2) # Assert self.assertEqual(text_data, content) self.assert_download_progress( len(text_data.encode('utf-8')), self.config.blob_settings.max_chunk_get_size, self.config.blob_settings.max_single_get_size, progress) @record def test_get_blob_to_text_non_parallel(self): # Arrange text_blob = self._get_blob_reference() text_data = self.get_random_text_data( self.config.blob_settings.max_single_get_size + 1) blob = self.bsc.get_blob_client(self.container_name, text_blob) blob.upload_blob(text_data) progress = [] def callback(response): current = response.context['download_stream_current'] total = response.context['data_stream_total'] progress.append((current, total)) # Act content = blob.download_blob( raw_response_hook=callback).content_as_text(max_connections=1) # Assert self.assertEqual(text_data, content) self.assert_download_progress( len(text_data), self.config.blob_settings.max_chunk_get_size, self.config.blob_settings.max_single_get_size, progress) @record def test_get_blob_to_text_small(self): # Arrange blob_data = self.get_random_text_data(1024) blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.upload_blob(blob_data) progress = [] def callback(response): current = response.context['download_stream_current'] total = response.context['data_stream_total'] progress.append((current, total)) # Act content = blob.download_blob( raw_response_hook=callback).content_as_text() # Assert self.assertEqual(blob_data, content) self.assert_download_progress( len(blob_data), self.config.blob_settings.max_chunk_get_size, self.config.blob_settings.max_single_get_size, progress) @record def test_get_blob_to_text_with_encoding(self): # Arrange text = u'hello 啊齄丂狛狜 world' blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.upload_blob(text, encoding='utf-16') # Act content = blob.download_blob().content_as_text(encoding='utf-16') # Assert self.assertEqual(text, content) @record def test_get_blob_to_text_with_encoding_and_progress(self): # Arrange text = u'hello 啊齄丂狛狜 world' blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.upload_blob(text, encoding='utf-16') # Act progress = [] def callback(response): current = response.context['download_stream_current'] total = response.context['data_stream_total'] progress.append((current, total)) content = blob.download_blob( raw_response_hook=callback).content_as_text(encoding='utf-16') # Assert self.assertEqual(text, content) self.assert_download_progress( len(text.encode('utf-8')), self.config.blob_settings.max_chunk_get_size, self.config.blob_settings.max_single_get_size, progress) @record def test_get_blob_non_seekable(self): # Arrange blob = self.bsc.get_blob_client(self.container_name, self.byte_blob) # Act with open(FILE_PATH, 'wb') as stream: non_seekable_stream = StorageGetBlobTest.NonSeekableFile(stream) downloader = blob.download_blob() properties = downloader.download_to_stream(non_seekable_stream, max_connections=1) # Assert self.assertIsInstance(properties, BlobProperties) with open(FILE_PATH, 'rb') as stream: actual = stream.read() self.assertEqual(self.byte_data, actual) def test_get_blob_non_seekable_parallel(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self.bsc.get_blob_client(self.container_name, self.byte_blob) # Act with open(FILE_PATH, 'wb') as stream: non_seekable_stream = StorageGetBlobTest.NonSeekableFile(stream) with self.assertRaises(ValueError): downloader = blob.download_blob() properties = downloader.download_to_stream(non_seekable_stream, max_connections=2) @record def test_get_blob_to_stream_exact_get_size(self): # Arrange blob_name = self._get_blob_reference() byte_data = self.get_random_bytes( self.config.blob_settings.max_single_get_size) blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.upload_blob(byte_data) progress = [] def callback(response): current = response.context['download_stream_current'] total = response.context['data_stream_total'] progress.append((current, total)) # Act with open(FILE_PATH, 'wb') as stream: downloader = blob.download_blob(raw_response_hook=callback) properties = downloader.download_to_stream(stream, max_connections=2) # Assert with open(FILE_PATH, 'rb') as stream: actual = stream.read() self.assertEqual(byte_data, actual) self.assert_download_progress( len(byte_data), self.config.blob_settings.max_chunk_get_size, self.config.blob_settings.max_single_get_size, progress) @record def test_get_blob_exact_get_size(self): # Arrange blob_name = self._get_blob_reference() byte_data = self.get_random_bytes( self.config.blob_settings.max_single_get_size) blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.upload_blob(byte_data) progress = [] def callback(response): current = response.context['download_stream_current'] total = response.context['data_stream_total'] progress.append((current, total)) # Act content = blob.download_blob( raw_response_hook=callback).content_as_bytes() # Assert self.assertEqual(byte_data, content) self.assert_download_progress( len(byte_data), self.config.blob_settings.max_chunk_get_size, self.config.blob_settings.max_single_get_size, progress) def test_get_blob_exact_chunk_size(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() byte_data = self.get_random_bytes( self.config.blob_settings.max_single_get_size + self.config.blob_settings.max_chunk_get_size) blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.upload_blob(byte_data) progress = [] def callback(response): current = response.context['download_stream_current'] total = response.context['data_stream_total'] progress.append((current, total)) # Act content = blob.download_blob( raw_response_hook=callback).content_as_bytes() # Assert self.assertEqual(byte_data, content) self.assert_download_progress( len(byte_data), self.config.blob_settings.max_chunk_get_size, self.config.blob_settings.max_single_get_size, progress) def test_get_blob_to_stream_with_md5(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self.bsc.get_blob_client(self.container_name, self.byte_blob) # Act with open(FILE_PATH, 'wb') as stream: downloader = blob.download_blob(validate_content=True) properties = downloader.download_to_stream(stream, max_connections=2) # Assert self.assertIsInstance(properties, BlobProperties) with open(FILE_PATH, 'rb') as stream: actual = stream.read() self.assertEqual(self.byte_data, actual) def test_get_blob_with_md5(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self.bsc.get_blob_client(self.container_name, self.byte_blob) # Act content = blob.download_blob(validate_content=True).content_as_bytes( max_connections=2) # Assert self.assertEqual(self.byte_data, content) def test_get_blob_range_to_stream_with_overall_md5(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self.bsc.get_blob_client(self.container_name, self.byte_blob) props = blob.get_blob_properties() props.content_settings.content_md5 = b'MDAwMDAwMDA=' blob.set_http_headers(props.content_settings) # Act with open(FILE_PATH, 'wb') as stream: downloader = blob.download_blob(offset=0, length=1024, validate_content=True) properties = downloader.download_to_stream(stream, max_connections=2) # Assert self.assertIsInstance(properties, BlobProperties) self.assertEqual(b'MDAwMDAwMDA=', properties.content_settings.content_md5) def test_get_blob_range_with_overall_md5(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return blob = self.bsc.get_blob_client(self.container_name, self.byte_blob) content = blob.download_blob(offset=0, length=1024, validate_content=True) # Arrange props = blob.get_blob_properties() props.content_settings.content_md5 = b'MDAwMDAwMDA=' blob.set_http_headers(props.content_settings) # Act content = blob.download_blob(offset=0, length=1024, validate_content=True) # Assert self.assertEqual(b'MDAwMDAwMDA=', content.properties.content_settings.content_md5) def test_get_blob_range_with_range_md5(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return blob = self.bsc.get_blob_client(self.container_name, self.byte_blob) content = blob.download_blob(offset=0, length=1024, validate_content=True) # Arrange props = blob.get_blob_properties() props.content_settings.content_md5 = None blob.set_http_headers(props.content_settings) # Act content = blob.download_blob(offset=0, length=1024, validate_content=True) # Assert self.assertIsNotNone(content.properties.content_settings.content_type) self.assertIsNone(content.properties.content_settings.content_md5)
class AzureBlobStorage(object): """ Instantiate AzureBlobStorage Class for a given Azure storage account. `Args:` account_name: str The name of the Azure storage account to use. Not required if ``AZURE_ACCOUNT_NAME`` environment variable is set, or if ``account_url`` is supplied. credential: str An account shared access key with access to the Azure storage account, an SAS token string, or an instance of a TokenCredentials class. Not required if ``AZURE_CREDENTIAL`` environment variable is set. account_domain: str The domain of the Azure storage account, defaults to "blob.core.windows.net". Not required if ``AZURE_ACCOUNT_DOMAIN`` environment variable is set or if ``account_url`` is supplied. account_url: str The account URL for the Azure storage account including the account name and domain. Not required if ``AZURE_ACCOUNT_URL`` environment variable is set. `Returns:` `AzureBlobStorage` """ def __init__(self, account_name=None, credential=None, account_domain='blob.core.windows.net', account_url=None): self.account_url = os.getenv('AZURE_ACCOUNT_URL', account_url) self.credential = check_env.check('AZURE_CREDENTIAL', credential) if not self.account_url: self.account_name = check_env.check('AZURE_ACCOUNT_NAME', account_name) self.account_domain = check_env.check('AZURE_ACCOUNT_DOMAIN', account_domain) self.account_url = f'https://{self.account_name}.{self.account_domain}/' else: if not self.account_url.startswith('http'): self.account_url = f'https://{self.account_url}' # Update the account name and domain if a URL is supplied parsed_url = urlparse(self.account_url) self.account_name = parsed_url.netloc.split(".")[0] self.account_domain = ".".join(parsed_url.netloc.split(".")[1:]) self.client = BlobServiceClient(account_url=self.account_url, credential=self.credential) def list_containers(self): """ Returns a list of container names for the storage account `Returns:` list[str] List of container names """ container_names = [container.name for container in self.client.list_containers()] logger.info(f'Found {len(container_names)} containers.') return container_names def container_exists(self, container_name): """ Verify that a container exists within the storage account `Args:` container_name: str The name of the container `Returns:` bool """ container_client = self.get_container(container_name) try: container_client.get_container_properties() logger.info(f'{container_name} exists.') return True except ResourceNotFoundError: logger.info(f'{container_name} does not exist.') return False def get_container(self, container_name): """ Returns a container client `Args:` container_name: str The name of the container `Returns:` `ContainerClient` """ logger.info(f'Returning {container_name} container client') return self.client.get_container_client(container_name) def create_container(self, container_name, metadata=None, public_access=None, **kwargs): """ Create a container `Args:` container_name: str The name of the container metadata: Optional[dict[str, str]] A dict with metadata to associated with the container. public_access: Optional[Union[PublicAccess, str]] Settings for public access on the container, can be 'container' or 'blob' if not ``None`` kwargs: Additional arguments to be supplied to the Azure Blob Storage API. See `Azure Blob Storage SDK documentation <https://docs.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob.blobserviceclient?view=azure-python#create-container-name--metadata-none--public-access-none----kwargs->`_ for more info. `Returns:` `ContainerClient` """ # noqa container_client = self.client.create_container( container_name, metadata=metadata, public_access=public_access, **kwargs ) logger.info(f'Created {container_name} container.') return container_client def delete_container(self, container_name): """ Delete a container. `Args:` container_name: str The name of the container `Returns:` ``None`` """ self.client.delete_container(container_name) logger.info(f'{container_name} container deleted.') def list_blobs(self, container_name, name_starts_with=None): """ List all of the names of blobs in a container `Args:` container_name: str The name of the container name_starts_with: Optional[str] A prefix to filter blob names `Returns:` list[str] A list of blob names """ container_client = self.get_container(container_name) blobs = [ blob for blob in container_client.list_blobs(name_starts_with=name_starts_with) ] logger.info(f'Found {len(blobs)} blobs in {container_name} container.') return blobs def blob_exists(self, container_name, blob_name): """ Verify that a blob exists in the specified container `Args:` container_name: str The container name blob_name: str The blob name `Returns:` bool """ blob_client = self.get_blob(container_name, blob_name) try: blob_client.get_blob_properties() logger.info(f'{blob_name} exists in {container_name} container.') return True except ResourceNotFoundError: logger.info(f'{blob_name} does not exist in {container_name} container.') return False def get_blob(self, container_name, blob_name): """ Get a blob object `Args:` container_name: str The container name blob_name: str The blob name `Returns:` `BlobClient` """ blob_client = self.client.get_blob_client(container_name, blob_name) logger.info(f'Got {blob_name} blob from {container_name} container.') return blob_client def get_blob_url(self, container_name, blob_name, account_key=None, permission=None, expiry=None, start=None): """ Get a URL with a shared access signature for a blob `Args:` container_name: str The container name blob_name: str The blob name account_key: Optional[str] An account shared access key for the storage account. Will default to the key used on initialization if one was provided as the credential, but required if it was not. permission: Optional[Union[BlobSasPermissions, str]] Permissions associated with the blob URL. Can be either a BlobSasPermissions object or a string where 'r', 'a', 'c', 'w', and 'd' correspond to read, add, create, write, and delete permissions respectively. expiry: Optional[Union[datetime, str]] The datetime when the URL should expire. Defaults to UTC. start: Optional[Union[datetime, str]] The datetime when the URL should become valid. Defaults to UTC. If it is ``None``, the URL becomes active when it is first created. `Returns:` str URL with shared access signature for blob """ if not account_key: if not self.credential: raise ValueError( 'An account shared access key must be provided if it was not on initialization' ) account_key = self.credential sas = generate_blob_sas( self.account_name, container_name, blob_name, account_key=account_key, permission=permission, expiry=expiry, start=start, ) return f'{self.account_url}/{container_name}/{blob_name}?sas={sas}' def _get_content_settings_from_dict(self, kwargs_dict): """ Removes any keys for ``ContentSettings`` from a dict and returns a tuple of the generated settings or ``None`` and a dict with the settings keys removed. `Args:` kwargs_dict: dict A dict which should be processed and may have keys for ``ContentSettings`` `Returns:` Tuple[Optional[ContentSettings], dict] Any created settings or ``None`` and the dict with settings keys remvoed """ kwargs_copy = {**kwargs_dict} content_settings = None content_settings_dict = {} content_settings_keys = [ 'content_type', 'content_encoding', 'content_language', 'content_disposition', 'cache_control', 'content_md5' ] kwarg_keys = list(kwargs_copy.keys()) for key in kwarg_keys: if key in content_settings_keys: content_settings_dict[key] = kwargs_copy.pop(key) if content_settings_dict: content_settings = ContentSettings(**content_settings_dict) return content_settings, kwargs_copy def put_blob(self, container_name, blob_name, local_path, **kwargs): """ Puts a blob (aka file) in a bucket `Args:` container_name: str The name of the container to store the blob blob_name: str The name of the blob to be stored local_path: str The local path of the file to upload kwargs: Additional arguments to be supplied to the Azure Blob Storage API. See `Azure Blob Storage SDK documentation <https://docs.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob.blobclient?view=azure-python#upload-blob-data--blob-type--blobtype-blockblob---blockblob----length-none--metadata-none----kwargs->`_ for more info. Any keys that belong to the ``ContentSettings`` object will be provided to that class directly. `Returns:` `BlobClient` """ # noqa blob_client = self.get_blob(container_name, blob_name) # Move all content_settings keys into a ContentSettings object content_settings, kwargs_dict = self._get_content_settings_from_dict(kwargs) with open(local_path, 'rb') as f: data = f.read() blob_client = blob_client.upload_blob( data, overwrite=True, content_settings=content_settings, **kwargs_dict, ) logger.info(f'{blob_name} blob put in {container_name} container') # Return refreshed BlobClient object return self.get_blob(container_name, blob_name) def download_blob(self, container_name, blob_name, local_path=None): """ Downloads a blob from a container into the specified file path or a temporary file path `Args:` container_name: str The container name blob_name: str The blob name local_path: Optional[str] The local path where the file will be downloaded. If not specified, a temporary file will be created and returned, and that file will be removed automatically when the script is done running. `Returns:` str The path of the downloaded file """ if not local_path: local_path = files.create_temp_file_for_path('TEMPFILEAZURE') blob_client = self.get_blob(container_name, blob_name) logger.info(f'Downloading {blob_name} blob from {container_name} container.') with open(local_path, 'wb') as f: blob_client.download_blob().readinto(f) logger.info(f'{blob_name} blob saved to {local_path}.') return local_path def delete_blob(self, container_name, blob_name): """ Delete a blob in a specified container. `Args:` container_name: str The container name blob_name: str The blob name `Returns:` ``None`` """ blob_client = self.get_blob(container_name, blob_name) blob_client.delete_blob() logger.info(f'{blob_name} blob in {container_name} container deleted.') def upload_table(self, table, container_name, blob_name, data_type='csv', **kwargs): """ Load the data from a Parsons table into a blob. `Args:` table: obj A :ref:`parsons-table` container_name: str The container name to upload the data into blob_name: str The blob name to upload the data into data_type: str The file format to use when writing the data. One of: `csv` or `json` kwargs: Additional keyword arguments to supply to ``put_blob`` `Returns:` `BlobClient` """ if data_type == 'csv': local_path = table.to_csv() content_type = 'text/csv' elif data_type == 'json': local_path = table.to_json() content_type = 'application/json' else: raise ValueError(f'Unknown data_type value ({data_type}): must be one of: csv or json') return self.put_blob( container_name, blob_name, local_path, content_type=content_type, **kwargs )
class StorageLargeBlockBlobTest(StorageTestCase): def setUp(self): super(StorageLargeBlockBlobTest, self).setUp() url = self._get_account_url() credential = self._get_shared_key_credential() # test chunking functionality by reducing the threshold # for chunking and the size of each chunk, otherwise # the tests would take too long to execute self.bsc = BlobServiceClient(url, credential=credential, max_single_put_size=32 * 1024, max_block_size=2 * 1024 * 1024, min_large_block_upload_threshold=1 * 1024 * 1024) self.config = self.bsc._config self.container_name = self.get_resource_name('utcontainer') if not self.is_playback(): self.bsc.create_container(self.container_name) def tearDown(self): if not self.is_playback(): try: self.bsc.delete_container(self.container_name) except: pass if os.path.isfile(FILE_PATH): try: os.remove(FILE_PATH) except: pass return super(StorageLargeBlockBlobTest, self).tearDown() # --Helpers----------------------------------------------------------------- def _get_blob_reference(self): return self.get_resource_name(TEST_BLOB_PREFIX) def _create_blob(self): blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.upload_blob(b'') return blob def assertBlobEqual(self, container_name, blob_name, expected_data): blob = self.bsc.get_blob_client(container_name, blob_name) actual_data = blob.download_blob() self.assertEqual(b"".join(list(actual_data)), expected_data) # --Test cases for block blobs -------------------------------------------- def test_put_block_bytes_large(self): if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self._create_blob() # Act for i in range(5): resp = blob.stage_block('block {0}'.format(i).encode('utf-8'), os.urandom(LARGE_BLOCK_SIZE)) self.assertIsNone(resp) # Assert def test_put_block_bytes_large_with_md5(self): if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self._create_blob() # Act for i in range(5): resp = blob.stage_block('block {0}'.format(i).encode('utf-8'), os.urandom(LARGE_BLOCK_SIZE), validate_content=True) self.assertIsNone(resp) def test_put_block_stream_large(self): if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self._create_blob() # Act for i in range(5): stream = BytesIO(bytearray(LARGE_BLOCK_SIZE)) resp = resp = blob.stage_block( 'block {0}'.format(i).encode('utf-8'), stream, length=LARGE_BLOCK_SIZE) self.assertIsNone(resp) # Assert def test_put_block_stream_large_with_md5(self): if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self._create_blob() # Act for i in range(5): stream = BytesIO(bytearray(LARGE_BLOCK_SIZE)) resp = resp = blob.stage_block( 'block {0}'.format(i).encode('utf-8'), stream, length=LARGE_BLOCK_SIZE, validate_content=True) self.assertIsNone(resp) # Assert def test_create_large_blob_from_path(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = bytearray(os.urandom(LARGE_BLOB_SIZE)) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, max_connections=2) # Assert self.assertBlobEqual(self.container_name, blob_name, data) def test_create_large_blob_from_path_with_md5(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = bytearray(os.urandom(LARGE_BLOB_SIZE)) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, validate_content=True, max_connections=2) # Assert self.assertBlobEqual(self.container_name, blob_name, data) def test_create_large_blob_from_path_non_parallel(self): if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = bytearray(self.get_random_bytes(100)) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, max_connections=1) # Assert self.assertBlobEqual(self.container_name, blob_name, data) def test_create_large_blob_from_path_with_progress(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = bytearray(os.urandom(LARGE_BLOB_SIZE)) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act progress = [] def callback(response): current = response.context['upload_stream_current'] total = response.context['data_stream_total'] if current is not None: progress.append((current, total)) with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, max_connections=2, raw_response_hook=callback) # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assert_upload_progress(len(data), self.config.max_block_size, progress) def test_create_large_blob_from_path_with_properties(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = bytearray(os.urandom(LARGE_BLOB_SIZE)) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act content_settings = ContentSettings(content_type='image/png', content_language='spanish') with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, content_settings=content_settings, max_connections=2) # Assert self.assertBlobEqual(self.container_name, blob_name, data) properties = blob.get_blob_properties() self.assertEqual(properties.content_settings.content_type, content_settings.content_type) self.assertEqual(properties.content_settings.content_language, content_settings.content_language) def test_create_large_blob_from_stream_chunked_upload(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = bytearray(os.urandom(LARGE_BLOB_SIZE)) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, max_connections=2) # Assert self.assertBlobEqual(self.container_name, blob_name, data) def test_create_large_blob_from_stream_with_progress_chunked_upload(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = bytearray(os.urandom(LARGE_BLOB_SIZE)) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act progress = [] def callback(response): current = response.context['upload_stream_current'] total = response.context['data_stream_total'] if current is not None: progress.append((current, total)) with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, max_connections=2, raw_response_hook=callback) # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assert_upload_progress(len(data), self.config.max_block_size, progress) def test_create_large_blob_from_stream_chunked_upload_with_count(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = bytearray(os.urandom(LARGE_BLOB_SIZE)) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act blob_size = len(data) - 301 with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, length=blob_size, max_connections=2) # Assert self.assertBlobEqual(self.container_name, blob_name, data[:blob_size]) def test_create_large_blob_from_stream_chunked_upload_with_count_and_properties( self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = bytearray(os.urandom(LARGE_BLOB_SIZE)) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act content_settings = ContentSettings(content_type='image/png', content_language='spanish') blob_size = len(data) - 301 with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, length=blob_size, content_settings=content_settings, max_connections=2) # Assert self.assertBlobEqual(self.container_name, blob_name, data[:blob_size]) properties = blob.get_blob_properties() self.assertEqual(properties.content_settings.content_type, content_settings.content_type) self.assertEqual(properties.content_settings.content_language, content_settings.content_language) def test_create_large_blob_from_stream_chunked_upload_with_properties( self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = bytearray(os.urandom(LARGE_BLOB_SIZE)) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act content_settings = ContentSettings(content_type='image/png', content_language='spanish') with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, content_settings=content_settings, max_connections=2) # Assert self.assertBlobEqual(self.container_name, blob_name, data) properties = blob.get_blob_properties() self.assertEqual(properties.content_settings.content_type, content_settings.content_type) self.assertEqual(properties.content_settings.content_language, content_settings.content_language)
class StorageBlockBlobTest(StorageTestCase): def setUp(self): super(StorageBlockBlobTest, self).setUp() url = self._get_account_url() # test chunking functionality by reducing the size of each chunk, # otherwise the tests would take too long to execute self.bsc = BlobServiceClient( url, credential=self.settings.STORAGE_ACCOUNT_KEY, connection_data_block_size=4 * 1024, max_single_put_size=32 * 1024, max_block_size=4 * 1024) self.config = self.bsc._config self.container_name = self.get_resource_name('utcontainer') if not self.is_playback(): self.bsc.create_container(self.container_name) def tearDown(self): if not self.is_playback(): try: self.bsc.delete_container(self.container_name) except: pass if os.path.isfile(FILE_PATH): try: os.remove(FILE_PATH) except: pass return super(StorageBlockBlobTest, self).tearDown() #--Helpers----------------------------------------------------------------- def _get_blob_reference(self): return self.get_resource_name(TEST_BLOB_PREFIX) def _create_blob(self): blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.upload_blob(b'') return blob def assertBlobEqual(self, container_name, blob_name, expected_data): blob = self.bsc.get_blob_client(container_name, blob_name) actual_data = blob.download_blob() self.assertEqual(b"".join(list(actual_data)), expected_data) class NonSeekableFile(object): def __init__(self, wrapped_file): self.wrapped_file = wrapped_file def write(self, data): self.wrapped_file.write(data) def read(self, count): return self.wrapped_file.read(count) #--Test cases for block blobs -------------------------------------------- @record def test_put_block(self): # Arrange blob = self._create_blob() # Act for i in range(5): resp = blob.stage_block(i, 'block {0}'.format(i).encode('utf-8')) self.assertIsNone(resp) # Assert @record def test_put_block_unicode(self): # Arrange blob = self._create_blob() # Act resp = blob.stage_block('1', u'啊齄丂狛狜') self.assertIsNone(resp) # Assert @record def test_put_block_with_md5(self): # Arrange blob = self._create_blob() # Act blob.stage_block(1, b'block', validate_content=True) # Assert @record def test_put_block_list(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.stage_block('1', b'AAA') blob.stage_block('2', b'BBB') blob.stage_block('3', b'CCC') # Act block_list = [ BlobBlock(block_id='1'), BlobBlock(block_id='2'), BlobBlock(block_id='3') ] put_block_list_resp = blob.commit_block_list(block_list) # Assert content = blob.download_blob() self.assertEqual(b"".join(list(content)), b'AAABBBCCC') self.assertEqual(content.properties.etag, put_block_list_resp.get('etag')) self.assertEqual(content.properties.last_modified, put_block_list_resp.get('last_modified')) @record def test_put_block_list_invalid_block_id(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.stage_block('1', b'AAA') blob.stage_block('2', b'BBB') blob.stage_block('3', b'CCC') # Act try: block_list = [ BlobBlock(block_id='1'), BlobBlock(block_id='2'), BlobBlock(block_id='4') ] blob.commit_block_list(block_list) self.fail() except HttpResponseError as e: self.assertGreaterEqual( str(e).find('specified block list is invalid'), 0) # Assert @record def test_put_block_list_with_md5(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.stage_block('1', b'AAA') blob.stage_block('2', b'BBB') blob.stage_block('3', b'CCC') # Act block_list = [ BlobBlock(block_id='1'), BlobBlock(block_id='2'), BlobBlock(block_id='3') ] blob.commit_block_list(block_list, validate_content=True) # Assert @record def test_get_block_list_no_blocks(self): # Arrange blob = self._create_blob() # Act block_list = blob.get_block_list('all') # Assert self.assertIsNotNone(block_list) self.assertEqual(len(block_list[1]), 0) self.assertEqual(len(block_list[0]), 0) @record def test_get_block_list_uncommitted_blocks(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.stage_block('1', b'AAA') blob.stage_block('2', b'BBB') blob.stage_block('3', b'CCC') # Act block_list = blob.get_block_list('uncommitted') # Assert self.assertIsNotNone(block_list) self.assertEqual(len(block_list), 2) self.assertEqual(len(block_list[1]), 3) self.assertEqual(len(block_list[0]), 0) self.assertEqual(block_list[1][0].id, '1') self.assertEqual(block_list[1][0].size, 3) self.assertEqual(block_list[1][1].id, '2') self.assertEqual(block_list[1][1].size, 3) self.assertEqual(block_list[1][2].id, '3') self.assertEqual(block_list[1][2].size, 3) @record def test_get_block_list_committed_blocks(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) blob.stage_block('1', b'AAA') blob.stage_block('2', b'BBB') blob.stage_block('3', b'CCC') block_list = [ BlobBlock(block_id='1'), BlobBlock(block_id='2'), BlobBlock(block_id='3') ] blob.commit_block_list(block_list) # Act block_list = blob.get_block_list('committed') # Assert self.assertIsNotNone(block_list) self.assertEqual(len(block_list), 2) self.assertEqual(len(block_list[1]), 0) self.assertEqual(len(block_list[0]), 3) self.assertEqual(block_list[0][0].id, '1') self.assertEqual(block_list[0][0].size, 3) self.assertEqual(block_list[0][1].id, '2') self.assertEqual(block_list[0][1].size, 3) self.assertEqual(block_list[0][2].id, '3') self.assertEqual(block_list[0][2].size, 3) @record def test_create_small_block_blob_with_no_overwrite(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data1 = b'hello world' data2 = b'hello second world' # Act create_resp = blob.upload_blob(data1, overwrite=True) with self.assertRaises(ResourceExistsError): blob.upload_blob(data2, overwrite=False) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data1) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) self.assertEqual(props.blob_type, BlobType.BlockBlob) @record def test_create_small_block_blob_with_overwrite(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data1 = b'hello world' data2 = b'hello second world' # Act create_resp = blob.upload_blob(data1, overwrite=True) update_resp = blob.upload_blob(data2, overwrite=True) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data2) self.assertEqual(props.etag, update_resp.get('etag')) self.assertEqual(props.last_modified, update_resp.get('last_modified')) self.assertEqual(props.blob_type, BlobType.BlockBlob) @record def test_create_large_block_blob_with_no_overwrite(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data1 = self.get_random_bytes(LARGE_BLOB_SIZE) data2 = self.get_random_bytes(LARGE_BLOB_SIZE) # Act create_resp = blob.upload_blob(data1, overwrite=True, metadata={'BlobData': 'Data1'}) with self.assertRaises(ResourceExistsError): blob.upload_blob(data2, overwrite=False, metadata={'BlobData': 'Data2'}) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data1) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) self.assertEqual(props.blob_type, BlobType.BlockBlob) self.assertEqual(props.metadata, {'BlobData': 'Data1'}) self.assertEqual(props.size, LARGE_BLOB_SIZE) @record def test_create_large_block_blob_with_overwrite(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data1 = self.get_random_bytes(LARGE_BLOB_SIZE) data2 = self.get_random_bytes(LARGE_BLOB_SIZE + 512) # Act create_resp = blob.upload_blob(data1, overwrite=True, metadata={'BlobData': 'Data1'}) update_resp = blob.upload_blob(data2, overwrite=True, metadata={'BlobData': 'Data2'}) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data2) self.assertEqual(props.etag, update_resp.get('etag')) self.assertEqual(props.last_modified, update_resp.get('last_modified')) self.assertEqual(props.blob_type, BlobType.BlockBlob) self.assertEqual(props.metadata, {'BlobData': 'Data2'}) self.assertEqual(props.size, LARGE_BLOB_SIZE + 512) @record def test_create_blob_from_bytes_single_put(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = b'hello world' # Act create_resp = blob.upload_blob(data) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) @record def test_create_blob_from_0_bytes(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = b'' # Act create_resp = blob.upload_blob(data) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) @record def test_create_from_bytes_blob_unicode(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = u'hello world' # Act create_resp = blob.upload_blob(data) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) @record def test_create_from_bytes_blob_unicode(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) # Act data = u'hello world' create_resp = blob.upload_blob(data) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data.encode('utf-8')) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) def test_create_from_bytes_blob_with_lease_id(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob = self._create_blob() data = self.get_random_bytes(LARGE_BLOB_SIZE) lease = blob.acquire_lease() # Act create_resp = blob.upload_blob(data, lease=lease) # Assert output = blob.download_blob(lease=lease) self.assertEqual(b"".join(list(output)), data) self.assertEqual(output.properties.etag, create_resp.get('etag')) self.assertEqual(output.properties.last_modified, create_resp.get('last_modified')) def test_create_blob_from_bytes_with_metadata(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) metadata = {'hello': 'world', 'number': '42'} # Act blob.upload_blob(data, metadata=metadata) # Assert md = blob.get_blob_properties().metadata self.assertDictEqual(md, metadata) def test_create_blob_from_bytes_with_properties(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) # Act content_settings = ContentSettings(content_type='image/png', content_language='spanish') blob.upload_blob(data, content_settings=content_settings) # Assert self.assertBlobEqual(self.container_name, blob_name, data) properties = blob.get_blob_properties() self.assertEqual(properties.content_settings.content_type, content_settings.content_type) self.assertEqual(properties.content_settings.content_language, content_settings.content_language) def test_create_blob_from_bytes_with_progress(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) # Act progress = [] def callback(response): current = response.context['upload_stream_current'] total = response.context['data_stream_total'] if current is not None: progress.append((current, total)) create_resp = blob.upload_blob(data, raw_response_hook=callback) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assert_upload_progress(len(data), self.config.max_block_size, progress) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) def test_create_blob_from_bytes_with_index(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) # Act blob.upload_blob(data[3:]) # Assert self.assertEqual(data[3:], b"".join(list(blob.download_blob()))) @record def test_create_blob_from_bytes_with_index_and_count(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) # Act blob.upload_blob(data[3:], length=5) # Assert self.assertEqual(data[3:8], b"".join(list(blob.download_blob()))) @record def test_create_blob_from_bytes_with_index_and_count_and_properties(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) # Act content_settings = ContentSettings(content_type='image/png', content_language='spanish') blob.upload_blob(data[3:], length=5, content_settings=content_settings) # Assert self.assertEqual(data[3:8], b"".join(list(blob.download_blob()))) properties = blob.get_blob_properties() self.assertEqual(properties.content_settings.content_type, content_settings.content_type) self.assertEqual(properties.content_settings.content_language, content_settings.content_language) @record def test_create_blob_from_bytes_non_parallel(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) # Act blob.upload_blob(data, length=LARGE_BLOB_SIZE, max_connections=1) # Assert self.assertBlobEqual(self.container_name, blob.blob_name, data) def test_create_blob_from_path(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act with open(FILE_PATH, 'rb') as stream: create_resp = blob.upload_blob(stream) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) @record def test_create_blob_from_path_non_parallel(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(100) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act with open(FILE_PATH, 'rb') as stream: create_resp = blob.upload_blob(stream, length=100, max_connections=1) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) def test_create_blob_from_path_with_progress(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act progress = [] def callback(response): current = response.context['upload_stream_current'] total = response.context['data_stream_total'] if current is not None: progress.append((current, total)) with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, raw_response_hook=callback) # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assert_upload_progress(len(data), self.config.max_block_size, progress) def test_create_blob_from_path_with_properties(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act content_settings = ContentSettings(content_type='image/png', content_language='spanish') with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, content_settings=content_settings) # Assert self.assertBlobEqual(self.container_name, blob_name, data) properties = blob.get_blob_properties() self.assertEqual(properties.content_settings.content_type, content_settings.content_type) self.assertEqual(properties.content_settings.content_language, content_settings.content_language) def test_create_blob_from_stream_chunked_upload(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act with open(FILE_PATH, 'rb') as stream: create_resp = blob.upload_blob(stream) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) def test_create_blob_from_stream_non_seekable_chunked_upload_known_size( self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) blob_size = len(data) - 66 with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act with open(FILE_PATH, 'rb') as stream: non_seekable_file = StorageBlockBlobTest.NonSeekableFile(stream) blob.upload_blob(non_seekable_file, length=blob_size, max_connections=1) # Assert self.assertBlobEqual(self.container_name, blob_name, data[:blob_size]) def test_create_blob_from_stream_non_seekable_chunked_upload_unknown_size( self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act with open(FILE_PATH, 'rb') as stream: non_seekable_file = StorageBlockBlobTest.NonSeekableFile(stream) blob.upload_blob(non_seekable_file, max_connections=1) # Assert self.assertBlobEqual(self.container_name, blob_name, data) def test_create_blob_from_stream_with_progress_chunked_upload(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act progress = [] def callback(response): current = response.context['upload_stream_current'] total = response.context['data_stream_total'] if current is not None: progress.append((current, total)) with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, raw_response_hook=callback) # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assert_upload_progress(len(data), self.config.max_block_size, progress) def test_create_blob_from_stream_chunked_upload_with_count(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act blob_size = len(data) - 301 with open(FILE_PATH, 'rb') as stream: resp = blob.upload_blob(stream, length=blob_size) # Assert self.assertBlobEqual(self.container_name, blob_name, data[:blob_size]) def test_create_blob_from_stream_chunked_upload_with_count_and_properties( self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act content_settings = ContentSettings(content_type='image/png', content_language='spanish') blob_size = len(data) - 301 with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, length=blob_size, content_settings=content_settings) # Assert self.assertBlobEqual(self.container_name, blob_name, data[:blob_size]) properties = blob.get_blob_properties() self.assertEqual(properties.content_settings.content_type, content_settings.content_type) self.assertEqual(properties.content_settings.content_language, content_settings.content_language) def test_create_blob_from_stream_chunked_upload_with_properties(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) with open(FILE_PATH, 'wb') as stream: stream.write(data) # Act content_settings = ContentSettings(content_type='image/png', content_language='spanish') with open(FILE_PATH, 'rb') as stream: blob.upload_blob(stream, content_settings=content_settings) # Assert self.assertBlobEqual(self.container_name, blob_name, data) properties = blob.get_blob_properties() self.assertEqual(properties.content_settings.content_type, content_settings.content_type) self.assertEqual(properties.content_settings.content_language, content_settings.content_language) @record def test_create_blob_from_text(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) text = u'hello 啊齄丂狛狜 world' data = text.encode('utf-8') # Act create_resp = blob.upload_blob(text) props = blob.get_blob_properties() # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assertEqual(props.etag, create_resp.get('etag')) self.assertEqual(props.last_modified, create_resp.get('last_modified')) @record def test_create_blob_from_text_with_encoding(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) text = u'hello 啊齄丂狛狜 world' data = text.encode('utf-16') # Act blob.upload_blob(text, encoding='utf-16') # Assert self.assertBlobEqual(self.container_name, blob_name, data) @record def test_create_blob_from_text_with_encoding_and_progress(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) text = u'hello 啊齄丂狛狜 world' data = text.encode('utf-16') # Act progress = [] def callback(response): current = response.context['upload_stream_current'] total = response.context['data_stream_total'] if current is not None: progress.append((current, total)) blob.upload_blob(text, encoding='utf-16', raw_response_hook=callback) # Assert self.assertBlobEqual(self.container_name, blob_name, data) self.assert_upload_progress(len(data), self.config.max_block_size, progress) def test_create_blob_from_text_chunked_upload(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_text_data(LARGE_BLOB_SIZE) encoded_data = data.encode('utf-8') # Act blob.upload_blob(data) # Assert self.assertBlobEqual(self.container_name, blob_name, encoded_data) # Assert self.assertBlobEqual(self.container_name, blob_name, encoded_data) @record def test_create_blob_with_md5(self): # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = b'hello world' # Act blob.upload_blob(data, validate_content=True) # Assert def test_create_blob_with_md5_chunked(self): # parallel tests introduce random order of requests, can only run live if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._get_blob_reference() blob = self.bsc.get_blob_client(self.container_name, blob_name) data = self.get_random_bytes(LARGE_BLOB_SIZE) # Act blob.upload_blob(data, validate_content=True)
class StorageBlobRetryTest(StorageTestCase): def setUp(self): super(StorageBlobRetryTest, self).setUp() url = self._get_account_url() credential = self._get_shared_key_credential() retry = ExponentialRetry(initial_backoff=1, increment_base=2, retry_total=3) self.bs = BlobServiceClient(url, credential=credential, retry_policy=retry) self.container_name = self.get_resource_name('utcontainer') if not self.is_playback(): try: self.bs.create_container(self.container_name) except ResourceExistsError: pass def tearDown(self): if not self.is_playback(): try: self.bs.delete_container(self.container_name) except HttpResponseError: pass return super(StorageBlobRetryTest, self).tearDown() # --Helpers----------------------------------------------------------------- class NonSeekableStream(object): def __init__(self, wrapped_stream): self.wrapped_stream = wrapped_stream def write(self, data): self.wrapped_stream.write(data) def read(self, count): return self.wrapped_stream.read(count) def seek(self, *args, **kwargs): raise UnsupportedOperation("boom!") def tell(self): return self.wrapped_stream.tell() @record def test_retry_put_block_with_seekable_stream(self): if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self.get_resource_name('blob') data = self.get_random_bytes(PUT_BLOCK_SIZE) data_stream = BytesIO(data) # rig the response so that it fails for a single time responder = ResponseCallback(status=201, new_status=408) # Act blob = self.bs.get_blob_client(self.container_name, blob_name) blob.stage_block(1, data_stream, raw_response_hook=responder.override_first_status) # Assert _, uncommitted_blocks = blob.get_block_list( block_list_type="uncommitted", raw_response_hook=responder.override_first_status) self.assertEqual(len(uncommitted_blocks), 1) self.assertEqual(uncommitted_blocks[0].size, PUT_BLOCK_SIZE) # Commit block and verify content blob.commit_block_list( ['1'], raw_response_hook=responder.override_first_status) # Assert content = blob.download_blob().readall() self.assertEqual(content, data) @record def test_retry_put_block_with_non_seekable_stream(self): if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self.get_resource_name('blob') data = self.get_random_bytes(PUT_BLOCK_SIZE) data_stream = self.NonSeekableStream(BytesIO(data)) # rig the response so that it fails for a single time responder = ResponseCallback(status=201, new_status=408) # Act blob = self.bs.get_blob_client(self.container_name, blob_name) # Note: put_block transforms non-seekable streams into byte arrays before handing it off to the executor blob.stage_block(1, data_stream, raw_response_hook=responder.override_first_status) # Assert _, uncommitted_blocks = blob.get_block_list( block_list_type="uncommitted", raw_response_hook=responder.override_first_status) self.assertEqual(len(uncommitted_blocks), 1) self.assertEqual(uncommitted_blocks[0].size, PUT_BLOCK_SIZE) # Commit block and verify content blob.commit_block_list( ['1'], raw_response_hook=responder.override_first_status) # Assert content = blob.download_blob().readall() self.assertEqual(content, data) @record def test_retry_put_block_with_non_seekable_stream_fail(self): if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self.get_resource_name('blob') data = self.get_random_bytes(PUT_BLOCK_SIZE) data_stream = self.NonSeekableStream(BytesIO(data)) # rig the response so that it fails for a single time responder = ResponseCallback(status=201, new_status=408) # Act blob = self.bs.get_blob_client(self.container_name, blob_name) with self.assertRaises(HttpResponseError) as error: blob.stage_block(1, data_stream, length=PUT_BLOCK_SIZE, raw_response_hook=responder.override_first_status) # Assert self.assertEqual(error.exception.response.status_code, 408)
class StorageBlockBlobTest(StorageTestCase): def setUp(self): super(StorageBlockBlobTest, self).setUp() url = self._get_account_url() credential = self._get_shared_key_credential() # test chunking functionality by reducing the size of each chunk, # otherwise the tests would take too long to execute self.bsc = BlobServiceClient(url, credential=credential, connection_data_block_size=4 * 1024, max_single_put_size=32 * 1024, max_block_size=4 * 1024) self.config = self.bsc._config self.container_name = self.get_resource_name('utcontainer') # create source blob to be copied from self.source_blob_name = self.get_resource_name('srcblob') self.source_blob_data = self.get_random_bytes(SOURCE_BLOB_SIZE) blob = self.bsc.get_blob_client(self.container_name, self.source_blob_name) if not self.is_playback(): self.bsc.create_container(self.container_name) blob.upload_blob(self.source_blob_data) # generate a SAS so that it is accessible with a URL sas_token = blob.generate_shared_access_signature( permission=BlobPermissions.READ, expiry=datetime.utcnow() + timedelta(hours=1), ) self.source_blob_url = BlobClient(blob.url, credential=sas_token).url def tearDown(self): if not self.is_playback(): try: self.bsc.delete_container(self.container_name) except HttpResponseError: pass return super(StorageBlockBlobTest, self).tearDown() @record def test_put_block_from_url_and_commit(self): # Arrange dest_blob_name = self.get_resource_name('destblob') dest_blob = self.bsc.get_blob_client(self.container_name, dest_blob_name) # Act part 1: make put block from url calls dest_blob.stage_block_from_url(block_id=1, source_url=self.source_blob_url, source_offset=0, source_length=4 * 1024 - 1) dest_blob.stage_block_from_url(block_id=2, source_url=self.source_blob_url, source_offset=4 * 1024, source_length=8 * 1024) # Assert blocks committed, uncommitted = dest_blob.get_block_list('all') self.assertEqual(len(uncommitted), 2) self.assertEqual(len(committed), 0) # Act part 2: commit the blocks dest_blob.commit_block_list(['1', '2']) # Assert destination blob has right content content = dest_blob.download_blob().content_as_bytes() self.assertEqual(content, self.source_blob_data) @record def test_put_block_from_url_and_validate_content_md5(self): # Arrange dest_blob_name = self.get_resource_name('destblob') dest_blob = self.bsc.get_blob_client(self.container_name, dest_blob_name) src_md5 = StorageContentValidation.get_content_md5( self.source_blob_data) # Act part 1: put block from url with md5 validation dest_blob.stage_block_from_url(block_id=1, source_url=self.source_blob_url, source_content_md5=src_md5, source_offset=0, source_length=8 * 1024) # Assert block was staged committed, uncommitted = dest_blob.get_block_list('all') self.assertEqual(len(uncommitted), 1) self.assertEqual(len(committed), 0) # Act part 2: put block from url with wrong md5 fake_md5 = StorageContentValidation.get_content_md5(b"POTATO") with self.assertRaises(HttpResponseError) as error: dest_blob.stage_block_from_url(block_id=2, source_url=self.source_blob_url, source_content_md5=fake_md5, source_offset=0, source_length=8 * 1024) self.assertEqual(error.exception.error_code, StorageErrorCode.md5_mismatch) # Assert block was not staged committed, uncommitted = dest_blob.get_block_list('all') self.assertEqual(len(uncommitted), 1) self.assertEqual(len(committed), 0) @record def test_copy_blob_sync(self): # Arrange dest_blob_name = self.get_resource_name('destblob') dest_blob = self.bsc.get_blob_client(self.container_name, dest_blob_name) # Act copy_props = dest_blob.copy_blob_from_url(self.source_blob_url, requires_sync=True) # Assert self.assertIsNotNone(copy_props) self.assertIsNotNone(copy_props.copy_id()) self.assertEqual('success', copy_props.status()) # Verify content content = dest_blob.download_blob().content_as_bytes() self.assertEqual(self.source_blob_data, content)
print(" Success! All tasks reached the 'Completed' state within the " "specified timeout period.") # Print the stdout.txt and stderr.txt files for each task to the console print_task_output(batch_client, config.JOB_ID) # Print out some timing info end_time = datetime.datetime.now().replace(microsecond=0) print() print(f'Sample end: {end_time}') elapsed_time = end_time - start_time print(f'Elapsed time: {elapsed_time}') print() input('Press ENTER to exit...') except batchmodels.BatchErrorException as err: print_batch_exception(err) raise finally: # Clean up storage resources print(f'Deleting container [{input_container_name}]...') blob_service_client.delete_container(input_container_name) # Clean up Batch resources (if the user so chooses). if query_yes_no('Delete job?') == 'yes': batch_client.job.delete(config.JOB_ID) if query_yes_no('Delete pool?') == 'yes': batch_client.pool.delete(config.POOL_ID)
class StorageLoggingTest(StorageTestCase): def setUp(self): super(StorageLoggingTest, self).setUp() url = self._get_account_url() credential = self._get_shared_key_credential() self.bsc = BlobServiceClient(url, credential=credential) self.container_name = self.get_resource_name('utcontainer') # create source blob to be copied from self.source_blob_name = self.get_resource_name('srcblob') self.source_blob_data = self.get_random_bytes(4 * 1024) source_blob = self.bsc.get_blob_client(self.container_name, self.source_blob_name) if not self.is_playback(): self.bsc.create_container(self.container_name) source_blob.upload_blob(self.source_blob_data) # generate a SAS so that it is accessible with a URL sas_token = source_blob.generate_shared_access_signature( permission=BlobPermissions.READ, expiry=datetime.utcnow() + timedelta(hours=1), ) sas_source = BlobClient(source_blob.url, credential=sas_token) self.source_blob_url = sas_source.url def tearDown(self): if not self.is_playback(): self.bsc.delete_container(self.container_name) return super(StorageLoggingTest, self).tearDown() @record def test_authorization_is_scrubbed_off(self): # Arrange container = self.bsc.get_container_client(self.container_name) # Act with LogCaptured(self) as log_captured: container.get_container_properties(logging_enable=True) log_as_str = log_captured.getvalue() # Assert # make sure authorization header is logged, but its value is not # the keyword SharedKey is present in the authorization header's value self.assertTrue(_AUTHORIZATION_HEADER_NAME in log_as_str) self.assertFalse('SharedKey' in log_as_str) @record def test_sas_signature_is_scrubbed_off(self): # SAS URL is calculated from storage key, so this test runs live only if TestMode.need_recording_file(self.test_mode): return # Arrange container = self.bsc.get_container_client(self.container_name) token = container.generate_shared_access_signature( permission=ContainerPermissions.READ, expiry=datetime.utcnow() + timedelta(hours=1), ) # parse out the signed signature token_components = parse_qs(token) signed_signature = quote( token_components[QueryStringConstants.SIGNED_SIGNATURE][0]) sas_service = ContainerClient(container.url, credential=token) # Act with LogCaptured(self) as log_captured: sas_service.get_account_information(logging_enable=True) log_as_str = log_captured.getvalue() # Assert # make sure the query parameter 'sig' is logged, but its value is not self.assertTrue( QueryStringConstants.SIGNED_SIGNATURE in log_as_str) self.assertFalse(signed_signature in log_as_str) @record def test_copy_source_sas_is_scrubbed_off(self): # SAS URL is calculated from storage key, so this test runs live only if TestMode.need_recording_file(self.test_mode): return # Arrange dest_blob_name = self.get_resource_name('destblob') dest_blob = self.bsc.get_blob_client(self.container_name, dest_blob_name) # parse out the signed signature token_components = parse_qs(self.source_blob_url) signed_signature = quote( token_components[QueryStringConstants.SIGNED_SIGNATURE][0]) # Act with LogCaptured(self) as log_captured: dest_blob.start_copy_from_url(self.source_blob_url, requires_sync=True, logging_enable=True) log_as_str = log_captured.getvalue() # Assert # make sure the query parameter 'sig' is logged, but its value is not self.assertTrue( QueryStringConstants.SIGNED_SIGNATURE in log_as_str) self.assertFalse(signed_signature in log_as_str) # make sure authorization header is logged, but its value is not # the keyword SharedKey is present in the authorization header's value self.assertTrue(_AUTHORIZATION_HEADER_NAME in log_as_str) self.assertFalse('SharedKey' in log_as_str)
def delete_container(self, account_name, account_key, container_name): account_url = "https://{}.blob.core.windows.net".format(account_name) blob_service_client = BlobServiceClient(account_url=account_url, credential=account_key) blob_service_client.delete_container(container_name)