def test_account_sas(self): # SAS URL is calculated from storage key, so this test runs live only if TestMode.need_recording_file(self.test_mode): return file_name = self._get_file_reference() # create a file under root directory self._create_file_and_return_client(file=file_name) # generate a token with file level read permission token = generate_account_sas( self.dsc.account_name, self.dsc.credential.account_key, ResourceTypes(file_system=True, object=True), AccountSasPermissions(read=True), datetime.utcnow() + timedelta(hours=1), ) # read the created file which is under root directory file_client = DataLakeFileClient(self.dsc.url, self.file_system_name, file_name, credential=token) properties = file_client.get_file_properties() # make sure we can read the file properties self.assertIsNotNone(properties) # try to write to the created file with the token with self.assertRaises(StorageErrorException): file_client.append_data(b"abcd", 0, 4)
def test_account_sas(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # SAS URL is calculated from storage key, so this test runs live only file_name = self._get_file_reference() # create a file under root directory self._create_file_and_return_client(file=file_name) # generate a token with file level read permission token = generate_account_sas( self.dsc.account_name, self.dsc.credential.account_key, ResourceTypes(file_system=True, object=True), AccountSasPermissions(read=True), datetime.utcnow() + timedelta(hours=1), ) for credential in [token, AzureSasCredential(token)]: # read the created file which is under root directory file_client = DataLakeFileClient(self.dsc.url, self.file_system_name, file_name, credential=credential) properties = file_client.get_file_properties() # make sure we can read the file properties self.assertIsNotNone(properties) # try to write to the created file with the token with self.assertRaises(HttpResponseError): file_client.append_data(b"abcd", 0, 4)
def test_read_file_with_user_delegation_key(self): # SAS URL is calculated from storage key, so this test runs live only if TestMode.need_recording_file(self.test_mode): return # Create file file_client = self._create_file_and_return_client() data = self.get_random_bytes(1024) # Upload data to file file_client.append_data(data, 0, len(data)) file_client.flush_data(len(data)) # Get user delegation key token_credential = self.generate_oauth_token() service_client = DataLakeServiceClient(self._get_oauth_account_url(), credential=token_credential) user_delegation_key = service_client.get_user_delegation_key(datetime.utcnow(), datetime.utcnow() + timedelta(hours=1)) sas_token = generate_file_sas(file_client.account_name, file_client.file_system_name, None, file_client.path_name, user_delegation_key, permission=FileSasPermissions(read=True, create=True, write=True, delete=True), expiry=datetime.utcnow() + timedelta(hours=1), ) # doanload the data and make sure it is the same as uploaded data new_file_client = DataLakeFileClient(self._get_account_url(), file_client.file_system_name, file_client.path_name, credential=sas_token) downloaded_data = new_file_client.download_file().readall() self.assertEqual(data, downloaded_data)
def test_rename_file_with_file_system_sas(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # sas token is calculated from storage key, so live only token = generate_file_system_sas( self.dsc.account_name, self.file_system_name, self.dsc.credential.account_key, FileSystemSasPermissions(write=True, read=True, delete=True), datetime.utcnow() + timedelta(hours=1), ) # read the created file which is under root directory file_client = DataLakeFileClient(self.dsc.url, self.file_system_name, "oldfile", credential=token) file_client.create_file() data_bytes = b"abc" file_client.append_data(data_bytes, 0, 3) file_client.flush_data(3) new_client = file_client.rename_file(file_client.file_system_name + '/' + 'newname') data = new_client.download_file().readall() self.assertEqual(data, data_bytes) self.assertEqual(new_client.path_name, "newname")
def test_rename_file_with_account_sas(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) pytest.skip("service bug") token = generate_account_sas( self.dsc.account_name, self.dsc.credential.account_key, ResourceTypes(object=True), AccountSasPermissions(write=True, read=True, create=True, delete=True), datetime.utcnow() + timedelta(hours=5), ) # read the created file which is under root directory file_client = DataLakeFileClient(self.dsc.url, self.file_system_name, "oldfile", credential=token) file_client.create_file() data_bytes = b"abc" file_client.append_data(data_bytes, 0, 3) file_client.flush_data(3) new_client = file_client.rename_file(file_client.file_system_name + '/' + 'newname') data = new_client.download_file().readall() self.assertEqual(data, data_bytes) self.assertEqual(new_client.path_name, "newname")
def test_rename_file_with_file_sas(self): # SAS URL is calculated from storage key, so this test runs live only if TestMode.need_recording_file(self.test_mode): return token = generate_file_sas(self.dsc.account_name, self.file_system_name, None, "oldfile", self.settings.STORAGE_DATA_LAKE_ACCOUNT_KEY, permission=FileSasPermissions(read=True, create=True, write=True, delete=True), expiry=datetime.utcnow() + timedelta(hours=1), ) new_token = generate_file_sas(self.dsc.account_name, self.file_system_name, None, "newname", self.settings.STORAGE_DATA_LAKE_ACCOUNT_KEY, permission=FileSasPermissions(read=True, create=True, write=True, delete=True), expiry=datetime.utcnow() + timedelta(hours=1), ) # read the created file which is under root directory file_client = DataLakeFileClient(self.dsc.url, self.file_system_name, "oldfile", credential=token) file_client.create_file() data_bytes = b"abc" file_client.append_data(data_bytes, 0, 3) file_client.flush_data(3) new_client = file_client.rename_file(file_client.file_system_name+'/'+'newname'+'?'+new_token) data = new_client.download_file().readall() self.assertEqual(data, data_bytes) self.assertEqual(new_client.path_name, "newname")
def blob_delete(self, storage_account_id: str, container: str, key: str): tokens = storage_account_id.split('/') subscription_id = tokens[2] resource_group_name = tokens[4] storage_account_name = tokens[8] storage_client = self.get_storage_client(subscription_id) storage_account = storage_client.storage_accounts.get_properties(resource_group_name, storage_account_name) if storage_account.is_hns_enabled: account_url = f'https://{storage_account_name}.dfs.core.windows.net/' file_client = DataLakeFileClient(account_url, container, key, credential=self.credential) file_client.delete_file()
def test_file_sas_only_applies_to_file_level(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # SAS URL is calculated from storage key, so this test runs live only file_name = self._get_file_reference() directory_name = self._get_directory_reference() self._create_file_and_return_client(directory=directory_name, file=file_name) # generate a token with file level read and write permissions token = generate_file_sas( self.dsc.account_name, self.file_system_name, directory_name, file_name, self.dsc.credential.account_key, permission=FileSasPermissions(read=True, write=True), expiry=datetime.utcnow() + timedelta(hours=1), ) # read the created file which is under root directory file_client = DataLakeFileClient(self.dsc.url, self.file_system_name, directory_name + '/' + file_name, credential=token) properties = file_client.get_file_properties() # make sure we can read the file properties self.assertIsNotNone(properties) # try to write to the created file with the token response = file_client.append_data(b"abcd", 0, 4, validate_content=True) self.assertIsNotNone(response) # the token is for file level, so users are not supposed to have access to file system level operations file_system_client = FileSystemClient(self.dsc.url, self.file_system_name, credential=token) with self.assertRaises(ClientAuthenticationError): file_system_client.get_file_system_properties() # the token is for file level, so users are not supposed to have access to directory level operations directory_client = DataLakeDirectoryClient(self.dsc.url, self.file_system_name, directory_name, credential=token) with self.assertRaises(ClientAuthenticationError): directory_client.get_directory_properties()
def test_create_file_using_oauth_token_credential(self): # Arrange file_name = self._get_file_reference() token_credential = self.generate_oauth_token() # Create a directory to put the file under that file_client = DataLakeFileClient(self.dsc.url, self.file_system_name, file_name, credential=token_credential) response = file_client.create_file() # Assert self.assertIsNotNone(response)
def test_preauthorize_user_with_user_delegation_key( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # SAS URL is calculated from storage key, so this test runs live only # Create file file_client = self._create_file_and_return_client() data = self.get_random_bytes(1024) # Upload data to file file_client.append_data(data, 0, len(data)) file_client.flush_data(len(data)) file_client.set_access_control( owner="68390a19-a643-458b-b726-408abf67b4fc", permissions='0777') acl = file_client.get_access_control() # Get user delegation key token_credential = self.generate_oauth_token() service_client = DataLakeServiceClient( self._get_account_url(datalake_storage_account_name), credential=token_credential) user_delegation_key = service_client.get_user_delegation_key( datetime.utcnow(), datetime.utcnow() + timedelta(hours=1)) sas_token = generate_file_sas( file_client.account_name, file_client.file_system_name, None, file_client.path_name, user_delegation_key, permission=FileSasPermissions(read=True, write=True, manage_access_control=True, manage_ownership=True), expiry=datetime.utcnow() + timedelta(hours=1), preauthorized_agent_object_id="68390a19-a643-458b-b726-408abf67b4fc" ) # doanload the data and make sure it is the same as uploaded data new_file_client = DataLakeFileClient( self._get_account_url(datalake_storage_account_name), file_client.file_system_name, file_client.path_name, credential=sas_token) acl = new_file_client.set_access_control(permissions='0777') self.assertIsNotNone(acl)
def blob_upload(self, storage_account_id: str, file: io.BytesIO, container: str, key: str, tags: dict): log.debug(f'Storage account for upload is {storage_account_id}') log.debug(f'Upload key is {key}') tokens = storage_account_id.split('/') subscription_id = tokens[2] resource_group_name = tokens[4] storage_account_name = tokens[8] storage_client = self.get_storage_client(subscription_id) storage_account = storage_client.storage_accounts.get_properties(resource_group_name, storage_account_name) log.debug(f'Storage account kind is {storage_account.kind}') log.debug(f'Hierarchical namespace enabled: {storage_account.is_hns_enabled}') if storage_account.is_hns_enabled: account_url = f'https://{storage_account_name}.dfs.core.windows.net/' file_client = DataLakeFileClient(account_url, container, key, credential=self.credential) log.debug(f'file_client pointing to {file_client.url}') file_client.upload_data(file, overwrite=True, metadata=tags)
def test_set_acl_with_user_delegation_key(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # SAS URL is calculated from storage key, so this test runs live only # Create file file_client = self._create_file_and_return_client() data = self.get_random_bytes(1024) # Upload data to file file_client.append_data(data, 0, len(data)) file_client.flush_data(len(data)) # Get user delegation key token_credential = self.generate_oauth_token() service_client = DataLakeServiceClient( self._get_account_url(datalake_storage_account_name), credential=token_credential) user_delegation_key = service_client.get_user_delegation_key( datetime.utcnow(), datetime.utcnow() + timedelta(hours=1)) sas_token = generate_file_sas( file_client.account_name, file_client.file_system_name, None, file_client.path_name, user_delegation_key, permission=FileSasPermissions(execute=True, manage_access_control=True, manage_ownership=True), expiry=datetime.utcnow() + timedelta(hours=1), ) # doanload the data and make sure it is the same as uploaded data new_file_client = DataLakeFileClient( self._get_account_url(datalake_storage_account_name), file_client.file_system_name, file_client.path_name, credential=sas_token) acl = 'user::rwx,group::r-x,other::rwx' owner = "dc140949-53b7-44af-b1e9-cd994951fb86" new_file_client.set_access_control(acl=acl, owner=owner) access_control = new_file_client.get_access_control() self.assertEqual(acl, access_control['acl']) self.assertEqual(owner, access_control['owner'])
def _get_file_client(self, storage_account_url: str, file_system: str, file_path: str, credential: Union[DefaultAzureCredential, str]): file_client = DataLakeFileClient(storage_account_url, file_system, file_path, credential=credential) return file_client
def test_account_sas_raises_if_sas_already_in_uri( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) with self.assertRaises(ValueError): DataLakeFileClient(self.dsc.url + "?sig=foo", self.file_system_name, "foo", credential=AzureSasCredential("?foo=bar"))
def __get_validation_schema(file_client: DataLakeFileClient) -> Dict: try: file_client.get_file_properties() except ResourceNotFoundError as error: message = f'({type(error).__name__}) The expected JSON Schema does not exist: {error}' raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=message) from error try: stream = file_client.download_file() except HttpResponseError as error: message = f'({type(error).__name__}) Schema could not be retrieved for validation: {error}' raise HTTPException(status_code=error.status_code, detail=message) from error try: schema = json.loads(stream.readall().decode()) except json.JSONDecodeError as error: message = f'({type(error).__name__}) Malformed schema JSON: {error}' raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=message) from error return schema
def instantiate_file_client_from_conn_str(): # [START instantiate_file_client_from_conn_str] from azure.storage.filedatalake import DataLakeFileClient DataLakeFileClient.from_connection_string(connection_string, "myfilesystem", "mydirectory", "myfile")