def __init__(self, datalake_dir, credential=None): datalake = list(filter(None, re.split('/|@', datalake_dir))) assert len(datalake) >= 3 if credential is None: credential = DefaultAzureCredential() account_url = "https://" + datalake[2] datalake_client = DataLakeServiceClient( credential=credential, account_url=account_url).get_file_system_client(datalake[1]) if len(datalake) > 3: datalake_client = datalake_client.get_directory_client('/'.join( datalake[3:])) datalake_client.create_directory() self.datalake_dir = datalake_dir + '/' if datalake_dir[ -1] != '/' else datalake_dir self.dir_client = datalake_client
class FileTest(StorageTestCase): def _setUp(self, account_name, account_key): url = self._get_account_url(account_name) self.dsc = DataLakeServiceClient(url, credential=account_key, logging_enable=True) self.config = self.dsc._config self.file_system_name = self.get_resource_name('filesystem') if not self.is_playback(): file_system = self.dsc.get_file_system_client( self.file_system_name) try: file_system.create_file_system(timeout=5) except ResourceExistsError: pass def tearDown(self): if not self.is_playback(): try: self.dsc.delete_file_system(self.file_system_name) except: pass return super(FileTest, self).tearDown() # --Helpers----------------------------------------------------------------- def _get_directory_reference(self, prefix=TEST_DIRECTORY_PREFIX): directory_name = self.get_resource_name(prefix) return directory_name def _get_file_reference(self, prefix=TEST_FILE_PREFIX): file_name = self.get_resource_name(prefix) return file_name def _create_file_system(self): return self.dsc.create_file_system(self._get_file_system_reference()) def _create_directory_and_return_client(self, directory=None): directory_name = directory if directory else self._get_directory_reference( ) directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() return directory_client def _create_file_and_return_client(self, directory="", file=None): if directory: self._create_directory_and_return_client(directory) if not file: file = self._get_file_reference() file_client = self.dsc.get_file_client(self.file_system_name, directory + '/' + file) file_client.create_file() return file_client # --Helpers----------------------------------------------------------------- @DataLakePreparer() def test_create_file(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') response = file_client.create_file() # Assert self.assertIsNotNone(response) @DataLakePreparer() def test_file_exists(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange directory_name = self._get_directory_reference() directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client1 = directory_client.get_file_client('filename') file_client2 = directory_client.get_file_client('nonexistentfile') file_client1.create_file() self.assertTrue(file_client1.exists()) self.assertFalse(file_client2.exists()) @DataLakePreparer() def test_create_file_using_oauth_token_credential( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange file_name = self._get_file_reference() token_credential = self.generate_oauth_token() # Create a directory to put the file under that file_client = DataLakeFileClient(self.dsc.url, self.file_system_name, file_name, credential=token_credential) response = file_client.create_file() # Assert self.assertIsNotNone(response) @DataLakePreparer() def test_create_file_with_existing_name(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange file_client = self._create_file_and_return_client() with self.assertRaises(ResourceExistsError): # if the file exists then throw error # if_none_match='*' is to make sure no existing file file_client.create_file(match_condition=MatchConditions.IfMissing) @DataLakePreparer() def test_create_file_with_lease_id(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange directory_name = self._get_directory_reference() directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') # Act file_client.create_file() lease = file_client.acquire_lease() create_resp = file_client.create_file(lease=lease) # Assert file_properties = file_client.get_file_properties() self.assertIsNotNone(file_properties) self.assertEqual(file_properties.etag, create_resp.get('etag')) self.assertEqual(file_properties.last_modified, create_resp.get('last_modified')) @DataLakePreparer() def test_create_file_under_root_directory(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange # get a file client to interact with the file under root directory file_client = self.dsc.get_file_client(self.file_system_name, "filename") response = file_client.create_file() # Assert self.assertIsNotNone(response) @DataLakePreparer() def test_append_data(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') file_client.create_file() # Act response = file_client.append_data(b'abc', 0, 3) self.assertIsNotNone(response) @DataLakePreparer() def test_append_empty_data(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) file_client = self._create_file_and_return_client() # Act file_client.flush_data(0) file_props = file_client.get_file_properties() self.assertIsNotNone(file_props['size'], 0) @DataLakePreparer() def test_flush_data(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') file_client.create_file() # Act file_client.append_data(b'abc', 0, 3) response = file_client.flush_data(3) # Assert prop = file_client.get_file_properties() self.assertIsNotNone(response) self.assertEqual(prop['size'], 3) @DataLakePreparer() def test_flush_data_with_match_condition(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') resp = file_client.create_file() # Act file_client.append_data(b'abc', 0, 3) # flush is successful because it isn't touched response = file_client.flush_data( 3, etag=resp['etag'], match_condition=MatchConditions.IfNotModified) file_client.append_data(b'abc', 3, 3) with self.assertRaises(ResourceModifiedError): # flush is unsuccessful because extra data were appended. file_client.flush_data( 6, etag=resp['etag'], match_condition=MatchConditions.IfNotModified) @pytest.mark.live_test_only @DataLakePreparer() def test_upload_data_to_none_existing_file(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # parallel upload cannot be recorded directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') data = self.get_random_bytes(200 * 1024) file_client.upload_data(data, overwrite=True, max_concurrency=3) downloaded_data = file_client.download_file().readall() self.assertEqual(data, downloaded_data) @pytest.mark.live_test_only @DataLakePreparer() def test_upload_data_in_substreams(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # parallel upload cannot be recorded directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') # Get 16MB data data = self.get_random_bytes(16 * 1024 * 1024) # Ensure chunk size is greater than threshold (8MB > 4MB) - for optimized upload file_client.upload_data(data, chunk_size=8 * 1024 * 1024, overwrite=True, max_concurrency=3) downloaded_data = file_client.download_file().readall() self.assertEqual(data, downloaded_data) # Run on single thread file_client.upload_data(data, chunk_size=8 * 1024 * 1024, overwrite=True) downloaded_data = file_client.download_file().readall() self.assertEqual(data, downloaded_data) @DataLakePreparer() def test_upload_data_to_existing_file(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() # create an existing file file_client = directory_client.get_file_client('filename') file_client.create_file() file_client.append_data(b"abc", 0) file_client.flush_data(3) # to override the existing file data = self.get_random_bytes(100) with self.assertRaises(HttpResponseError): file_client.upload_data(data, max_concurrency=5) file_client.upload_data(data, overwrite=True, max_concurrency=5) downloaded_data = file_client.download_file().readall() self.assertEqual(data, downloaded_data) @DataLakePreparer() def test_upload_data_to_existing_file_with_content_settings( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() # create an existing file file_client = directory_client.get_file_client('filename') etag = file_client.create_file()['etag'] # to override the existing file data = self.get_random_bytes(100) content_settings = ContentSettings(content_language='spanish', content_disposition='inline') file_client.upload_data(data, max_concurrency=5, content_settings=content_settings, etag=etag, match_condition=MatchConditions.IfNotModified) downloaded_data = file_client.download_file().readall() properties = file_client.get_file_properties() self.assertEqual(data, downloaded_data) self.assertEqual(properties.content_settings.content_language, content_settings.content_language) @DataLakePreparer() def test_upload_data_to_existing_file_with_permission_and_umask( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() # create an existing file file_client = directory_client.get_file_client('filename') etag = file_client.create_file()['etag'] # to override the existing file data = self.get_random_bytes(100) file_client.upload_data(data, overwrite=True, max_concurrency=5, permissions='0777', umask="0000", etag=etag, match_condition=MatchConditions.IfNotModified) downloaded_data = file_client.download_file().readall() prop = file_client.get_access_control() # Assert self.assertEqual(data, downloaded_data) self.assertEqual(prop['permissions'], 'rwxrwxrwx') @DataLakePreparer() def test_read_file(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) file_client = self._create_file_and_return_client() data = self.get_random_bytes(1024) # upload data to file file_client.append_data(data, 0, len(data)) file_client.flush_data(len(data)) # doanload the data and make sure it is the same as uploaded data downloaded_data = file_client.download_file().readall() self.assertEqual(data, downloaded_data) @pytest.mark.live_test_only @DataLakePreparer() def test_read_file_with_user_delegation_key(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # SAS URL is calculated from storage key, so this test runs live only # Create file file_client = self._create_file_and_return_client() data = self.get_random_bytes(1024) # Upload data to file file_client.append_data(data, 0, len(data)) file_client.flush_data(len(data)) # Get user delegation key token_credential = self.generate_oauth_token() service_client = DataLakeServiceClient( self._get_account_url(datalake_storage_account_name), credential=token_credential, logging_enable=True) user_delegation_key = service_client.get_user_delegation_key( datetime.utcnow(), datetime.utcnow() + timedelta(hours=1)) sas_token = generate_file_sas( file_client.account_name, file_client.file_system_name, None, file_client.path_name, user_delegation_key, permission=FileSasPermissions(read=True, create=True, write=True, delete=True), expiry=datetime.utcnow() + timedelta(hours=1), ) # doanload the data and make sure it is the same as uploaded data new_file_client = DataLakeFileClient( self._get_account_url(datalake_storage_account_name), file_client.file_system_name, file_client.path_name, credential=sas_token, logging_enable=True) downloaded_data = new_file_client.download_file().readall() self.assertEqual(data, downloaded_data) @pytest.mark.live_test_only @DataLakePreparer() def test_set_acl_with_user_delegation_key(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # SAS URL is calculated from storage key, so this test runs live only # Create file file_client = self._create_file_and_return_client() data = self.get_random_bytes(1024) # Upload data to file file_client.append_data(data, 0, len(data)) file_client.flush_data(len(data)) # Get user delegation key token_credential = self.generate_oauth_token() service_client = DataLakeServiceClient( self._get_account_url(datalake_storage_account_name), credential=token_credential) user_delegation_key = service_client.get_user_delegation_key( datetime.utcnow(), datetime.utcnow() + timedelta(hours=1)) sas_token = generate_file_sas( file_client.account_name, file_client.file_system_name, None, file_client.path_name, user_delegation_key, permission=FileSasPermissions(execute=True, manage_access_control=True, manage_ownership=True), expiry=datetime.utcnow() + timedelta(hours=1), ) # doanload the data and make sure it is the same as uploaded data new_file_client = DataLakeFileClient( self._get_account_url(datalake_storage_account_name), file_client.file_system_name, file_client.path_name, credential=sas_token) acl = 'user::rwx,group::r-x,other::rwx' owner = "dc140949-53b7-44af-b1e9-cd994951fb86" new_file_client.set_access_control(acl=acl, owner=owner) access_control = new_file_client.get_access_control() self.assertEqual(acl, access_control['acl']) self.assertEqual(owner, access_control['owner']) @pytest.mark.live_test_only @DataLakePreparer() def test_preauthorize_user_with_user_delegation_key( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # SAS URL is calculated from storage key, so this test runs live only # Create file file_client = self._create_file_and_return_client() data = self.get_random_bytes(1024) # Upload data to file file_client.append_data(data, 0, len(data)) file_client.flush_data(len(data)) file_client.set_access_control( owner="68390a19-a643-458b-b726-408abf67b4fc", permissions='0777') acl = file_client.get_access_control() # Get user delegation key token_credential = self.generate_oauth_token() service_client = DataLakeServiceClient( self._get_account_url(datalake_storage_account_name), credential=token_credential) user_delegation_key = service_client.get_user_delegation_key( datetime.utcnow(), datetime.utcnow() + timedelta(hours=1)) sas_token = generate_file_sas( file_client.account_name, file_client.file_system_name, None, file_client.path_name, user_delegation_key, permission=FileSasPermissions(read=True, write=True, manage_access_control=True, manage_ownership=True), expiry=datetime.utcnow() + timedelta(hours=1), preauthorized_agent_object_id="68390a19-a643-458b-b726-408abf67b4fc" ) # doanload the data and make sure it is the same as uploaded data new_file_client = DataLakeFileClient( self._get_account_url(datalake_storage_account_name), file_client.file_system_name, file_client.path_name, credential=sas_token) acl = new_file_client.set_access_control(permissions='0777') self.assertIsNotNone(acl) @DataLakePreparer() def test_read_file_into_file(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) file_client = self._create_file_and_return_client() data = self.get_random_bytes(1024) # upload data to file file_client.append_data(data, 0, len(data)) file_client.flush_data(len(data)) # doanload the data into a file and make sure it is the same as uploaded data with open(FILE_PATH, 'wb') as stream: download = file_client.download_file(max_concurrency=2) download.readinto(stream) # Assert with open(FILE_PATH, 'rb') as stream: actual = stream.read() self.assertEqual(data, actual) @DataLakePreparer() def test_read_file_to_text(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) file_client = self._create_file_and_return_client() data = self.get_random_text_data(1024) # upload data to file file_client.append_data(data, 0, len(data)) file_client.flush_data(len(data)) # doanload the text data and make sure it is the same as uploaded data downloaded_data = file_client.download_file( max_concurrency=2, encoding="utf-8").readall() # Assert self.assertEqual(data, downloaded_data) @pytest.mark.live_test_only @DataLakePreparer() def test_account_sas(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # SAS URL is calculated from storage key, so this test runs live only file_name = self._get_file_reference() # create a file under root directory self._create_file_and_return_client(file=file_name) # generate a token with file level read permission token = generate_account_sas( self.dsc.account_name, self.dsc.credential.account_key, ResourceTypes(file_system=True, object=True), AccountSasPermissions(read=True), datetime.utcnow() + timedelta(hours=1), ) for credential in [token, AzureSasCredential(token)]: # read the created file which is under root directory file_client = DataLakeFileClient(self.dsc.url, self.file_system_name, file_name, credential=credential) properties = file_client.get_file_properties() # make sure we can read the file properties self.assertIsNotNone(properties) # try to write to the created file with the token with self.assertRaises(HttpResponseError): file_client.append_data(b"abcd", 0, 4) @DataLakePreparer() def test_account_sas_raises_if_sas_already_in_uri( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) with self.assertRaises(ValueError): DataLakeFileClient(self.dsc.url + "?sig=foo", self.file_system_name, "foo", credential=AzureSasCredential("?foo=bar")) @pytest.mark.live_test_only @DataLakePreparer() def test_file_sas_only_applies_to_file_level(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # SAS URL is calculated from storage key, so this test runs live only file_name = self._get_file_reference() directory_name = self._get_directory_reference() self._create_file_and_return_client(directory=directory_name, file=file_name) # generate a token with file level read and write permissions token = generate_file_sas( self.dsc.account_name, self.file_system_name, directory_name, file_name, self.dsc.credential.account_key, permission=FileSasPermissions(read=True, write=True), expiry=datetime.utcnow() + timedelta(hours=1), ) # read the created file which is under root directory file_client = DataLakeFileClient(self.dsc.url, self.file_system_name, directory_name + '/' + file_name, credential=token) properties = file_client.get_file_properties() # make sure we can read the file properties self.assertIsNotNone(properties) # try to write to the created file with the token response = file_client.append_data(b"abcd", 0, 4, validate_content=True) self.assertIsNotNone(response) # the token is for file level, so users are not supposed to have access to file system level operations file_system_client = FileSystemClient(self.dsc.url, self.file_system_name, credential=token) with self.assertRaises(ClientAuthenticationError): file_system_client.get_file_system_properties() # the token is for file level, so users are not supposed to have access to directory level operations directory_client = DataLakeDirectoryClient(self.dsc.url, self.file_system_name, directory_name, credential=token) with self.assertRaises(ClientAuthenticationError): directory_client.get_directory_properties() @DataLakePreparer() def test_delete_file(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange file_client = self._create_file_and_return_client() file_client.delete_file() with self.assertRaises(ResourceNotFoundError): file_client.get_file_properties() @DataLakePreparer() def test_delete_file_with_if_unmodified_since( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange file_client = self._create_file_and_return_client() prop = file_client.get_file_properties() file_client.delete_file(if_unmodified_since=prop['last_modified']) # Make sure the file was deleted with self.assertRaises(ResourceNotFoundError): file_client.get_file_properties() @DataLakePreparer() def test_set_access_control(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) file_client = self._create_file_and_return_client() response = file_client.set_access_control(permissions='0777') # Assert self.assertIsNotNone(response) @DataLakePreparer() def test_set_access_control_with_match_conditions( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) file_client = self._create_file_and_return_client() with self.assertRaises(ResourceModifiedError): file_client.set_access_control( permissions='0777', match_condition=MatchConditions.IfMissing) @DataLakePreparer() def test_get_access_control(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) file_client = self._create_file_and_return_client() file_client.set_access_control(permissions='0777') # Act response = file_client.get_access_control() # Assert self.assertIsNotNone(response) @DataLakePreparer() def test_get_access_control_with_if_modified_since( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) file_client = self._create_file_and_return_client() file_client.set_access_control(permissions='0777') prop = file_client.get_file_properties() # Act response = file_client.get_access_control( if_modified_since=prop['last_modified'] - timedelta(minutes=15)) # Assert self.assertIsNotNone(response) @DataLakePreparer() def test_set_access_control_recursive(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) acl = 'user::rwx,group::r-x,other::rwx' file_client = self._create_file_and_return_client() summary = file_client.set_access_control_recursive(acl=acl) # Assert self.assertEqual(summary.counters.directories_successful, 0) self.assertEqual(summary.counters.files_successful, 1) self.assertEqual(summary.counters.failure_count, 0) access_control = file_client.get_access_control() self.assertIsNotNone(access_control) self.assertEqual(acl, access_control['acl']) @DataLakePreparer() def test_update_access_control_recursive(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) acl = 'user::rwx,group::r-x,other::rwx' file_client = self._create_file_and_return_client() summary = file_client.update_access_control_recursive(acl=acl) # Assert self.assertEqual(summary.counters.directories_successful, 0) self.assertEqual(summary.counters.files_successful, 1) self.assertEqual(summary.counters.failure_count, 0) access_control = file_client.get_access_control() self.assertIsNotNone(access_control) self.assertEqual(acl, access_control['acl']) @DataLakePreparer() def test_remove_access_control_recursive(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) acl = "mask," + "default:user,default:group," + \ "user:ec3595d6-2c17-4696-8caa-7e139758d24a,group:ec3595d6-2c17-4696-8caa-7e139758d24a," + \ "default:user:ec3595d6-2c17-4696-8caa-7e139758d24a,default:group:ec3595d6-2c17-4696-8caa-7e139758d24a" file_client = self._create_file_and_return_client() summary = file_client.remove_access_control_recursive(acl=acl) # Assert self.assertEqual(summary.counters.directories_successful, 0) self.assertEqual(summary.counters.files_successful, 1) self.assertEqual(summary.counters.failure_count, 0) @DataLakePreparer() def test_get_properties(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange directory_client = self._create_directory_and_return_client() metadata = {'hello': 'world', 'number': '42'} content_settings = ContentSettings(content_language='spanish', content_disposition='inline') file_client = directory_client.create_file( "newfile", metadata=metadata, content_settings=content_settings) file_client.append_data(b"abc", 0, 3) file_client.flush_data(3) properties = file_client.get_file_properties() # Assert self.assertTrue(properties) self.assertEqual(properties.size, 3) self.assertEqual(properties.metadata['hello'], metadata['hello']) self.assertEqual(properties.content_settings.content_language, content_settings.content_language) @DataLakePreparer() def test_set_expiry(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange directory_client = self._create_directory_and_return_client() metadata = {'hello': 'world', 'number': '42'} content_settings = ContentSettings(content_language='spanish', content_disposition='inline') expires_on = datetime.utcnow() + timedelta(hours=1) file_client = directory_client.create_file( "newfile", metadata=metadata, content_settings=content_settings) file_client.set_file_expiry("Absolute", expires_on=expires_on) properties = file_client.get_file_properties() # Assert self.assertTrue(properties) self.assertIsNotNone(properties.expiry_time) @DataLakePreparer() def test_rename_file_with_non_used_name(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) file_client = self._create_file_and_return_client() data_bytes = b"abc" file_client.append_data(data_bytes, 0, 3) file_client.flush_data(3) new_client = file_client.rename_file(file_client.file_system_name + '/' + 'newname') data = new_client.download_file().readall() self.assertEqual(data, data_bytes) self.assertEqual(new_client.path_name, "newname") @pytest.mark.live_test_only @DataLakePreparer() def test_rename_file_with_file_system_sas(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # sas token is calculated from storage key, so live only token = generate_file_system_sas( self.dsc.account_name, self.file_system_name, self.dsc.credential.account_key, FileSystemSasPermissions(write=True, read=True, delete=True), datetime.utcnow() + timedelta(hours=1), ) # read the created file which is under root directory file_client = DataLakeFileClient(self.dsc.url, self.file_system_name, "oldfile", credential=token) file_client.create_file() data_bytes = b"abc" file_client.append_data(data_bytes, 0, 3) file_client.flush_data(3) new_client = file_client.rename_file(file_client.file_system_name + '/' + 'newname') data = new_client.download_file().readall() self.assertEqual(data, data_bytes) self.assertEqual(new_client.path_name, "newname") @pytest.mark.live_test_only @DataLakePreparer() def test_rename_file_with_file_sas(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # SAS URL is calculated from storage key, so this test runs live only token = generate_file_sas( self.dsc.account_name, self.file_system_name, None, "oldfile", datalake_storage_account_key, permission=FileSasPermissions(read=True, create=True, write=True, delete=True), expiry=datetime.utcnow() + timedelta(hours=1), ) new_token = generate_file_sas( self.dsc.account_name, self.file_system_name, None, "newname", datalake_storage_account_key, permission=FileSasPermissions(read=True, create=True, write=True, delete=True), expiry=datetime.utcnow() + timedelta(hours=1), ) # read the created file which is under root directory file_client = DataLakeFileClient(self.dsc.url, self.file_system_name, "oldfile", credential=token) file_client.create_file() data_bytes = b"abc" file_client.append_data(data_bytes, 0, 3) file_client.flush_data(3) new_client = file_client.rename_file(file_client.file_system_name + '/' + 'newname' + '?' + new_token) data = new_client.download_file().readall() self.assertEqual(data, data_bytes) self.assertEqual(new_client.path_name, "newname") @DataLakePreparer() def test_rename_file_with_account_sas(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) pytest.skip("service bug") token = generate_account_sas( self.dsc.account_name, self.dsc.credential.account_key, ResourceTypes(object=True), AccountSasPermissions(write=True, read=True, create=True, delete=True), datetime.utcnow() + timedelta(hours=5), ) # read the created file which is under root directory file_client = DataLakeFileClient(self.dsc.url, self.file_system_name, "oldfile", credential=token) file_client.create_file() data_bytes = b"abc" file_client.append_data(data_bytes, 0, 3) file_client.flush_data(3) new_client = file_client.rename_file(file_client.file_system_name + '/' + 'newname') data = new_client.download_file().readall() self.assertEqual(data, data_bytes) self.assertEqual(new_client.path_name, "newname") @DataLakePreparer() def test_rename_file_to_existing_file(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # create the existing file existing_file_client = self._create_file_and_return_client( file="existingfile") existing_file_client.append_data(b"a", 0, 1) existing_file_client.flush_data(1) old_url = existing_file_client.url # prepare to rename the file to the existing file file_client = self._create_file_and_return_client() data_bytes = b"abc" file_client.append_data(data_bytes, 0, 3) file_client.flush_data(3) new_client = file_client.rename_file(file_client.file_system_name + '/' + existing_file_client.path_name) new_url = file_client.url data = new_client.download_file().readall() # the existing file was overridden self.assertEqual(data, data_bytes) @DataLakePreparer() def test_rename_file_will_not_change_existing_directory( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # create none empty directory(with 2 files) dir1 = self._create_directory_and_return_client(directory="dir1") f1 = dir1.create_file("file1") f1.append_data(b"file1", 0, 5) f1.flush_data(5) f2 = dir1.create_file("file2") f2.append_data(b"file2", 0, 5) f2.flush_data(5) # create another none empty directory(with 2 files) dir2 = self._create_directory_and_return_client(directory="dir2") f3 = dir2.create_file("file3") f3.append_data(b"file3", 0, 5) f3.flush_data(5) f4 = dir2.create_file("file4") f4.append_data(b"file4", 0, 5) f4.flush_data(5) new_client = f3.rename_file(f1.file_system_name + '/' + f1.path_name) self.assertEqual(new_client.download_file().readall(), b"file3") # make sure the data in file2 and file4 weren't touched f2_data = f2.download_file().readall() self.assertEqual(f2_data, b"file2") f4_data = f4.download_file().readall() self.assertEqual(f4_data, b"file4") with self.assertRaises(HttpResponseError): f3.download_file().readall()
class DirectoryTest(StorageTestCase): def setUp(self): super(DirectoryTest, self).setUp() url = self._get_account_url() self.dsc = DataLakeServiceClient( url, credential=self.settings.STORAGE_DATA_LAKE_ACCOUNT_KEY) self.config = self.dsc._config self.file_system_name = self.get_resource_name('filesystem') if not self.is_playback(): file_system = self.dsc.get_file_system_client( self.file_system_name) try: file_system.create_file_system(timeout=5) except ResourceExistsError: pass def tearDown(self): if not self.is_playback(): try: self.dsc.delete_file_system(self.file_system_name) for file_system in self.dsc.list_file_systems(): self.dsc.delete_file_system(file_system.name) except: pass return super(DirectoryTest, self).tearDown() # --Helpers----------------------------------------------------------------- def _get_directory_reference(self, prefix=TEST_DIRECTORY_PREFIX): directory_name = self.get_resource_name(prefix) return directory_name def _create_directory_and_get_directory_client(self, directory_name=None): directory_name = directory_name if directory_name else self._get_directory_reference( ) directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() return directory_client def _create_file_system(self): return self.dsc.create_file_system(self._get_file_system_reference()) # --Helpers----------------------------------------------------------------- @record def test_create_directory(self): # Arrange directory_name = self._get_directory_reference() content_settings = ContentSettings(content_language='spanish', content_disposition='inline') # Act directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) created = directory_client.create_directory( content_settings=content_settings) # Assert self.assertTrue(created) @record def test_using_oauth_token_credential_to_create_directory(self): # generate a token with directory level create permission directory_name = self._get_directory_reference() token_credential = self.generate_oauth_token() directory_client = DataLakeDirectoryClient(self.dsc.url, self.file_system_name, directory_name, credential=token_credential) response = directory_client.create_directory() self.assertIsNotNone(response) @record def test_create_directory_with_match_conditions(self): # Arrange directory_name = self._get_directory_reference() # Act directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) created = directory_client.create_directory( match_condition=MatchConditions.IfMissing) # Assert self.assertTrue(created) @record def test_create_directory_with_permission(self): # Arrange directory_name = self._get_directory_reference() # Act directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) created = directory_client.create_directory(permissions="rwxr--r--", umask="0000") prop = directory_client.get_access_control() # Assert self.assertTrue(created) self.assertEqual(prop['permissions'], 'rwxr--r--') @record def test_create_directory_with_content_settings(self): # Arrange directory_name = self._get_directory_reference() content_settings = ContentSettings(content_language='spanish', content_disposition='inline') # Act directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) created = directory_client.create_directory( content_settings=content_settings) # Assert self.assertTrue(created) @record def test_create_directory_with_metadata(self): # Arrange directory_name = self._get_directory_reference() metadata = {'hello': 'world', 'number': '42'} # Act directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) created = directory_client.create_directory(metadata=metadata) properties = directory_client.get_directory_properties() # Assert self.assertTrue(created) @record def test_delete_directory(self): # Arrange directory_name = self._get_directory_reference() metadata = {'hello': 'world', 'number': '42'} directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory(metadata=metadata) response = directory_client.delete_directory() # Assert self.assertIsNone(response) @record def test_delete_directory_with_if_modified_since(self): # Arrange directory_name = self._get_directory_reference() directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() prop = directory_client.get_directory_properties() with self.assertRaises(ResourceModifiedError): directory_client.delete_directory( if_modified_since=prop['last_modified']) @record def test_create_sub_directory_and_delete_sub_directory(self): # Arrange directory_name = self._get_directory_reference() metadata = {'hello': 'world', 'number': '42'} # Create a directory first, to prepare for creating sub directory directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory(metadata=metadata) # Create sub directory from the current directory sub_directory_name = 'subdir' sub_directory_created = directory_client.create_sub_directory( sub_directory_name) # to make sure the sub directory was indeed created by get sub_directory properties from sub directory client sub_directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name + '/' + sub_directory_name) sub_properties = sub_directory_client.get_directory_properties() # Assert self.assertTrue(sub_directory_created) self.assertTrue(sub_properties) # Act directory_client.delete_sub_directory(sub_directory_name) with self.assertRaises(ResourceNotFoundError): sub_directory_client.get_directory_properties() @record def test_set_access_control(self): directory_name = self._get_directory_reference() metadata = {'hello': 'world', 'number': '42'} directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory(metadata=metadata) response = directory_client.set_access_control(permissions='0777') # Assert self.assertIsNotNone(response) @record def test_set_access_control_with_acl(self): directory_name = self._get_directory_reference() metadata = {'hello': 'world', 'number': '42'} directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory(metadata=metadata) acl = 'user::rwx,group::r-x,other::rwx' directory_client.set_access_control(acl=acl) access_control = directory_client.get_access_control() # Assert self.assertIsNotNone(access_control) self.assertEqual(acl, access_control['acl']) @record def test_set_access_control_if_none_modified(self): directory_name = self._get_directory_reference() directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) resp = directory_client.create_directory() response = directory_client.set_access_control( permissions='0777', etag=resp['etag'], match_condition=MatchConditions.IfNotModified) # Assert self.assertIsNotNone(response) @record def test_get_access_control(self): directory_name = self._get_directory_reference() metadata = {'hello': 'world', 'number': '42'} directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory(metadata=metadata, permissions='0777') # Act response = directory_client.get_access_control() # Assert self.assertIsNotNone(response) @record def test_get_access_control_with_match_conditions(self): directory_name = self._get_directory_reference() directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) resp = directory_client.create_directory(permissions='0777', umask='0000') # Act response = directory_client.get_access_control( etag=resp['etag'], match_condition=MatchConditions.IfNotModified) # Assert self.assertIsNotNone(response) self.assertEquals(response['permissions'], 'rwxrwxrwx') @record def test_rename_from(self): metadata = {'hello': 'world', 'number': '42'} directory_name = self._get_directory_reference() directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() new_name = "newname" new_directory_client = self.dsc.get_directory_client( self.file_system_name, new_name) new_directory_client._rename_path('/' + self.file_system_name + '/' + directory_name, metadata=metadata) properties = new_directory_client.get_directory_properties() self.assertIsNotNone(properties) @record def test_rename_from_a_shorter_directory_to_longer_directory(self): # TODO: investigate why rename shorter path to a longer one does not work pytest.skip("") directory_name = self._get_directory_reference() self._create_directory_and_get_directory_client(directory_name="old") new_name = "newname" new_directory_client = self._create_directory_and_get_directory_client( directory_name=new_name) new_directory_client = new_directory_client.create_sub_directory( "newsub") new_directory_client._rename_path('/' + self.file_system_name + '/' + directory_name) properties = new_directory_client.get_directory_properties() self.assertIsNotNone(properties) @record def test_rename_from_a_directory_in_another_file_system(self): # create a file dir1 under file system1 old_file_system_name = "oldfilesystem" old_dir_name = "olddir" old_client = self.dsc.get_file_system_client(old_file_system_name) old_client.create_file_system() old_client.create_directory(old_dir_name) # create a dir2 under file system2 new_name = "newname" new_directory_client = self._create_directory_and_get_directory_client( directory_name=new_name) new_directory_client = new_directory_client.create_sub_directory( "newsub") # rename dir1 under file system1 to dir2 under file system2 new_directory_client._rename_path('/' + old_file_system_name + '/' + old_dir_name) properties = new_directory_client.get_directory_properties() self.assertIsNotNone(properties) self.dsc.delete_file_system(old_file_system_name) @record def test_rename_to_an_existing_directory_in_another_file_system(self): # create a file dir1 under file system1 destination_file_system_name = "destfilesystem" destination_dir_name = "destdir" fs_client = self.dsc.get_file_system_client( destination_file_system_name) fs_client.create_file_system() destination_directory_client = fs_client.create_directory( destination_dir_name) # create a dir2 under file system2 source_name = "source" source_directory_client = self._create_directory_and_get_directory_client( directory_name=source_name) source_directory_client = source_directory_client.create_sub_directory( "subdir") # rename dir2 under file system2 to dir1 under file system1 res = source_directory_client.rename_directory( '/' + destination_file_system_name + '/' + destination_dir_name) # the source directory has been renamed to destination directory, so it cannot be found with self.assertRaises(HttpResponseError): source_directory_client.get_directory_properties() self.assertEquals(res.url, destination_directory_client.url) @record def test_rename_with_none_existing_destination_condition_and_source_unmodified_condition( self): non_existing_dir_name = "nonexistingdir" # create a file system1 destination_file_system_name = self._get_directory_reference( "destfilesystem") fs_client = self.dsc.get_file_system_client( destination_file_system_name) fs_client.create_file_system() # create a dir2 under file system2 source_name = "source" source_directory_client = self._create_directory_and_get_directory_client( directory_name=source_name) source_directory_client = source_directory_client.create_sub_directory( "subdir") # rename dir2 under file system2 to a non existing directory under file system1, # when dir1 does not exist and dir2 wasn't modified etag = source_directory_client.get_directory_properties()['etag'] res = source_directory_client.rename_directory( '/' + destination_file_system_name + '/' + non_existing_dir_name, match_condition=MatchConditions.IfMissing, source_etag=etag, source_match_condition=MatchConditions.IfNotModified) # the source directory has been renamed to destination directory, so it cannot be found with self.assertRaises(HttpResponseError): source_directory_client.get_directory_properties() self.assertEquals(non_existing_dir_name, res.path_name) @record def test_rename_to_an_non_existing_directory_in_another_file_system(self): # create a file dir1 under file system1 destination_file_system_name = self._get_directory_reference( "destfilesystem") non_existing_dir_name = "nonexistingdir" fs_client = self.dsc.get_file_system_client( destination_file_system_name) fs_client.create_file_system() # create a dir2 under file system2 source_name = "source" source_directory_client = self._create_directory_and_get_directory_client( directory_name=source_name) source_directory_client = source_directory_client.create_sub_directory( "subdir") # rename dir2 under file system2 to dir1 under file system1 res = source_directory_client.rename_directory( '/' + destination_file_system_name + '/' + non_existing_dir_name) # the source directory has been renamed to destination directory, so it cannot be found with self.assertRaises(HttpResponseError): source_directory_client.get_directory_properties() self.assertEquals(non_existing_dir_name, res.path_name) @record def test_rename_directory_to_non_empty_directory(self): # TODO: investigate why rename non empty dir doesn't work pytest.skip("") dir1 = self._create_directory_and_get_directory_client("dir1") dir1.create_sub_directory("subdir") dir2 = self._create_directory_and_get_directory_client("dir2") dir2.rename_directory(dir1.file_system_name + '/' + dir1.path_name) with self.assertRaises(HttpResponseError): dir2.get_directory_properties() @record def test_get_properties(self): # Arrange directory_name = self._get_directory_reference() metadata = {'hello': 'world', 'number': '42'} directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory(metadata=metadata) properties = directory_client.get_directory_properties() # Assert self.assertTrue(properties) self.assertIsNotNone(properties.metadata) self.assertEqual(properties.metadata['hello'], metadata['hello']) @record def test_using_directory_sas_to_read(self): # SAS URL is calculated from storage key, so this test runs live only if TestMode.need_recording_file(self.test_mode): return client = self._create_directory_and_get_directory_client() directory_name = client.path_name # generate a token with directory level read permission token = generate_directory_sas( self.dsc.account_name, self.file_system_name, directory_name, account_key=self.dsc.credential.account_key, permission=DirectorySasPermissions(read=True), expiry=datetime.utcnow() + timedelta(hours=1), ) directory_client = DataLakeDirectoryClient(self.dsc.url, self.file_system_name, directory_name, credential=token) access_control = directory_client.get_access_control() self.assertIsNotNone(access_control) @record def test_using_directory_sas_to_create(self): # SAS URL is calculated from storage key, so this test runs live only if TestMode.need_recording_file(self.test_mode): return # generate a token with directory level create permission directory_name = self._get_directory_reference() token = generate_directory_sas( self.dsc.account_name, self.file_system_name, directory_name, account_key=self.dsc.credential.account_key, permission=DirectorySasPermissions(create=True), expiry=datetime.utcnow() + timedelta(hours=1), ) directory_client = DataLakeDirectoryClient(self.dsc.url, self.file_system_name, directory_name, credential=token) response = directory_client.create_directory() self.assertIsNotNone(response)
def data_lake_service_sample(self): # Instantiate a DataLakeServiceClient using a connection string # [START create_datalake_service_client] from azure.storage.filedatalake import DataLakeServiceClient datalake_service_client = DataLakeServiceClient.from_connection_string( self.connection_string) # [END create_datalake_service_client] # Instantiate a DataLakeServiceClient Azure Identity credentials. # [START create_datalake_service_client_oauth] from azure.identity import ClientSecretCredential token_credential = ClientSecretCredential( self.active_directory_tenant_id, self.active_directory_application_id, self.active_directory_application_secret, ) datalake_service_client = DataLakeServiceClient( "https://{}.dfs.core.windows.net".format(self.account_name), credential=token_credential) # [END create_datalake_service_client_oauth] # get user delegation key # [START get_user_delegation_key] from datetime import datetime, timedelta user_delegation_key = datalake_service_client.get_user_delegation_key( datetime.utcnow(), datetime.utcnow() + timedelta(hours=1)) # [END get_user_delegation_key] # Create file systems # [START create_file_system_from_service_client] datalake_service_client.create_file_system("filesystem") # [END create_file_system_from_service_client] file_system_client = datalake_service_client.create_file_system( "anotherfilesystem") # List file systems # [START list_file_systems] file_systems = datalake_service_client.list_file_systems() for file_system in file_systems: print(file_system.name) # [END list_file_systems] # Get Clients from DataLakeServiceClient file_system_client = datalake_service_client.get_file_system_client( file_system_client.file_system_name) # [START get_directory_client_from_service_client] directory_client = datalake_service_client.get_directory_client( file_system_client.file_system_name, "mydirectory") # [END get_directory_client_from_service_client] # [START get_file_client_from_service_client] file_client = datalake_service_client.get_file_client( file_system_client.file_system_name, "myfile") # [END get_file_client_from_service_client] # Create file and set properties metadata = {'hello': 'world', 'number': '42'} from azure.storage.filedatalake import ContentSettings content_settings = ContentSettings(content_language='spanish', content_disposition='inline') file_client.create_file(content_settings=content_settings) file_client.set_metadata(metadata=metadata) file_props = file_client.get_file_properties() print(file_props.metadata) # Create file/directory and set properties directory_client.create_directory(content_settings=content_settings, metadata=metadata) dir_props = directory_client.get_directory_properties() print(dir_props.metadata) # Delete File Systems # [START delete_file_system_from_service_client] datalake_service_client.delete_file_system("filesystem") # [END delete_file_system_from_service_client] file_system_client.delete_file_system()
class FileTest(StorageTestCase): def setUp(self): super(FileTest, self).setUp() url = self._get_account_url() self.dsc = DataLakeServiceClient( url, credential=self.settings.STORAGE_DATA_LAKE_ACCOUNT_KEY, logging_enable=True) self.config = self.dsc._config self.file_system_name = self.get_resource_name('filesystem') if not self.is_playback(): file_system = self.dsc.get_file_system_client( self.file_system_name) try: file_system.create_file_system(timeout=5) except ResourceExistsError: pass def tearDown(self): if not self.is_playback(): try: self.dsc.delete_file_system(self.file_system_name) except: pass return super(FileTest, self).tearDown() # --Helpers----------------------------------------------------------------- def _get_directory_reference(self, prefix=TEST_DIRECTORY_PREFIX): directory_name = self.get_resource_name(prefix) return directory_name def _get_file_reference(self, prefix=TEST_FILE_PREFIX): file_name = self.get_resource_name(prefix) return file_name def _create_file_system(self): return self.dsc.create_file_system(self._get_file_system_reference()) def _create_directory_and_return_client(self, directory=None): directory_name = directory if directory else self._get_directory_reference( ) directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() return directory_client def _create_file_and_return_client(self, directory="", file=None): if directory: self._create_directory_and_return_client(directory) if not file: file = self._get_file_reference() file_client = self.dsc.get_file_client(self.file_system_name, directory + '/' + file) file_client.create_file() return file_client # --Helpers----------------------------------------------------------------- @record def test_create_file(self): # Arrange directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') response = file_client.create_file() # Assert self.assertIsNotNone(response) @record def test_create_file_using_oauth_token_credential(self): # Arrange file_name = self._get_file_reference() token_credential = self.generate_oauth_token() # Create a directory to put the file under that file_client = DataLakeFileClient(self.dsc.url, self.file_system_name, file_name, credential=token_credential) response = file_client.create_file() # Assert self.assertIsNotNone(response) @record def test_create_file_with_existing_name(self): # Arrange file_client = self._create_file_and_return_client() with self.assertRaises(ResourceExistsError): # if the file exists then throw error # if_none_match='*' is to make sure no existing file file_client.create_file(match_condition=MatchConditions.IfMissing) @record def test_create_file_with_lease_id(self): # Arrange directory_name = self._get_directory_reference() directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') # Act file_client.create_file() lease = file_client.acquire_lease() create_resp = file_client.create_file(lease=lease) # Assert file_properties = file_client.get_file_properties() self.assertIsNotNone(file_properties) self.assertEqual(file_properties.etag, create_resp.get('etag')) self.assertEqual(file_properties.last_modified, create_resp.get('last_modified')) @record def test_create_file_under_root_directory(self): # Arrange # get a file client to interact with the file under root directory file_client = self.dsc.get_file_client(self.file_system_name, "filename") response = file_client.create_file() # Assert self.assertIsNotNone(response) @record def test_append_data(self): directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') file_client.create_file() # Act response = file_client.append_data(b'abc', 0, 3) self.assertIsNotNone(response) @record def test_append_empty_data(self): file_client = self._create_file_and_return_client() # Act file_client.flush_data(0) file_props = file_client.get_file_properties() self.assertIsNotNone(file_props['size'], 0) @record def test_flush_data(self): directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') file_client.create_file() # Act file_client.append_data(b'abc', 0, 3) response = file_client.flush_data(3) # Assert prop = file_client.get_file_properties() self.assertIsNotNone(response) self.assertEqual(prop['size'], 3) @record def test_flush_data_with_match_condition(self): directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') resp = file_client.create_file() # Act file_client.append_data(b'abc', 0, 3) # flush is successful because it isn't touched response = file_client.flush_data( 3, etag=resp['etag'], match_condition=MatchConditions.IfNotModified) file_client.append_data(b'abc', 3, 3) with self.assertRaises(ResourceModifiedError): # flush is unsuccessful because extra data were appended. file_client.flush_data( 6, etag=resp['etag'], match_condition=MatchConditions.IfNotModified) def test_upload_data_to_none_existing_file(self): # parallel upload cannot be recorded if TestMode.need_recording_file(self.test_mode): return directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') data = self.get_random_bytes(200 * 1024) file_client.upload_data(data, overwrite=True, max_concurrency=3) downloaded_data = file_client.download_file().readall() self.assertEqual(data, downloaded_data) @record def test_upload_data_to_existing_file(self): directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() # create an existing file file_client = directory_client.get_file_client('filename') file_client.create_file() file_client.append_data(b"abc", 0) file_client.flush_data(3) # to override the existing file data = self.get_random_bytes(100) with self.assertRaises(HttpResponseError): file_client.upload_data(data, max_concurrency=5) file_client.upload_data(data, overwrite=True, max_concurrency=5) downloaded_data = file_client.download_file().readall() self.assertEqual(data, downloaded_data) @record def test_upload_data_to_existing_file_with_content_settings(self): directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() # create an existing file file_client = directory_client.get_file_client('filename') etag = file_client.create_file()['etag'] # to override the existing file data = self.get_random_bytes(100) content_settings = ContentSettings(content_language='spanish', content_disposition='inline') file_client.upload_data(data, max_concurrency=5, content_settings=content_settings, etag=etag, match_condition=MatchConditions.IfNotModified) downloaded_data = file_client.download_file().readall() properties = file_client.get_file_properties() self.assertEqual(data, downloaded_data) self.assertEqual(properties.content_settings.content_language, content_settings.content_language) @record def test_upload_data_to_existing_file_with_permission_and_umask(self): directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() # create an existing file file_client = directory_client.get_file_client('filename') etag = file_client.create_file()['etag'] # to override the existing file data = self.get_random_bytes(100) file_client.upload_data(data, overwrite=True, max_concurrency=5, permissions='0777', umask="0000", etag=etag, match_condition=MatchConditions.IfNotModified) downloaded_data = file_client.download_file().readall() prop = file_client.get_access_control() # Assert self.assertEqual(data, downloaded_data) self.assertEqual(prop['permissions'], 'rwxrwxrwx') @record def test_read_file(self): file_client = self._create_file_and_return_client() data = self.get_random_bytes(1024) # upload data to file file_client.append_data(data, 0, len(data)) file_client.flush_data(len(data)) # doanload the data and make sure it is the same as uploaded data downloaded_data = file_client.download_file().readall() self.assertEqual(data, downloaded_data) @record def test_read_file_with_user_delegation_key(self): # SAS URL is calculated from storage key, so this test runs live only if TestMode.need_recording_file(self.test_mode): return # Create file file_client = self._create_file_and_return_client() data = self.get_random_bytes(1024) # Upload data to file file_client.append_data(data, 0, len(data)) file_client.flush_data(len(data)) # Get user delegation key token_credential = self.generate_oauth_token() service_client = DataLakeServiceClient(self._get_oauth_account_url(), credential=token_credential) user_delegation_key = service_client.get_user_delegation_key( datetime.utcnow(), datetime.utcnow() + timedelta(hours=1)) sas_token = generate_file_sas( file_client.account_name, file_client.file_system_name, None, file_client.path_name, user_delegation_key, permission=FileSasPermissions(read=True, create=True, write=True, delete=True), expiry=datetime.utcnow() + timedelta(hours=1), ) # doanload the data and make sure it is the same as uploaded data new_file_client = DataLakeFileClient(self._get_account_url(), file_client.file_system_name, file_client.path_name, credential=sas_token) downloaded_data = new_file_client.download_file().readall() self.assertEqual(data, downloaded_data) @record def test_read_file_into_file(self): file_client = self._create_file_and_return_client() data = self.get_random_bytes(1024) # upload data to file file_client.append_data(data, 0, len(data)) file_client.flush_data(len(data)) # doanload the data into a file and make sure it is the same as uploaded data with open(FILE_PATH, 'wb') as stream: download = file_client.download_file(max_concurrency=2) download.readinto(stream) # Assert with open(FILE_PATH, 'rb') as stream: actual = stream.read() self.assertEqual(data, actual) @record def test_read_file_to_text(self): file_client = self._create_file_and_return_client() data = self.get_random_text_data(1024) # upload data to file file_client.append_data(data, 0, len(data)) file_client.flush_data(len(data)) # doanload the text data and make sure it is the same as uploaded data downloaded_data = file_client.download_file( max_concurrency=2, encoding="utf-8").readall() # Assert self.assertEqual(data, downloaded_data) @record def test_account_sas(self): # SAS URL is calculated from storage key, so this test runs live only if TestMode.need_recording_file(self.test_mode): return file_name = self._get_file_reference() # create a file under root directory self._create_file_and_return_client(file=file_name) # generate a token with file level read permission token = generate_account_sas( self.dsc.account_name, self.dsc.credential.account_key, ResourceTypes(file_system=True, object=True), AccountSasPermissions(read=True), datetime.utcnow() + timedelta(hours=1), ) # read the created file which is under root directory file_client = DataLakeFileClient(self.dsc.url, self.file_system_name, file_name, credential=token) properties = file_client.get_file_properties() # make sure we can read the file properties self.assertIsNotNone(properties) # try to write to the created file with the token with self.assertRaises(HttpResponseError): file_client.append_data(b"abcd", 0, 4) @record def test_file_sas_only_applies_to_file_level(self): # SAS URL is calculated from storage key, so this test runs live only if TestMode.need_recording_file(self.test_mode): return file_name = self._get_file_reference() directory_name = self._get_directory_reference() self._create_file_and_return_client(directory=directory_name, file=file_name) # generate a token with file level read and write permissions token = generate_file_sas( self.dsc.account_name, self.file_system_name, directory_name, file_name, self.dsc.credential.account_key, permission=FileSasPermissions(read=True, write=True), expiry=datetime.utcnow() + timedelta(hours=1), ) # read the created file which is under root directory file_client = DataLakeFileClient(self.dsc.url, self.file_system_name, directory_name + '/' + file_name, credential=token) properties = file_client.get_file_properties() # make sure we can read the file properties self.assertIsNotNone(properties) # try to write to the created file with the token response = file_client.append_data(b"abcd", 0, 4, validate_content=True) self.assertIsNotNone(response) # the token is for file level, so users are not supposed to have access to file system level operations file_system_client = FileSystemClient(self.dsc.url, self.file_system_name, credential=token) with self.assertRaises(ClientAuthenticationError): file_system_client.get_file_system_properties() # the token is for file level, so users are not supposed to have access to directory level operations directory_client = DataLakeDirectoryClient(self.dsc.url, self.file_system_name, directory_name, credential=token) with self.assertRaises(ClientAuthenticationError): directory_client.get_directory_properties() @record def test_delete_file(self): # Arrange file_client = self._create_file_and_return_client() file_client.delete_file() with self.assertRaises(ResourceNotFoundError): file_client.get_file_properties() @record def test_delete_file_with_if_unmodified_since(self): # Arrange file_client = self._create_file_and_return_client() prop = file_client.get_file_properties() file_client.delete_file(if_unmodified_since=prop['last_modified']) # Make sure the file was deleted with self.assertRaises(ResourceNotFoundError): file_client.get_file_properties() @record def test_set_access_control(self): file_client = self._create_file_and_return_client() response = file_client.set_access_control(permissions='0777') # Assert self.assertIsNotNone(response) @record def test_set_access_control_with_match_conditions(self): file_client = self._create_file_and_return_client() with self.assertRaises(ResourceModifiedError): file_client.set_access_control( permissions='0777', match_condition=MatchConditions.IfMissing) @record def test_get_access_control(self): file_client = self._create_file_and_return_client() file_client.set_access_control(permissions='0777') # Act response = file_client.get_access_control() # Assert self.assertIsNotNone(response) @record def test_get_access_control_with_if_modified_since(self): file_client = self._create_file_and_return_client() file_client.set_access_control(permissions='0777') prop = file_client.get_file_properties() # Act response = file_client.get_access_control( if_modified_since=prop['last_modified'] - timedelta(minutes=15)) # Assert self.assertIsNotNone(response) @record def test_get_properties(self): # Arrange directory_client = self._create_directory_and_return_client() metadata = {'hello': 'world', 'number': '42'} content_settings = ContentSettings(content_language='spanish', content_disposition='inline') file_client = directory_client.create_file( "newfile", metadata=metadata, content_settings=content_settings) file_client.append_data(b"abc", 0, 3) file_client.flush_data(3) properties = file_client.get_file_properties() # Assert self.assertTrue(properties) self.assertEqual(properties.size, 3) self.assertEqual(properties.metadata['hello'], metadata['hello']) self.assertEqual(properties.content_settings.content_language, content_settings.content_language) @record def test_rename_file_with_non_used_name(self): file_client = self._create_file_and_return_client() data_bytes = b"abc" file_client.append_data(data_bytes, 0, 3) file_client.flush_data(3) new_client = file_client.rename_file(file_client.file_system_name + '/' + 'newname') data = new_client.download_file().readall() self.assertEqual(data, data_bytes) self.assertEqual(new_client.path_name, "newname") @record def test_rename_file_to_existing_file(self): # create the existing file existing_file_client = self._create_file_and_return_client( file="existingfile") existing_file_client.append_data(b"a", 0, 1) existing_file_client.flush_data(1) old_url = existing_file_client.url # prepare to rename the file to the existing file file_client = self._create_file_and_return_client() data_bytes = b"abc" file_client.append_data(data_bytes, 0, 3) file_client.flush_data(3) new_client = file_client.rename_file(file_client.file_system_name + '/' + existing_file_client.path_name) new_url = file_client.url data = new_client.download_file().readall() # the existing file was overridden self.assertEqual(data, data_bytes) @record def test_rename_file_will_not_change_existing_directory(self): # create none empty directory(with 2 files) dir1 = self._create_directory_and_return_client(directory="dir1") f1 = dir1.create_file("file1") f1.append_data(b"file1", 0, 5) f1.flush_data(5) f2 = dir1.create_file("file2") f2.append_data(b"file2", 0, 5) f2.flush_data(5) # create another none empty directory(with 2 files) dir2 = self._create_directory_and_return_client(directory="dir2") f3 = dir2.create_file("file3") f3.append_data(b"file3", 0, 5) f3.flush_data(5) f4 = dir2.create_file("file4") f4.append_data(b"file4", 0, 5) f4.flush_data(5) new_client = f3.rename_file(f1.file_system_name + '/' + f1.path_name) self.assertEqual(new_client.download_file().readall(), b"file3") # make sure the data in file2 and file4 weren't touched f2_data = f2.download_file().readall() self.assertEqual(f2_data, b"file2") f4_data = f4.download_file().readall() self.assertEqual(f4_data, b"file4") with self.assertRaises(HttpResponseError): f3.download_file().readall()
class LargeFileTest(StorageTestCase): def _setUp(self, account_name, account_key): url = self.account_url(account_name, 'dfs') self.payload_dropping_policy = PayloadDroppingPolicy() credential_policy = _format_shared_key_credential( account_name, account_key) self.dsc = DataLakeServiceClient(url, credential=account_key, logging_enable=True, _additional_pipeline_policies=[ self.payload_dropping_policy, credential_policy ]) self.config = self.dsc._config self.file_system_name = self.get_resource_name('filesystem') if not self.is_playback(): file_system = self.dsc.get_file_system_client( self.file_system_name) try: file_system.create_file_system(timeout=5) except ResourceExistsError: pass def tearDown(self): if not self.is_playback(): try: self.dsc.delete_file_system(self.file_system_name) except: pass return super(LargeFileTest, self).tearDown() @pytest.mark.live_test_only @DataLakePreparer() def test_append_large_stream_without_network(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) directory_name = self.get_resource_name(TEST_DIRECTORY_PREFIX) # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') file_client.create_file() data = LargeStream(LARGEST_BLOCK_SIZE) # Act response = file_client.append_data(data, 0, LARGEST_BLOCK_SIZE) self.assertIsNotNone(response) self.assertEqual(self.payload_dropping_policy.append_counter, 1) self.assertEqual(self.payload_dropping_policy.append_sizes[0], LARGEST_BLOCK_SIZE) @pytest.mark.live_test_only @DataLakePreparer() def test_upload_large_stream_without_network(self, datalake_storage_account_name, datalake_storage_account_key): pytest.skip( "Pypy3 on Linux failed somehow, skip for now to investigate") self._setUp(datalake_storage_account_name, datalake_storage_account_key) directory_name = self.get_resource_name(TEST_DIRECTORY_PREFIX) # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') file_client.create_file() length = 2 * LARGEST_BLOCK_SIZE data = LargeStream(length) # Act response = file_client.upload_data(data, length, overwrite=True, chunk_size=LARGEST_BLOCK_SIZE) self.assertIsNotNone(response) self.assertEqual(self.payload_dropping_policy.append_counter, 2) self.assertEqual(self.payload_dropping_policy.append_sizes[0], LARGEST_BLOCK_SIZE) self.assertEqual(self.payload_dropping_policy.append_sizes[1], LARGEST_BLOCK_SIZE)
class FileTest(StorageTestCase): def setUp(self): super(FileTest, self).setUp() url = self._get_account_url() self.dsc = DataLakeServiceClient(url, credential=self.settings.STORAGE_DATA_LAKE_ACCOUNT_KEY) self.config = self.dsc._config self.file_system_name = self.get_resource_name('filesystem') if not self.is_playback(): file_system = self.dsc.get_file_system_client(self.file_system_name) try: file_system.create_file_system(timeout=5) except ResourceExistsError: pass def tearDown(self): if not self.is_playback(): try: self.dsc.delete_file_system(self.file_system_name) except: pass return super(FileTest, self).tearDown() # --Helpers----------------------------------------------------------------- def _get_directory_reference(self, prefix=TEST_DIRECTORY_PREFIX): directory_name = self.get_resource_name(prefix) return directory_name def _get_file_reference(self, prefix=TEST_FILE_PREFIX): file_name = self.get_resource_name(prefix) return file_name def _create_file_system(self): return self.dsc.create_file_system(self._get_file_system_reference()) def _create_directory_and_return_client(self, directory=None): directory_name = directory if directory else self._get_directory_reference() directory_client = self.dsc.get_directory_client(self.file_system_name, directory_name) directory_client.create_directory() return directory_client def _create_file_and_return_client(self, directory="", file=None): if directory: self._create_directory_and_return_client(directory) if not file: file = self._get_file_reference() file_client = self.dsc.get_file_client(self.file_system_name, directory + '/' + file) file_client.create_file() return file_client # --Helpers----------------------------------------------------------------- @record def test_create_file(self): # Arrange directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client(self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') response = file_client.create_file() # Assert self.assertIsNotNone(response) @record def test_create_file_with_lease_id(self): # Arrange directory_name = self._get_directory_reference() directory_client = self.dsc.get_directory_client(self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') # Act file_client.create_file() lease = file_client.acquire_lease() create_resp = file_client.create_file(lease=lease) # Assert file_properties = file_client.get_file_properties() self.assertIsNotNone(file_properties) self.assertEqual(file_properties.etag, create_resp.get('etag')) self.assertEqual(file_properties.last_modified, create_resp.get('last_modified')) @record def test_create_file_under_root_directory(self): # Arrange # get a file client to interact with the file under root directory file_client = self.dsc.get_file_client(self.file_system_name, "filename") response = file_client.create_file() # Assert self.assertIsNotNone(response) @record def test_append_data(self): directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client(self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') file_client.create_file() # Act response = file_client.append_data(b'abc', 0, 3) self.assertIsNotNone(response) @record def test_flush_data(self): directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client(self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') file_client.create_file() # Act file_client.append_data(b'abc', 0, 3) response = file_client.flush_data(3) self.assertIsNotNone(response) @record def test_read_file(self): file_client = self._create_file_and_return_client() data = self.get_random_bytes(1024) # upload data to file file_client.append_data(data, 0, len(data)) file_client.flush_data(len(data)) # doanload the data and make sure it is the same as uploaded data downloaded_data = file_client.read_file() self.assertEqual(data, downloaded_data) @record def test_account_sas(self): # SAS URL is calculated from storage key, so this test runs live only if TestMode.need_recording_file(self.test_mode): return file_name = self._get_file_reference() # create a file under root directory self._create_file_and_return_client(file=file_name) # generate a token with file level read permission token = generate_account_sas( self.dsc.account_name, self.dsc.credential.account_key, ResourceTypes(file_system=True, object=True), AccountSasPermissions(read=True), datetime.utcnow() + timedelta(hours=1), ) # read the created file which is under root directory file_client = DataLakeFileClient(self.dsc.url, self.file_system_name, file_name, credential=token) properties = file_client.get_file_properties() # make sure we can read the file properties self.assertIsNotNone(properties) # try to write to the created file with the token with self.assertRaises(StorageErrorException): file_client.append_data(b"abcd", 0, 4) @record def test_file_sas_only_applies_to_file_level(self): # SAS URL is calculated from storage key, so this test runs live only if TestMode.need_recording_file(self.test_mode): return file_name = self._get_file_reference() directory_name = self._get_directory_reference() self._create_file_and_return_client(directory=directory_name, file=file_name) # generate a token with file level read and write permissions token = generate_file_sas( self.dsc.account_name, self.file_system_name, directory_name, file_name, account_key=self.dsc.credential.account_key, permission=FileSasPermissions(read=True, write=True), expiry=datetime.utcnow() + timedelta(hours=1), ) # read the created file which is under root directory file_client = DataLakeFileClient(self.dsc.url, self.file_system_name, directory_name+'/'+file_name, credential=token) properties = file_client.get_file_properties() # make sure we can read the file properties self.assertIsNotNone(properties) # try to write to the created file with the token response = file_client.append_data(b"abcd", 0, 4, validate_content=True) self.assertIsNotNone(response) # the token is for file level, so users are not supposed to have access to file system level operations file_system_client = FileSystemClient(self.dsc.url, self.file_system_name, credential=token) with self.assertRaises(ClientAuthenticationError): file_system_client.get_file_system_properties() # the token is for file level, so users are not supposed to have access to directory level operations directory_client = DataLakeDirectoryClient(self.dsc.url, self.file_system_name, directory_name, credential=token) with self.assertRaises(ClientAuthenticationError): directory_client.get_directory_properties() @record def test_delete_file(self): # Arrange file_client = self._create_file_and_return_client() file_client.delete_file() with self.assertRaises(ResourceNotFoundError): file_client.get_file_properties() @record def test_set_access_control(self): file_client = self._create_file_and_return_client() response = file_client.set_access_control(permissions='0777')\ # Assert self.assertIsNotNone(response) @record def test_get_access_control(self): file_client = self._create_file_and_return_client() file_client.set_access_control(permissions='0777') # Act response = file_client.get_access_control() # Assert self.assertIsNotNone(response) @record def test_get_properties(self): # Arrange directory_client = self._create_directory_and_return_client() metadata = {'hello': 'world', 'number': '42'} content_settings = ContentSettings( content_language='spanish', content_disposition='inline') file_client = directory_client.create_file("newfile", metadata=metadata, content_settings=content_settings) file_client.append_data(b"abc", 0, 3) file_client.flush_data(3) properties = file_client.get_file_properties() # Assert self.assertTrue(properties) self.assertEqual(properties.size, 3) self.assertEqual(properties.metadata['hello'], metadata['hello']) self.assertEqual(properties.content_settings.content_language, content_settings.content_language) @record def test_rename_file_with_non_used_name(self): file_client = self._create_file_and_return_client() data_bytes = b"abc" file_client.append_data(data_bytes, 0, 3) file_client.flush_data(3) new_client = file_client.rename_file(file_client.file_system_name+'/'+'newname') data = new_client.read_file() self.assertEqual(data, data_bytes) self.assertEqual(new_client.path_name, "newname") @record def test_rename_file_to_existing_file(self): # create the existing file existing_file_client = self._create_file_and_return_client(file="existingfile") existing_file_client.append_data(b"a", 0, 1) existing_file_client.flush_data(1) old_url = existing_file_client.url # prepare to rename the file to the existing file file_client = self._create_file_and_return_client() data_bytes = b"abc" file_client.append_data(data_bytes, 0, 3) file_client.flush_data(3) new_client = file_client.rename_file(file_client.file_system_name+'/'+existing_file_client.path_name) new_url = file_client.url data = new_client.read_file() # the existing file was overridden self.assertEqual(data, data_bytes) @record def test_rename_file_will_not_change_existing_directory(self): # create none empty directory(with 2 files) dir1 = self._create_directory_and_return_client(directory="dir1") f1 = dir1.create_file("file1") f1.append_data(b"file1", 0, 5) f1.flush_data(5) f2 = dir1.create_file("file2") f2.append_data(b"file2", 0, 5) f2.flush_data(5) # create another none empty directory(with 2 files) dir2 = self._create_directory_and_return_client(directory="dir2") f3 = dir2.create_file("file3") f3.append_data(b"file3", 0, 5) f3.flush_data(5) f4 = dir2.create_file("file4") f4.append_data(b"file4", 0, 5) f4.flush_data(5) new_client = f3.rename_file(f1.file_system_name+'/'+f1.path_name) self.assertEqual(new_client.read_file(), b"file3") # make sure the data in file2 and file4 weren't touched f2_data = f2.read_file() self.assertEqual(f2_data, b"file2") f4_data = f4.read_file() self.assertEqual(f4_data, b"file4") with self.assertRaises(HttpResponseError): f3.read_file()
class AzDataLakeProject(object): def __init__(self, container_url, path): storage_account, file_system_name = get_details_from_container_url( container_url) self.container_url = container_url self.path = path self.storage_account = storage_account self.file_system_name = file_system_name storage_config = AzStorageConfig.objects.get( storage_account=storage_account, container_name=file_system_name) self.service = DataLakeServiceClient( f"https://{storage_account}.dfs.core.windows.net/", credential=storage_config.storage_account_key) self.directory_client = self.service.get_directory_client( file_system_name, path) def exists(self): return self.directory_client.exists() def ensure_parent_directory(self): parent_directory_path = os.path.dirname(self.path) directory_client = self.service.get_directory_client( self.file_system_name, parent_directory_path) if not directory_client.exists(): directory_client.create_directory() def move(self, destination_container_url, destination_path): if destination_container_url == self.container_url: # Within the same container simply rename the directory filesystem_name = "{}/{}".format(self.file_system_name, destination_path) self.directory_client.rename_directory(filesystem_name) else: # Copy to the destination from_url = "{}/{}".format(self.container_url, self.path) to_url = "{}/{}".format(destination_container_url, destination_path) copy_command = [ settings.AZCOPY_COMMAND, "copy", '--recursive', from_url, to_url ] subprocess.run(copy_command, check=True) # Delete our copy self.directory_client.delete_directory() def get_file_system_client(self): return self.service.get_file_system_client(self.file_system_name) def get_paths(self): file_system_client = self.get_file_system_client() return file_system_client.get_paths(self.path) def get_file_manifest(self): file_system_client = self.get_file_system_client() results = [] for file_metadata in file_system_client.get_paths(self.path): if file_metadata.is_directory: results.append(file_metadata) else: file_client = file_system_client.get_file_client( file_metadata.name) file_properties = dict(file_client.get_file_properties()) del file_properties['lease'] # Fix nested content settings content_settings = file_properties["content_settings"] file_properties["content_settings"] = { "content_type": content_settings["content_type"], "content_md5": content_settings["content_md5"].hex(), } results.append(file_properties) return results def add_download_user(self, azure_user_id): file_system_client = self.service.get_file_system_client( self.file_system_name) acl = make_acl(azure_user_id, permissions='r-x') directory_client = file_system_client.get_directory_client(self.path) directory_client.update_access_control_recursive(acl=acl) def set_owner(self, azure_user_id): file_paths = [self.path] file_system_client = self.service.get_file_system_client( self.file_system_name) for file_metadata in file_system_client.get_paths(self.path): file_paths.append(file_metadata.name) for file_path in file_paths: file_client = file_system_client.get_file_client(file_path) file_client.set_access_control(owner=azure_user_id)