def download_everything(): """download all the files in the lake right now""" service_client = DataLakeServiceClient(storage_endpoint, credential=storage_account_key) fs_client = service_client.get_file_system_client(lake_name) pool = ThreadPoolExecutor(8) futures = [] for f in fs_client.get_paths(f"{iot_name}"): f_client = fs_client.get_file_client(f.name) futures.append(pool.submit(partial(download_one, f_client))) for fut in futures: if fut.done() and fut.exception(): print(fut.exception()) # check for exceptions # fut.result() for f in tqdm.tqdm(futures): try: f.result() except Exception as e: print(e)
def create_from_credential( cls, account_name: str, file_system_name: str, credential: Any, use_thread_local_transport: bool = True, **kwargs, ) -> "ADLGen2FileSystem": """ Creates ADL Gen2 file system client. Parameters ---------- account_name: str Azure account name file_system_name: str Container name credential: object azure.identity credential use_thread_local_transport: bool Use ``ThreadLocalRequestTransport`` as HTTP transport Returns ------- ADLGen2FileSystem """ client_kwargs = {} if use_thread_local_transport: client_kwargs["transport"] = ThreadLocalRequestTransport() service_client = DataLakeServiceClient( account_url="https://%s.dfs.core.windows.net" % account_name, credential=credential, **client_kwargs, ) file_system_client = service_client.get_file_system_client( file_system=file_system_name) return cls(file_system_client, account_name, file_system_name, **kwargs)
def test_using_directory_sas_to_read(self): storage_directory = "demo-folder-allowed" print("\nGenerating SAS for directory: {}".format(storage_directory)) print("\nReading contents...") # generate a token for a directory with all permissions token = generate_directory_sas( self.ACCOUNT_NAME, self.STORAGE_FILESYSTEM, storage_directory, self.ACCOUNT_KEY, permission=FileSystemSasPermissions(read=True,write=True,delete=True,list=True), expiry=datetime.utcnow() + timedelta(hours=1), ) service_client = DataLakeServiceClient(self.STORAGE_URL, credential=token) file_system_client = service_client.get_file_system_client(self.STORAGE_FILESYSTEM) paths = list(file_system_client.get_paths(storage_directory)) for p in paths: print(p.name)
def data_lake_service_sample(self): # Instantiate a DataLakeServiceClient using a connection string # [START create_datalake_service_client] from azure.storage.filedatalake import DataLakeServiceClient datalake_service_client = DataLakeServiceClient.from_connection_string( self.connection_string) # [END create_datalake_service_client] # Instantiate a DataLakeServiceClient Azure Identity credentials. # [START create_datalake_service_client_oauth] from azure.identity import ClientSecretCredential token_credential = ClientSecretCredential( self.active_directory_tenant_id, self.active_directory_application_id, self.active_directory_application_secret, ) datalake_service_client = DataLakeServiceClient( "https://{}.dfs.core.windows.net".format(self.account_name), credential=token_credential) # [END create_datalake_service_client_oauth] # get user delegation key # [START get_user_delegation_key] from datetime import datetime, timedelta user_delegation_key = datalake_service_client.get_user_delegation_key( datetime.utcnow(), datetime.utcnow() + timedelta(hours=1)) # [END get_user_delegation_key] # Create file systems # [START create_file_system_from_service_client] datalake_service_client.create_file_system("filesystem") # [END create_file_system_from_service_client] file_system_client = datalake_service_client.create_file_system( "anotherfilesystem") # List file systems # [START list_file_systems] file_systems = datalake_service_client.list_file_systems() for file_system in file_systems: print(file_system.name) # [END list_file_systems] # Get Clients from DataLakeServiceClient file_system_client = datalake_service_client.get_file_system_client( file_system_client.file_system_name) # [START get_directory_client_from_service_client] directory_client = datalake_service_client.get_directory_client( file_system_client.file_system_name, "mydirectory") # [END get_directory_client_from_service_client] # [START get_file_client_from_service_client] file_client = datalake_service_client.get_file_client( file_system_client.file_system_name, "myfile") # [END get_file_client_from_service_client] # Create file and set properties metadata = {'hello': 'world', 'number': '42'} from azure.storage.filedatalake import ContentSettings content_settings = ContentSettings(content_language='spanish', content_disposition='inline') file_client.create_file(content_settings=content_settings) file_client.set_metadata(metadata=metadata) file_props = file_client.get_file_properties() print(file_props.metadata) # Create file/directory and set properties directory_client.create_directory(content_settings=content_settings, metadata=metadata) dir_props = directory_client.get_directory_properties() print(dir_props.metadata) # Delete File Systems # [START delete_file_system_from_service_client] datalake_service_client.delete_file_system("filesystem") # [END delete_file_system_from_service_client] file_system_client.delete_file_system()
def filesystem(self): datalake = DataLakeServiceClient(account_url=self.account_url, credential=self.credential) return datalake.get_file_system_client(self.file_system)
class AzureDataLake(AbstractDataLake): def __init__(self, storage_account_name, storage_account_key, container_name, app_name): self.storage_account_name = storage_account_name self.storage_account_key = storage_account_key self.app_name = app_name self._connect() self._create_file_system(container_name) self._container_name = container_name def _connect(self): url = f"https://{self.storage_account_name}.dfs.core.windows.net" self.service_client = DataLakeServiceClient( account_url=url, credential=self.storage_account_key) def _create_file_system(self, container_name): try: self.file_system_client = self.service_client.create_file_system( file_system=container_name) except ResourceExistsError: self.file_system_client = self.service_client.get_file_system_client( file_system=container_name) self._ROOT_FOLDER = self.file_system_client.get_directory_client( f"/{self.app_name}") def mkdir(self, path: str): self.file_system_client.create_directory(f"/{self.app_name}{path}") def rmdir(self, path: str, recursive=True): if path.startswith("/"): path = path[1:] self._ROOT_FOLDER.delete_sub_directory(path) def store(self, serialized_json_content: str, filename: str, overwrite=False): if filename.startswith("/"): filename = filename[1:] file_client = self._ROOT_FOLDER.create_file(filename) file_client.upload_data(serialized_json_content, overwrite=overwrite) def retrieve(self, filename: str): if filename.startswith("/"): filename = filename[1:] file_client = self._ROOT_FOLDER.get_file_client(filename) download = file_client.download_file() return download.readall() def rm(self, filename: str): if filename.startswith("/"): filename = filename[1:] self._ROOT_FOLDER.get_file_client(filename).delete_file() def ls(self, path: str) -> [str]: all_paths = [] for path in self.file_system_client.get_paths( path=f"/{self.app_name}{path}"): all_paths.append(path.name.split(self.app_name)[1]) return all_paths def mvdir(self, dirname: str, new_dirname: str): if dirname.startswith("/"): dirname = dirname[1:] if new_dirname.startswith("/"): new_dirname = new_dirname[1:] directory_client = self.file_system_client.get_directory_client( f"{self.app_name}/{dirname}") directory_client.rename_directory( new_name= f"{directory_client.file_system_name}/{self.app_name}/{new_dirname}" ) def mvfile(self, filepath: str, new_filepath: str): if filepath.startswith("/"): filepath = filepath[1:] if new_filepath.startswith("/"): new_filepath = new_filepath[1:] fc = self._ROOT_FOLDER.get_file_client(filepath) fc.rename_file( f"{self._container_name}/{self.app_name}/{new_filepath}")
class FileTest(StorageTestCase): def setUp(self): super(FileTest, self).setUp() url = self._get_account_url() self.dsc = DataLakeServiceClient( url, credential=self.settings.STORAGE_DATA_LAKE_ACCOUNT_KEY, logging_enable=True) self.config = self.dsc._config self.file_system_name = self.get_resource_name('filesystem') if not self.is_playback(): file_system = self.dsc.get_file_system_client( self.file_system_name) try: file_system.create_file_system(timeout=5) except ResourceExistsError: pass def tearDown(self): if not self.is_playback(): try: self.dsc.delete_file_system(self.file_system_name) except: pass return super(FileTest, self).tearDown() # --Helpers----------------------------------------------------------------- def _get_directory_reference(self, prefix=TEST_DIRECTORY_PREFIX): directory_name = self.get_resource_name(prefix) return directory_name def _get_file_reference(self, prefix=TEST_FILE_PREFIX): file_name = self.get_resource_name(prefix) return file_name def _create_file_system(self): return self.dsc.create_file_system(self._get_file_system_reference()) def _create_directory_and_return_client(self, directory=None): directory_name = directory if directory else self._get_directory_reference( ) directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() return directory_client def _create_file_and_return_client(self, directory="", file=None): if directory: self._create_directory_and_return_client(directory) if not file: file = self._get_file_reference() file_client = self.dsc.get_file_client(self.file_system_name, directory + '/' + file) file_client.create_file() return file_client # --Helpers----------------------------------------------------------------- @record def test_create_file(self): # Arrange directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') response = file_client.create_file() # Assert self.assertIsNotNone(response) @record def test_create_file_using_oauth_token_credential(self): # Arrange file_name = self._get_file_reference() token_credential = self.generate_oauth_token() # Create a directory to put the file under that file_client = DataLakeFileClient(self.dsc.url, self.file_system_name, file_name, credential=token_credential) response = file_client.create_file() # Assert self.assertIsNotNone(response) @record def test_create_file_with_existing_name(self): # Arrange file_client = self._create_file_and_return_client() with self.assertRaises(ResourceExistsError): # if the file exists then throw error # if_none_match='*' is to make sure no existing file file_client.create_file(match_condition=MatchConditions.IfMissing) @record def test_create_file_with_lease_id(self): # Arrange directory_name = self._get_directory_reference() directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') # Act file_client.create_file() lease = file_client.acquire_lease() create_resp = file_client.create_file(lease=lease) # Assert file_properties = file_client.get_file_properties() self.assertIsNotNone(file_properties) self.assertEqual(file_properties.etag, create_resp.get('etag')) self.assertEqual(file_properties.last_modified, create_resp.get('last_modified')) @record def test_create_file_under_root_directory(self): # Arrange # get a file client to interact with the file under root directory file_client = self.dsc.get_file_client(self.file_system_name, "filename") response = file_client.create_file() # Assert self.assertIsNotNone(response) @record def test_append_data(self): directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') file_client.create_file() # Act response = file_client.append_data(b'abc', 0, 3) self.assertIsNotNone(response) @record def test_append_empty_data(self): file_client = self._create_file_and_return_client() # Act file_client.flush_data(0) file_props = file_client.get_file_properties() self.assertIsNotNone(file_props['size'], 0) @record def test_flush_data(self): directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') file_client.create_file() # Act file_client.append_data(b'abc', 0, 3) response = file_client.flush_data(3) # Assert prop = file_client.get_file_properties() self.assertIsNotNone(response) self.assertEqual(prop['size'], 3) @record def test_flush_data_with_match_condition(self): directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') resp = file_client.create_file() # Act file_client.append_data(b'abc', 0, 3) # flush is successful because it isn't touched response = file_client.flush_data( 3, etag=resp['etag'], match_condition=MatchConditions.IfNotModified) file_client.append_data(b'abc', 3, 3) with self.assertRaises(ResourceModifiedError): # flush is unsuccessful because extra data were appended. file_client.flush_data( 6, etag=resp['etag'], match_condition=MatchConditions.IfNotModified) def test_upload_data_to_none_existing_file(self): # parallel upload cannot be recorded if TestMode.need_recording_file(self.test_mode): return directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') data = self.get_random_bytes(200 * 1024) file_client.upload_data(data, overwrite=True, max_concurrency=3) downloaded_data = file_client.download_file().readall() self.assertEqual(data, downloaded_data) @record def test_upload_data_to_existing_file(self): directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() # create an existing file file_client = directory_client.get_file_client('filename') file_client.create_file() file_client.append_data(b"abc", 0) file_client.flush_data(3) # to override the existing file data = self.get_random_bytes(100) with self.assertRaises(HttpResponseError): file_client.upload_data(data, max_concurrency=5) file_client.upload_data(data, overwrite=True, max_concurrency=5) downloaded_data = file_client.download_file().readall() self.assertEqual(data, downloaded_data) @record def test_upload_data_to_existing_file_with_content_settings(self): directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() # create an existing file file_client = directory_client.get_file_client('filename') etag = file_client.create_file()['etag'] # to override the existing file data = self.get_random_bytes(100) content_settings = ContentSettings(content_language='spanish', content_disposition='inline') file_client.upload_data(data, max_concurrency=5, content_settings=content_settings, etag=etag, match_condition=MatchConditions.IfNotModified) downloaded_data = file_client.download_file().readall() properties = file_client.get_file_properties() self.assertEqual(data, downloaded_data) self.assertEqual(properties.content_settings.content_language, content_settings.content_language) @record def test_upload_data_to_existing_file_with_permission_and_umask(self): directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() # create an existing file file_client = directory_client.get_file_client('filename') etag = file_client.create_file()['etag'] # to override the existing file data = self.get_random_bytes(100) file_client.upload_data(data, overwrite=True, max_concurrency=5, permissions='0777', umask="0000", etag=etag, match_condition=MatchConditions.IfNotModified) downloaded_data = file_client.download_file().readall() prop = file_client.get_access_control() # Assert self.assertEqual(data, downloaded_data) self.assertEqual(prop['permissions'], 'rwxrwxrwx') @record def test_read_file(self): file_client = self._create_file_and_return_client() data = self.get_random_bytes(1024) # upload data to file file_client.append_data(data, 0, len(data)) file_client.flush_data(len(data)) # doanload the data and make sure it is the same as uploaded data downloaded_data = file_client.download_file().readall() self.assertEqual(data, downloaded_data) @record def test_read_file_with_user_delegation_key(self): # SAS URL is calculated from storage key, so this test runs live only if TestMode.need_recording_file(self.test_mode): return # Create file file_client = self._create_file_and_return_client() data = self.get_random_bytes(1024) # Upload data to file file_client.append_data(data, 0, len(data)) file_client.flush_data(len(data)) # Get user delegation key token_credential = self.generate_oauth_token() service_client = DataLakeServiceClient(self._get_oauth_account_url(), credential=token_credential) user_delegation_key = service_client.get_user_delegation_key( datetime.utcnow(), datetime.utcnow() + timedelta(hours=1)) sas_token = generate_file_sas( file_client.account_name, file_client.file_system_name, None, file_client.path_name, user_delegation_key, permission=FileSasPermissions(read=True, create=True, write=True, delete=True), expiry=datetime.utcnow() + timedelta(hours=1), ) # doanload the data and make sure it is the same as uploaded data new_file_client = DataLakeFileClient(self._get_account_url(), file_client.file_system_name, file_client.path_name, credential=sas_token) downloaded_data = new_file_client.download_file().readall() self.assertEqual(data, downloaded_data) @record def test_read_file_into_file(self): file_client = self._create_file_and_return_client() data = self.get_random_bytes(1024) # upload data to file file_client.append_data(data, 0, len(data)) file_client.flush_data(len(data)) # doanload the data into a file and make sure it is the same as uploaded data with open(FILE_PATH, 'wb') as stream: download = file_client.download_file(max_concurrency=2) download.readinto(stream) # Assert with open(FILE_PATH, 'rb') as stream: actual = stream.read() self.assertEqual(data, actual) @record def test_read_file_to_text(self): file_client = self._create_file_and_return_client() data = self.get_random_text_data(1024) # upload data to file file_client.append_data(data, 0, len(data)) file_client.flush_data(len(data)) # doanload the text data and make sure it is the same as uploaded data downloaded_data = file_client.download_file( max_concurrency=2, encoding="utf-8").readall() # Assert self.assertEqual(data, downloaded_data) @record def test_account_sas(self): # SAS URL is calculated from storage key, so this test runs live only if TestMode.need_recording_file(self.test_mode): return file_name = self._get_file_reference() # create a file under root directory self._create_file_and_return_client(file=file_name) # generate a token with file level read permission token = generate_account_sas( self.dsc.account_name, self.dsc.credential.account_key, ResourceTypes(file_system=True, object=True), AccountSasPermissions(read=True), datetime.utcnow() + timedelta(hours=1), ) # read the created file which is under root directory file_client = DataLakeFileClient(self.dsc.url, self.file_system_name, file_name, credential=token) properties = file_client.get_file_properties() # make sure we can read the file properties self.assertIsNotNone(properties) # try to write to the created file with the token with self.assertRaises(HttpResponseError): file_client.append_data(b"abcd", 0, 4) @record def test_file_sas_only_applies_to_file_level(self): # SAS URL is calculated from storage key, so this test runs live only if TestMode.need_recording_file(self.test_mode): return file_name = self._get_file_reference() directory_name = self._get_directory_reference() self._create_file_and_return_client(directory=directory_name, file=file_name) # generate a token with file level read and write permissions token = generate_file_sas( self.dsc.account_name, self.file_system_name, directory_name, file_name, self.dsc.credential.account_key, permission=FileSasPermissions(read=True, write=True), expiry=datetime.utcnow() + timedelta(hours=1), ) # read the created file which is under root directory file_client = DataLakeFileClient(self.dsc.url, self.file_system_name, directory_name + '/' + file_name, credential=token) properties = file_client.get_file_properties() # make sure we can read the file properties self.assertIsNotNone(properties) # try to write to the created file with the token response = file_client.append_data(b"abcd", 0, 4, validate_content=True) self.assertIsNotNone(response) # the token is for file level, so users are not supposed to have access to file system level operations file_system_client = FileSystemClient(self.dsc.url, self.file_system_name, credential=token) with self.assertRaises(ClientAuthenticationError): file_system_client.get_file_system_properties() # the token is for file level, so users are not supposed to have access to directory level operations directory_client = DataLakeDirectoryClient(self.dsc.url, self.file_system_name, directory_name, credential=token) with self.assertRaises(ClientAuthenticationError): directory_client.get_directory_properties() @record def test_delete_file(self): # Arrange file_client = self._create_file_and_return_client() file_client.delete_file() with self.assertRaises(ResourceNotFoundError): file_client.get_file_properties() @record def test_delete_file_with_if_unmodified_since(self): # Arrange file_client = self._create_file_and_return_client() prop = file_client.get_file_properties() file_client.delete_file(if_unmodified_since=prop['last_modified']) # Make sure the file was deleted with self.assertRaises(ResourceNotFoundError): file_client.get_file_properties() @record def test_set_access_control(self): file_client = self._create_file_and_return_client() response = file_client.set_access_control(permissions='0777') # Assert self.assertIsNotNone(response) @record def test_set_access_control_with_match_conditions(self): file_client = self._create_file_and_return_client() with self.assertRaises(ResourceModifiedError): file_client.set_access_control( permissions='0777', match_condition=MatchConditions.IfMissing) @record def test_get_access_control(self): file_client = self._create_file_and_return_client() file_client.set_access_control(permissions='0777') # Act response = file_client.get_access_control() # Assert self.assertIsNotNone(response) @record def test_get_access_control_with_if_modified_since(self): file_client = self._create_file_and_return_client() file_client.set_access_control(permissions='0777') prop = file_client.get_file_properties() # Act response = file_client.get_access_control( if_modified_since=prop['last_modified'] - timedelta(minutes=15)) # Assert self.assertIsNotNone(response) @record def test_get_properties(self): # Arrange directory_client = self._create_directory_and_return_client() metadata = {'hello': 'world', 'number': '42'} content_settings = ContentSettings(content_language='spanish', content_disposition='inline') file_client = directory_client.create_file( "newfile", metadata=metadata, content_settings=content_settings) file_client.append_data(b"abc", 0, 3) file_client.flush_data(3) properties = file_client.get_file_properties() # Assert self.assertTrue(properties) self.assertEqual(properties.size, 3) self.assertEqual(properties.metadata['hello'], metadata['hello']) self.assertEqual(properties.content_settings.content_language, content_settings.content_language) @record def test_rename_file_with_non_used_name(self): file_client = self._create_file_and_return_client() data_bytes = b"abc" file_client.append_data(data_bytes, 0, 3) file_client.flush_data(3) new_client = file_client.rename_file(file_client.file_system_name + '/' + 'newname') data = new_client.download_file().readall() self.assertEqual(data, data_bytes) self.assertEqual(new_client.path_name, "newname") @record def test_rename_file_to_existing_file(self): # create the existing file existing_file_client = self._create_file_and_return_client( file="existingfile") existing_file_client.append_data(b"a", 0, 1) existing_file_client.flush_data(1) old_url = existing_file_client.url # prepare to rename the file to the existing file file_client = self._create_file_and_return_client() data_bytes = b"abc" file_client.append_data(data_bytes, 0, 3) file_client.flush_data(3) new_client = file_client.rename_file(file_client.file_system_name + '/' + existing_file_client.path_name) new_url = file_client.url data = new_client.download_file().readall() # the existing file was overridden self.assertEqual(data, data_bytes) @record def test_rename_file_will_not_change_existing_directory(self): # create none empty directory(with 2 files) dir1 = self._create_directory_and_return_client(directory="dir1") f1 = dir1.create_file("file1") f1.append_data(b"file1", 0, 5) f1.flush_data(5) f2 = dir1.create_file("file2") f2.append_data(b"file2", 0, 5) f2.flush_data(5) # create another none empty directory(with 2 files) dir2 = self._create_directory_and_return_client(directory="dir2") f3 = dir2.create_file("file3") f3.append_data(b"file3", 0, 5) f3.flush_data(5) f4 = dir2.create_file("file4") f4.append_data(b"file4", 0, 5) f4.flush_data(5) new_client = f3.rename_file(f1.file_system_name + '/' + f1.path_name) self.assertEqual(new_client.download_file().readall(), b"file3") # make sure the data in file2 and file4 weren't touched f2_data = f2.download_file().readall() self.assertEqual(f2_data, b"file2") f4_data = f4.download_file().readall() self.assertEqual(f4_data, b"file4") with self.assertRaises(HttpResponseError): f3.download_file().readall()
if __name__ == '__main__': if len(sys.argv) != 4: print('Please use the following syntax to call the script:') print('\tadls-acl.py <STORAGE_ACCT_NAME> <FILE_SYSTEM_NAME> <PATH>') print('Example:') print( '\tadls-acl.py mystorageaccountname rawdata folder1/subfolder1/subfolder1-2' ) sys.exit() else: ACCOUNT_NAME, FILE_SYSTEM, TARGET_DIR = sys.argv[1:] # Clients credential = DefaultAzureCredential() service = DataLakeServiceClient( account_url=f'https://{ACCOUNT_NAME}.dfs.core.windows.net/', credential=credential) filesystem = service.get_file_system_client(file_system=FILE_SYSTEM) print('*' * 20) print(f'Storage Account Name: {ACCOUNT_NAME}') print(f'File System Name: {FILE_SYSTEM}') print('*' * 20) print( f'Running: Setting ACLs for all child paths (subdirectories and files) in {TARGET_DIR} to match parent.' ) total_start = time.time() # Start Timing asyncio.run(main(TARGET_DIR, filesystem)) total_end = time.time() # End Timing print("Complete: Recursive ACL configuration took {} seconds.".format( str(round(total_end - total_start, 2))))
class FileTest(StorageTestCase): def _setUp(self, account_name, account_key): url = self._get_account_url(account_name) self.dsc = DataLakeServiceClient(url, credential=account_key, logging_enable=True) self.config = self.dsc._config self.file_system_name = self.get_resource_name('filesystem') if not self.is_playback(): file_system = self.dsc.get_file_system_client( self.file_system_name) try: file_system.create_file_system(timeout=5) except ResourceExistsError: pass def tearDown(self): if not self.is_playback(): try: self.dsc.delete_file_system(self.file_system_name) except: pass return super(FileTest, self).tearDown() # --Helpers----------------------------------------------------------------- def _get_directory_reference(self, prefix=TEST_DIRECTORY_PREFIX): directory_name = self.get_resource_name(prefix) return directory_name def _get_file_reference(self, prefix=TEST_FILE_PREFIX): file_name = self.get_resource_name(prefix) return file_name def _create_file_system(self): return self.dsc.create_file_system(self._get_file_system_reference()) def _create_directory_and_return_client(self, directory=None): directory_name = directory if directory else self._get_directory_reference( ) directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() return directory_client def _create_file_and_return_client(self, directory="", file=None): if directory: self._create_directory_and_return_client(directory) if not file: file = self._get_file_reference() file_client = self.dsc.get_file_client(self.file_system_name, directory + '/' + file) file_client.create_file() return file_client # --Helpers----------------------------------------------------------------- @DataLakePreparer() def test_create_file(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') response = file_client.create_file() # Assert self.assertIsNotNone(response) @DataLakePreparer() def test_file_exists(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange directory_name = self._get_directory_reference() directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client1 = directory_client.get_file_client('filename') file_client2 = directory_client.get_file_client('nonexistentfile') file_client1.create_file() self.assertTrue(file_client1.exists()) self.assertFalse(file_client2.exists()) @DataLakePreparer() def test_create_file_using_oauth_token_credential( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange file_name = self._get_file_reference() token_credential = self.generate_oauth_token() # Create a directory to put the file under that file_client = DataLakeFileClient(self.dsc.url, self.file_system_name, file_name, credential=token_credential) response = file_client.create_file() # Assert self.assertIsNotNone(response) @DataLakePreparer() def test_create_file_with_existing_name(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange file_client = self._create_file_and_return_client() with self.assertRaises(ResourceExistsError): # if the file exists then throw error # if_none_match='*' is to make sure no existing file file_client.create_file(match_condition=MatchConditions.IfMissing) @DataLakePreparer() def test_create_file_with_lease_id(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange directory_name = self._get_directory_reference() directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') # Act file_client.create_file() lease = file_client.acquire_lease() create_resp = file_client.create_file(lease=lease) # Assert file_properties = file_client.get_file_properties() self.assertIsNotNone(file_properties) self.assertEqual(file_properties.etag, create_resp.get('etag')) self.assertEqual(file_properties.last_modified, create_resp.get('last_modified')) @DataLakePreparer() def test_create_file_under_root_directory(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange # get a file client to interact with the file under root directory file_client = self.dsc.get_file_client(self.file_system_name, "filename") response = file_client.create_file() # Assert self.assertIsNotNone(response) @DataLakePreparer() def test_append_data(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') file_client.create_file() # Act response = file_client.append_data(b'abc', 0, 3) self.assertIsNotNone(response) @DataLakePreparer() def test_append_empty_data(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) file_client = self._create_file_and_return_client() # Act file_client.flush_data(0) file_props = file_client.get_file_properties() self.assertIsNotNone(file_props['size'], 0) @DataLakePreparer() def test_flush_data(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') file_client.create_file() # Act file_client.append_data(b'abc', 0, 3) response = file_client.flush_data(3) # Assert prop = file_client.get_file_properties() self.assertIsNotNone(response) self.assertEqual(prop['size'], 3) @DataLakePreparer() def test_flush_data_with_match_condition(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') resp = file_client.create_file() # Act file_client.append_data(b'abc', 0, 3) # flush is successful because it isn't touched response = file_client.flush_data( 3, etag=resp['etag'], match_condition=MatchConditions.IfNotModified) file_client.append_data(b'abc', 3, 3) with self.assertRaises(ResourceModifiedError): # flush is unsuccessful because extra data were appended. file_client.flush_data( 6, etag=resp['etag'], match_condition=MatchConditions.IfNotModified) @pytest.mark.live_test_only @DataLakePreparer() def test_upload_data_to_none_existing_file(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # parallel upload cannot be recorded directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') data = self.get_random_bytes(200 * 1024) file_client.upload_data(data, overwrite=True, max_concurrency=3) downloaded_data = file_client.download_file().readall() self.assertEqual(data, downloaded_data) @pytest.mark.live_test_only @DataLakePreparer() def test_upload_data_in_substreams(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # parallel upload cannot be recorded directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') # Get 16MB data data = self.get_random_bytes(16 * 1024 * 1024) # Ensure chunk size is greater than threshold (8MB > 4MB) - for optimized upload file_client.upload_data(data, chunk_size=8 * 1024 * 1024, overwrite=True, max_concurrency=3) downloaded_data = file_client.download_file().readall() self.assertEqual(data, downloaded_data) # Run on single thread file_client.upload_data(data, chunk_size=8 * 1024 * 1024, overwrite=True) downloaded_data = file_client.download_file().readall() self.assertEqual(data, downloaded_data) @DataLakePreparer() def test_upload_data_to_existing_file(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() # create an existing file file_client = directory_client.get_file_client('filename') file_client.create_file() file_client.append_data(b"abc", 0) file_client.flush_data(3) # to override the existing file data = self.get_random_bytes(100) with self.assertRaises(HttpResponseError): file_client.upload_data(data, max_concurrency=5) file_client.upload_data(data, overwrite=True, max_concurrency=5) downloaded_data = file_client.download_file().readall() self.assertEqual(data, downloaded_data) @DataLakePreparer() def test_upload_data_to_existing_file_with_content_settings( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() # create an existing file file_client = directory_client.get_file_client('filename') etag = file_client.create_file()['etag'] # to override the existing file data = self.get_random_bytes(100) content_settings = ContentSettings(content_language='spanish', content_disposition='inline') file_client.upload_data(data, max_concurrency=5, content_settings=content_settings, etag=etag, match_condition=MatchConditions.IfNotModified) downloaded_data = file_client.download_file().readall() properties = file_client.get_file_properties() self.assertEqual(data, downloaded_data) self.assertEqual(properties.content_settings.content_language, content_settings.content_language) @DataLakePreparer() def test_upload_data_to_existing_file_with_permission_and_umask( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() # create an existing file file_client = directory_client.get_file_client('filename') etag = file_client.create_file()['etag'] # to override the existing file data = self.get_random_bytes(100) file_client.upload_data(data, overwrite=True, max_concurrency=5, permissions='0777', umask="0000", etag=etag, match_condition=MatchConditions.IfNotModified) downloaded_data = file_client.download_file().readall() prop = file_client.get_access_control() # Assert self.assertEqual(data, downloaded_data) self.assertEqual(prop['permissions'], 'rwxrwxrwx') @DataLakePreparer() def test_read_file(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) file_client = self._create_file_and_return_client() data = self.get_random_bytes(1024) # upload data to file file_client.append_data(data, 0, len(data)) file_client.flush_data(len(data)) # doanload the data and make sure it is the same as uploaded data downloaded_data = file_client.download_file().readall() self.assertEqual(data, downloaded_data) @pytest.mark.live_test_only @DataLakePreparer() def test_read_file_with_user_delegation_key(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # SAS URL is calculated from storage key, so this test runs live only # Create file file_client = self._create_file_and_return_client() data = self.get_random_bytes(1024) # Upload data to file file_client.append_data(data, 0, len(data)) file_client.flush_data(len(data)) # Get user delegation key token_credential = self.generate_oauth_token() service_client = DataLakeServiceClient( self._get_account_url(datalake_storage_account_name), credential=token_credential, logging_enable=True) user_delegation_key = service_client.get_user_delegation_key( datetime.utcnow(), datetime.utcnow() + timedelta(hours=1)) sas_token = generate_file_sas( file_client.account_name, file_client.file_system_name, None, file_client.path_name, user_delegation_key, permission=FileSasPermissions(read=True, create=True, write=True, delete=True), expiry=datetime.utcnow() + timedelta(hours=1), ) # doanload the data and make sure it is the same as uploaded data new_file_client = DataLakeFileClient( self._get_account_url(datalake_storage_account_name), file_client.file_system_name, file_client.path_name, credential=sas_token, logging_enable=True) downloaded_data = new_file_client.download_file().readall() self.assertEqual(data, downloaded_data) @pytest.mark.live_test_only @DataLakePreparer() def test_set_acl_with_user_delegation_key(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # SAS URL is calculated from storage key, so this test runs live only # Create file file_client = self._create_file_and_return_client() data = self.get_random_bytes(1024) # Upload data to file file_client.append_data(data, 0, len(data)) file_client.flush_data(len(data)) # Get user delegation key token_credential = self.generate_oauth_token() service_client = DataLakeServiceClient( self._get_account_url(datalake_storage_account_name), credential=token_credential) user_delegation_key = service_client.get_user_delegation_key( datetime.utcnow(), datetime.utcnow() + timedelta(hours=1)) sas_token = generate_file_sas( file_client.account_name, file_client.file_system_name, None, file_client.path_name, user_delegation_key, permission=FileSasPermissions(execute=True, manage_access_control=True, manage_ownership=True), expiry=datetime.utcnow() + timedelta(hours=1), ) # doanload the data and make sure it is the same as uploaded data new_file_client = DataLakeFileClient( self._get_account_url(datalake_storage_account_name), file_client.file_system_name, file_client.path_name, credential=sas_token) acl = 'user::rwx,group::r-x,other::rwx' owner = "dc140949-53b7-44af-b1e9-cd994951fb86" new_file_client.set_access_control(acl=acl, owner=owner) access_control = new_file_client.get_access_control() self.assertEqual(acl, access_control['acl']) self.assertEqual(owner, access_control['owner']) @pytest.mark.live_test_only @DataLakePreparer() def test_preauthorize_user_with_user_delegation_key( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # SAS URL is calculated from storage key, so this test runs live only # Create file file_client = self._create_file_and_return_client() data = self.get_random_bytes(1024) # Upload data to file file_client.append_data(data, 0, len(data)) file_client.flush_data(len(data)) file_client.set_access_control( owner="68390a19-a643-458b-b726-408abf67b4fc", permissions='0777') acl = file_client.get_access_control() # Get user delegation key token_credential = self.generate_oauth_token() service_client = DataLakeServiceClient( self._get_account_url(datalake_storage_account_name), credential=token_credential) user_delegation_key = service_client.get_user_delegation_key( datetime.utcnow(), datetime.utcnow() + timedelta(hours=1)) sas_token = generate_file_sas( file_client.account_name, file_client.file_system_name, None, file_client.path_name, user_delegation_key, permission=FileSasPermissions(read=True, write=True, manage_access_control=True, manage_ownership=True), expiry=datetime.utcnow() + timedelta(hours=1), preauthorized_agent_object_id="68390a19-a643-458b-b726-408abf67b4fc" ) # doanload the data and make sure it is the same as uploaded data new_file_client = DataLakeFileClient( self._get_account_url(datalake_storage_account_name), file_client.file_system_name, file_client.path_name, credential=sas_token) acl = new_file_client.set_access_control(permissions='0777') self.assertIsNotNone(acl) @DataLakePreparer() def test_read_file_into_file(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) file_client = self._create_file_and_return_client() data = self.get_random_bytes(1024) # upload data to file file_client.append_data(data, 0, len(data)) file_client.flush_data(len(data)) # doanload the data into a file and make sure it is the same as uploaded data with open(FILE_PATH, 'wb') as stream: download = file_client.download_file(max_concurrency=2) download.readinto(stream) # Assert with open(FILE_PATH, 'rb') as stream: actual = stream.read() self.assertEqual(data, actual) @DataLakePreparer() def test_read_file_to_text(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) file_client = self._create_file_and_return_client() data = self.get_random_text_data(1024) # upload data to file file_client.append_data(data, 0, len(data)) file_client.flush_data(len(data)) # doanload the text data and make sure it is the same as uploaded data downloaded_data = file_client.download_file( max_concurrency=2, encoding="utf-8").readall() # Assert self.assertEqual(data, downloaded_data) @pytest.mark.live_test_only @DataLakePreparer() def test_account_sas(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # SAS URL is calculated from storage key, so this test runs live only file_name = self._get_file_reference() # create a file under root directory self._create_file_and_return_client(file=file_name) # generate a token with file level read permission token = generate_account_sas( self.dsc.account_name, self.dsc.credential.account_key, ResourceTypes(file_system=True, object=True), AccountSasPermissions(read=True), datetime.utcnow() + timedelta(hours=1), ) for credential in [token, AzureSasCredential(token)]: # read the created file which is under root directory file_client = DataLakeFileClient(self.dsc.url, self.file_system_name, file_name, credential=credential) properties = file_client.get_file_properties() # make sure we can read the file properties self.assertIsNotNone(properties) # try to write to the created file with the token with self.assertRaises(HttpResponseError): file_client.append_data(b"abcd", 0, 4) @DataLakePreparer() def test_account_sas_raises_if_sas_already_in_uri( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) with self.assertRaises(ValueError): DataLakeFileClient(self.dsc.url + "?sig=foo", self.file_system_name, "foo", credential=AzureSasCredential("?foo=bar")) @pytest.mark.live_test_only @DataLakePreparer() def test_file_sas_only_applies_to_file_level(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # SAS URL is calculated from storage key, so this test runs live only file_name = self._get_file_reference() directory_name = self._get_directory_reference() self._create_file_and_return_client(directory=directory_name, file=file_name) # generate a token with file level read and write permissions token = generate_file_sas( self.dsc.account_name, self.file_system_name, directory_name, file_name, self.dsc.credential.account_key, permission=FileSasPermissions(read=True, write=True), expiry=datetime.utcnow() + timedelta(hours=1), ) # read the created file which is under root directory file_client = DataLakeFileClient(self.dsc.url, self.file_system_name, directory_name + '/' + file_name, credential=token) properties = file_client.get_file_properties() # make sure we can read the file properties self.assertIsNotNone(properties) # try to write to the created file with the token response = file_client.append_data(b"abcd", 0, 4, validate_content=True) self.assertIsNotNone(response) # the token is for file level, so users are not supposed to have access to file system level operations file_system_client = FileSystemClient(self.dsc.url, self.file_system_name, credential=token) with self.assertRaises(ClientAuthenticationError): file_system_client.get_file_system_properties() # the token is for file level, so users are not supposed to have access to directory level operations directory_client = DataLakeDirectoryClient(self.dsc.url, self.file_system_name, directory_name, credential=token) with self.assertRaises(ClientAuthenticationError): directory_client.get_directory_properties() @DataLakePreparer() def test_delete_file(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange file_client = self._create_file_and_return_client() file_client.delete_file() with self.assertRaises(ResourceNotFoundError): file_client.get_file_properties() @DataLakePreparer() def test_delete_file_with_if_unmodified_since( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange file_client = self._create_file_and_return_client() prop = file_client.get_file_properties() file_client.delete_file(if_unmodified_since=prop['last_modified']) # Make sure the file was deleted with self.assertRaises(ResourceNotFoundError): file_client.get_file_properties() @DataLakePreparer() def test_set_access_control(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) file_client = self._create_file_and_return_client() response = file_client.set_access_control(permissions='0777') # Assert self.assertIsNotNone(response) @DataLakePreparer() def test_set_access_control_with_match_conditions( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) file_client = self._create_file_and_return_client() with self.assertRaises(ResourceModifiedError): file_client.set_access_control( permissions='0777', match_condition=MatchConditions.IfMissing) @DataLakePreparer() def test_get_access_control(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) file_client = self._create_file_and_return_client() file_client.set_access_control(permissions='0777') # Act response = file_client.get_access_control() # Assert self.assertIsNotNone(response) @DataLakePreparer() def test_get_access_control_with_if_modified_since( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) file_client = self._create_file_and_return_client() file_client.set_access_control(permissions='0777') prop = file_client.get_file_properties() # Act response = file_client.get_access_control( if_modified_since=prop['last_modified'] - timedelta(minutes=15)) # Assert self.assertIsNotNone(response) @DataLakePreparer() def test_set_access_control_recursive(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) acl = 'user::rwx,group::r-x,other::rwx' file_client = self._create_file_and_return_client() summary = file_client.set_access_control_recursive(acl=acl) # Assert self.assertEqual(summary.counters.directories_successful, 0) self.assertEqual(summary.counters.files_successful, 1) self.assertEqual(summary.counters.failure_count, 0) access_control = file_client.get_access_control() self.assertIsNotNone(access_control) self.assertEqual(acl, access_control['acl']) @DataLakePreparer() def test_update_access_control_recursive(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) acl = 'user::rwx,group::r-x,other::rwx' file_client = self._create_file_and_return_client() summary = file_client.update_access_control_recursive(acl=acl) # Assert self.assertEqual(summary.counters.directories_successful, 0) self.assertEqual(summary.counters.files_successful, 1) self.assertEqual(summary.counters.failure_count, 0) access_control = file_client.get_access_control() self.assertIsNotNone(access_control) self.assertEqual(acl, access_control['acl']) @DataLakePreparer() def test_remove_access_control_recursive(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) acl = "mask," + "default:user,default:group," + \ "user:ec3595d6-2c17-4696-8caa-7e139758d24a,group:ec3595d6-2c17-4696-8caa-7e139758d24a," + \ "default:user:ec3595d6-2c17-4696-8caa-7e139758d24a,default:group:ec3595d6-2c17-4696-8caa-7e139758d24a" file_client = self._create_file_and_return_client() summary = file_client.remove_access_control_recursive(acl=acl) # Assert self.assertEqual(summary.counters.directories_successful, 0) self.assertEqual(summary.counters.files_successful, 1) self.assertEqual(summary.counters.failure_count, 0) @DataLakePreparer() def test_get_properties(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange directory_client = self._create_directory_and_return_client() metadata = {'hello': 'world', 'number': '42'} content_settings = ContentSettings(content_language='spanish', content_disposition='inline') file_client = directory_client.create_file( "newfile", metadata=metadata, content_settings=content_settings) file_client.append_data(b"abc", 0, 3) file_client.flush_data(3) properties = file_client.get_file_properties() # Assert self.assertTrue(properties) self.assertEqual(properties.size, 3) self.assertEqual(properties.metadata['hello'], metadata['hello']) self.assertEqual(properties.content_settings.content_language, content_settings.content_language) @DataLakePreparer() def test_set_expiry(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange directory_client = self._create_directory_and_return_client() metadata = {'hello': 'world', 'number': '42'} content_settings = ContentSettings(content_language='spanish', content_disposition='inline') expires_on = datetime.utcnow() + timedelta(hours=1) file_client = directory_client.create_file( "newfile", metadata=metadata, content_settings=content_settings) file_client.set_file_expiry("Absolute", expires_on=expires_on) properties = file_client.get_file_properties() # Assert self.assertTrue(properties) self.assertIsNotNone(properties.expiry_time) @DataLakePreparer() def test_rename_file_with_non_used_name(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) file_client = self._create_file_and_return_client() data_bytes = b"abc" file_client.append_data(data_bytes, 0, 3) file_client.flush_data(3) new_client = file_client.rename_file(file_client.file_system_name + '/' + 'newname') data = new_client.download_file().readall() self.assertEqual(data, data_bytes) self.assertEqual(new_client.path_name, "newname") @pytest.mark.live_test_only @DataLakePreparer() def test_rename_file_with_file_system_sas(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # sas token is calculated from storage key, so live only token = generate_file_system_sas( self.dsc.account_name, self.file_system_name, self.dsc.credential.account_key, FileSystemSasPermissions(write=True, read=True, delete=True), datetime.utcnow() + timedelta(hours=1), ) # read the created file which is under root directory file_client = DataLakeFileClient(self.dsc.url, self.file_system_name, "oldfile", credential=token) file_client.create_file() data_bytes = b"abc" file_client.append_data(data_bytes, 0, 3) file_client.flush_data(3) new_client = file_client.rename_file(file_client.file_system_name + '/' + 'newname') data = new_client.download_file().readall() self.assertEqual(data, data_bytes) self.assertEqual(new_client.path_name, "newname") @pytest.mark.live_test_only @DataLakePreparer() def test_rename_file_with_file_sas(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # SAS URL is calculated from storage key, so this test runs live only token = generate_file_sas( self.dsc.account_name, self.file_system_name, None, "oldfile", datalake_storage_account_key, permission=FileSasPermissions(read=True, create=True, write=True, delete=True), expiry=datetime.utcnow() + timedelta(hours=1), ) new_token = generate_file_sas( self.dsc.account_name, self.file_system_name, None, "newname", datalake_storage_account_key, permission=FileSasPermissions(read=True, create=True, write=True, delete=True), expiry=datetime.utcnow() + timedelta(hours=1), ) # read the created file which is under root directory file_client = DataLakeFileClient(self.dsc.url, self.file_system_name, "oldfile", credential=token) file_client.create_file() data_bytes = b"abc" file_client.append_data(data_bytes, 0, 3) file_client.flush_data(3) new_client = file_client.rename_file(file_client.file_system_name + '/' + 'newname' + '?' + new_token) data = new_client.download_file().readall() self.assertEqual(data, data_bytes) self.assertEqual(new_client.path_name, "newname") @DataLakePreparer() def test_rename_file_with_account_sas(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) pytest.skip("service bug") token = generate_account_sas( self.dsc.account_name, self.dsc.credential.account_key, ResourceTypes(object=True), AccountSasPermissions(write=True, read=True, create=True, delete=True), datetime.utcnow() + timedelta(hours=5), ) # read the created file which is under root directory file_client = DataLakeFileClient(self.dsc.url, self.file_system_name, "oldfile", credential=token) file_client.create_file() data_bytes = b"abc" file_client.append_data(data_bytes, 0, 3) file_client.flush_data(3) new_client = file_client.rename_file(file_client.file_system_name + '/' + 'newname') data = new_client.download_file().readall() self.assertEqual(data, data_bytes) self.assertEqual(new_client.path_name, "newname") @DataLakePreparer() def test_rename_file_to_existing_file(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # create the existing file existing_file_client = self._create_file_and_return_client( file="existingfile") existing_file_client.append_data(b"a", 0, 1) existing_file_client.flush_data(1) old_url = existing_file_client.url # prepare to rename the file to the existing file file_client = self._create_file_and_return_client() data_bytes = b"abc" file_client.append_data(data_bytes, 0, 3) file_client.flush_data(3) new_client = file_client.rename_file(file_client.file_system_name + '/' + existing_file_client.path_name) new_url = file_client.url data = new_client.download_file().readall() # the existing file was overridden self.assertEqual(data, data_bytes) @DataLakePreparer() def test_rename_file_will_not_change_existing_directory( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # create none empty directory(with 2 files) dir1 = self._create_directory_and_return_client(directory="dir1") f1 = dir1.create_file("file1") f1.append_data(b"file1", 0, 5) f1.flush_data(5) f2 = dir1.create_file("file2") f2.append_data(b"file2", 0, 5) f2.flush_data(5) # create another none empty directory(with 2 files) dir2 = self._create_directory_and_return_client(directory="dir2") f3 = dir2.create_file("file3") f3.append_data(b"file3", 0, 5) f3.flush_data(5) f4 = dir2.create_file("file4") f4.append_data(b"file4", 0, 5) f4.flush_data(5) new_client = f3.rename_file(f1.file_system_name + '/' + f1.path_name) self.assertEqual(new_client.download_file().readall(), b"file3") # make sure the data in file2 and file4 weren't touched f2_data = f2.download_file().readall() self.assertEqual(f2_data, b"file2") f4_data = f4.download_file().readall() self.assertEqual(f4_data, b"file4") with self.assertRaises(HttpResponseError): f3.download_file().readall()
class FileSystemTest(StorageTestCase): def _setUp(self, account_name, account_key): url = self._get_account_url(account_name) self.dsc = DataLakeServiceClient(url, account_key) self.config = self.dsc._config self.test_file_systems = [] def tearDown(self): if not self.is_playback(): try: for file_system in self.test_file_systems: self.dsc.delete_file_system(file_system) except: pass return super(FileSystemTest, self).tearDown() # --Helpers----------------------------------------------------------------- def _get_file_system_reference(self, prefix=TEST_FILE_SYSTEM_PREFIX): file_system_name = self.get_resource_name(prefix) self.test_file_systems.append(file_system_name) return file_system_name def _create_file_system(self, file_system_prefix=TEST_FILE_SYSTEM_PREFIX): return self.dsc.create_file_system(self._get_file_system_reference(prefix=file_system_prefix)) # --Helpers----------------------------------------------------------------- @DataLakePreparer() def test_create_file_system(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange file_system_name = self._get_file_system_reference() # Act file_system_client = self.dsc.get_file_system_client(file_system_name) created = file_system_client.create_file_system() # Assert self.assertTrue(created) @DataLakePreparer() def test_file_system_exists(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange file_system_name = self._get_file_system_reference() # Act file_system_client1 = self.dsc.get_file_system_client(file_system_name) file_system_client2 = self.dsc.get_file_system_client("nonexistentfs") file_system_client1.create_file_system() self.assertTrue(file_system_client1.exists()) self.assertFalse(file_system_client2.exists()) @DataLakePreparer() def test_create_file_system_with_metadata(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange metadata = {'hello': 'world', 'number': '42'} file_system_name = self._get_file_system_reference() # Act file_system_client = self.dsc.get_file_system_client(file_system_name) created = file_system_client.create_file_system(metadata=metadata) # Assert meta = file_system_client.get_file_system_properties().metadata self.assertTrue(created) self.assertDictEqual(meta, metadata) @DataLakePreparer() def test_set_file_system_acl(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Act file_system = self._create_file_system() access_policy = AccessPolicy(permission=FileSystemSasPermissions(read=True), expiry=datetime.utcnow() + timedelta(hours=1), start=datetime.utcnow()) signed_identifier1 = {'testid': access_policy} response = file_system.set_file_system_access_policy(signed_identifier1, public_access=PublicAccess.FileSystem) self.assertIsNotNone(response.get('etag')) self.assertIsNotNone(response.get('last_modified')) acl1 = file_system.get_file_system_access_policy() self.assertIsNotNone(acl1['public_access']) self.assertEqual(len(acl1['signed_identifiers']), 1) # If set signed identifier without specifying the access policy then it will be default to None signed_identifier2 = {'testid': access_policy, 'test2': access_policy} file_system.set_file_system_access_policy(signed_identifier2) acl2 = file_system.get_file_system_access_policy() self.assertIsNone(acl2['public_access']) self.assertEqual(len(acl2['signed_identifiers']), 2) @DataLakePreparer() def test_list_file_systemss(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange file_system_name = self._get_file_system_reference() file_system = self.dsc.create_file_system(file_system_name) # Act file_systems = list(self.dsc.list_file_systems()) # Assert self.assertIsNotNone(file_systems) self.assertGreaterEqual(len(file_systems), 1) self.assertIsNotNone(file_systems[0]) self.assertNamedItemInContainer(file_systems, file_system.file_system_name) self.assertIsNotNone(file_systems[0].has_immutability_policy) self.assertIsNotNone(file_systems[0].has_legal_hold) @DataLakePreparer() def test_rename_file_system(self, datalake_storage_account_name, datalake_storage_account_key): if not self.is_playback(): return self._setUp(datalake_storage_account_name, datalake_storage_account_key) old_name1 = self._get_file_system_reference(prefix="oldcontainer1") old_name2 = self._get_file_system_reference(prefix="oldcontainer2") new_name = self._get_file_system_reference(prefix="newcontainer") filesystem1 = self.dsc.create_file_system(old_name1) self.dsc.create_file_system(old_name2) new_filesystem = self.dsc._rename_file_system(name=old_name1, new_name=new_name) with self.assertRaises(HttpResponseError): self.dsc._rename_file_system(name=old_name2, new_name=new_name) with self.assertRaises(HttpResponseError): filesystem1.get_file_system_properties() with self.assertRaises(HttpResponseError): self.dsc._rename_file_system(name="badfilesystem", new_name="filesystem") self.assertEqual(new_name, new_filesystem.get_file_system_properties().name) @DataLakePreparer() def test_rename_file_system_with_file_system_client(self, datalake_storage_account_name, datalake_storage_account_key): pytest.skip("Feature not yet enabled. Make sure to record this test once enabled.") self._setUp(datalake_storage_account_name, datalake_storage_account_key) old_name1 = self._get_file_system_reference(prefix="oldcontainer1") old_name2 = self._get_file_system_reference(prefix="oldcontainer2") new_name = self._get_file_system_reference(prefix="newcontainer") bad_name = self._get_file_system_reference(prefix="badcontainer") filesystem1 = self.dsc.create_file_system(old_name1) file_system2 = self.dsc.create_file_system(old_name2) bad_file_system = self.dsc.get_file_system_client(bad_name) new_filesystem = filesystem1._rename_file_system(new_name=new_name) with self.assertRaises(HttpResponseError): file_system2._rename_file_system(new_name=new_name) with self.assertRaises(HttpResponseError): filesystem1.get_file_system_properties() with self.assertRaises(HttpResponseError): bad_file_system._rename_file_system(new_name="filesystem") self.assertEqual(new_name, new_filesystem.get_file_system_properties().name) @DataLakePreparer() def test_rename_file_system_with_source_lease(self, datalake_storage_account_name, datalake_storage_account_key): if not self.is_playback(): return self._setUp(datalake_storage_account_name, datalake_storage_account_key) old_name = self._get_file_system_reference(prefix="old") new_name = self._get_file_system_reference(prefix="new") filesystem = self.dsc.create_file_system(old_name) filesystem_lease_id = filesystem.acquire_lease() with self.assertRaises(HttpResponseError): self.dsc._rename_file_system(name=old_name, new_name=new_name) with self.assertRaises(HttpResponseError): self.dsc._rename_file_system(name=old_name, new_name=new_name, lease="bad_id") new_filesystem = self.dsc._rename_file_system(name=old_name, new_name=new_name, lease=filesystem_lease_id) self.assertEqual(new_name, new_filesystem.get_file_system_properties().name) @DataLakePreparer() def test_undelete_file_system(self, datalake_storage_account_name, datalake_storage_account_key): # Needs soft delete enabled account. if not self.is_playback(): return self._setUp(datalake_storage_account_name, datalake_storage_account_key) name = self._get_file_system_reference() filesystem_client = self.dsc.create_file_system(name) # Act filesystem_client.delete_file_system() # to make sure the filesystem deleted with self.assertRaises(ResourceNotFoundError): filesystem_client.get_file_system_properties() filesystem_list = list(self.dsc.list_file_systems(include_deleted=True)) self.assertTrue(len(filesystem_list) >= 1) restored_version = 0 for filesystem in filesystem_list: # find the deleted filesystem and restore it if filesystem.deleted and filesystem.name == filesystem_client.file_system_name: restored_fs_client = self.dsc.undelete_file_system(filesystem.name, filesystem.deleted_version, new_name="restored" + name + str(restored_version)) restored_version += 1 # to make sure the deleted filesystem is restored props = restored_fs_client.get_file_system_properties() self.assertIsNotNone(props) @DataLakePreparer() def test_restore_to_existing_file_system(self, datalake_storage_account_name, datalake_storage_account_key): # Needs soft delete enabled account. if not self.is_playback(): return self._setUp(datalake_storage_account_name, datalake_storage_account_key) # get an existing filesystem existing_name = self._get_file_system_reference(prefix="existing2") name = self._get_file_system_reference(prefix="filesystem2") existing_filesystem_client = self.dsc.create_file_system(existing_name) filesystem_client = self.dsc.create_file_system(name) # Act filesystem_client.delete_file_system() # to make sure the filesystem deleted with self.assertRaises(ResourceNotFoundError): filesystem_client.get_file_system_properties() filesystem_list = list(self.dsc.list_file_systems(include_deleted=True)) self.assertTrue(len(filesystem_list) >= 1) for filesystem in filesystem_list: # find the deleted filesystem and restore it if filesystem.deleted and filesystem.name == filesystem_client.file_system_name: with self.assertRaises(HttpResponseError): self.dsc.undelete_file_system(filesystem.name, filesystem.deleted_version, new_name=existing_filesystem_client.file_system_name) @DataLakePreparer() def test_restore_file_system_with_sas(self, datalake_storage_account_name, datalake_storage_account_key): pytest.skip( "We are generating a SAS token therefore play only live but we also need a soft delete enabled account.") self._setUp(datalake_storage_account_name, datalake_storage_account_key) token = generate_account_sas( self.dsc.account_name, self.dsc.credential.account_key, ResourceTypes(service=True, file_system=True), AccountSasPermissions(read=True, write=True, list=True, delete=True), datetime.utcnow() + timedelta(hours=1), ) dsc = DataLakeServiceClient(self.dsc.url, token) name = self._get_file_system_reference(prefix="filesystem") filesystem_client = dsc.create_file_system(name) filesystem_client.delete_file_system() # to make sure the filesystem is deleted with self.assertRaises(ResourceNotFoundError): filesystem_client.get_file_system_properties() filesystem_list = list(dsc.list_file_systems(include_deleted=True)) self.assertTrue(len(filesystem_list) >= 1) restored_version = 0 for filesystem in filesystem_list: # find the deleted filesystem and restore it if filesystem.deleted and filesystem.name == filesystem_client.file_system_name: restored_fs_client = dsc.undelete_file_system(filesystem.name, filesystem.deleted_version, new_name="restored" + name + str(restored_version)) restored_version += 1 # to make sure the deleted filesystem is restored props = restored_fs_client.get_file_system_properties() self.assertIsNotNone(props) @DataLakePreparer() def test_delete_file_system_with_existing_file_system(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange file_system = self._create_file_system() # Act deleted = file_system.delete_file_system() # Assert self.assertIsNone(deleted) @DataLakePreparer() def test_delete_none_existing_file_system(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) fake_file_system_client = self.dsc.get_file_system_client("fakeclient") # Act with self.assertRaises(ResourceNotFoundError): fake_file_system_client.delete_file_system(match_condition=MatchConditions.IfMissing) @DataLakePreparer() def test_list_file_systems_with_include_metadata(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange file_system = self._create_file_system() metadata = {'hello': 'world', 'number': '42'} resp = file_system.set_file_system_metadata(metadata) # Act file_systems = list(self.dsc.list_file_systems( name_starts_with=file_system.file_system_name, include_metadata=True)) # Assert self.assertIsNotNone(file_systems) self.assertGreaterEqual(len(file_systems), 1) self.assertIsNotNone(file_systems[0]) self.assertNamedItemInContainer(file_systems, file_system.file_system_name) self.assertDictEqual(file_systems[0].metadata, metadata) @DataLakePreparer() def test_list_file_systems_by_page(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange for i in range(0, 6): self._create_file_system(file_system_prefix="filesystem{}".format(i)) # Act file_systems = list(next(self.dsc.list_file_systems( results_per_page=3, name_starts_with="file", include_metadata=True).by_page())) # Assert self.assertIsNotNone(file_systems) self.assertGreaterEqual(len(file_systems), 3) @DataLakePreparer() def test_list_file_systems_with_public_access(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange file_system_name = self._get_file_system_reference() file_system = self.dsc.get_file_system_client(file_system_name) file_system.create_file_system(public_access="blob") metadata = {'hello': 'world', 'number': '42'} resp = file_system.set_file_system_metadata(metadata) # Act file_systems = list(self.dsc.list_file_systems( name_starts_with=file_system.file_system_name, include_metadata=True)) # Assert self.assertIsNotNone(file_systems) self.assertGreaterEqual(len(file_systems), 1) self.assertIsNotNone(file_systems[0]) self.assertNamedItemInContainer(file_systems, file_system.file_system_name) self.assertDictEqual(file_systems[0].metadata, metadata) self.assertTrue(file_systems[0].public_access is PublicAccess.File) @DataLakePreparer() def test_get_file_system_properties(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange metadata = {'hello': 'world', 'number': '42'} file_system = self._create_file_system() file_system.set_file_system_metadata(metadata) # Act props = file_system.get_file_system_properties() # Assert self.assertIsNotNone(props) self.assertDictEqual(props.metadata, metadata) self.assertIsNotNone(props.has_immutability_policy) self.assertIsNotNone(props.has_legal_hold) @DataLakePreparer() def test_service_client_session_closes_after_filesystem_creation(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange dsc2 = DataLakeServiceClient(self.dsc.url, credential=datalake_storage_account_key) with DataLakeServiceClient(self.dsc.url, credential=datalake_storage_account_key) as ds_client: fs1 = ds_client.create_file_system(self._get_file_system_reference(prefix="fs1")) fs1.delete_file_system() dsc2.create_file_system(self._get_file_system_reference(prefix="fs2")) dsc2.close() @DataLakePreparer() def test_list_paths(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange file_system = self._create_file_system() for i in range(0, 6): file_system.create_directory("dir1{}".format(i)) paths = list(file_system.get_paths(upn=True)) self.assertEqual(len(paths), 6) self.assertTrue(isinstance(paths[0].last_modified, datetime)) @DataLakePreparer() def test_list_paths_which_are_all_files(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange file_system = self._create_file_system() for i in range(0, 6): file_system.create_file("file{}".format(i)) paths = list(file_system.get_paths(upn=True)) self.assertEqual(len(paths), 6) @DataLakePreparer() def test_list_paths_with_max_per_page(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange file_system = self._create_file_system() for i in range(0, 6): file_system.create_directory("dir1{}".format(i)) generator1 = file_system.get_paths(max_results=2, upn=True).by_page() paths1 = list(next(generator1)) generator2 = file_system.get_paths(max_results=4, upn=True)\ .by_page(continuation_token=generator1.continuation_token) paths2 = list(next(generator2)) self.assertEqual(len(paths1), 2) self.assertEqual(len(paths2), 4) @DataLakePreparer() def test_list_paths_under_specific_path(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange file_system = self._create_file_system() for i in range(0, 6): file_system.create_directory("dir1{}".format(i)) # create a subdirectory under the current directory subdir = file_system.get_directory_client("dir1{}".format(i)).create_sub_directory("subdir") subdir.create_sub_directory("subsub") # create a file under the current directory file_client = subdir.create_file("file") file_client.append_data(b"abced", 0, 5) file_client.flush_data(5) generator1 = file_system.get_paths(path="dir10/subdir", max_results=2, upn=True).by_page() paths = list(next(generator1)) self.assertEqual(len(paths), 2) self.assertEqual(paths[0].content_length, 5) @DataLakePreparer() def test_list_paths_recursively(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange file_system = self._create_file_system() for i in range(0, 6): file_system.create_directory("dir1{}".format(i)) # create a subdirectory under the current directory subdir = file_system.get_directory_client("dir1{}".format(i)).create_sub_directory("subdir") subdir.create_sub_directory("subsub") # create a file under the current directory subdir.create_file("file") paths = list(file_system.get_paths(recursive=True, upn=True)) # there are 24 subpaths in total self.assertEqual(len(paths), 24) @DataLakePreparer() def test_list_paths_pages_correctly(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange file_system = self._create_file_system(file_system_prefix="fs1") for i in range(0, 6): file_system.create_directory("dir1{}".format(i)) for i in range(0, 6): file_system.create_file("file{}".format(i)) generator = file_system.get_paths(max_results=6, upn=True).by_page() paths1 = list(next(generator)) paths2 = list(next(generator)) with self.assertRaises(StopIteration): list(next(generator)) self.assertEqual(len(paths1), 6) self.assertEqual(len(paths2), 6) @DataLakePreparer() def test_create_directory_from_file_system_client(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange file_system = self._create_file_system() file_system.create_directory("dir1/dir2") paths = list(file_system.get_paths(recursive=False, upn=True)) self.assertEqual(len(paths), 1) self.assertEqual(paths[0].name, "dir1") @DataLakePreparer() def test_create_file_from_file_system_client(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange file_system = self._create_file_system() file_system.create_file("dir1/dir2/file") paths = list(file_system.get_paths(recursive=True, upn=True)) self.assertEqual(len(paths), 3) self.assertEqual(paths[0].name, "dir1") self.assertEqual(paths[2].is_directory, False) @DataLakePreparer() def test_get_root_directory_client(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) file_system = self._create_file_system() directory_client = file_system._get_root_directory_client() acl = 'user::rwx,group::r-x,other::rwx' directory_client.set_access_control(acl=acl) access_control = directory_client.get_access_control() self.assertEqual(acl, access_control['acl']) @DataLakePreparer() def test_file_system_sessions_closes_properly(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange file_system_client = self._create_file_system("fenrhxsbfvsdvdsvdsadb") with file_system_client as fs_client: with fs_client.get_file_client("file1.txt") as f_client: f_client.create_file() with fs_client.get_file_client("file2.txt") as f_client: f_client.create_file() with fs_client.get_directory_client("file1") as f_client: f_client.create_directory() with fs_client.get_directory_client("file2") as f_client: f_client.create_directory()
class FileTest(StorageTestCase): def setUp(self): super(FileTest, self).setUp() url = self._get_account_url() self.dsc = DataLakeServiceClient(url, credential=self.settings.STORAGE_DATA_LAKE_ACCOUNT_KEY) self.config = self.dsc._config self.file_system_name = self.get_resource_name('filesystem') if not self.is_playback(): file_system = self.dsc.get_file_system_client(self.file_system_name) try: file_system.create_file_system(timeout=5) except ResourceExistsError: pass def tearDown(self): if not self.is_playback(): try: self.dsc.delete_file_system(self.file_system_name) except: pass return super(FileTest, self).tearDown() # --Helpers----------------------------------------------------------------- def _get_directory_reference(self, prefix=TEST_DIRECTORY_PREFIX): directory_name = self.get_resource_name(prefix) return directory_name def _get_file_reference(self, prefix=TEST_FILE_PREFIX): file_name = self.get_resource_name(prefix) return file_name def _create_file_system(self): return self.dsc.create_file_system(self._get_file_system_reference()) def _create_directory_and_return_client(self, directory=None): directory_name = directory if directory else self._get_directory_reference() directory_client = self.dsc.get_directory_client(self.file_system_name, directory_name) directory_client.create_directory() return directory_client def _create_file_and_return_client(self, directory="", file=None): if directory: self._create_directory_and_return_client(directory) if not file: file = self._get_file_reference() file_client = self.dsc.get_file_client(self.file_system_name, directory + '/' + file) file_client.create_file() return file_client # --Helpers----------------------------------------------------------------- @record def test_create_file(self): # Arrange directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client(self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') response = file_client.create_file() # Assert self.assertIsNotNone(response) @record def test_create_file_with_lease_id(self): # Arrange directory_name = self._get_directory_reference() directory_client = self.dsc.get_directory_client(self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') # Act file_client.create_file() lease = file_client.acquire_lease() create_resp = file_client.create_file(lease=lease) # Assert file_properties = file_client.get_file_properties() self.assertIsNotNone(file_properties) self.assertEqual(file_properties.etag, create_resp.get('etag')) self.assertEqual(file_properties.last_modified, create_resp.get('last_modified')) @record def test_create_file_under_root_directory(self): # Arrange # get a file client to interact with the file under root directory file_client = self.dsc.get_file_client(self.file_system_name, "filename") response = file_client.create_file() # Assert self.assertIsNotNone(response) @record def test_append_data(self): directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client(self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') file_client.create_file() # Act response = file_client.append_data(b'abc', 0, 3) self.assertIsNotNone(response) @record def test_flush_data(self): directory_name = self._get_directory_reference() # Create a directory to put the file under that directory_client = self.dsc.get_directory_client(self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') file_client.create_file() # Act file_client.append_data(b'abc', 0, 3) response = file_client.flush_data(3) self.assertIsNotNone(response) @record def test_read_file(self): file_client = self._create_file_and_return_client() data = self.get_random_bytes(1024) # upload data to file file_client.append_data(data, 0, len(data)) file_client.flush_data(len(data)) # doanload the data and make sure it is the same as uploaded data downloaded_data = file_client.read_file() self.assertEqual(data, downloaded_data) @record def test_account_sas(self): # SAS URL is calculated from storage key, so this test runs live only if TestMode.need_recording_file(self.test_mode): return file_name = self._get_file_reference() # create a file under root directory self._create_file_and_return_client(file=file_name) # generate a token with file level read permission token = generate_account_sas( self.dsc.account_name, self.dsc.credential.account_key, ResourceTypes(file_system=True, object=True), AccountSasPermissions(read=True), datetime.utcnow() + timedelta(hours=1), ) # read the created file which is under root directory file_client = DataLakeFileClient(self.dsc.url, self.file_system_name, file_name, credential=token) properties = file_client.get_file_properties() # make sure we can read the file properties self.assertIsNotNone(properties) # try to write to the created file with the token with self.assertRaises(StorageErrorException): file_client.append_data(b"abcd", 0, 4) @record def test_file_sas_only_applies_to_file_level(self): # SAS URL is calculated from storage key, so this test runs live only if TestMode.need_recording_file(self.test_mode): return file_name = self._get_file_reference() directory_name = self._get_directory_reference() self._create_file_and_return_client(directory=directory_name, file=file_name) # generate a token with file level read and write permissions token = generate_file_sas( self.dsc.account_name, self.file_system_name, directory_name, file_name, account_key=self.dsc.credential.account_key, permission=FileSasPermissions(read=True, write=True), expiry=datetime.utcnow() + timedelta(hours=1), ) # read the created file which is under root directory file_client = DataLakeFileClient(self.dsc.url, self.file_system_name, directory_name+'/'+file_name, credential=token) properties = file_client.get_file_properties() # make sure we can read the file properties self.assertIsNotNone(properties) # try to write to the created file with the token response = file_client.append_data(b"abcd", 0, 4, validate_content=True) self.assertIsNotNone(response) # the token is for file level, so users are not supposed to have access to file system level operations file_system_client = FileSystemClient(self.dsc.url, self.file_system_name, credential=token) with self.assertRaises(ClientAuthenticationError): file_system_client.get_file_system_properties() # the token is for file level, so users are not supposed to have access to directory level operations directory_client = DataLakeDirectoryClient(self.dsc.url, self.file_system_name, directory_name, credential=token) with self.assertRaises(ClientAuthenticationError): directory_client.get_directory_properties() @record def test_delete_file(self): # Arrange file_client = self._create_file_and_return_client() file_client.delete_file() with self.assertRaises(ResourceNotFoundError): file_client.get_file_properties() @record def test_set_access_control(self): file_client = self._create_file_and_return_client() response = file_client.set_access_control(permissions='0777')\ # Assert self.assertIsNotNone(response) @record def test_get_access_control(self): file_client = self._create_file_and_return_client() file_client.set_access_control(permissions='0777') # Act response = file_client.get_access_control() # Assert self.assertIsNotNone(response) @record def test_get_properties(self): # Arrange directory_client = self._create_directory_and_return_client() metadata = {'hello': 'world', 'number': '42'} content_settings = ContentSettings( content_language='spanish', content_disposition='inline') file_client = directory_client.create_file("newfile", metadata=metadata, content_settings=content_settings) file_client.append_data(b"abc", 0, 3) file_client.flush_data(3) properties = file_client.get_file_properties() # Assert self.assertTrue(properties) self.assertEqual(properties.size, 3) self.assertEqual(properties.metadata['hello'], metadata['hello']) self.assertEqual(properties.content_settings.content_language, content_settings.content_language) @record def test_rename_file_with_non_used_name(self): file_client = self._create_file_and_return_client() data_bytes = b"abc" file_client.append_data(data_bytes, 0, 3) file_client.flush_data(3) new_client = file_client.rename_file(file_client.file_system_name+'/'+'newname') data = new_client.read_file() self.assertEqual(data, data_bytes) self.assertEqual(new_client.path_name, "newname") @record def test_rename_file_to_existing_file(self): # create the existing file existing_file_client = self._create_file_and_return_client(file="existingfile") existing_file_client.append_data(b"a", 0, 1) existing_file_client.flush_data(1) old_url = existing_file_client.url # prepare to rename the file to the existing file file_client = self._create_file_and_return_client() data_bytes = b"abc" file_client.append_data(data_bytes, 0, 3) file_client.flush_data(3) new_client = file_client.rename_file(file_client.file_system_name+'/'+existing_file_client.path_name) new_url = file_client.url data = new_client.read_file() # the existing file was overridden self.assertEqual(data, data_bytes) @record def test_rename_file_will_not_change_existing_directory(self): # create none empty directory(with 2 files) dir1 = self._create_directory_and_return_client(directory="dir1") f1 = dir1.create_file("file1") f1.append_data(b"file1", 0, 5) f1.flush_data(5) f2 = dir1.create_file("file2") f2.append_data(b"file2", 0, 5) f2.flush_data(5) # create another none empty directory(with 2 files) dir2 = self._create_directory_and_return_client(directory="dir2") f3 = dir2.create_file("file3") f3.append_data(b"file3", 0, 5) f3.flush_data(5) f4 = dir2.create_file("file4") f4.append_data(b"file4", 0, 5) f4.flush_data(5) new_client = f3.rename_file(f1.file_system_name+'/'+f1.path_name) self.assertEqual(new_client.read_file(), b"file3") # make sure the data in file2 and file4 weren't touched f2_data = f2.read_file() self.assertEqual(f2_data, b"file2") f4_data = f4.read_file() self.assertEqual(f4_data, b"file4") with self.assertRaises(HttpResponseError): f3.read_file()
class Adl(object): def __init__(self): url = self.datalake_account_url() key = self.datalake_account_key() print('url: {}'.format(url)) print('key: {}'.format(key)) self.service_client = DataLakeServiceClient(account_url=url, credential=key) print(self.service_client) def create_fs(self, fsname): try: fs_client = self.service_client.get_file_system_client(fsname) fs_client.create_file_system() print('create_fs: {}'.format(fsname)) except ResourceExistsError: pass def delete_fs(self, fsname): try: fs_client = self.service_client.get_file_system_client(fsname) fs_client.delete_file_system() except ResourceNotFoundError: pass def filesystem_list(self): return self.service_client.list_file_systems() def create_dir(self, fsname, dirname): try: fs_client = self.service_client.get_file_system_client(fsname) fs_client.create_directory(dirname) print('create_dir: {} in fs: {}'.format(dirname, fsname)) except: pass def file_list(self, fsname, dirname): try: fsc = self.service_client.get_file_system_client( file_system=fsname) return fsc.get_paths(path=dirname) except Exception as e: return list() def directory_client(self, fsname, dirname): try: fs_client = self.service_client.get_file_system_client(fsname) return fs_client.get_directory_client(dirname) except: return None def upload_file(self, dir_client, local_path, remote_name, opts={}): file_client = dir_client.create_file(remote_name) local_file = open(local_path, 'r') file_contents = local_file.read() print('upload_file, opts: {}'.format(opts)) # https://docs.microsoft.com/en-us/python/api/azure-storage-file-datalake/azure.storage.filedatalake.contentsettings?view=azure-python cs = ContentSettings(**opts) file_client.upload_data(file_contents, overwrite=True, content_settings=cs) print('upload_file; {} -> {}'.format(local_path, remote_name)) def download_file(self, dir_client, remote_name, local_path): file_client = dir_client.get_file_client(remote_name) download = file_client.download_file() downloaded_bytes = download.readall() local_file = open(local_path, 'wb') local_file.write(downloaded_bytes) local_file.close() print('download_file; {} -> {}'.format(remote_name, local_path)) def datalake_account_url(self): storage_account_name = os.environ['AZURE_ADL_ACCOUNT'] return "{}://{}.dfs.core.windows.net".format("https", storage_account_name) def datalake_account_key(self): return os.environ['AZURE_ADL_KEY'] def datalake_account_conn_string(self): return os.environ['AZURE_ADL_CONNECTION_STRING']
class AzDataLakeProject(object): def __init__(self, container_url, path): storage_account, file_system_name = get_details_from_container_url( container_url) self.container_url = container_url self.path = path self.storage_account = storage_account self.file_system_name = file_system_name storage_config = AzStorageConfig.objects.get( storage_account=storage_account, container_name=file_system_name) self.service = DataLakeServiceClient( f"https://{storage_account}.dfs.core.windows.net/", credential=storage_config.storage_account_key) self.directory_client = self.service.get_directory_client( file_system_name, path) def exists(self): return self.directory_client.exists() def ensure_parent_directory(self): parent_directory_path = os.path.dirname(self.path) directory_client = self.service.get_directory_client( self.file_system_name, parent_directory_path) if not directory_client.exists(): directory_client.create_directory() def move(self, destination_container_url, destination_path): if destination_container_url == self.container_url: # Within the same container simply rename the directory filesystem_name = "{}/{}".format(self.file_system_name, destination_path) self.directory_client.rename_directory(filesystem_name) else: # Copy to the destination from_url = "{}/{}".format(self.container_url, self.path) to_url = "{}/{}".format(destination_container_url, destination_path) copy_command = [ settings.AZCOPY_COMMAND, "copy", '--recursive', from_url, to_url ] subprocess.run(copy_command, check=True) # Delete our copy self.directory_client.delete_directory() def get_file_system_client(self): return self.service.get_file_system_client(self.file_system_name) def get_paths(self): file_system_client = self.get_file_system_client() return file_system_client.get_paths(self.path) def get_file_manifest(self): file_system_client = self.get_file_system_client() results = [] for file_metadata in file_system_client.get_paths(self.path): if file_metadata.is_directory: results.append(file_metadata) else: file_client = file_system_client.get_file_client( file_metadata.name) file_properties = dict(file_client.get_file_properties()) del file_properties['lease'] # Fix nested content settings content_settings = file_properties["content_settings"] file_properties["content_settings"] = { "content_type": content_settings["content_type"], "content_md5": content_settings["content_md5"].hex(), } results.append(file_properties) return results def add_download_user(self, azure_user_id): file_system_client = self.service.get_file_system_client( self.file_system_name) acl = make_acl(azure_user_id, permissions='r-x') directory_client = file_system_client.get_directory_client(self.path) directory_client.update_access_control_recursive(acl=acl) def set_owner(self, azure_user_id): file_paths = [self.path] file_system_client = self.service.get_file_system_client( self.file_system_name) for file_metadata in file_system_client.get_paths(self.path): file_paths.append(file_metadata.name) for file_path in file_paths: file_client = file_system_client.get_file_client(file_path) file_client.set_access_control(owner=azure_user_id)
def run(): account_name = "" account_key = "" account_url = "{}://{}.dfs.core.windows.net".format("https", account_name) fs_name = "" userName = "" sourceDir = "" service_client = DataLakeServiceClient(account_url, credential=account_key) print() print() # Using existing file system (Admin created) print(" - Finding a filesystem named '{}'.".format(fs_name)) filesystem_client = service_client.get_file_system_client( file_system=fs_name) # Creating a folder based on user name print(" - Creating a directory named '{}'.".format(userName)) directory_client = filesystem_client.create_directory( userName, content_settings=None, metadata={ 'Source': 'rail_data', 'sourceUrl': 'http://127.0.0.1?13456789' }) # Set permissions on folder for XID acl = "user::rwx,user:{}@company.com:rwx,group::r-x,mask::rwx,other::---,default:user::rwx,default:user:{}@company.com:rwx,default:group::r-x,default:mask::rwx,default:other::---".format( userName, userName) print(" - Setting permissions on named '{}'.".format(userName)) directory_client.set_access_control(owner=None, group=None, permissions=None, acl=acl) # uploading all files in a directory print(" - Uploading all files in directory") for file_name in os.listdir(sourceDir): print(" - Opening a file named '{}'.".format(file_name)) data = open("{}\\{}".format(sourceDir, file_name), "r") print(" - Uploading a file named '{}'.".format(file_name)) file_client = directory_client.create_file( file_name, content_settings=None, metadata={'SourceFileName': file_name}) data = data.read() file_client.append_data(data, offset=0, length=len(data)) file_client.flush_data(len(data)) print(" - Finished uploading '{}'.".format(file_name)) print(" - Finished uploading all files in directory") # create a PBIDS file data = {"version": "0.1"} data['connections'] = [] data['connections'].append({ 'details': { 'protocol': 'azure-data-lake-storage', "address": { 'server': '', "path": '' } }, "options": {}, "mode": "Import" }) data['connections'][0]['details']['address'].update( {'server': account_url}) data['connections'][0]['details']['address'].update( {'path': "/{}/{}".format(fs_name, userName)}) print(" - Creating PBIDS file: '{}'.".format(userName + '.PBIDS')) with open(userName + '.PBIDS', 'w') as outfile: json.dump(data, outfile)
return file_client.read_file() # Set URL and Entity(table) in PBI dataflows account_url = "https://{}.dfs.core.windows.net/".format( Credentials.accountName) powerbi_url = "https://{}.dfs.core.windows.net/{}".format( Credentials.accountName, Credentials.dataflowContainer) entity_name = "AmesHousingData" # Make connection to Data Lake datalake_service = DataLakeServiceClient(account_url=account_url, credential=Credentials.credential) # Make a client to Power BI dataflows blob filesystem_client = datalake_service.get_file_system_client( Credentials.dataflowContainer) # Read CDM Model definition from model.json file for CDM folder # Location is '<Workspace>/<Dataflow Name>/model.json' cdm_model_file = 'Ames Housing/Housing Data/model.json' cdm_model_json = getADLSfile(filesystem_client, cdm_model_file).decode('utf-8') cdm_model = CdmModel.Model.fromJson(cdm_model_json) # Set name of Entity (table) you want to read ames_housing_entitiy = cdm_model.entities[entity_name] # Get path to CSV file for Entity(table) csv_path = ames_housing_entitiy.partitions[0].location csv_path = urllib.parse.unquote(csv_path).replace(powerbi_url, '') csv_bytes = getADLSfile(filesystem_client, csv_path)
import os import pyodbc import pandas as pd import io import sys from azure.storage.filedatalake import DataLakeServiceClient, DelimitedTextDialect storage_account_name = "mgrhdstddl2" storage_account_key = os.environ['storage_account_key'] container_name = "khd-datalake" directory_name = "tpc-h" service_client = DataLakeServiceClient(account_url="{}://{}.dfs.core.windows.net".format( "https", storage_account_name), credential=storage_account_key) file_system_client = service_client.get_file_system_client(file_system=container_name) dir_client = file_system_client.get_directory_client(directory_name) dir_client.create_directory() file_client = dir_client.get_file_client("nation.csv") csv_stream = io.BytesIO() file_client.download_file().readinto(csv_stream) csv_stream.seek(0) df5 = pd.read_csv(csv_stream, delimiter='|', header='infer') df5.where(df5['N_NAME'] == 'BRAZIL', inplace = True) excel_stream = io.BytesIO() dir_client_save = file_system_client.get_directory_client("test-python-output") dir_client_save.create_directory() file_client_save = dir_client_save.create_file("Brazil_description.xlsx")
class DirectoryTest(StorageTestCase): def setUp(self): super(DirectoryTest, self).setUp() url = self._get_account_url() self.dsc = DataLakeServiceClient( url, credential=self.settings.STORAGE_DATA_LAKE_ACCOUNT_KEY) self.config = self.dsc._config self.file_system_name = self.get_resource_name('filesystem') if not self.is_playback(): file_system = self.dsc.get_file_system_client( self.file_system_name) try: file_system.create_file_system(timeout=5) except ResourceExistsError: pass def tearDown(self): if not self.is_playback(): try: self.dsc.delete_file_system(self.file_system_name) for file_system in self.dsc.list_file_systems(): self.dsc.delete_file_system(file_system.name) except: pass return super(DirectoryTest, self).tearDown() # --Helpers----------------------------------------------------------------- def _get_directory_reference(self, prefix=TEST_DIRECTORY_PREFIX): directory_name = self.get_resource_name(prefix) return directory_name def _create_directory_and_get_directory_client(self, directory_name=None): directory_name = directory_name if directory_name else self._get_directory_reference( ) directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() return directory_client def _create_file_system(self): return self.dsc.create_file_system(self._get_file_system_reference()) # --Helpers----------------------------------------------------------------- @record def test_create_directory(self): # Arrange directory_name = self._get_directory_reference() content_settings = ContentSettings(content_language='spanish', content_disposition='inline') # Act directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) created = directory_client.create_directory( content_settings=content_settings) # Assert self.assertTrue(created) @record def test_using_oauth_token_credential_to_create_directory(self): # generate a token with directory level create permission directory_name = self._get_directory_reference() token_credential = self.generate_oauth_token() directory_client = DataLakeDirectoryClient(self.dsc.url, self.file_system_name, directory_name, credential=token_credential) response = directory_client.create_directory() self.assertIsNotNone(response) @record def test_create_directory_with_match_conditions(self): # Arrange directory_name = self._get_directory_reference() # Act directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) created = directory_client.create_directory( match_condition=MatchConditions.IfMissing) # Assert self.assertTrue(created) @record def test_create_directory_with_permission(self): # Arrange directory_name = self._get_directory_reference() # Act directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) created = directory_client.create_directory(permissions="rwxr--r--", umask="0000") prop = directory_client.get_access_control() # Assert self.assertTrue(created) self.assertEqual(prop['permissions'], 'rwxr--r--') @record def test_create_directory_with_content_settings(self): # Arrange directory_name = self._get_directory_reference() content_settings = ContentSettings(content_language='spanish', content_disposition='inline') # Act directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) created = directory_client.create_directory( content_settings=content_settings) # Assert self.assertTrue(created) @record def test_create_directory_with_metadata(self): # Arrange directory_name = self._get_directory_reference() metadata = {'hello': 'world', 'number': '42'} # Act directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) created = directory_client.create_directory(metadata=metadata) properties = directory_client.get_directory_properties() # Assert self.assertTrue(created) @record def test_delete_directory(self): # Arrange directory_name = self._get_directory_reference() metadata = {'hello': 'world', 'number': '42'} directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory(metadata=metadata) response = directory_client.delete_directory() # Assert self.assertIsNone(response) @record def test_delete_directory_with_if_modified_since(self): # Arrange directory_name = self._get_directory_reference() directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() prop = directory_client.get_directory_properties() with self.assertRaises(ResourceModifiedError): directory_client.delete_directory( if_modified_since=prop['last_modified']) @record def test_create_sub_directory_and_delete_sub_directory(self): # Arrange directory_name = self._get_directory_reference() metadata = {'hello': 'world', 'number': '42'} # Create a directory first, to prepare for creating sub directory directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory(metadata=metadata) # Create sub directory from the current directory sub_directory_name = 'subdir' sub_directory_created = directory_client.create_sub_directory( sub_directory_name) # to make sure the sub directory was indeed created by get sub_directory properties from sub directory client sub_directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name + '/' + sub_directory_name) sub_properties = sub_directory_client.get_directory_properties() # Assert self.assertTrue(sub_directory_created) self.assertTrue(sub_properties) # Act directory_client.delete_sub_directory(sub_directory_name) with self.assertRaises(ResourceNotFoundError): sub_directory_client.get_directory_properties() @record def test_set_access_control(self): directory_name = self._get_directory_reference() metadata = {'hello': 'world', 'number': '42'} directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory(metadata=metadata) response = directory_client.set_access_control(permissions='0777') # Assert self.assertIsNotNone(response) @record def test_set_access_control_with_acl(self): directory_name = self._get_directory_reference() metadata = {'hello': 'world', 'number': '42'} directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory(metadata=metadata) acl = 'user::rwx,group::r-x,other::rwx' directory_client.set_access_control(acl=acl) access_control = directory_client.get_access_control() # Assert self.assertIsNotNone(access_control) self.assertEqual(acl, access_control['acl']) @record def test_set_access_control_if_none_modified(self): directory_name = self._get_directory_reference() directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) resp = directory_client.create_directory() response = directory_client.set_access_control( permissions='0777', etag=resp['etag'], match_condition=MatchConditions.IfNotModified) # Assert self.assertIsNotNone(response) @record def test_get_access_control(self): directory_name = self._get_directory_reference() metadata = {'hello': 'world', 'number': '42'} directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory(metadata=metadata, permissions='0777') # Act response = directory_client.get_access_control() # Assert self.assertIsNotNone(response) @record def test_get_access_control_with_match_conditions(self): directory_name = self._get_directory_reference() directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) resp = directory_client.create_directory(permissions='0777', umask='0000') # Act response = directory_client.get_access_control( etag=resp['etag'], match_condition=MatchConditions.IfNotModified) # Assert self.assertIsNotNone(response) self.assertEquals(response['permissions'], 'rwxrwxrwx') @record def test_rename_from(self): metadata = {'hello': 'world', 'number': '42'} directory_name = self._get_directory_reference() directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() new_name = "newname" new_directory_client = self.dsc.get_directory_client( self.file_system_name, new_name) new_directory_client._rename_path('/' + self.file_system_name + '/' + directory_name, metadata=metadata) properties = new_directory_client.get_directory_properties() self.assertIsNotNone(properties) @record def test_rename_from_a_shorter_directory_to_longer_directory(self): # TODO: investigate why rename shorter path to a longer one does not work pytest.skip("") directory_name = self._get_directory_reference() self._create_directory_and_get_directory_client(directory_name="old") new_name = "newname" new_directory_client = self._create_directory_and_get_directory_client( directory_name=new_name) new_directory_client = new_directory_client.create_sub_directory( "newsub") new_directory_client._rename_path('/' + self.file_system_name + '/' + directory_name) properties = new_directory_client.get_directory_properties() self.assertIsNotNone(properties) @record def test_rename_from_a_directory_in_another_file_system(self): # create a file dir1 under file system1 old_file_system_name = "oldfilesystem" old_dir_name = "olddir" old_client = self.dsc.get_file_system_client(old_file_system_name) old_client.create_file_system() old_client.create_directory(old_dir_name) # create a dir2 under file system2 new_name = "newname" new_directory_client = self._create_directory_and_get_directory_client( directory_name=new_name) new_directory_client = new_directory_client.create_sub_directory( "newsub") # rename dir1 under file system1 to dir2 under file system2 new_directory_client._rename_path('/' + old_file_system_name + '/' + old_dir_name) properties = new_directory_client.get_directory_properties() self.assertIsNotNone(properties) self.dsc.delete_file_system(old_file_system_name) @record def test_rename_to_an_existing_directory_in_another_file_system(self): # create a file dir1 under file system1 destination_file_system_name = "destfilesystem" destination_dir_name = "destdir" fs_client = self.dsc.get_file_system_client( destination_file_system_name) fs_client.create_file_system() destination_directory_client = fs_client.create_directory( destination_dir_name) # create a dir2 under file system2 source_name = "source" source_directory_client = self._create_directory_and_get_directory_client( directory_name=source_name) source_directory_client = source_directory_client.create_sub_directory( "subdir") # rename dir2 under file system2 to dir1 under file system1 res = source_directory_client.rename_directory( '/' + destination_file_system_name + '/' + destination_dir_name) # the source directory has been renamed to destination directory, so it cannot be found with self.assertRaises(HttpResponseError): source_directory_client.get_directory_properties() self.assertEquals(res.url, destination_directory_client.url) @record def test_rename_with_none_existing_destination_condition_and_source_unmodified_condition( self): non_existing_dir_name = "nonexistingdir" # create a file system1 destination_file_system_name = self._get_directory_reference( "destfilesystem") fs_client = self.dsc.get_file_system_client( destination_file_system_name) fs_client.create_file_system() # create a dir2 under file system2 source_name = "source" source_directory_client = self._create_directory_and_get_directory_client( directory_name=source_name) source_directory_client = source_directory_client.create_sub_directory( "subdir") # rename dir2 under file system2 to a non existing directory under file system1, # when dir1 does not exist and dir2 wasn't modified etag = source_directory_client.get_directory_properties()['etag'] res = source_directory_client.rename_directory( '/' + destination_file_system_name + '/' + non_existing_dir_name, match_condition=MatchConditions.IfMissing, source_etag=etag, source_match_condition=MatchConditions.IfNotModified) # the source directory has been renamed to destination directory, so it cannot be found with self.assertRaises(HttpResponseError): source_directory_client.get_directory_properties() self.assertEquals(non_existing_dir_name, res.path_name) @record def test_rename_to_an_non_existing_directory_in_another_file_system(self): # create a file dir1 under file system1 destination_file_system_name = self._get_directory_reference( "destfilesystem") non_existing_dir_name = "nonexistingdir" fs_client = self.dsc.get_file_system_client( destination_file_system_name) fs_client.create_file_system() # create a dir2 under file system2 source_name = "source" source_directory_client = self._create_directory_and_get_directory_client( directory_name=source_name) source_directory_client = source_directory_client.create_sub_directory( "subdir") # rename dir2 under file system2 to dir1 under file system1 res = source_directory_client.rename_directory( '/' + destination_file_system_name + '/' + non_existing_dir_name) # the source directory has been renamed to destination directory, so it cannot be found with self.assertRaises(HttpResponseError): source_directory_client.get_directory_properties() self.assertEquals(non_existing_dir_name, res.path_name) @record def test_rename_directory_to_non_empty_directory(self): # TODO: investigate why rename non empty dir doesn't work pytest.skip("") dir1 = self._create_directory_and_get_directory_client("dir1") dir1.create_sub_directory("subdir") dir2 = self._create_directory_and_get_directory_client("dir2") dir2.rename_directory(dir1.file_system_name + '/' + dir1.path_name) with self.assertRaises(HttpResponseError): dir2.get_directory_properties() @record def test_get_properties(self): # Arrange directory_name = self._get_directory_reference() metadata = {'hello': 'world', 'number': '42'} directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory(metadata=metadata) properties = directory_client.get_directory_properties() # Assert self.assertTrue(properties) self.assertIsNotNone(properties.metadata) self.assertEqual(properties.metadata['hello'], metadata['hello']) @record def test_using_directory_sas_to_read(self): # SAS URL is calculated from storage key, so this test runs live only if TestMode.need_recording_file(self.test_mode): return client = self._create_directory_and_get_directory_client() directory_name = client.path_name # generate a token with directory level read permission token = generate_directory_sas( self.dsc.account_name, self.file_system_name, directory_name, account_key=self.dsc.credential.account_key, permission=DirectorySasPermissions(read=True), expiry=datetime.utcnow() + timedelta(hours=1), ) directory_client = DataLakeDirectoryClient(self.dsc.url, self.file_system_name, directory_name, credential=token) access_control = directory_client.get_access_control() self.assertIsNotNone(access_control) @record def test_using_directory_sas_to_create(self): # SAS URL is calculated from storage key, so this test runs live only if TestMode.need_recording_file(self.test_mode): return # generate a token with directory level create permission directory_name = self._get_directory_reference() token = generate_directory_sas( self.dsc.account_name, self.file_system_name, directory_name, account_key=self.dsc.credential.account_key, permission=DirectorySasPermissions(create=True), expiry=datetime.utcnow() + timedelta(hours=1), ) directory_client = DataLakeDirectoryClient(self.dsc.url, self.file_system_name, directory_name, credential=token) response = directory_client.create_directory() self.assertIsNotNone(response)
class DataLakeG2: file_system_client: FileSystemClient current_directory: DataLakeDirectoryClient directory: DataLakeDirectoryClient dict_of_directory: dict dict_inh: dict logging.basicConfig(format='%(levelname)s - %(asctime)s - %(message)s', datefmt='%d-%b-%y %H:%M:%S', level=logging.INFO, filename='DataLakeG2.log', filemode='w') def __init__(self, connection_string=os.getenv("AZURE_DT_2"), container_name_="container06"): account_name = os.getenv('STORAGE_ACCOUNT_NAME', "") account_key = os.getenv('STORAGE_ACCOUNT_KEY', "") # set up the service client with the credentials from the environment variables self.service_client = DataLakeServiceClient( account_url="{}://{}.dfs.core.windows.net".format( "https", account_name), credential=account_key) self.file_system_name = container_name_ self.file_name = file_name_ self.dict_inh = {} self.dict_of_directory = {} @logging_name_function def create_file_system(self): """ Create file system(Container) """ try: self.file_system_client = self.service_client.create_file_system( file_system=self.file_system_name) logging.info("Create_file_system - DONE") except Exception as ex: logging.error("Exception occurred in create_file_system", exc_info=True) self.file_system_client = self.service_client.get_file_system_client( file_system=self.file_system_name) @logging_name_function def create_directory(self, name_directory): try: directory = self.file_system_client.create_directory( name_directory) self.dict_of_directory[name_directory] = directory self.dict_inh[name_directory] = [] except Exception as ex: logging.error("Exception occurred in create_directory", exc_info=True) @logging_name_function def create_subdirectory(self, name_directory, name_subdirectory): try: self.dict_of_directory[name_subdirectory] = self.dict_of_directory[name_directory].\ create_sub_directory(name_subdirectory) logging.info('get_sub_directory client DONE') self.dict_inh[name_directory].append(name_subdirectory) self.dict_inh[name_subdirectory] = [] except Exception as ex: logging.error("Exception occurred in create_subdirectory", exc_info=True) @logging_name_function def upload_file_to_the_directory(self, file_name, directory_name): try: file_client = self.dict_of_directory[directory_name].create_file( file_name) local_file = open(file_name, 'rb') file_contents = local_file.read() file_client.append_data(data=file_contents, offset=0, length=len(file_contents)) file_client.flush_data(len(file_contents)) except Exception as ex: logging.error("Exception occurred in upload_file_to_the_directory", exc_info=True) @logging_name_function def show_directory(self): print(f'All directory: {self.dict_of_directory}') print(f'Subdirectory {self.dict_inh}')
class FileSystemTest(StorageTestCase): def setUp(self): super(FileSystemTest, self).setUp() url = self._get_account_url() self.dsc = DataLakeServiceClient( url, credential=self.settings.STORAGE_DATA_LAKE_ACCOUNT_KEY) self.config = self.dsc._config self.test_file_systems = [] def tearDown(self): if not self.is_playback(): try: for file_system in self.test_file_systems: self.dsc.delete_file_system(file_system) except: pass return super(FileSystemTest, self).tearDown() # --Helpers----------------------------------------------------------------- def _get_file_system_reference(self, prefix=TEST_FILE_SYSTEM_PREFIX): file_system_name = self.get_resource_name(prefix) self.test_file_systems.append(file_system_name) return file_system_name def _create_file_system(self, file_system_prefix=TEST_FILE_SYSTEM_PREFIX): return self.dsc.create_file_system( self._get_file_system_reference(prefix=file_system_prefix)) # --Helpers----------------------------------------------------------------- @record def test_create_file_system(self): # Arrange file_system_name = self._get_file_system_reference() # Act file_system_client = self.dsc.get_file_system_client(file_system_name) created = file_system_client.create_file_system() # Assert self.assertTrue(created) @record def test_create_file_system_with_metadata(self): # Arrange metadata = {'hello': 'world', 'number': '42'} file_system_name = self._get_file_system_reference() # Act file_system_client = self.dsc.get_file_system_client(file_system_name) created = file_system_client.create_file_system(metadata=metadata) # Assert meta = file_system_client.get_file_system_properties().metadata self.assertTrue(created) self.assertDictEqual(meta, metadata) @record def test_set_file_system_acl(self): # Act file_system = self._create_file_system() access_policy = AccessPolicy( permission=FileSystemSasPermissions(read=True), expiry=datetime.utcnow() + timedelta(hours=1), start=datetime.utcnow()) signed_identifier1 = {'testid': access_policy} response = file_system.set_file_system_access_policy( signed_identifier1, public_access=PublicAccess.FileSystem) self.assertIsNotNone(response.get('etag')) self.assertIsNotNone(response.get('last_modified')) acl1 = file_system.get_file_system_access_policy() self.assertIsNotNone(acl1['public_access']) self.assertEqual(len(acl1['signed_identifiers']), 1) # If set signed identifier without specifying the access policy then it will be default to None signed_identifier2 = {'testid': access_policy, 'test2': access_policy} file_system.set_file_system_access_policy(signed_identifier2) acl2 = file_system.get_file_system_access_policy() self.assertIsNone(acl2['public_access']) self.assertEqual(len(acl2['signed_identifiers']), 2) @record def test_list_file_systemss(self): # Arrange file_system_name = self._get_file_system_reference() file_system = self.dsc.create_file_system(file_system_name) # Act file_systems = list(self.dsc.list_file_systems()) # Assert self.assertIsNotNone(file_systems) self.assertGreaterEqual(len(file_systems), 1) self.assertIsNotNone(file_systems[0]) self.assertNamedItemInContainer(file_systems, file_system.file_system_name) self.assertIsNotNone(file_systems[0].has_immutability_policy) self.assertIsNotNone(file_systems[0].has_legal_hold) @record def test_delete_file_system_with_existing_file_system(self): # Arrange file_system = self._create_file_system() # Act deleted = file_system.delete_file_system() # Assert self.assertIsNone(deleted) @record def test_delete_none_existing_file_system(self): fake_file_system_client = self.dsc.get_file_system_client("fakeclient") # Act with self.assertRaises(ResourceNotFoundError): fake_file_system_client.delete_file_system( match_condition=MatchConditions.IfMissing) @record def test_list_file_systems_with_include_metadata(self): # Arrange file_system = self._create_file_system() metadata = {'hello': 'world', 'number': '42'} resp = file_system.set_file_system_metadata(metadata) # Act file_systems = list( self.dsc.list_file_systems( name_starts_with=file_system.file_system_name, include_metadata=True)) # Assert self.assertIsNotNone(file_systems) self.assertGreaterEqual(len(file_systems), 1) self.assertIsNotNone(file_systems[0]) self.assertNamedItemInContainer(file_systems, file_system.file_system_name) self.assertDictEqual(file_systems[0].metadata, metadata) @record def test_list_file_systems_by_page(self): # Arrange for i in range(0, 6): self._create_file_system( file_system_prefix="filesystem{}".format(i)) # Act file_systems = list( next( self.dsc.list_file_systems(results_per_page=3, name_starts_with="file", include_metadata=True).by_page())) # Assert self.assertIsNotNone(file_systems) self.assertGreaterEqual(len(file_systems), 3) @record def test_list_file_systems_with_public_access(self): # Arrange file_system_name = self._get_file_system_reference() file_system = self.dsc.get_file_system_client(file_system_name) file_system.create_file_system(public_access="blob") metadata = {'hello': 'world', 'number': '42'} resp = file_system.set_file_system_metadata(metadata) # Act file_systems = list( self.dsc.list_file_systems( name_starts_with=file_system.file_system_name, include_metadata=True)) # Assert self.assertIsNotNone(file_systems) self.assertGreaterEqual(len(file_systems), 1) self.assertIsNotNone(file_systems[0]) self.assertNamedItemInContainer(file_systems, file_system.file_system_name) self.assertDictEqual(file_systems[0].metadata, metadata) self.assertTrue(file_systems[0].public_access is PublicAccess.File) @record def test_get_file_system_properties(self): # Arrange metadata = {'hello': 'world', 'number': '42'} file_system = self._create_file_system() file_system.set_file_system_metadata(metadata) # Act props = file_system.get_file_system_properties() # Assert self.assertIsNotNone(props) self.assertDictEqual(props.metadata, metadata) self.assertIsNotNone(props.has_immutability_policy) self.assertIsNotNone(props.has_legal_hold) @record def test_list_paths(self): # Arrange file_system = self._create_file_system() for i in range(0, 6): file_system.create_directory("dir1{}".format(i)) paths = list(file_system.get_paths(upn=True)) self.assertEqual(len(paths), 6) @record def test_list_paths_which_are_all_files(self): # Arrange file_system = self._create_file_system() for i in range(0, 6): file_system.create_file("file{}".format(i)) paths = list(file_system.get_paths(upn=True)) self.assertEqual(len(paths), 6) @record def test_list_paths_with_max_per_page(self): # Arrange file_system = self._create_file_system() for i in range(0, 6): file_system.create_directory("dir1{}".format(i)) generator1 = file_system.get_paths(max_results=2, upn=True).by_page() paths1 = list(next(generator1)) generator2 = file_system.get_paths(max_results=4, upn=True)\ .by_page(continuation_token=generator1.continuation_token) paths2 = list(next(generator2)) self.assertEqual(len(paths1), 2) self.assertEqual(len(paths2), 4) @record def test_list_paths_under_specific_path(self): # Arrange file_system = self._create_file_system() for i in range(0, 6): file_system.create_directory("dir1{}".format(i)) # create a subdirectory under the current directory subdir = file_system.get_directory_client( "dir1{}".format(i)).create_sub_directory("subdir") subdir.create_sub_directory("subsub") # create a file under the current directory file_client = subdir.create_file("file") file_client.append_data(b"abced", 0, 5) file_client.flush_data(5) generator1 = file_system.get_paths(path="dir10/subdir", max_results=2, upn=True).by_page() paths = list(next(generator1)) self.assertEqual(len(paths), 2) self.assertEqual(paths[0].content_length, 5) @record def test_list_paths_recursively(self): # Arrange file_system = self._create_file_system() for i in range(0, 6): file_system.create_directory("dir1{}".format(i)) # create a subdirectory under the current directory subdir = file_system.get_directory_client( "dir1{}".format(i)).create_sub_directory("subdir") subdir.create_sub_directory("subsub") # create a file under the current directory subdir.create_file("file") paths = list(file_system.get_paths(recursive=True, upn=True)) # there are 24 subpaths in total self.assertEqual(len(paths), 24) @record def test_create_directory_from_file_system_client(self): # Arrange file_system = self._create_file_system() file_system.create_directory("dir1/dir2") paths = list(file_system.get_paths(recursive=False, upn=True)) self.assertEqual(len(paths), 1) self.assertEqual(paths[0].name, "dir1") @record def test_create_file_from_file_system_client(self): # Arrange file_system = self._create_file_system() file_system.create_file("dir1/dir2/file") paths = list(file_system.get_paths(recursive=True, upn=True)) self.assertEqual(len(paths), 3) self.assertEqual(paths[0].name, "dir1") self.assertEqual(paths[2].is_directory, False)
# Create aad group group = GroupCreateParameters(display_name=group_name, mail_nickname="GroupMail-at-microsoft.com") graphrbac_client.groups.create(group) # Change permissions of the bash script used to retrieve aad group ID os.chmod('./script.sh', 0o755) rc = subprocess.call("./script.sh") # Retrieve the aad group ID from file f = open("groupid.txt") group_id = str(f.readline()) # Create storage account credentials storage_creds = ClientSecretCredential(tenant_id=tenant_id, client_id=client_id, client_secret=client_secret) # Perform the data lake tasks dl_service_client = DataLakeServiceClient( account_url="https://bluesofttaskdd.dfs.core.windows.net/", credential=storage_creds) file_system_client = dl_service_client.get_file_system_client( file_system='file-system') file_system_client.create_directory(group_name) directory_client = file_system_client.get_directory_client(group_name) directory_client.set_access_control(acl=f"default:group:{group_id}:r-x")
class LargeFileTest(StorageTestCase): def _setUp(self, account_name, account_key): url = self.account_url(account_name, 'dfs') self.payload_dropping_policy = PayloadDroppingPolicy() credential_policy = _format_shared_key_credential( account_name, account_key) self.dsc = DataLakeServiceClient(url, credential=account_key, logging_enable=True, _additional_pipeline_policies=[ self.payload_dropping_policy, credential_policy ]) self.config = self.dsc._config self.file_system_name = self.get_resource_name('filesystem') if not self.is_playback(): file_system = self.dsc.get_file_system_client( self.file_system_name) try: file_system.create_file_system(timeout=5) except ResourceExistsError: pass def tearDown(self): if not self.is_playback(): try: self.dsc.delete_file_system(self.file_system_name) except: pass return super(LargeFileTest, self).tearDown() @pytest.mark.live_test_only @DataLakePreparer() def test_append_large_stream_without_network(self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) directory_name = self.get_resource_name(TEST_DIRECTORY_PREFIX) # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') file_client.create_file() data = LargeStream(LARGEST_BLOCK_SIZE) # Act response = file_client.append_data(data, 0, LARGEST_BLOCK_SIZE) self.assertIsNotNone(response) self.assertEqual(self.payload_dropping_policy.append_counter, 1) self.assertEqual(self.payload_dropping_policy.append_sizes[0], LARGEST_BLOCK_SIZE) @pytest.mark.live_test_only @DataLakePreparer() def test_upload_large_stream_without_network(self, datalake_storage_account_name, datalake_storage_account_key): pytest.skip( "Pypy3 on Linux failed somehow, skip for now to investigate") self._setUp(datalake_storage_account_name, datalake_storage_account_key) directory_name = self.get_resource_name(TEST_DIRECTORY_PREFIX) # Create a directory to put the file under that directory_client = self.dsc.get_directory_client( self.file_system_name, directory_name) directory_client.create_directory() file_client = directory_client.get_file_client('filename') file_client.create_file() length = 2 * LARGEST_BLOCK_SIZE data = LargeStream(length) # Act response = file_client.upload_data(data, length, overwrite=True, chunk_size=LARGEST_BLOCK_SIZE) self.assertIsNotNone(response) self.assertEqual(self.payload_dropping_policy.append_counter, 2) self.assertEqual(self.payload_dropping_policy.append_sizes[0], LARGEST_BLOCK_SIZE) self.assertEqual(self.payload_dropping_policy.append_sizes[1], LARGEST_BLOCK_SIZE)
class FileSystemTest(StorageTestCase): def setUp(self): super(FileSystemTest, self).setUp() url = self._get_account_url() self.dsc = DataLakeServiceClient(url, credential=self.settings.STORAGE_DATA_LAKE_ACCOUNT_KEY) self.config = self.dsc._config self.test_file_systems = [] def tearDown(self): if not self.is_playback(): try: for file_system in self.test_file_systems: self.dsc.delete_file_system(file_system) except: pass return super(FileSystemTest, self).tearDown() # --Helpers----------------------------------------------------------------- def _get_file_system_reference(self, prefix=TEST_FILE_SYSTEM_PREFIX): file_system_name = self.get_resource_name(prefix) self.test_file_systems.append(file_system_name) return file_system_name def _create_file_system(self, file_system_prefix=TEST_FILE_SYSTEM_PREFIX): return self.dsc.create_file_system(self._get_file_system_reference(prefix=file_system_prefix)) # --Helpers----------------------------------------------------------------- @record def test_create_file_system(self): # Arrange file_system_name = self._get_file_system_reference() # Act file_system_client = self.dsc.get_file_system_client(file_system_name) created = file_system_client.create_file_system() # Assert self.assertTrue(created) @record def test_list_file_systemss(self): # Arrange file_system_name = self._get_file_system_reference() file_system = self.dsc.create_file_system(file_system_name) # Act file_systems = list(self.dsc.list_file_systems()) # Assert self.assertIsNotNone(file_systems) self.assertGreaterEqual(len(file_systems), 1) self.assertIsNotNone(file_systems[0]) self.assertNamedItemInContainer(file_systems, file_system.file_system_name) self.assertIsNotNone(file_systems[0].has_immutability_policy) self.assertIsNotNone(file_systems[0].has_legal_hold) @record def test_delete_file_system_with_existing_file_system(self): # Arrange file_system = self._create_file_system() # Act deleted = file_system.delete_file_system() # Assert self.assertIsNone(deleted) @record def test_list_file_systems_with_include_metadata(self): # Arrange file_system = self._create_file_system() metadata = {'hello': 'world', 'number': '42'} resp = file_system.set_file_system_metadata(metadata) # Act file_systems = list(self.dsc.list_file_systems( name_starts_with=file_system.file_system_name, include_metadata=True)) # Assert self.assertIsNotNone(file_systems) self.assertGreaterEqual(len(file_systems), 1) self.assertIsNotNone(file_systems[0]) self.assertNamedItemInContainer(file_systems, file_system.file_system_name) self.assertDictEqual(file_systems[0].metadata, metadata) @record def test_list_file_systems_by_page(self): # Arrange for i in range(0, 6): self._create_file_system(file_system_prefix="filesystem{}".format(i)) # Act file_systems = list(next(self.dsc.list_file_systems( results_per_page=3, name_starts_with="file", include_metadata=True).by_page())) # Assert self.assertIsNotNone(file_systems) self.assertGreaterEqual(len(file_systems), 3) @record def test_list_file_systems_with_public_access(self): # Arrange file_system_name = self._get_file_system_reference() file_system = self.dsc.get_file_system_client(file_system_name) file_system.create_file_system(public_access="blob") metadata = {'hello': 'world', 'number': '42'} resp = file_system.set_file_system_metadata(metadata) # Act file_systems = list(self.dsc.list_file_systems( name_starts_with=file_system.file_system_name, include_metadata=True)) # Assert self.assertIsNotNone(file_systems) self.assertGreaterEqual(len(file_systems), 1) self.assertIsNotNone(file_systems[0]) self.assertNamedItemInContainer(file_systems, file_system.file_system_name) self.assertDictEqual(file_systems[0].metadata, metadata) self.assertTrue(file_systems[0].public_access is PublicAccess.File) @record def test_get_file_system_properties(self): # Arrange metadata = {'hello': 'world', 'number': '42'} file_system = self._create_file_system() file_system.set_file_system_metadata(metadata) # Act props = file_system.get_file_system_properties() # Assert self.assertIsNotNone(props) self.assertDictEqual(props.metadata, metadata) self.assertIsNotNone(props.has_immutability_policy) self.assertIsNotNone(props.has_legal_hold) @record def test_list_paths(self): # Arrange file_system = self._create_file_system() for i in range(0, 6): file_system.create_directory("dir1{}".format(i)) paths = list(file_system.get_paths(upn=True)) self.assertEqual(len(paths), 6) @record def test_list_paths_with_max_per_page(self): # Arrange file_system = self._create_file_system() for i in range(0, 6): file_system.create_directory("dir1{}".format(i)) generator1 = file_system.get_paths(max_results=2, upn=True).by_page() paths1 = list(next(generator1)) generator2 = file_system.get_paths(max_results=4, upn=True)\ .by_page(continuation_token=generator1.continuation_token) paths2 = list(next(generator2)) self.assertEqual(len(paths1), 2) self.assertEqual(len(paths2), 4) @record def test_list_paths_under_specific_path(self): # Arrange file_system = self._create_file_system() for i in range(0, 6): file_system.create_directory("dir1{}".format(i)) # create a subdirectory under the current directory subdir = file_system.get_directory_client("dir1{}".format(i)).create_sub_directory("subdir") subdir.create_sub_directory("subsub") # create a file under the current directory file_client = subdir.create_file("file") file_client.append_data(b"abced", 0, 5) file_client.flush_data(5) generator1 = file_system.get_paths(path="dir10/subdir", max_results=2, upn=True).by_page() paths = list(next(generator1)) self.assertEqual(len(paths), 2) self.assertEqual(paths[0].content_length, 5)