示例#1
0
    def __init__(self, datalake_dir, credential=None):
        datalake = list(filter(None, re.split('/|@', datalake_dir)))
        assert len(datalake) >= 3

        if credential is None:
            credential = DefaultAzureCredential()

        account_url = "https://" + datalake[2]
        datalake_client = DataLakeServiceClient(
            credential=credential,
            account_url=account_url).get_file_system_client(datalake[1])

        if len(datalake) > 3:
            datalake_client = datalake_client.get_directory_client('/'.join(
                datalake[3:]))
            datalake_client.create_directory()

        self.datalake_dir = datalake_dir + '/' if datalake_dir[
            -1] != '/' else datalake_dir
        self.dir_client = datalake_client
示例#2
0
class FileTest(StorageTestCase):
    def _setUp(self, account_name, account_key):
        url = self._get_account_url(account_name)
        self.dsc = DataLakeServiceClient(url,
                                         credential=account_key,
                                         logging_enable=True)
        self.config = self.dsc._config

        self.file_system_name = self.get_resource_name('filesystem')

        if not self.is_playback():
            file_system = self.dsc.get_file_system_client(
                self.file_system_name)
            try:
                file_system.create_file_system(timeout=5)
            except ResourceExistsError:
                pass

    def tearDown(self):
        if not self.is_playback():
            try:
                self.dsc.delete_file_system(self.file_system_name)
            except:
                pass

        return super(FileTest, self).tearDown()

    # --Helpers-----------------------------------------------------------------
    def _get_directory_reference(self, prefix=TEST_DIRECTORY_PREFIX):
        directory_name = self.get_resource_name(prefix)
        return directory_name

    def _get_file_reference(self, prefix=TEST_FILE_PREFIX):
        file_name = self.get_resource_name(prefix)
        return file_name

    def _create_file_system(self):
        return self.dsc.create_file_system(self._get_file_system_reference())

    def _create_directory_and_return_client(self, directory=None):
        directory_name = directory if directory else self._get_directory_reference(
        )
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()
        return directory_client

    def _create_file_and_return_client(self, directory="", file=None):
        if directory:
            self._create_directory_and_return_client(directory)
        if not file:
            file = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.file_system_name,
                                               directory + '/' + file)
        file_client.create_file()
        return file_client

    # --Helpers-----------------------------------------------------------------

    @DataLakePreparer()
    def test_create_file(self, datalake_storage_account_name,
                         datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        directory_name = self._get_directory_reference()

        # Create a directory to put the file under that
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()

        file_client = directory_client.get_file_client('filename')
        response = file_client.create_file()

        # Assert
        self.assertIsNotNone(response)

    @DataLakePreparer()
    def test_file_exists(self, datalake_storage_account_name,
                         datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        directory_name = self._get_directory_reference()

        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()

        file_client1 = directory_client.get_file_client('filename')
        file_client2 = directory_client.get_file_client('nonexistentfile')
        file_client1.create_file()

        self.assertTrue(file_client1.exists())
        self.assertFalse(file_client2.exists())

    @DataLakePreparer()
    def test_create_file_using_oauth_token_credential(
            self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        file_name = self._get_file_reference()
        token_credential = self.generate_oauth_token()

        # Create a directory to put the file under that
        file_client = DataLakeFileClient(self.dsc.url,
                                         self.file_system_name,
                                         file_name,
                                         credential=token_credential)

        response = file_client.create_file()

        # Assert
        self.assertIsNotNone(response)

    @DataLakePreparer()
    def test_create_file_with_existing_name(self,
                                            datalake_storage_account_name,
                                            datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        file_client = self._create_file_and_return_client()

        with self.assertRaises(ResourceExistsError):
            # if the file exists then throw error
            # if_none_match='*' is to make sure no existing file
            file_client.create_file(match_condition=MatchConditions.IfMissing)

    @DataLakePreparer()
    def test_create_file_with_lease_id(self, datalake_storage_account_name,
                                       datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        directory_name = self._get_directory_reference()
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()

        file_client = directory_client.get_file_client('filename')
        # Act
        file_client.create_file()
        lease = file_client.acquire_lease()
        create_resp = file_client.create_file(lease=lease)

        # Assert
        file_properties = file_client.get_file_properties()
        self.assertIsNotNone(file_properties)
        self.assertEqual(file_properties.etag, create_resp.get('etag'))
        self.assertEqual(file_properties.last_modified,
                         create_resp.get('last_modified'))

    @DataLakePreparer()
    def test_create_file_under_root_directory(self,
                                              datalake_storage_account_name,
                                              datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        # get a file client to interact with the file under root directory
        file_client = self.dsc.get_file_client(self.file_system_name,
                                               "filename")

        response = file_client.create_file()

        # Assert
        self.assertIsNotNone(response)

    @DataLakePreparer()
    def test_append_data(self, datalake_storage_account_name,
                         datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        directory_name = self._get_directory_reference()

        # Create a directory to put the file under that
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()

        file_client = directory_client.get_file_client('filename')
        file_client.create_file()

        # Act
        response = file_client.append_data(b'abc', 0, 3)
        self.assertIsNotNone(response)

    @DataLakePreparer()
    def test_append_empty_data(self, datalake_storage_account_name,
                               datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        file_client = self._create_file_and_return_client()

        # Act
        file_client.flush_data(0)
        file_props = file_client.get_file_properties()

        self.assertIsNotNone(file_props['size'], 0)

    @DataLakePreparer()
    def test_flush_data(self, datalake_storage_account_name,
                        datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        directory_name = self._get_directory_reference()

        # Create a directory to put the file under that
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()

        file_client = directory_client.get_file_client('filename')
        file_client.create_file()

        # Act
        file_client.append_data(b'abc', 0, 3)
        response = file_client.flush_data(3)

        # Assert
        prop = file_client.get_file_properties()
        self.assertIsNotNone(response)
        self.assertEqual(prop['size'], 3)

    @DataLakePreparer()
    def test_flush_data_with_match_condition(self,
                                             datalake_storage_account_name,
                                             datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        directory_name = self._get_directory_reference()

        # Create a directory to put the file under that
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()

        file_client = directory_client.get_file_client('filename')
        resp = file_client.create_file()

        # Act
        file_client.append_data(b'abc', 0, 3)

        # flush is successful because it isn't touched
        response = file_client.flush_data(
            3,
            etag=resp['etag'],
            match_condition=MatchConditions.IfNotModified)

        file_client.append_data(b'abc', 3, 3)
        with self.assertRaises(ResourceModifiedError):
            # flush is unsuccessful because extra data were appended.
            file_client.flush_data(
                6,
                etag=resp['etag'],
                match_condition=MatchConditions.IfNotModified)

    @pytest.mark.live_test_only
    @DataLakePreparer()
    def test_upload_data_to_none_existing_file(self,
                                               datalake_storage_account_name,
                                               datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # parallel upload cannot be recorded

        directory_name = self._get_directory_reference()

        # Create a directory to put the file under that
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()

        file_client = directory_client.get_file_client('filename')
        data = self.get_random_bytes(200 * 1024)
        file_client.upload_data(data, overwrite=True, max_concurrency=3)

        downloaded_data = file_client.download_file().readall()
        self.assertEqual(data, downloaded_data)

    @pytest.mark.live_test_only
    @DataLakePreparer()
    def test_upload_data_in_substreams(self, datalake_storage_account_name,
                                       datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # parallel upload cannot be recorded
        directory_name = self._get_directory_reference()

        # Create a directory to put the file under that
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()

        file_client = directory_client.get_file_client('filename')
        # Get 16MB data
        data = self.get_random_bytes(16 * 1024 * 1024)
        # Ensure chunk size is greater than threshold (8MB > 4MB) - for optimized upload
        file_client.upload_data(data,
                                chunk_size=8 * 1024 * 1024,
                                overwrite=True,
                                max_concurrency=3)
        downloaded_data = file_client.download_file().readall()
        self.assertEqual(data, downloaded_data)

        # Run on single thread
        file_client.upload_data(data,
                                chunk_size=8 * 1024 * 1024,
                                overwrite=True)
        downloaded_data = file_client.download_file().readall()
        self.assertEqual(data, downloaded_data)

    @DataLakePreparer()
    def test_upload_data_to_existing_file(self, datalake_storage_account_name,
                                          datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        directory_name = self._get_directory_reference()

        # Create a directory to put the file under that
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()

        # create an existing file
        file_client = directory_client.get_file_client('filename')
        file_client.create_file()
        file_client.append_data(b"abc", 0)
        file_client.flush_data(3)

        # to override the existing file
        data = self.get_random_bytes(100)
        with self.assertRaises(HttpResponseError):
            file_client.upload_data(data, max_concurrency=5)
        file_client.upload_data(data, overwrite=True, max_concurrency=5)

        downloaded_data = file_client.download_file().readall()
        self.assertEqual(data, downloaded_data)

    @DataLakePreparer()
    def test_upload_data_to_existing_file_with_content_settings(
            self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        directory_name = self._get_directory_reference()

        # Create a directory to put the file under that
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()

        # create an existing file
        file_client = directory_client.get_file_client('filename')
        etag = file_client.create_file()['etag']

        # to override the existing file
        data = self.get_random_bytes(100)
        content_settings = ContentSettings(content_language='spanish',
                                           content_disposition='inline')

        file_client.upload_data(data,
                                max_concurrency=5,
                                content_settings=content_settings,
                                etag=etag,
                                match_condition=MatchConditions.IfNotModified)

        downloaded_data = file_client.download_file().readall()
        properties = file_client.get_file_properties()

        self.assertEqual(data, downloaded_data)
        self.assertEqual(properties.content_settings.content_language,
                         content_settings.content_language)

    @DataLakePreparer()
    def test_upload_data_to_existing_file_with_permission_and_umask(
            self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        directory_name = self._get_directory_reference()

        # Create a directory to put the file under that
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()

        # create an existing file
        file_client = directory_client.get_file_client('filename')
        etag = file_client.create_file()['etag']

        # to override the existing file
        data = self.get_random_bytes(100)

        file_client.upload_data(data,
                                overwrite=True,
                                max_concurrency=5,
                                permissions='0777',
                                umask="0000",
                                etag=etag,
                                match_condition=MatchConditions.IfNotModified)

        downloaded_data = file_client.download_file().readall()
        prop = file_client.get_access_control()

        # Assert
        self.assertEqual(data, downloaded_data)
        self.assertEqual(prop['permissions'], 'rwxrwxrwx')

    @DataLakePreparer()
    def test_read_file(self, datalake_storage_account_name,
                       datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        file_client = self._create_file_and_return_client()
        data = self.get_random_bytes(1024)

        # upload data to file
        file_client.append_data(data, 0, len(data))
        file_client.flush_data(len(data))

        # doanload the data and make sure it is the same as uploaded data
        downloaded_data = file_client.download_file().readall()
        self.assertEqual(data, downloaded_data)

    @pytest.mark.live_test_only
    @DataLakePreparer()
    def test_read_file_with_user_delegation_key(self,
                                                datalake_storage_account_name,
                                                datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # SAS URL is calculated from storage key, so this test runs live only

        # Create file
        file_client = self._create_file_and_return_client()
        data = self.get_random_bytes(1024)
        # Upload data to file
        file_client.append_data(data, 0, len(data))
        file_client.flush_data(len(data))

        # Get user delegation key
        token_credential = self.generate_oauth_token()
        service_client = DataLakeServiceClient(
            self._get_account_url(datalake_storage_account_name),
            credential=token_credential,
            logging_enable=True)
        user_delegation_key = service_client.get_user_delegation_key(
            datetime.utcnow(),
            datetime.utcnow() + timedelta(hours=1))

        sas_token = generate_file_sas(
            file_client.account_name,
            file_client.file_system_name,
            None,
            file_client.path_name,
            user_delegation_key,
            permission=FileSasPermissions(read=True,
                                          create=True,
                                          write=True,
                                          delete=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )

        # doanload the data and make sure it is the same as uploaded data
        new_file_client = DataLakeFileClient(
            self._get_account_url(datalake_storage_account_name),
            file_client.file_system_name,
            file_client.path_name,
            credential=sas_token,
            logging_enable=True)
        downloaded_data = new_file_client.download_file().readall()
        self.assertEqual(data, downloaded_data)

    @pytest.mark.live_test_only
    @DataLakePreparer()
    def test_set_acl_with_user_delegation_key(self,
                                              datalake_storage_account_name,
                                              datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # SAS URL is calculated from storage key, so this test runs live only

        # Create file
        file_client = self._create_file_and_return_client()
        data = self.get_random_bytes(1024)
        # Upload data to file
        file_client.append_data(data, 0, len(data))
        file_client.flush_data(len(data))

        # Get user delegation key
        token_credential = self.generate_oauth_token()
        service_client = DataLakeServiceClient(
            self._get_account_url(datalake_storage_account_name),
            credential=token_credential)
        user_delegation_key = service_client.get_user_delegation_key(
            datetime.utcnow(),
            datetime.utcnow() + timedelta(hours=1))

        sas_token = generate_file_sas(
            file_client.account_name,
            file_client.file_system_name,
            None,
            file_client.path_name,
            user_delegation_key,
            permission=FileSasPermissions(execute=True,
                                          manage_access_control=True,
                                          manage_ownership=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )

        # doanload the data and make sure it is the same as uploaded data
        new_file_client = DataLakeFileClient(
            self._get_account_url(datalake_storage_account_name),
            file_client.file_system_name,
            file_client.path_name,
            credential=sas_token)
        acl = 'user::rwx,group::r-x,other::rwx'
        owner = "dc140949-53b7-44af-b1e9-cd994951fb86"
        new_file_client.set_access_control(acl=acl, owner=owner)
        access_control = new_file_client.get_access_control()
        self.assertEqual(acl, access_control['acl'])
        self.assertEqual(owner, access_control['owner'])

    @pytest.mark.live_test_only
    @DataLakePreparer()
    def test_preauthorize_user_with_user_delegation_key(
            self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # SAS URL is calculated from storage key, so this test runs live only

        # Create file
        file_client = self._create_file_and_return_client()
        data = self.get_random_bytes(1024)
        # Upload data to file
        file_client.append_data(data, 0, len(data))
        file_client.flush_data(len(data))
        file_client.set_access_control(
            owner="68390a19-a643-458b-b726-408abf67b4fc", permissions='0777')
        acl = file_client.get_access_control()

        # Get user delegation key
        token_credential = self.generate_oauth_token()
        service_client = DataLakeServiceClient(
            self._get_account_url(datalake_storage_account_name),
            credential=token_credential)
        user_delegation_key = service_client.get_user_delegation_key(
            datetime.utcnow(),
            datetime.utcnow() + timedelta(hours=1))

        sas_token = generate_file_sas(
            file_client.account_name,
            file_client.file_system_name,
            None,
            file_client.path_name,
            user_delegation_key,
            permission=FileSasPermissions(read=True,
                                          write=True,
                                          manage_access_control=True,
                                          manage_ownership=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
            preauthorized_agent_object_id="68390a19-a643-458b-b726-408abf67b4fc"
        )

        # doanload the data and make sure it is the same as uploaded data
        new_file_client = DataLakeFileClient(
            self._get_account_url(datalake_storage_account_name),
            file_client.file_system_name,
            file_client.path_name,
            credential=sas_token)

        acl = new_file_client.set_access_control(permissions='0777')
        self.assertIsNotNone(acl)

    @DataLakePreparer()
    def test_read_file_into_file(self, datalake_storage_account_name,
                                 datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        file_client = self._create_file_and_return_client()
        data = self.get_random_bytes(1024)

        # upload data to file
        file_client.append_data(data, 0, len(data))
        file_client.flush_data(len(data))

        # doanload the data into a file and make sure it is the same as uploaded data
        with open(FILE_PATH, 'wb') as stream:
            download = file_client.download_file(max_concurrency=2)
            download.readinto(stream)

        # Assert
        with open(FILE_PATH, 'rb') as stream:
            actual = stream.read()
            self.assertEqual(data, actual)

    @DataLakePreparer()
    def test_read_file_to_text(self, datalake_storage_account_name,
                               datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        file_client = self._create_file_and_return_client()
        data = self.get_random_text_data(1024)

        # upload data to file
        file_client.append_data(data, 0, len(data))
        file_client.flush_data(len(data))

        # doanload the text data and make sure it is the same as uploaded data
        downloaded_data = file_client.download_file(
            max_concurrency=2, encoding="utf-8").readall()

        # Assert
        self.assertEqual(data, downloaded_data)

    @pytest.mark.live_test_only
    @DataLakePreparer()
    def test_account_sas(self, datalake_storage_account_name,
                         datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # SAS URL is calculated from storage key, so this test runs live only

        file_name = self._get_file_reference()
        # create a file under root directory
        self._create_file_and_return_client(file=file_name)

        # generate a token with file level read permission
        token = generate_account_sas(
            self.dsc.account_name,
            self.dsc.credential.account_key,
            ResourceTypes(file_system=True, object=True),
            AccountSasPermissions(read=True),
            datetime.utcnow() + timedelta(hours=1),
        )

        for credential in [token, AzureSasCredential(token)]:
            # read the created file which is under root directory
            file_client = DataLakeFileClient(self.dsc.url,
                                             self.file_system_name,
                                             file_name,
                                             credential=credential)
            properties = file_client.get_file_properties()

            # make sure we can read the file properties
            self.assertIsNotNone(properties)

            # try to write to the created file with the token
            with self.assertRaises(HttpResponseError):
                file_client.append_data(b"abcd", 0, 4)

    @DataLakePreparer()
    def test_account_sas_raises_if_sas_already_in_uri(
            self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        with self.assertRaises(ValueError):
            DataLakeFileClient(self.dsc.url + "?sig=foo",
                               self.file_system_name,
                               "foo",
                               credential=AzureSasCredential("?foo=bar"))

    @pytest.mark.live_test_only
    @DataLakePreparer()
    def test_file_sas_only_applies_to_file_level(self,
                                                 datalake_storage_account_name,
                                                 datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # SAS URL is calculated from storage key, so this test runs live only
        file_name = self._get_file_reference()
        directory_name = self._get_directory_reference()
        self._create_file_and_return_client(directory=directory_name,
                                            file=file_name)

        # generate a token with file level read and write permissions
        token = generate_file_sas(
            self.dsc.account_name,
            self.file_system_name,
            directory_name,
            file_name,
            self.dsc.credential.account_key,
            permission=FileSasPermissions(read=True, write=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )

        # read the created file which is under root directory
        file_client = DataLakeFileClient(self.dsc.url,
                                         self.file_system_name,
                                         directory_name + '/' + file_name,
                                         credential=token)
        properties = file_client.get_file_properties()

        # make sure we can read the file properties
        self.assertIsNotNone(properties)

        # try to write to the created file with the token
        response = file_client.append_data(b"abcd",
                                           0,
                                           4,
                                           validate_content=True)
        self.assertIsNotNone(response)

        # the token is for file level, so users are not supposed to have access to file system level operations
        file_system_client = FileSystemClient(self.dsc.url,
                                              self.file_system_name,
                                              credential=token)
        with self.assertRaises(ClientAuthenticationError):
            file_system_client.get_file_system_properties()

        # the token is for file level, so users are not supposed to have access to directory level operations
        directory_client = DataLakeDirectoryClient(self.dsc.url,
                                                   self.file_system_name,
                                                   directory_name,
                                                   credential=token)
        with self.assertRaises(ClientAuthenticationError):
            directory_client.get_directory_properties()

    @DataLakePreparer()
    def test_delete_file(self, datalake_storage_account_name,
                         datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        file_client = self._create_file_and_return_client()

        file_client.delete_file()

        with self.assertRaises(ResourceNotFoundError):
            file_client.get_file_properties()

    @DataLakePreparer()
    def test_delete_file_with_if_unmodified_since(
            self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        file_client = self._create_file_and_return_client()

        prop = file_client.get_file_properties()
        file_client.delete_file(if_unmodified_since=prop['last_modified'])

        # Make sure the file was deleted
        with self.assertRaises(ResourceNotFoundError):
            file_client.get_file_properties()

    @DataLakePreparer()
    def test_set_access_control(self, datalake_storage_account_name,
                                datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        file_client = self._create_file_and_return_client()

        response = file_client.set_access_control(permissions='0777')

        # Assert
        self.assertIsNotNone(response)

    @DataLakePreparer()
    def test_set_access_control_with_match_conditions(
            self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        file_client = self._create_file_and_return_client()

        with self.assertRaises(ResourceModifiedError):
            file_client.set_access_control(
                permissions='0777', match_condition=MatchConditions.IfMissing)

    @DataLakePreparer()
    def test_get_access_control(self, datalake_storage_account_name,
                                datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        file_client = self._create_file_and_return_client()
        file_client.set_access_control(permissions='0777')

        # Act
        response = file_client.get_access_control()

        # Assert
        self.assertIsNotNone(response)

    @DataLakePreparer()
    def test_get_access_control_with_if_modified_since(
            self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        file_client = self._create_file_and_return_client()
        file_client.set_access_control(permissions='0777')

        prop = file_client.get_file_properties()

        # Act
        response = file_client.get_access_control(
            if_modified_since=prop['last_modified'] - timedelta(minutes=15))

        # Assert
        self.assertIsNotNone(response)

    @DataLakePreparer()
    def test_set_access_control_recursive(self, datalake_storage_account_name,
                                          datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        acl = 'user::rwx,group::r-x,other::rwx'
        file_client = self._create_file_and_return_client()

        summary = file_client.set_access_control_recursive(acl=acl)

        # Assert
        self.assertEqual(summary.counters.directories_successful, 0)
        self.assertEqual(summary.counters.files_successful, 1)
        self.assertEqual(summary.counters.failure_count, 0)
        access_control = file_client.get_access_control()
        self.assertIsNotNone(access_control)
        self.assertEqual(acl, access_control['acl'])

    @DataLakePreparer()
    def test_update_access_control_recursive(self,
                                             datalake_storage_account_name,
                                             datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        acl = 'user::rwx,group::r-x,other::rwx'
        file_client = self._create_file_and_return_client()

        summary = file_client.update_access_control_recursive(acl=acl)

        # Assert
        self.assertEqual(summary.counters.directories_successful, 0)
        self.assertEqual(summary.counters.files_successful, 1)
        self.assertEqual(summary.counters.failure_count, 0)
        access_control = file_client.get_access_control()
        self.assertIsNotNone(access_control)
        self.assertEqual(acl, access_control['acl'])

    @DataLakePreparer()
    def test_remove_access_control_recursive(self,
                                             datalake_storage_account_name,
                                             datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        acl = "mask," + "default:user,default:group," + \
             "user:ec3595d6-2c17-4696-8caa-7e139758d24a,group:ec3595d6-2c17-4696-8caa-7e139758d24a," + \
             "default:user:ec3595d6-2c17-4696-8caa-7e139758d24a,default:group:ec3595d6-2c17-4696-8caa-7e139758d24a"
        file_client = self._create_file_and_return_client()
        summary = file_client.remove_access_control_recursive(acl=acl)

        # Assert
        self.assertEqual(summary.counters.directories_successful, 0)
        self.assertEqual(summary.counters.files_successful, 1)
        self.assertEqual(summary.counters.failure_count, 0)

    @DataLakePreparer()
    def test_get_properties(self, datalake_storage_account_name,
                            datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        directory_client = self._create_directory_and_return_client()

        metadata = {'hello': 'world', 'number': '42'}
        content_settings = ContentSettings(content_language='spanish',
                                           content_disposition='inline')
        file_client = directory_client.create_file(
            "newfile", metadata=metadata, content_settings=content_settings)
        file_client.append_data(b"abc", 0, 3)
        file_client.flush_data(3)
        properties = file_client.get_file_properties()

        # Assert
        self.assertTrue(properties)
        self.assertEqual(properties.size, 3)
        self.assertEqual(properties.metadata['hello'], metadata['hello'])
        self.assertEqual(properties.content_settings.content_language,
                         content_settings.content_language)

    @DataLakePreparer()
    def test_set_expiry(self, datalake_storage_account_name,
                        datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # Arrange
        directory_client = self._create_directory_and_return_client()

        metadata = {'hello': 'world', 'number': '42'}
        content_settings = ContentSettings(content_language='spanish',
                                           content_disposition='inline')
        expires_on = datetime.utcnow() + timedelta(hours=1)
        file_client = directory_client.create_file(
            "newfile", metadata=metadata, content_settings=content_settings)
        file_client.set_file_expiry("Absolute", expires_on=expires_on)
        properties = file_client.get_file_properties()

        # Assert
        self.assertTrue(properties)
        self.assertIsNotNone(properties.expiry_time)

    @DataLakePreparer()
    def test_rename_file_with_non_used_name(self,
                                            datalake_storage_account_name,
                                            datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        file_client = self._create_file_and_return_client()
        data_bytes = b"abc"
        file_client.append_data(data_bytes, 0, 3)
        file_client.flush_data(3)
        new_client = file_client.rename_file(file_client.file_system_name +
                                             '/' + 'newname')

        data = new_client.download_file().readall()
        self.assertEqual(data, data_bytes)
        self.assertEqual(new_client.path_name, "newname")

    @pytest.mark.live_test_only
    @DataLakePreparer()
    def test_rename_file_with_file_system_sas(self,
                                              datalake_storage_account_name,
                                              datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # sas token is calculated from storage key, so live only
        token = generate_file_system_sas(
            self.dsc.account_name,
            self.file_system_name,
            self.dsc.credential.account_key,
            FileSystemSasPermissions(write=True, read=True, delete=True),
            datetime.utcnow() + timedelta(hours=1),
        )

        # read the created file which is under root directory
        file_client = DataLakeFileClient(self.dsc.url,
                                         self.file_system_name,
                                         "oldfile",
                                         credential=token)
        file_client.create_file()
        data_bytes = b"abc"
        file_client.append_data(data_bytes, 0, 3)
        file_client.flush_data(3)
        new_client = file_client.rename_file(file_client.file_system_name +
                                             '/' + 'newname')

        data = new_client.download_file().readall()
        self.assertEqual(data, data_bytes)
        self.assertEqual(new_client.path_name, "newname")

    @pytest.mark.live_test_only
    @DataLakePreparer()
    def test_rename_file_with_file_sas(self, datalake_storage_account_name,
                                       datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # SAS URL is calculated from storage key, so this test runs live only
        token = generate_file_sas(
            self.dsc.account_name,
            self.file_system_name,
            None,
            "oldfile",
            datalake_storage_account_key,
            permission=FileSasPermissions(read=True,
                                          create=True,
                                          write=True,
                                          delete=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )

        new_token = generate_file_sas(
            self.dsc.account_name,
            self.file_system_name,
            None,
            "newname",
            datalake_storage_account_key,
            permission=FileSasPermissions(read=True,
                                          create=True,
                                          write=True,
                                          delete=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )

        # read the created file which is under root directory
        file_client = DataLakeFileClient(self.dsc.url,
                                         self.file_system_name,
                                         "oldfile",
                                         credential=token)
        file_client.create_file()
        data_bytes = b"abc"
        file_client.append_data(data_bytes, 0, 3)
        file_client.flush_data(3)
        new_client = file_client.rename_file(file_client.file_system_name +
                                             '/' + 'newname' + '?' + new_token)

        data = new_client.download_file().readall()
        self.assertEqual(data, data_bytes)
        self.assertEqual(new_client.path_name, "newname")

    @DataLakePreparer()
    def test_rename_file_with_account_sas(self, datalake_storage_account_name,
                                          datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        pytest.skip("service bug")
        token = generate_account_sas(
            self.dsc.account_name,
            self.dsc.credential.account_key,
            ResourceTypes(object=True),
            AccountSasPermissions(write=True,
                                  read=True,
                                  create=True,
                                  delete=True),
            datetime.utcnow() + timedelta(hours=5),
        )

        # read the created file which is under root directory
        file_client = DataLakeFileClient(self.dsc.url,
                                         self.file_system_name,
                                         "oldfile",
                                         credential=token)
        file_client.create_file()
        data_bytes = b"abc"
        file_client.append_data(data_bytes, 0, 3)
        file_client.flush_data(3)
        new_client = file_client.rename_file(file_client.file_system_name +
                                             '/' + 'newname')

        data = new_client.download_file().readall()
        self.assertEqual(data, data_bytes)
        self.assertEqual(new_client.path_name, "newname")

    @DataLakePreparer()
    def test_rename_file_to_existing_file(self, datalake_storage_account_name,
                                          datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # create the existing file
        existing_file_client = self._create_file_and_return_client(
            file="existingfile")
        existing_file_client.append_data(b"a", 0, 1)
        existing_file_client.flush_data(1)
        old_url = existing_file_client.url

        # prepare to rename the file to the existing file
        file_client = self._create_file_and_return_client()
        data_bytes = b"abc"
        file_client.append_data(data_bytes, 0, 3)
        file_client.flush_data(3)
        new_client = file_client.rename_file(file_client.file_system_name +
                                             '/' +
                                             existing_file_client.path_name)
        new_url = file_client.url

        data = new_client.download_file().readall()
        # the existing file was overridden
        self.assertEqual(data, data_bytes)

    @DataLakePreparer()
    def test_rename_file_will_not_change_existing_directory(
            self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # create none empty directory(with 2 files)
        dir1 = self._create_directory_and_return_client(directory="dir1")
        f1 = dir1.create_file("file1")
        f1.append_data(b"file1", 0, 5)
        f1.flush_data(5)
        f2 = dir1.create_file("file2")
        f2.append_data(b"file2", 0, 5)
        f2.flush_data(5)

        # create another none empty directory(with 2 files)
        dir2 = self._create_directory_and_return_client(directory="dir2")
        f3 = dir2.create_file("file3")
        f3.append_data(b"file3", 0, 5)
        f3.flush_data(5)
        f4 = dir2.create_file("file4")
        f4.append_data(b"file4", 0, 5)
        f4.flush_data(5)

        new_client = f3.rename_file(f1.file_system_name + '/' + f1.path_name)

        self.assertEqual(new_client.download_file().readall(), b"file3")

        # make sure the data in file2 and file4 weren't touched
        f2_data = f2.download_file().readall()
        self.assertEqual(f2_data, b"file2")

        f4_data = f4.download_file().readall()
        self.assertEqual(f4_data, b"file4")

        with self.assertRaises(HttpResponseError):
            f3.download_file().readall()
示例#3
0
class DirectoryTest(StorageTestCase):
    def setUp(self):
        super(DirectoryTest, self).setUp()
        url = self._get_account_url()
        self.dsc = DataLakeServiceClient(
            url, credential=self.settings.STORAGE_DATA_LAKE_ACCOUNT_KEY)
        self.config = self.dsc._config

        self.file_system_name = self.get_resource_name('filesystem')

        if not self.is_playback():
            file_system = self.dsc.get_file_system_client(
                self.file_system_name)
            try:
                file_system.create_file_system(timeout=5)
            except ResourceExistsError:
                pass

    def tearDown(self):
        if not self.is_playback():
            try:
                self.dsc.delete_file_system(self.file_system_name)
                for file_system in self.dsc.list_file_systems():
                    self.dsc.delete_file_system(file_system.name)
            except:
                pass

        return super(DirectoryTest, self).tearDown()

    # --Helpers-----------------------------------------------------------------
    def _get_directory_reference(self, prefix=TEST_DIRECTORY_PREFIX):
        directory_name = self.get_resource_name(prefix)
        return directory_name

    def _create_directory_and_get_directory_client(self, directory_name=None):
        directory_name = directory_name if directory_name else self._get_directory_reference(
        )
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()
        return directory_client

    def _create_file_system(self):
        return self.dsc.create_file_system(self._get_file_system_reference())

    # --Helpers-----------------------------------------------------------------

    @record
    def test_create_directory(self):
        # Arrange
        directory_name = self._get_directory_reference()
        content_settings = ContentSettings(content_language='spanish',
                                           content_disposition='inline')
        # Act
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        created = directory_client.create_directory(
            content_settings=content_settings)

        # Assert
        self.assertTrue(created)

    @record
    def test_using_oauth_token_credential_to_create_directory(self):
        # generate a token with directory level create permission
        directory_name = self._get_directory_reference()
        token_credential = self.generate_oauth_token()
        directory_client = DataLakeDirectoryClient(self.dsc.url,
                                                   self.file_system_name,
                                                   directory_name,
                                                   credential=token_credential)
        response = directory_client.create_directory()
        self.assertIsNotNone(response)

    @record
    def test_create_directory_with_match_conditions(self):
        # Arrange
        directory_name = self._get_directory_reference()

        # Act
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        created = directory_client.create_directory(
            match_condition=MatchConditions.IfMissing)

        # Assert
        self.assertTrue(created)

    @record
    def test_create_directory_with_permission(self):
        # Arrange
        directory_name = self._get_directory_reference()

        # Act
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        created = directory_client.create_directory(permissions="rwxr--r--",
                                                    umask="0000")

        prop = directory_client.get_access_control()

        # Assert
        self.assertTrue(created)
        self.assertEqual(prop['permissions'], 'rwxr--r--')

    @record
    def test_create_directory_with_content_settings(self):
        # Arrange
        directory_name = self._get_directory_reference()
        content_settings = ContentSettings(content_language='spanish',
                                           content_disposition='inline')
        # Act
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        created = directory_client.create_directory(
            content_settings=content_settings)

        # Assert
        self.assertTrue(created)

    @record
    def test_create_directory_with_metadata(self):
        # Arrange
        directory_name = self._get_directory_reference()
        metadata = {'hello': 'world', 'number': '42'}
        # Act
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        created = directory_client.create_directory(metadata=metadata)

        properties = directory_client.get_directory_properties()

        # Assert
        self.assertTrue(created)

    @record
    def test_delete_directory(self):
        # Arrange
        directory_name = self._get_directory_reference()
        metadata = {'hello': 'world', 'number': '42'}
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory(metadata=metadata)

        response = directory_client.delete_directory()
        # Assert
        self.assertIsNone(response)

    @record
    def test_delete_directory_with_if_modified_since(self):
        # Arrange
        directory_name = self._get_directory_reference()

        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()
        prop = directory_client.get_directory_properties()

        with self.assertRaises(ResourceModifiedError):
            directory_client.delete_directory(
                if_modified_since=prop['last_modified'])

    @record
    def test_create_sub_directory_and_delete_sub_directory(self):
        # Arrange
        directory_name = self._get_directory_reference()
        metadata = {'hello': 'world', 'number': '42'}

        # Create a directory first, to prepare for creating sub directory
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory(metadata=metadata)

        # Create sub directory from the current directory
        sub_directory_name = 'subdir'
        sub_directory_created = directory_client.create_sub_directory(
            sub_directory_name)

        # to make sure the sub directory was indeed created by get sub_directory properties from sub directory client
        sub_directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name + '/' + sub_directory_name)
        sub_properties = sub_directory_client.get_directory_properties()

        # Assert
        self.assertTrue(sub_directory_created)
        self.assertTrue(sub_properties)

        # Act
        directory_client.delete_sub_directory(sub_directory_name)
        with self.assertRaises(ResourceNotFoundError):
            sub_directory_client.get_directory_properties()

    @record
    def test_set_access_control(self):
        directory_name = self._get_directory_reference()
        metadata = {'hello': 'world', 'number': '42'}
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory(metadata=metadata)

        response = directory_client.set_access_control(permissions='0777')
        # Assert
        self.assertIsNotNone(response)

    @record
    def test_set_access_control_with_acl(self):
        directory_name = self._get_directory_reference()
        metadata = {'hello': 'world', 'number': '42'}
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory(metadata=metadata)

        acl = 'user::rwx,group::r-x,other::rwx'
        directory_client.set_access_control(acl=acl)
        access_control = directory_client.get_access_control()

        # Assert

        self.assertIsNotNone(access_control)
        self.assertEqual(acl, access_control['acl'])

    @record
    def test_set_access_control_if_none_modified(self):
        directory_name = self._get_directory_reference()
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        resp = directory_client.create_directory()

        response = directory_client.set_access_control(
            permissions='0777',
            etag=resp['etag'],
            match_condition=MatchConditions.IfNotModified)
        # Assert
        self.assertIsNotNone(response)

    @record
    def test_get_access_control(self):
        directory_name = self._get_directory_reference()
        metadata = {'hello': 'world', 'number': '42'}
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory(metadata=metadata,
                                          permissions='0777')

        # Act
        response = directory_client.get_access_control()
        # Assert
        self.assertIsNotNone(response)

    @record
    def test_get_access_control_with_match_conditions(self):
        directory_name = self._get_directory_reference()
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        resp = directory_client.create_directory(permissions='0777',
                                                 umask='0000')

        # Act
        response = directory_client.get_access_control(
            etag=resp['etag'], match_condition=MatchConditions.IfNotModified)
        # Assert
        self.assertIsNotNone(response)
        self.assertEquals(response['permissions'], 'rwxrwxrwx')

    @record
    def test_rename_from(self):
        metadata = {'hello': 'world', 'number': '42'}
        directory_name = self._get_directory_reference()
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()

        new_name = "newname"

        new_directory_client = self.dsc.get_directory_client(
            self.file_system_name, new_name)

        new_directory_client._rename_path('/' + self.file_system_name + '/' +
                                          directory_name,
                                          metadata=metadata)
        properties = new_directory_client.get_directory_properties()

        self.assertIsNotNone(properties)

    @record
    def test_rename_from_a_shorter_directory_to_longer_directory(self):
        # TODO: investigate why rename shorter path to a longer one does not work
        pytest.skip("")
        directory_name = self._get_directory_reference()
        self._create_directory_and_get_directory_client(directory_name="old")

        new_name = "newname"
        new_directory_client = self._create_directory_and_get_directory_client(
            directory_name=new_name)
        new_directory_client = new_directory_client.create_sub_directory(
            "newsub")

        new_directory_client._rename_path('/' + self.file_system_name + '/' +
                                          directory_name)
        properties = new_directory_client.get_directory_properties()

        self.assertIsNotNone(properties)

    @record
    def test_rename_from_a_directory_in_another_file_system(self):
        # create a file dir1 under file system1
        old_file_system_name = "oldfilesystem"
        old_dir_name = "olddir"
        old_client = self.dsc.get_file_system_client(old_file_system_name)
        old_client.create_file_system()
        old_client.create_directory(old_dir_name)

        # create a dir2 under file system2
        new_name = "newname"
        new_directory_client = self._create_directory_and_get_directory_client(
            directory_name=new_name)
        new_directory_client = new_directory_client.create_sub_directory(
            "newsub")

        # rename dir1 under file system1 to dir2 under file system2
        new_directory_client._rename_path('/' + old_file_system_name + '/' +
                                          old_dir_name)
        properties = new_directory_client.get_directory_properties()

        self.assertIsNotNone(properties)
        self.dsc.delete_file_system(old_file_system_name)

    @record
    def test_rename_to_an_existing_directory_in_another_file_system(self):
        # create a file dir1 under file system1
        destination_file_system_name = "destfilesystem"
        destination_dir_name = "destdir"
        fs_client = self.dsc.get_file_system_client(
            destination_file_system_name)
        fs_client.create_file_system()
        destination_directory_client = fs_client.create_directory(
            destination_dir_name)

        # create a dir2 under file system2
        source_name = "source"
        source_directory_client = self._create_directory_and_get_directory_client(
            directory_name=source_name)
        source_directory_client = source_directory_client.create_sub_directory(
            "subdir")

        # rename dir2 under file system2 to dir1 under file system1
        res = source_directory_client.rename_directory(
            '/' + destination_file_system_name + '/' + destination_dir_name)

        # the source directory has been renamed to destination directory, so it cannot be found
        with self.assertRaises(HttpResponseError):
            source_directory_client.get_directory_properties()

        self.assertEquals(res.url, destination_directory_client.url)

    @record
    def test_rename_with_none_existing_destination_condition_and_source_unmodified_condition(
            self):
        non_existing_dir_name = "nonexistingdir"

        # create a file system1
        destination_file_system_name = self._get_directory_reference(
            "destfilesystem")
        fs_client = self.dsc.get_file_system_client(
            destination_file_system_name)
        fs_client.create_file_system()

        # create a dir2 under file system2
        source_name = "source"
        source_directory_client = self._create_directory_and_get_directory_client(
            directory_name=source_name)
        source_directory_client = source_directory_client.create_sub_directory(
            "subdir")

        # rename dir2 under file system2 to a non existing directory under file system1,
        # when dir1 does not exist and dir2 wasn't modified
        etag = source_directory_client.get_directory_properties()['etag']
        res = source_directory_client.rename_directory(
            '/' + destination_file_system_name + '/' + non_existing_dir_name,
            match_condition=MatchConditions.IfMissing,
            source_etag=etag,
            source_match_condition=MatchConditions.IfNotModified)

        # the source directory has been renamed to destination directory, so it cannot be found
        with self.assertRaises(HttpResponseError):
            source_directory_client.get_directory_properties()

        self.assertEquals(non_existing_dir_name, res.path_name)

    @record
    def test_rename_to_an_non_existing_directory_in_another_file_system(self):
        # create a file dir1 under file system1
        destination_file_system_name = self._get_directory_reference(
            "destfilesystem")
        non_existing_dir_name = "nonexistingdir"
        fs_client = self.dsc.get_file_system_client(
            destination_file_system_name)
        fs_client.create_file_system()

        # create a dir2 under file system2
        source_name = "source"
        source_directory_client = self._create_directory_and_get_directory_client(
            directory_name=source_name)
        source_directory_client = source_directory_client.create_sub_directory(
            "subdir")

        # rename dir2 under file system2 to dir1 under file system1
        res = source_directory_client.rename_directory(
            '/' + destination_file_system_name + '/' + non_existing_dir_name)

        # the source directory has been renamed to destination directory, so it cannot be found
        with self.assertRaises(HttpResponseError):
            source_directory_client.get_directory_properties()

        self.assertEquals(non_existing_dir_name, res.path_name)

    @record
    def test_rename_directory_to_non_empty_directory(self):
        # TODO: investigate why rename non empty dir doesn't work
        pytest.skip("")
        dir1 = self._create_directory_and_get_directory_client("dir1")
        dir1.create_sub_directory("subdir")

        dir2 = self._create_directory_and_get_directory_client("dir2")
        dir2.rename_directory(dir1.file_system_name + '/' + dir1.path_name)

        with self.assertRaises(HttpResponseError):
            dir2.get_directory_properties()

    @record
    def test_get_properties(self):
        # Arrange
        directory_name = self._get_directory_reference()
        metadata = {'hello': 'world', 'number': '42'}
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory(metadata=metadata)

        properties = directory_client.get_directory_properties()
        # Assert
        self.assertTrue(properties)
        self.assertIsNotNone(properties.metadata)
        self.assertEqual(properties.metadata['hello'], metadata['hello'])

    @record
    def test_using_directory_sas_to_read(self):
        # SAS URL is calculated from storage key, so this test runs live only
        if TestMode.need_recording_file(self.test_mode):
            return

        client = self._create_directory_and_get_directory_client()
        directory_name = client.path_name

        # generate a token with directory level read permission
        token = generate_directory_sas(
            self.dsc.account_name,
            self.file_system_name,
            directory_name,
            account_key=self.dsc.credential.account_key,
            permission=DirectorySasPermissions(read=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )

        directory_client = DataLakeDirectoryClient(self.dsc.url,
                                                   self.file_system_name,
                                                   directory_name,
                                                   credential=token)
        access_control = directory_client.get_access_control()

        self.assertIsNotNone(access_control)

    @record
    def test_using_directory_sas_to_create(self):
        # SAS URL is calculated from storage key, so this test runs live only
        if TestMode.need_recording_file(self.test_mode):
            return

        # generate a token with directory level create permission
        directory_name = self._get_directory_reference()
        token = generate_directory_sas(
            self.dsc.account_name,
            self.file_system_name,
            directory_name,
            account_key=self.dsc.credential.account_key,
            permission=DirectorySasPermissions(create=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )
        directory_client = DataLakeDirectoryClient(self.dsc.url,
                                                   self.file_system_name,
                                                   directory_name,
                                                   credential=token)
        response = directory_client.create_directory()
        self.assertIsNotNone(response)
示例#4
0
    def data_lake_service_sample(self):

        # Instantiate a DataLakeServiceClient using a connection string
        # [START create_datalake_service_client]
        from azure.storage.filedatalake import DataLakeServiceClient
        datalake_service_client = DataLakeServiceClient.from_connection_string(
            self.connection_string)
        # [END create_datalake_service_client]

        # Instantiate a DataLakeServiceClient Azure Identity credentials.
        # [START create_datalake_service_client_oauth]
        from azure.identity import ClientSecretCredential
        token_credential = ClientSecretCredential(
            self.active_directory_tenant_id,
            self.active_directory_application_id,
            self.active_directory_application_secret,
        )
        datalake_service_client = DataLakeServiceClient(
            "https://{}.dfs.core.windows.net".format(self.account_name),
            credential=token_credential)
        # [END create_datalake_service_client_oauth]

        # get user delegation key
        # [START get_user_delegation_key]
        from datetime import datetime, timedelta
        user_delegation_key = datalake_service_client.get_user_delegation_key(
            datetime.utcnow(),
            datetime.utcnow() + timedelta(hours=1))
        # [END get_user_delegation_key]

        # Create file systems
        # [START create_file_system_from_service_client]
        datalake_service_client.create_file_system("filesystem")
        # [END create_file_system_from_service_client]
        file_system_client = datalake_service_client.create_file_system(
            "anotherfilesystem")

        # List file systems
        # [START list_file_systems]
        file_systems = datalake_service_client.list_file_systems()
        for file_system in file_systems:
            print(file_system.name)
        # [END list_file_systems]

        # Get Clients from DataLakeServiceClient
        file_system_client = datalake_service_client.get_file_system_client(
            file_system_client.file_system_name)
        # [START get_directory_client_from_service_client]
        directory_client = datalake_service_client.get_directory_client(
            file_system_client.file_system_name, "mydirectory")
        # [END get_directory_client_from_service_client]
        # [START get_file_client_from_service_client]
        file_client = datalake_service_client.get_file_client(
            file_system_client.file_system_name, "myfile")
        # [END get_file_client_from_service_client]

        # Create file and set properties
        metadata = {'hello': 'world', 'number': '42'}
        from azure.storage.filedatalake import ContentSettings
        content_settings = ContentSettings(content_language='spanish',
                                           content_disposition='inline')
        file_client.create_file(content_settings=content_settings)
        file_client.set_metadata(metadata=metadata)
        file_props = file_client.get_file_properties()
        print(file_props.metadata)

        # Create file/directory and set properties
        directory_client.create_directory(content_settings=content_settings,
                                          metadata=metadata)
        dir_props = directory_client.get_directory_properties()
        print(dir_props.metadata)

        # Delete File Systems
        # [START delete_file_system_from_service_client]
        datalake_service_client.delete_file_system("filesystem")
        # [END delete_file_system_from_service_client]
        file_system_client.delete_file_system()
示例#5
0
class FileTest(StorageTestCase):
    def setUp(self):
        super(FileTest, self).setUp()
        url = self._get_account_url()
        self.dsc = DataLakeServiceClient(
            url,
            credential=self.settings.STORAGE_DATA_LAKE_ACCOUNT_KEY,
            logging_enable=True)
        self.config = self.dsc._config

        self.file_system_name = self.get_resource_name('filesystem')

        if not self.is_playback():
            file_system = self.dsc.get_file_system_client(
                self.file_system_name)
            try:
                file_system.create_file_system(timeout=5)
            except ResourceExistsError:
                pass

    def tearDown(self):
        if not self.is_playback():
            try:
                self.dsc.delete_file_system(self.file_system_name)
            except:
                pass

        return super(FileTest, self).tearDown()

    # --Helpers-----------------------------------------------------------------
    def _get_directory_reference(self, prefix=TEST_DIRECTORY_PREFIX):
        directory_name = self.get_resource_name(prefix)
        return directory_name

    def _get_file_reference(self, prefix=TEST_FILE_PREFIX):
        file_name = self.get_resource_name(prefix)
        return file_name

    def _create_file_system(self):
        return self.dsc.create_file_system(self._get_file_system_reference())

    def _create_directory_and_return_client(self, directory=None):
        directory_name = directory if directory else self._get_directory_reference(
        )
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()
        return directory_client

    def _create_file_and_return_client(self, directory="", file=None):
        if directory:
            self._create_directory_and_return_client(directory)
        if not file:
            file = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.file_system_name,
                                               directory + '/' + file)
        file_client.create_file()
        return file_client

    # --Helpers-----------------------------------------------------------------

    @record
    def test_create_file(self):
        # Arrange
        directory_name = self._get_directory_reference()

        # Create a directory to put the file under that
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()

        file_client = directory_client.get_file_client('filename')
        response = file_client.create_file()

        # Assert
        self.assertIsNotNone(response)

    @record
    def test_create_file_using_oauth_token_credential(self):
        # Arrange
        file_name = self._get_file_reference()
        token_credential = self.generate_oauth_token()

        # Create a directory to put the file under that
        file_client = DataLakeFileClient(self.dsc.url,
                                         self.file_system_name,
                                         file_name,
                                         credential=token_credential)

        response = file_client.create_file()

        # Assert
        self.assertIsNotNone(response)

    @record
    def test_create_file_with_existing_name(self):
        # Arrange
        file_client = self._create_file_and_return_client()

        with self.assertRaises(ResourceExistsError):
            # if the file exists then throw error
            # if_none_match='*' is to make sure no existing file
            file_client.create_file(match_condition=MatchConditions.IfMissing)

    @record
    def test_create_file_with_lease_id(self):
        # Arrange
        directory_name = self._get_directory_reference()
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()

        file_client = directory_client.get_file_client('filename')
        # Act
        file_client.create_file()
        lease = file_client.acquire_lease()
        create_resp = file_client.create_file(lease=lease)

        # Assert
        file_properties = file_client.get_file_properties()
        self.assertIsNotNone(file_properties)
        self.assertEqual(file_properties.etag, create_resp.get('etag'))
        self.assertEqual(file_properties.last_modified,
                         create_resp.get('last_modified'))

    @record
    def test_create_file_under_root_directory(self):
        # Arrange
        # get a file client to interact with the file under root directory
        file_client = self.dsc.get_file_client(self.file_system_name,
                                               "filename")

        response = file_client.create_file()

        # Assert
        self.assertIsNotNone(response)

    @record
    def test_append_data(self):
        directory_name = self._get_directory_reference()

        # Create a directory to put the file under that
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()

        file_client = directory_client.get_file_client('filename')
        file_client.create_file()

        # Act
        response = file_client.append_data(b'abc', 0, 3)

        self.assertIsNotNone(response)

    @record
    def test_append_empty_data(self):
        file_client = self._create_file_and_return_client()

        # Act
        file_client.flush_data(0)
        file_props = file_client.get_file_properties()

        self.assertIsNotNone(file_props['size'], 0)

    @record
    def test_flush_data(self):
        directory_name = self._get_directory_reference()

        # Create a directory to put the file under that
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()

        file_client = directory_client.get_file_client('filename')
        file_client.create_file()

        # Act
        file_client.append_data(b'abc', 0, 3)
        response = file_client.flush_data(3)

        # Assert
        prop = file_client.get_file_properties()
        self.assertIsNotNone(response)
        self.assertEqual(prop['size'], 3)

    @record
    def test_flush_data_with_match_condition(self):
        directory_name = self._get_directory_reference()

        # Create a directory to put the file under that
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()

        file_client = directory_client.get_file_client('filename')
        resp = file_client.create_file()

        # Act
        file_client.append_data(b'abc', 0, 3)

        # flush is successful because it isn't touched
        response = file_client.flush_data(
            3,
            etag=resp['etag'],
            match_condition=MatchConditions.IfNotModified)

        file_client.append_data(b'abc', 3, 3)
        with self.assertRaises(ResourceModifiedError):
            # flush is unsuccessful because extra data were appended.
            file_client.flush_data(
                6,
                etag=resp['etag'],
                match_condition=MatchConditions.IfNotModified)

    def test_upload_data_to_none_existing_file(self):
        # parallel upload cannot be recorded
        if TestMode.need_recording_file(self.test_mode):
            return

        directory_name = self._get_directory_reference()

        # Create a directory to put the file under that
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()

        file_client = directory_client.get_file_client('filename')
        data = self.get_random_bytes(200 * 1024)
        file_client.upload_data(data, overwrite=True, max_concurrency=3)

        downloaded_data = file_client.download_file().readall()
        self.assertEqual(data, downloaded_data)

    @record
    def test_upload_data_to_existing_file(self):
        directory_name = self._get_directory_reference()

        # Create a directory to put the file under that
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()

        # create an existing file
        file_client = directory_client.get_file_client('filename')
        file_client.create_file()
        file_client.append_data(b"abc", 0)
        file_client.flush_data(3)

        # to override the existing file
        data = self.get_random_bytes(100)
        with self.assertRaises(HttpResponseError):
            file_client.upload_data(data, max_concurrency=5)
        file_client.upload_data(data, overwrite=True, max_concurrency=5)

        downloaded_data = file_client.download_file().readall()
        self.assertEqual(data, downloaded_data)

    @record
    def test_upload_data_to_existing_file_with_content_settings(self):
        directory_name = self._get_directory_reference()

        # Create a directory to put the file under that
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()

        # create an existing file
        file_client = directory_client.get_file_client('filename')
        etag = file_client.create_file()['etag']

        # to override the existing file
        data = self.get_random_bytes(100)
        content_settings = ContentSettings(content_language='spanish',
                                           content_disposition='inline')

        file_client.upload_data(data,
                                max_concurrency=5,
                                content_settings=content_settings,
                                etag=etag,
                                match_condition=MatchConditions.IfNotModified)

        downloaded_data = file_client.download_file().readall()
        properties = file_client.get_file_properties()

        self.assertEqual(data, downloaded_data)
        self.assertEqual(properties.content_settings.content_language,
                         content_settings.content_language)

    @record
    def test_upload_data_to_existing_file_with_permission_and_umask(self):
        directory_name = self._get_directory_reference()

        # Create a directory to put the file under that
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()

        # create an existing file
        file_client = directory_client.get_file_client('filename')
        etag = file_client.create_file()['etag']

        # to override the existing file
        data = self.get_random_bytes(100)

        file_client.upload_data(data,
                                overwrite=True,
                                max_concurrency=5,
                                permissions='0777',
                                umask="0000",
                                etag=etag,
                                match_condition=MatchConditions.IfNotModified)

        downloaded_data = file_client.download_file().readall()
        prop = file_client.get_access_control()

        # Assert
        self.assertEqual(data, downloaded_data)
        self.assertEqual(prop['permissions'], 'rwxrwxrwx')

    @record
    def test_read_file(self):
        file_client = self._create_file_and_return_client()
        data = self.get_random_bytes(1024)

        # upload data to file
        file_client.append_data(data, 0, len(data))
        file_client.flush_data(len(data))

        # doanload the data and make sure it is the same as uploaded data
        downloaded_data = file_client.download_file().readall()
        self.assertEqual(data, downloaded_data)

    @record
    def test_read_file_with_user_delegation_key(self):
        # SAS URL is calculated from storage key, so this test runs live only
        if TestMode.need_recording_file(self.test_mode):
            return

        # Create file
        file_client = self._create_file_and_return_client()
        data = self.get_random_bytes(1024)
        # Upload data to file
        file_client.append_data(data, 0, len(data))
        file_client.flush_data(len(data))

        # Get user delegation key
        token_credential = self.generate_oauth_token()
        service_client = DataLakeServiceClient(self._get_oauth_account_url(),
                                               credential=token_credential)
        user_delegation_key = service_client.get_user_delegation_key(
            datetime.utcnow(),
            datetime.utcnow() + timedelta(hours=1))

        sas_token = generate_file_sas(
            file_client.account_name,
            file_client.file_system_name,
            None,
            file_client.path_name,
            user_delegation_key,
            permission=FileSasPermissions(read=True,
                                          create=True,
                                          write=True,
                                          delete=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )

        # doanload the data and make sure it is the same as uploaded data
        new_file_client = DataLakeFileClient(self._get_account_url(),
                                             file_client.file_system_name,
                                             file_client.path_name,
                                             credential=sas_token)
        downloaded_data = new_file_client.download_file().readall()
        self.assertEqual(data, downloaded_data)

    @record
    def test_read_file_into_file(self):
        file_client = self._create_file_and_return_client()
        data = self.get_random_bytes(1024)

        # upload data to file
        file_client.append_data(data, 0, len(data))
        file_client.flush_data(len(data))

        # doanload the data into a file and make sure it is the same as uploaded data
        with open(FILE_PATH, 'wb') as stream:
            download = file_client.download_file(max_concurrency=2)
            download.readinto(stream)

        # Assert
        with open(FILE_PATH, 'rb') as stream:
            actual = stream.read()
            self.assertEqual(data, actual)

    @record
    def test_read_file_to_text(self):
        file_client = self._create_file_and_return_client()
        data = self.get_random_text_data(1024)

        # upload data to file
        file_client.append_data(data, 0, len(data))
        file_client.flush_data(len(data))

        # doanload the text data and make sure it is the same as uploaded data
        downloaded_data = file_client.download_file(
            max_concurrency=2, encoding="utf-8").readall()

        # Assert
        self.assertEqual(data, downloaded_data)

    @record
    def test_account_sas(self):
        # SAS URL is calculated from storage key, so this test runs live only
        if TestMode.need_recording_file(self.test_mode):
            return

        file_name = self._get_file_reference()
        # create a file under root directory
        self._create_file_and_return_client(file=file_name)

        # generate a token with file level read permission
        token = generate_account_sas(
            self.dsc.account_name,
            self.dsc.credential.account_key,
            ResourceTypes(file_system=True, object=True),
            AccountSasPermissions(read=True),
            datetime.utcnow() + timedelta(hours=1),
        )

        # read the created file which is under root directory
        file_client = DataLakeFileClient(self.dsc.url,
                                         self.file_system_name,
                                         file_name,
                                         credential=token)
        properties = file_client.get_file_properties()

        # make sure we can read the file properties
        self.assertIsNotNone(properties)

        # try to write to the created file with the token
        with self.assertRaises(HttpResponseError):
            file_client.append_data(b"abcd", 0, 4)

    @record
    def test_file_sas_only_applies_to_file_level(self):
        # SAS URL is calculated from storage key, so this test runs live only
        if TestMode.need_recording_file(self.test_mode):
            return

        file_name = self._get_file_reference()
        directory_name = self._get_directory_reference()
        self._create_file_and_return_client(directory=directory_name,
                                            file=file_name)

        # generate a token with file level read and write permissions
        token = generate_file_sas(
            self.dsc.account_name,
            self.file_system_name,
            directory_name,
            file_name,
            self.dsc.credential.account_key,
            permission=FileSasPermissions(read=True, write=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )

        # read the created file which is under root directory
        file_client = DataLakeFileClient(self.dsc.url,
                                         self.file_system_name,
                                         directory_name + '/' + file_name,
                                         credential=token)
        properties = file_client.get_file_properties()

        # make sure we can read the file properties
        self.assertIsNotNone(properties)

        # try to write to the created file with the token
        response = file_client.append_data(b"abcd",
                                           0,
                                           4,
                                           validate_content=True)
        self.assertIsNotNone(response)

        # the token is for file level, so users are not supposed to have access to file system level operations
        file_system_client = FileSystemClient(self.dsc.url,
                                              self.file_system_name,
                                              credential=token)
        with self.assertRaises(ClientAuthenticationError):
            file_system_client.get_file_system_properties()

        # the token is for file level, so users are not supposed to have access to directory level operations
        directory_client = DataLakeDirectoryClient(self.dsc.url,
                                                   self.file_system_name,
                                                   directory_name,
                                                   credential=token)
        with self.assertRaises(ClientAuthenticationError):
            directory_client.get_directory_properties()

    @record
    def test_delete_file(self):
        # Arrange
        file_client = self._create_file_and_return_client()

        file_client.delete_file()

        with self.assertRaises(ResourceNotFoundError):
            file_client.get_file_properties()

    @record
    def test_delete_file_with_if_unmodified_since(self):
        # Arrange
        file_client = self._create_file_and_return_client()

        prop = file_client.get_file_properties()
        file_client.delete_file(if_unmodified_since=prop['last_modified'])

        # Make sure the file was deleted
        with self.assertRaises(ResourceNotFoundError):
            file_client.get_file_properties()

    @record
    def test_set_access_control(self):
        file_client = self._create_file_and_return_client()

        response = file_client.set_access_control(permissions='0777')

        # Assert
        self.assertIsNotNone(response)

    @record
    def test_set_access_control_with_match_conditions(self):
        file_client = self._create_file_and_return_client()

        with self.assertRaises(ResourceModifiedError):
            file_client.set_access_control(
                permissions='0777', match_condition=MatchConditions.IfMissing)

    @record
    def test_get_access_control(self):
        file_client = self._create_file_and_return_client()
        file_client.set_access_control(permissions='0777')

        # Act
        response = file_client.get_access_control()

        # Assert
        self.assertIsNotNone(response)

    @record
    def test_get_access_control_with_if_modified_since(self):
        file_client = self._create_file_and_return_client()
        file_client.set_access_control(permissions='0777')

        prop = file_client.get_file_properties()

        # Act
        response = file_client.get_access_control(
            if_modified_since=prop['last_modified'] - timedelta(minutes=15))

        # Assert
        self.assertIsNotNone(response)

    @record
    def test_get_properties(self):
        # Arrange
        directory_client = self._create_directory_and_return_client()

        metadata = {'hello': 'world', 'number': '42'}
        content_settings = ContentSettings(content_language='spanish',
                                           content_disposition='inline')
        file_client = directory_client.create_file(
            "newfile", metadata=metadata, content_settings=content_settings)
        file_client.append_data(b"abc", 0, 3)
        file_client.flush_data(3)
        properties = file_client.get_file_properties()

        # Assert
        self.assertTrue(properties)
        self.assertEqual(properties.size, 3)
        self.assertEqual(properties.metadata['hello'], metadata['hello'])
        self.assertEqual(properties.content_settings.content_language,
                         content_settings.content_language)

    @record
    def test_rename_file_with_non_used_name(self):
        file_client = self._create_file_and_return_client()
        data_bytes = b"abc"
        file_client.append_data(data_bytes, 0, 3)
        file_client.flush_data(3)
        new_client = file_client.rename_file(file_client.file_system_name +
                                             '/' + 'newname')

        data = new_client.download_file().readall()
        self.assertEqual(data, data_bytes)
        self.assertEqual(new_client.path_name, "newname")

    @record
    def test_rename_file_to_existing_file(self):
        # create the existing file
        existing_file_client = self._create_file_and_return_client(
            file="existingfile")
        existing_file_client.append_data(b"a", 0, 1)
        existing_file_client.flush_data(1)
        old_url = existing_file_client.url

        # prepare to rename the file to the existing file
        file_client = self._create_file_and_return_client()
        data_bytes = b"abc"
        file_client.append_data(data_bytes, 0, 3)
        file_client.flush_data(3)
        new_client = file_client.rename_file(file_client.file_system_name +
                                             '/' +
                                             existing_file_client.path_name)
        new_url = file_client.url

        data = new_client.download_file().readall()
        # the existing file was overridden
        self.assertEqual(data, data_bytes)

    @record
    def test_rename_file_will_not_change_existing_directory(self):
        # create none empty directory(with 2 files)
        dir1 = self._create_directory_and_return_client(directory="dir1")
        f1 = dir1.create_file("file1")
        f1.append_data(b"file1", 0, 5)
        f1.flush_data(5)
        f2 = dir1.create_file("file2")
        f2.append_data(b"file2", 0, 5)
        f2.flush_data(5)

        # create another none empty directory(with 2 files)
        dir2 = self._create_directory_and_return_client(directory="dir2")
        f3 = dir2.create_file("file3")
        f3.append_data(b"file3", 0, 5)
        f3.flush_data(5)
        f4 = dir2.create_file("file4")
        f4.append_data(b"file4", 0, 5)
        f4.flush_data(5)

        new_client = f3.rename_file(f1.file_system_name + '/' + f1.path_name)

        self.assertEqual(new_client.download_file().readall(), b"file3")

        # make sure the data in file2 and file4 weren't touched
        f2_data = f2.download_file().readall()
        self.assertEqual(f2_data, b"file2")

        f4_data = f4.download_file().readall()
        self.assertEqual(f4_data, b"file4")

        with self.assertRaises(HttpResponseError):
            f3.download_file().readall()
class LargeFileTest(StorageTestCase):
    def _setUp(self, account_name, account_key):
        url = self.account_url(account_name, 'dfs')
        self.payload_dropping_policy = PayloadDroppingPolicy()
        credential_policy = _format_shared_key_credential(
            account_name, account_key)
        self.dsc = DataLakeServiceClient(url,
                                         credential=account_key,
                                         logging_enable=True,
                                         _additional_pipeline_policies=[
                                             self.payload_dropping_policy,
                                             credential_policy
                                         ])
        self.config = self.dsc._config

        self.file_system_name = self.get_resource_name('filesystem')

        if not self.is_playback():
            file_system = self.dsc.get_file_system_client(
                self.file_system_name)
            try:
                file_system.create_file_system(timeout=5)
            except ResourceExistsError:
                pass

    def tearDown(self):
        if not self.is_playback():
            try:
                self.dsc.delete_file_system(self.file_system_name)
            except:
                pass

        return super(LargeFileTest, self).tearDown()

    @pytest.mark.live_test_only
    @DataLakePreparer()
    def test_append_large_stream_without_network(self,
                                                 datalake_storage_account_name,
                                                 datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        directory_name = self.get_resource_name(TEST_DIRECTORY_PREFIX)

        # Create a directory to put the file under that
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()

        file_client = directory_client.get_file_client('filename')
        file_client.create_file()

        data = LargeStream(LARGEST_BLOCK_SIZE)

        # Act
        response = file_client.append_data(data, 0, LARGEST_BLOCK_SIZE)

        self.assertIsNotNone(response)
        self.assertEqual(self.payload_dropping_policy.append_counter, 1)
        self.assertEqual(self.payload_dropping_policy.append_sizes[0],
                         LARGEST_BLOCK_SIZE)

    @pytest.mark.live_test_only
    @DataLakePreparer()
    def test_upload_large_stream_without_network(self,
                                                 datalake_storage_account_name,
                                                 datalake_storage_account_key):
        pytest.skip(
            "Pypy3 on Linux failed somehow, skip for now to investigate")

        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)

        directory_name = self.get_resource_name(TEST_DIRECTORY_PREFIX)

        # Create a directory to put the file under that
        directory_client = self.dsc.get_directory_client(
            self.file_system_name, directory_name)
        directory_client.create_directory()

        file_client = directory_client.get_file_client('filename')
        file_client.create_file()

        length = 2 * LARGEST_BLOCK_SIZE
        data = LargeStream(length)

        # Act
        response = file_client.upload_data(data,
                                           length,
                                           overwrite=True,
                                           chunk_size=LARGEST_BLOCK_SIZE)

        self.assertIsNotNone(response)
        self.assertEqual(self.payload_dropping_policy.append_counter, 2)
        self.assertEqual(self.payload_dropping_policy.append_sizes[0],
                         LARGEST_BLOCK_SIZE)
        self.assertEqual(self.payload_dropping_policy.append_sizes[1],
                         LARGEST_BLOCK_SIZE)
class FileTest(StorageTestCase):
    def setUp(self):
        super(FileTest, self).setUp()
        url = self._get_account_url()
        self.dsc = DataLakeServiceClient(url, credential=self.settings.STORAGE_DATA_LAKE_ACCOUNT_KEY)
        self.config = self.dsc._config

        self.file_system_name = self.get_resource_name('filesystem')

        if not self.is_playback():
            file_system = self.dsc.get_file_system_client(self.file_system_name)
            try:
                file_system.create_file_system(timeout=5)
            except ResourceExistsError:
                pass

    def tearDown(self):
        if not self.is_playback():
            try:
                self.dsc.delete_file_system(self.file_system_name)
            except:
                pass

        return super(FileTest, self).tearDown()

    # --Helpers-----------------------------------------------------------------
    def _get_directory_reference(self, prefix=TEST_DIRECTORY_PREFIX):
        directory_name = self.get_resource_name(prefix)
        return directory_name

    def _get_file_reference(self, prefix=TEST_FILE_PREFIX):
        file_name = self.get_resource_name(prefix)
        return file_name

    def _create_file_system(self):
        return self.dsc.create_file_system(self._get_file_system_reference())

    def _create_directory_and_return_client(self, directory=None):
        directory_name = directory if directory else self._get_directory_reference()
        directory_client = self.dsc.get_directory_client(self.file_system_name, directory_name)
        directory_client.create_directory()
        return directory_client

    def _create_file_and_return_client(self, directory="", file=None):
        if directory:
            self._create_directory_and_return_client(directory)
        if not file:
            file = self._get_file_reference()
        file_client = self.dsc.get_file_client(self.file_system_name, directory + '/' + file)
        file_client.create_file()
        return file_client

    # --Helpers-----------------------------------------------------------------

    @record
    def test_create_file(self):
        # Arrange
        directory_name = self._get_directory_reference()

        # Create a directory to put the file under that
        directory_client = self.dsc.get_directory_client(self.file_system_name, directory_name)
        directory_client.create_directory()

        file_client = directory_client.get_file_client('filename')
        response = file_client.create_file()

        # Assert
        self.assertIsNotNone(response)

    @record
    def test_create_file_with_lease_id(self):
        # Arrange
        directory_name = self._get_directory_reference()
        directory_client = self.dsc.get_directory_client(self.file_system_name, directory_name)
        directory_client.create_directory()

        file_client = directory_client.get_file_client('filename')
        # Act
        file_client.create_file()
        lease = file_client.acquire_lease()
        create_resp = file_client.create_file(lease=lease)

        # Assert
        file_properties = file_client.get_file_properties()
        self.assertIsNotNone(file_properties)
        self.assertEqual(file_properties.etag, create_resp.get('etag'))
        self.assertEqual(file_properties.last_modified, create_resp.get('last_modified'))

    @record
    def test_create_file_under_root_directory(self):
        # Arrange
        # get a file client to interact with the file under root directory
        file_client = self.dsc.get_file_client(self.file_system_name, "filename")

        response = file_client.create_file()

        # Assert
        self.assertIsNotNone(response)

    @record
    def test_append_data(self):
        directory_name = self._get_directory_reference()

        # Create a directory to put the file under that
        directory_client = self.dsc.get_directory_client(self.file_system_name, directory_name)
        directory_client.create_directory()

        file_client = directory_client.get_file_client('filename')
        file_client.create_file()

        # Act
        response = file_client.append_data(b'abc', 0, 3)

        self.assertIsNotNone(response)

    @record
    def test_flush_data(self):
        directory_name = self._get_directory_reference()

        # Create a directory to put the file under that
        directory_client = self.dsc.get_directory_client(self.file_system_name, directory_name)
        directory_client.create_directory()

        file_client = directory_client.get_file_client('filename')
        file_client.create_file()

        # Act
        file_client.append_data(b'abc', 0, 3)
        response = file_client.flush_data(3)

        self.assertIsNotNone(response)

    @record
    def test_read_file(self):
        file_client = self._create_file_and_return_client()
        data = self.get_random_bytes(1024)

        # upload data to file
        file_client.append_data(data, 0, len(data))
        file_client.flush_data(len(data))

        # doanload the data and make sure it is the same as uploaded data
        downloaded_data = file_client.read_file()
        self.assertEqual(data, downloaded_data)

    @record
    def test_account_sas(self):
        # SAS URL is calculated from storage key, so this test runs live only
        if TestMode.need_recording_file(self.test_mode):
            return

        file_name = self._get_file_reference()
        # create a file under root directory
        self._create_file_and_return_client(file=file_name)

        # generate a token with file level read permission
        token = generate_account_sas(
            self.dsc.account_name,
            self.dsc.credential.account_key,
            ResourceTypes(file_system=True, object=True),
            AccountSasPermissions(read=True),
            datetime.utcnow() + timedelta(hours=1),
        )

        # read the created file which is under root directory
        file_client = DataLakeFileClient(self.dsc.url, self.file_system_name, file_name, credential=token)
        properties = file_client.get_file_properties()

        # make sure we can read the file properties
        self.assertIsNotNone(properties)

        # try to write to the created file with the token
        with self.assertRaises(StorageErrorException):
            file_client.append_data(b"abcd", 0, 4)

    @record
    def test_file_sas_only_applies_to_file_level(self):
        # SAS URL is calculated from storage key, so this test runs live only
        if TestMode.need_recording_file(self.test_mode):
            return

        file_name = self._get_file_reference()
        directory_name = self._get_directory_reference()
        self._create_file_and_return_client(directory=directory_name, file=file_name)

        # generate a token with file level read and write permissions
        token = generate_file_sas(
            self.dsc.account_name,
            self.file_system_name,
            directory_name,
            file_name,
            account_key=self.dsc.credential.account_key,
            permission=FileSasPermissions(read=True, write=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )

        # read the created file which is under root directory
        file_client = DataLakeFileClient(self.dsc.url, self.file_system_name, directory_name+'/'+file_name,
                                         credential=token)
        properties = file_client.get_file_properties()

        # make sure we can read the file properties
        self.assertIsNotNone(properties)

        # try to write to the created file with the token
        response = file_client.append_data(b"abcd", 0, 4, validate_content=True)
        self.assertIsNotNone(response)

        # the token is for file level, so users are not supposed to have access to file system level operations
        file_system_client = FileSystemClient(self.dsc.url, self.file_system_name, credential=token)
        with self.assertRaises(ClientAuthenticationError):
            file_system_client.get_file_system_properties()

        # the token is for file level, so users are not supposed to have access to directory level operations
        directory_client = DataLakeDirectoryClient(self.dsc.url, self.file_system_name, directory_name,
                                                   credential=token)
        with self.assertRaises(ClientAuthenticationError):
            directory_client.get_directory_properties()

    @record
    def test_delete_file(self):
        # Arrange
        file_client = self._create_file_and_return_client()

        file_client.delete_file()

        with self.assertRaises(ResourceNotFoundError):
            file_client.get_file_properties()

    @record
    def test_set_access_control(self):
        file_client = self._create_file_and_return_client()

        response = file_client.set_access_control(permissions='0777')\

        # Assert
        self.assertIsNotNone(response)

    @record
    def test_get_access_control(self):
        file_client = self._create_file_and_return_client()
        file_client.set_access_control(permissions='0777')

        # Act
        response = file_client.get_access_control()

        # Assert
        self.assertIsNotNone(response)

    @record
    def test_get_properties(self):
        # Arrange
        directory_client = self._create_directory_and_return_client()

        metadata = {'hello': 'world', 'number': '42'}
        content_settings = ContentSettings(
            content_language='spanish',
            content_disposition='inline')
        file_client = directory_client.create_file("newfile", metadata=metadata, content_settings=content_settings)
        file_client.append_data(b"abc", 0, 3)
        file_client.flush_data(3)
        properties = file_client.get_file_properties()

        # Assert
        self.assertTrue(properties)
        self.assertEqual(properties.size, 3)
        self.assertEqual(properties.metadata['hello'], metadata['hello'])
        self.assertEqual(properties.content_settings.content_language, content_settings.content_language)

    @record
    def test_rename_file_with_non_used_name(self):
        file_client = self._create_file_and_return_client()
        data_bytes = b"abc"
        file_client.append_data(data_bytes, 0, 3)
        file_client.flush_data(3)
        new_client = file_client.rename_file(file_client.file_system_name+'/'+'newname')

        data = new_client.read_file()
        self.assertEqual(data, data_bytes)
        self.assertEqual(new_client.path_name, "newname")

    @record
    def test_rename_file_to_existing_file(self):
        # create the existing file
        existing_file_client = self._create_file_and_return_client(file="existingfile")
        existing_file_client.append_data(b"a", 0, 1)
        existing_file_client.flush_data(1)
        old_url = existing_file_client.url

        # prepare to rename the file to the existing file
        file_client = self._create_file_and_return_client()
        data_bytes = b"abc"
        file_client.append_data(data_bytes, 0, 3)
        file_client.flush_data(3)
        new_client = file_client.rename_file(file_client.file_system_name+'/'+existing_file_client.path_name)
        new_url = file_client.url

        data = new_client.read_file()
        # the existing file was overridden
        self.assertEqual(data, data_bytes)

    @record
    def test_rename_file_will_not_change_existing_directory(self):
        # create none empty directory(with 2 files)
        dir1 = self._create_directory_and_return_client(directory="dir1")
        f1 = dir1.create_file("file1")
        f1.append_data(b"file1", 0, 5)
        f1.flush_data(5)
        f2 = dir1.create_file("file2")
        f2.append_data(b"file2", 0, 5)
        f2.flush_data(5)

        # create another none empty directory(with 2 files)
        dir2 = self._create_directory_and_return_client(directory="dir2")
        f3 = dir2.create_file("file3")
        f3.append_data(b"file3", 0, 5)
        f3.flush_data(5)
        f4 = dir2.create_file("file4")
        f4.append_data(b"file4", 0, 5)
        f4.flush_data(5)

        new_client = f3.rename_file(f1.file_system_name+'/'+f1.path_name)

        self.assertEqual(new_client.read_file(), b"file3")

        # make sure the data in file2 and file4 weren't touched
        f2_data = f2.read_file()
        self.assertEqual(f2_data, b"file2")

        f4_data = f4.read_file()
        self.assertEqual(f4_data, b"file4")

        with self.assertRaises(HttpResponseError):
            f3.read_file()
示例#8
0
class AzDataLakeProject(object):
    def __init__(self, container_url, path):
        storage_account, file_system_name = get_details_from_container_url(
            container_url)
        self.container_url = container_url
        self.path = path
        self.storage_account = storage_account
        self.file_system_name = file_system_name
        storage_config = AzStorageConfig.objects.get(
            storage_account=storage_account, container_name=file_system_name)
        self.service = DataLakeServiceClient(
            f"https://{storage_account}.dfs.core.windows.net/",
            credential=storage_config.storage_account_key)
        self.directory_client = self.service.get_directory_client(
            file_system_name, path)

    def exists(self):
        return self.directory_client.exists()

    def ensure_parent_directory(self):
        parent_directory_path = os.path.dirname(self.path)
        directory_client = self.service.get_directory_client(
            self.file_system_name, parent_directory_path)
        if not directory_client.exists():
            directory_client.create_directory()

    def move(self, destination_container_url, destination_path):
        if destination_container_url == self.container_url:
            # Within the same container simply rename the directory
            filesystem_name = "{}/{}".format(self.file_system_name,
                                             destination_path)
            self.directory_client.rename_directory(filesystem_name)
        else:
            # Copy to the destination
            from_url = "{}/{}".format(self.container_url, self.path)
            to_url = "{}/{}".format(destination_container_url,
                                    destination_path)
            copy_command = [
                settings.AZCOPY_COMMAND, "copy", '--recursive', from_url,
                to_url
            ]
            subprocess.run(copy_command, check=True)

            # Delete our copy
            self.directory_client.delete_directory()

    def get_file_system_client(self):
        return self.service.get_file_system_client(self.file_system_name)

    def get_paths(self):
        file_system_client = self.get_file_system_client()
        return file_system_client.get_paths(self.path)

    def get_file_manifest(self):
        file_system_client = self.get_file_system_client()
        results = []
        for file_metadata in file_system_client.get_paths(self.path):
            if file_metadata.is_directory:
                results.append(file_metadata)
            else:
                file_client = file_system_client.get_file_client(
                    file_metadata.name)
                file_properties = dict(file_client.get_file_properties())
                del file_properties['lease']
                # Fix nested content settings
                content_settings = file_properties["content_settings"]
                file_properties["content_settings"] = {
                    "content_type": content_settings["content_type"],
                    "content_md5": content_settings["content_md5"].hex(),
                }
                results.append(file_properties)
        return results

    def add_download_user(self, azure_user_id):
        file_system_client = self.service.get_file_system_client(
            self.file_system_name)
        acl = make_acl(azure_user_id, permissions='r-x')
        directory_client = file_system_client.get_directory_client(self.path)
        directory_client.update_access_control_recursive(acl=acl)

    def set_owner(self, azure_user_id):
        file_paths = [self.path]
        file_system_client = self.service.get_file_system_client(
            self.file_system_name)
        for file_metadata in file_system_client.get_paths(self.path):
            file_paths.append(file_metadata.name)
        for file_path in file_paths:
            file_client = file_system_client.get_file_client(file_path)
            file_client.set_access_control(owner=azure_user_id)