Exemplo n.º 1
0
    def test_account_sas(self):
        # SAS URL is calculated from storage key, so this test runs live only
        if TestMode.need_recording_file(self.test_mode):
            return

        file_name = self._get_file_reference()
        # create a file under root directory
        self._create_file_and_return_client(file=file_name)

        # generate a token with file level read permission
        token = generate_account_sas(
            self.dsc.account_name,
            self.dsc.credential.account_key,
            ResourceTypes(file_system=True, object=True),
            AccountSasPermissions(read=True),
            datetime.utcnow() + timedelta(hours=1),
        )

        # read the created file which is under root directory
        file_client = DataLakeFileClient(self.dsc.url, self.file_system_name, file_name, credential=token)
        properties = file_client.get_file_properties()

        # make sure we can read the file properties
        self.assertIsNotNone(properties)

        # try to write to the created file with the token
        with self.assertRaises(StorageErrorException):
            file_client.append_data(b"abcd", 0, 4)
Exemplo n.º 2
0
    def test_account_sas(self, datalake_storage_account_name,
                         datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # SAS URL is calculated from storage key, so this test runs live only

        file_name = self._get_file_reference()
        # create a file under root directory
        self._create_file_and_return_client(file=file_name)

        # generate a token with file level read permission
        token = generate_account_sas(
            self.dsc.account_name,
            self.dsc.credential.account_key,
            ResourceTypes(file_system=True, object=True),
            AccountSasPermissions(read=True),
            datetime.utcnow() + timedelta(hours=1),
        )

        for credential in [token, AzureSasCredential(token)]:
            # read the created file which is under root directory
            file_client = DataLakeFileClient(self.dsc.url,
                                             self.file_system_name,
                                             file_name,
                                             credential=credential)
            properties = file_client.get_file_properties()

            # make sure we can read the file properties
            self.assertIsNotNone(properties)

            # try to write to the created file with the token
            with self.assertRaises(HttpResponseError):
                file_client.append_data(b"abcd", 0, 4)
Exemplo n.º 3
0
    def test_read_file_with_user_delegation_key(self):
        # SAS URL is calculated from storage key, so this test runs live only
        if TestMode.need_recording_file(self.test_mode):
            return

        # Create file
        file_client = self._create_file_and_return_client()
        data = self.get_random_bytes(1024)
        # Upload data to file
        file_client.append_data(data, 0, len(data))
        file_client.flush_data(len(data))

        # Get user delegation key
        token_credential = self.generate_oauth_token()
        service_client = DataLakeServiceClient(self._get_oauth_account_url(), credential=token_credential)
        user_delegation_key = service_client.get_user_delegation_key(datetime.utcnow(),
                                                                     datetime.utcnow() + timedelta(hours=1))

        sas_token = generate_file_sas(file_client.account_name,
                                      file_client.file_system_name,
                                      None,
                                      file_client.path_name,
                                      user_delegation_key,
                                      permission=FileSasPermissions(read=True, create=True, write=True, delete=True),
                                      expiry=datetime.utcnow() + timedelta(hours=1),
                                      )

        # doanload the data and make sure it is the same as uploaded data
        new_file_client = DataLakeFileClient(self._get_account_url(),
                                             file_client.file_system_name,
                                             file_client.path_name,
                                             credential=sas_token)
        downloaded_data = new_file_client.download_file().readall()
        self.assertEqual(data, downloaded_data)
Exemplo n.º 4
0
    def test_rename_file_with_file_system_sas(self,
                                              datalake_storage_account_name,
                                              datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # sas token is calculated from storage key, so live only
        token = generate_file_system_sas(
            self.dsc.account_name,
            self.file_system_name,
            self.dsc.credential.account_key,
            FileSystemSasPermissions(write=True, read=True, delete=True),
            datetime.utcnow() + timedelta(hours=1),
        )

        # read the created file which is under root directory
        file_client = DataLakeFileClient(self.dsc.url,
                                         self.file_system_name,
                                         "oldfile",
                                         credential=token)
        file_client.create_file()
        data_bytes = b"abc"
        file_client.append_data(data_bytes, 0, 3)
        file_client.flush_data(3)
        new_client = file_client.rename_file(file_client.file_system_name +
                                             '/' + 'newname')

        data = new_client.download_file().readall()
        self.assertEqual(data, data_bytes)
        self.assertEqual(new_client.path_name, "newname")
Exemplo n.º 5
0
    def test_rename_file_with_account_sas(self, datalake_storage_account_name,
                                          datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        pytest.skip("service bug")
        token = generate_account_sas(
            self.dsc.account_name,
            self.dsc.credential.account_key,
            ResourceTypes(object=True),
            AccountSasPermissions(write=True,
                                  read=True,
                                  create=True,
                                  delete=True),
            datetime.utcnow() + timedelta(hours=5),
        )

        # read the created file which is under root directory
        file_client = DataLakeFileClient(self.dsc.url,
                                         self.file_system_name,
                                         "oldfile",
                                         credential=token)
        file_client.create_file()
        data_bytes = b"abc"
        file_client.append_data(data_bytes, 0, 3)
        file_client.flush_data(3)
        new_client = file_client.rename_file(file_client.file_system_name +
                                             '/' + 'newname')

        data = new_client.download_file().readall()
        self.assertEqual(data, data_bytes)
        self.assertEqual(new_client.path_name, "newname")
Exemplo n.º 6
0
    def test_rename_file_with_file_sas(self):
        # SAS URL is calculated from storage key, so this test runs live only
        if TestMode.need_recording_file(self.test_mode):
            return
        token = generate_file_sas(self.dsc.account_name,
                                  self.file_system_name,
                                  None,
                                  "oldfile",
                                  self.settings.STORAGE_DATA_LAKE_ACCOUNT_KEY,
                                  permission=FileSasPermissions(read=True, create=True, write=True, delete=True),
                                  expiry=datetime.utcnow() + timedelta(hours=1),
                                  )

        new_token = generate_file_sas(self.dsc.account_name,
                                      self.file_system_name,
                                      None,
                                      "newname",
                                      self.settings.STORAGE_DATA_LAKE_ACCOUNT_KEY,
                                      permission=FileSasPermissions(read=True, create=True, write=True, delete=True),
                                      expiry=datetime.utcnow() + timedelta(hours=1),
                                      )

        # read the created file which is under root directory
        file_client = DataLakeFileClient(self.dsc.url, self.file_system_name, "oldfile", credential=token)
        file_client.create_file()
        data_bytes = b"abc"
        file_client.append_data(data_bytes, 0, 3)
        file_client.flush_data(3)
        new_client = file_client.rename_file(file_client.file_system_name+'/'+'newname'+'?'+new_token)

        data = new_client.download_file().readall()
        self.assertEqual(data, data_bytes)
        self.assertEqual(new_client.path_name, "newname")
Exemplo n.º 7
0
 def blob_delete(self, storage_account_id: str, container: str, key: str):
     tokens = storage_account_id.split('/')
     subscription_id = tokens[2]
     resource_group_name = tokens[4]
     storage_account_name = tokens[8]
     storage_client = self.get_storage_client(subscription_id)
     storage_account = storage_client.storage_accounts.get_properties(resource_group_name, storage_account_name)
     if storage_account.is_hns_enabled:
         account_url = f'https://{storage_account_name}.dfs.core.windows.net/'
         file_client = DataLakeFileClient(account_url, container, key, credential=self.credential)
         file_client.delete_file()
Exemplo n.º 8
0
    def test_file_sas_only_applies_to_file_level(self,
                                                 datalake_storage_account_name,
                                                 datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # SAS URL is calculated from storage key, so this test runs live only
        file_name = self._get_file_reference()
        directory_name = self._get_directory_reference()
        self._create_file_and_return_client(directory=directory_name,
                                            file=file_name)

        # generate a token with file level read and write permissions
        token = generate_file_sas(
            self.dsc.account_name,
            self.file_system_name,
            directory_name,
            file_name,
            self.dsc.credential.account_key,
            permission=FileSasPermissions(read=True, write=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )

        # read the created file which is under root directory
        file_client = DataLakeFileClient(self.dsc.url,
                                         self.file_system_name,
                                         directory_name + '/' + file_name,
                                         credential=token)
        properties = file_client.get_file_properties()

        # make sure we can read the file properties
        self.assertIsNotNone(properties)

        # try to write to the created file with the token
        response = file_client.append_data(b"abcd",
                                           0,
                                           4,
                                           validate_content=True)
        self.assertIsNotNone(response)

        # the token is for file level, so users are not supposed to have access to file system level operations
        file_system_client = FileSystemClient(self.dsc.url,
                                              self.file_system_name,
                                              credential=token)
        with self.assertRaises(ClientAuthenticationError):
            file_system_client.get_file_system_properties()

        # the token is for file level, so users are not supposed to have access to directory level operations
        directory_client = DataLakeDirectoryClient(self.dsc.url,
                                                   self.file_system_name,
                                                   directory_name,
                                                   credential=token)
        with self.assertRaises(ClientAuthenticationError):
            directory_client.get_directory_properties()
Exemplo n.º 9
0
    def test_create_file_using_oauth_token_credential(self):
        # Arrange
        file_name = self._get_file_reference()
        token_credential = self.generate_oauth_token()

        # Create a directory to put the file under that
        file_client = DataLakeFileClient(self.dsc.url, self.file_system_name, file_name,
                                         credential=token_credential)

        response = file_client.create_file()

        # Assert
        self.assertIsNotNone(response)
Exemplo n.º 10
0
    def test_preauthorize_user_with_user_delegation_key(
            self, datalake_storage_account_name, datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # SAS URL is calculated from storage key, so this test runs live only

        # Create file
        file_client = self._create_file_and_return_client()
        data = self.get_random_bytes(1024)
        # Upload data to file
        file_client.append_data(data, 0, len(data))
        file_client.flush_data(len(data))
        file_client.set_access_control(
            owner="68390a19-a643-458b-b726-408abf67b4fc", permissions='0777')
        acl = file_client.get_access_control()

        # Get user delegation key
        token_credential = self.generate_oauth_token()
        service_client = DataLakeServiceClient(
            self._get_account_url(datalake_storage_account_name),
            credential=token_credential)
        user_delegation_key = service_client.get_user_delegation_key(
            datetime.utcnow(),
            datetime.utcnow() + timedelta(hours=1))

        sas_token = generate_file_sas(
            file_client.account_name,
            file_client.file_system_name,
            None,
            file_client.path_name,
            user_delegation_key,
            permission=FileSasPermissions(read=True,
                                          write=True,
                                          manage_access_control=True,
                                          manage_ownership=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
            preauthorized_agent_object_id="68390a19-a643-458b-b726-408abf67b4fc"
        )

        # doanload the data and make sure it is the same as uploaded data
        new_file_client = DataLakeFileClient(
            self._get_account_url(datalake_storage_account_name),
            file_client.file_system_name,
            file_client.path_name,
            credential=sas_token)

        acl = new_file_client.set_access_control(permissions='0777')
        self.assertIsNotNone(acl)
Exemplo n.º 11
0
 def blob_upload(self, storage_account_id: str, file: io.BytesIO, container: str, key: str, tags: dict):
     log.debug(f'Storage account for upload is {storage_account_id}')
     log.debug(f'Upload key is {key}')
     tokens = storage_account_id.split('/')
     subscription_id = tokens[2]
     resource_group_name = tokens[4]
     storage_account_name = tokens[8]
     storage_client = self.get_storage_client(subscription_id)
     storage_account = storage_client.storage_accounts.get_properties(resource_group_name, storage_account_name)
     log.debug(f'Storage account kind is {storage_account.kind}')
     log.debug(f'Hierarchical namespace enabled: {storage_account.is_hns_enabled}')
     if storage_account.is_hns_enabled:
         account_url = f'https://{storage_account_name}.dfs.core.windows.net/'
         file_client = DataLakeFileClient(account_url, container, key, credential=self.credential)
         log.debug(f'file_client pointing to {file_client.url}')
         file_client.upload_data(file, overwrite=True, metadata=tags)
Exemplo n.º 12
0
    def test_set_acl_with_user_delegation_key(self,
                                              datalake_storage_account_name,
                                              datalake_storage_account_key):
        self._setUp(datalake_storage_account_name,
                    datalake_storage_account_key)
        # SAS URL is calculated from storage key, so this test runs live only

        # Create file
        file_client = self._create_file_and_return_client()
        data = self.get_random_bytes(1024)
        # Upload data to file
        file_client.append_data(data, 0, len(data))
        file_client.flush_data(len(data))

        # Get user delegation key
        token_credential = self.generate_oauth_token()
        service_client = DataLakeServiceClient(
            self._get_account_url(datalake_storage_account_name),
            credential=token_credential)
        user_delegation_key = service_client.get_user_delegation_key(
            datetime.utcnow(),
            datetime.utcnow() + timedelta(hours=1))

        sas_token = generate_file_sas(
            file_client.account_name,
            file_client.file_system_name,
            None,
            file_client.path_name,
            user_delegation_key,
            permission=FileSasPermissions(execute=True,
                                          manage_access_control=True,
                                          manage_ownership=True),
            expiry=datetime.utcnow() + timedelta(hours=1),
        )

        # doanload the data and make sure it is the same as uploaded data
        new_file_client = DataLakeFileClient(
            self._get_account_url(datalake_storage_account_name),
            file_client.file_system_name,
            file_client.path_name,
            credential=sas_token)
        acl = 'user::rwx,group::r-x,other::rwx'
        owner = "dc140949-53b7-44af-b1e9-cd994951fb86"
        new_file_client.set_access_control(acl=acl, owner=owner)
        access_control = new_file_client.get_access_control()
        self.assertEqual(acl, access_control['acl'])
        self.assertEqual(owner, access_control['owner'])
Exemplo n.º 13
0
 def _get_file_client(self, storage_account_url: str, file_system: str,
                      file_path: str,
                      credential: Union[DefaultAzureCredential, str]):
     file_client = DataLakeFileClient(storage_account_url,
                                      file_system,
                                      file_path,
                                      credential=credential)
     return file_client
Exemplo n.º 14
0
 def test_account_sas_raises_if_sas_already_in_uri(
         self, datalake_storage_account_name, datalake_storage_account_key):
     self._setUp(datalake_storage_account_name,
                 datalake_storage_account_key)
     with self.assertRaises(ValueError):
         DataLakeFileClient(self.dsc.url + "?sig=foo",
                            self.file_system_name,
                            "foo",
                            credential=AzureSasCredential("?foo=bar"))
Exemplo n.º 15
0
def __get_validation_schema(file_client: DataLakeFileClient) -> Dict:
    try:
        file_client.get_file_properties()
    except ResourceNotFoundError as error:
        message = f'({type(error).__name__}) The expected JSON Schema does not exist: {error}'
        raise HTTPException(status_code=HTTPStatus.NOT_FOUND, detail=message) from error

    try:
        stream = file_client.download_file()
    except HttpResponseError as error:
        message = f'({type(error).__name__}) Schema could not be retrieved for validation: {error}'
        raise HTTPException(status_code=error.status_code, detail=message) from error

    try:
        schema = json.loads(stream.readall().decode())
    except json.JSONDecodeError as error:
        message = f'({type(error).__name__}) Malformed schema JSON: {error}'
        raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=message) from error

    return schema
def instantiate_file_client_from_conn_str():
    # [START instantiate_file_client_from_conn_str]
    from azure.storage.filedatalake import DataLakeFileClient
    DataLakeFileClient.from_connection_string(connection_string,
                                              "myfilesystem", "mydirectory",
                                              "myfile")