Exemplo n.º 1
0
    def test_user_delegation_sas_for_container(self):
        # SAS URL is calculated from storage key, so this test runs live only
        pytest.skip("Current Framework Cannot Support OAUTH")
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        token_credential = self.generate_oauth_token()
        service_client = BlobServiceClient(self._get_oauth_account_url(),
                                           credential=token_credential)
        user_delegation_key = service_client.get_user_delegation_key(
            datetime.utcnow(),
            datetime.utcnow() + timedelta(hours=1))

        container_client = service_client.create_container(
            self.get_resource_name('oauthcontainer'))
        token = container_client.generate_shared_access_signature(
            expiry=datetime.utcnow() + timedelta(hours=1),
            permission=ContainerPermissions.READ,
            user_delegation_key=user_delegation_key,
            account_name='emilydevtest')

        blob_client = container_client.get_blob_client(
            self.get_resource_name('oauthblob'))
        blob_content = self.get_random_text_data(1024)
        blob_client.upload_blob(blob_content, length=len(blob_content))

        # Act
        new_blob_client = BlobClient(blob_client.url, credential=token)
        content = new_blob_client.download_blob()

        # Assert
        self.assertEqual(blob_content, b"".join(list(content)).decode('utf-8'))
def getUserDelegationKey():
    # Set the Service Principals identity and credentials in the environment variables
    os.environ.setdefault('AZURE_TENANT_ID', '')
    os.environ.setdefault('AZURE_CLIENT_ID', '')
    os.environ.setdefault('AZURE_CLIENT_SECRET', '')

    token_credential = DefaultAzureCredential()
    blob_service_client = BlobServiceClient(
        account_url="https://{0}.blob.core.windows.net".format(
            STORAGE_ACCOUNT),
        credential=token_credential)

    # Get the user delegation key
    udk = blob_service_client.get_user_delegation_key(
        key_start_time=datetime.utcnow(),
        key_expiry_time=datetime.utcnow() + timedelta(hours=2))
    return udk
Exemplo n.º 3
0
class DataLakeServiceClient(StorageAccountHostsMixin):
    """A client to interact with the DataLake Service at the account level.

    This client provides operations to retrieve and configure the account properties
    as well as list, create and delete file systems within the account.
    For operations relating to a specific file system, directory or file, clients for those entities
    can also be retrieved using the `get_client` functions.

    :ivar str url:
        The full endpoint URL to the datalake service endpoint.
    :ivar str primary_endpoint:
        The full primary endpoint URL.
    :ivar str primary_hostname:
        The hostname of the primary endpoint.
    :param str account_url:
        The URL to the DataLake storage account. Any other entities included
        in the URL path (e.g. file system or file) will be discarded. This URL can be optionally
        authenticated with a SAS token.
    :param credential:
        The credentials with which to authenticate. This is optional if the
        account URL already has a SAS token. The value can be a SAS token string,
        an instance of a AzureSasCredential from azure.core.credentials, an account
        shared access key, or an instance of a TokenCredentials class from azure.identity.
        If the resource URI already contains a SAS token, this will be ignored in favor of an explicit credential
        - except in the case of AzureSasCredential, where the conflicting SAS tokens will raise a ValueError.

    .. admonition:: Example:

        .. literalinclude:: ../samples/datalake_samples_service.py
            :start-after: [START create_datalake_service_client]
            :end-before: [END create_datalake_service_client]
            :language: python
            :dedent: 8
            :caption: Creating the DataLakeServiceClient from connection string.

        .. literalinclude:: ../samples/datalake_samples_service.py
            :start-after: [START create_datalake_service_client_oauth]
            :end-before: [END create_datalake_service_client_oauth]
            :language: python
            :dedent: 8
            :caption: Creating the DataLakeServiceClient with Azure Identity credentials.
    """
    def __init__(
            self,
            account_url,  # type: str
            credential=None,  # type: Optional[Any]
            **kwargs  # type: Any
    ):
        # type: (...) -> None
        try:
            if not account_url.lower().startswith('http'):
                account_url = "https://" + account_url
        except AttributeError:
            raise ValueError("Account URL must be a string.")
        parsed_url = urlparse(account_url.rstrip('/'))
        if not parsed_url.netloc:
            raise ValueError("Invalid URL: {}".format(account_url))

        blob_account_url = convert_dfs_url_to_blob_url(account_url)
        self._blob_account_url = blob_account_url
        self._blob_service_client = BlobServiceClient(blob_account_url,
                                                      credential, **kwargs)
        self._blob_service_client._hosts[LocationMode.SECONDARY] = ""  #pylint: disable=protected-access

        _, sas_token = parse_query(parsed_url.query)
        self._query_str, self._raw_credential = self._format_query_string(
            sas_token, credential)

        super(DataLakeServiceClient,
              self).__init__(parsed_url,
                             service='dfs',
                             credential=self._raw_credential,
                             **kwargs)
        # ADLS doesn't support secondary endpoint, make sure it's empty
        self._hosts[LocationMode.SECONDARY] = ""

    def __enter__(self):
        self._blob_service_client.__enter__()
        return self

    def __exit__(self, *args):
        self._blob_service_client.close()

    def close(self):
        # type: () -> None
        """ This method is to close the sockets opened by the client.
        It need not be used when using with a context manager.
        """
        self._blob_service_client.close()

    def _format_url(self, hostname):
        """Format the endpoint URL according to hostname
        """
        formated_url = "{}://{}/{}".format(self.scheme, hostname,
                                           self._query_str)
        return formated_url

    @classmethod
    def from_connection_string(
        cls,
        conn_str,  # type: str
        credential=None,  # type: Optional[Any]
        **kwargs  # type: Any
    ):  # type: (...) -> DataLakeServiceClient
        """
        Create DataLakeServiceClient from a Connection String.

        :param str conn_str:
            A connection string to an Azure Storage account.
        :param credential:
            The credentials with which to authenticate. This is optional if the
            account URL already has a SAS token, or the connection string already has shared
            access key values. The value can be a SAS token string,
            an instance of a AzureSasCredential from azure.core.credentials, an account shared access
            key, or an instance of a TokenCredentials class from azure.identity.
            Credentials provided here will take precedence over those in the connection string.
        :return a DataLakeServiceClient
        :rtype ~azure.storage.filedatalake.DataLakeServiceClient

        .. admonition:: Example:

            .. literalinclude:: ../samples/datalake_samples_file_system.py
                :start-after: [START create_data_lake_service_client_from_conn_str]
                :end-before: [END create_data_lake_service_client_from_conn_str]
                :language: python
                :dedent: 8
                :caption: Creating the DataLakeServiceClient from a connection string.
        """
        account_url, _, credential = parse_connection_str(
            conn_str, credential, 'dfs')
        return cls(account_url, credential=credential, **kwargs)

    def get_user_delegation_key(
            self,
            key_start_time,  # type: datetime
            key_expiry_time,  # type: datetime
            **kwargs  # type: Any
    ):
        # type: (...) -> UserDelegationKey
        """
        Obtain a user delegation key for the purpose of signing SAS tokens.
        A token credential must be present on the service object for this request to succeed.

        :param ~datetime.datetime key_start_time:
            A DateTime value. Indicates when the key becomes valid.
        :param ~datetime.datetime key_expiry_time:
            A DateTime value. Indicates when the key stops being valid.
        :keyword int timeout:
            The timeout parameter is expressed in seconds.
        :return: The user delegation key.
        :rtype: ~azure.storage.filedatalake.UserDelegationKey

        .. admonition:: Example:

            .. literalinclude:: ../samples/datalake_samples_service.py
                :start-after: [START get_user_delegation_key]
                :end-before: [END get_user_delegation_key]
                :language: python
                :dedent: 8
                :caption: Get user delegation key from datalake service client.
        """
        delegation_key = self._blob_service_client.get_user_delegation_key(
            key_start_time=key_start_time,
            key_expiry_time=key_expiry_time,
            **kwargs)  # pylint: disable=protected-access
        return UserDelegationKey._from_generated(delegation_key)  # pylint: disable=protected-access

    def list_file_systems(
            self,
            name_starts_with=None,  # type: Optional[str]
            include_metadata=None,  # type: Optional[bool]
            **kwargs):
        # type: (...) -> ItemPaged[FileSystemProperties]
        """Returns a generator to list the file systems under the specified account.

        The generator will lazily follow the continuation tokens returned by
        the service and stop when all file systems have been returned.

        :param str name_starts_with:
            Filters the results to return only file systems whose names
            begin with the specified prefix.
        :param bool include_metadata:
            Specifies that file system metadata be returned in the response.
            The default value is `False`.
        :keyword int results_per_page:
            The maximum number of file system names to retrieve per API
            call. If the request does not specify the server will return up to 5,000 items per page.
        :keyword int timeout:
            The timeout parameter is expressed in seconds.
        :keyword bool include_deleted:
            Specifies that deleted file systems to be returned in the response. This is for file system restore enabled
            account. The default value is `False`.
            .. versionadded:: 12.3.0
        :returns: An iterable (auto-paging) of FileSystemProperties.
        :rtype: ~azure.core.paging.ItemPaged[~azure.storage.filedatalake.FileSystemProperties]

        .. admonition:: Example:

            .. literalinclude:: ../samples/datalake_samples_service.py
                :start-after: [START list_file_systems]
                :end-before: [END list_file_systems]
                :language: python
                :dedent: 8
                :caption: Listing the file systems in the datalake service.
        """
        item_paged = self._blob_service_client.list_containers(
            name_starts_with=name_starts_with,
            include_metadata=include_metadata,
            **kwargs)  # pylint: disable=protected-access
        item_paged._page_iterator_class = FileSystemPropertiesPaged  # pylint: disable=protected-access
        return item_paged

    def create_file_system(
            self,
            file_system,  # type: Union[FileSystemProperties, str]
            metadata=None,  # type: Optional[Dict[str, str]]
            public_access=None,  # type: Optional[PublicAccess]
            **kwargs):
        # type: (...) -> FileSystemClient
        """Creates a new file system under the specified account.

        If the file system with the same name already exists, a ResourceExistsError will
        be raised. This method returns a client with which to interact with the newly
        created file system.

        :param str file_system:
            The name of the file system to create.
        :param metadata:
            A dict with name-value pairs to associate with the
            file system as metadata. Example: `{'Category':'test'}`
        :type metadata: dict(str, str)
        :param public_access:
            Possible values include: file system, file.
        :type public_access: ~azure.storage.filedatalake.PublicAccess
        :keyword int timeout:
            The timeout parameter is expressed in seconds.
        :rtype: ~azure.storage.filedatalake.FileSystemClient

        .. admonition:: Example:

            .. literalinclude:: ../samples/datalake_samples_service.py
                :start-after: [START create_file_system_from_service_client]
                :end-before: [END create_file_system_from_service_client]
                :language: python
                :dedent: 8
                :caption: Creating a file system in the datalake service.
        """
        file_system_client = self.get_file_system_client(file_system)
        file_system_client.create_file_system(metadata=metadata,
                                              public_access=public_access,
                                              **kwargs)
        return file_system_client

    def _rename_file_system(self, name, new_name, **kwargs):
        # type: (str, str, **Any) -> FileSystemClient
        """Renames a filesystem.

        Operation is successful only if the source filesystem exists.

        :param str name:
            The name of the filesystem to rename.
        :param str new_name:
            The new filesystem name the user wants to rename to.
        :keyword lease:
            Specify this to perform only if the lease ID given
            matches the active lease ID of the source filesystem.
        :paramtype lease: ~azure.storage.filedatalake.DataLakeLeaseClient or str
        :keyword int timeout:
            The timeout parameter is expressed in seconds.
        :rtype: ~azure.storage.filedatalake.FileSystemClient
        """
        self._blob_service_client._rename_container(name, new_name, **kwargs)  # pylint: disable=protected-access
        renamed_file_system = self.get_file_system_client(new_name)
        return renamed_file_system

    def undelete_file_system(self, name, deleted_version, **kwargs):
        # type: (str, str, **Any) -> FileSystemClient
        """Restores soft-deleted filesystem.

        Operation will only be successful if used within the specified number of days
        set in the delete retention policy.

        .. versionadded:: 12.3.0
            This operation was introduced in API version '2019-12-12'.

        :param str name:
            Specifies the name of the deleted filesystem to restore.
        :param str deleted_version:
            Specifies the version of the deleted filesystem to restore.
        :keyword int timeout:
            The timeout parameter is expressed in seconds.
        :rtype: ~azure.storage.filedatalake.FileSystemClient
        """
        new_name = kwargs.pop('new_name', None)
        file_system = self.get_file_system_client(new_name or name)
        self._blob_service_client.undelete_container(name,
                                                     deleted_version,
                                                     new_name=new_name,
                                                     **kwargs)  # pylint: disable=protected-access
        return file_system

    def delete_file_system(
            self,
            file_system,  # type: Union[FileSystemProperties, str]
            **kwargs):
        # type: (...) -> FileSystemClient
        """Marks the specified file system for deletion.

        The file system and any files contained within it are later deleted during garbage collection.
        If the file system is not found, a ResourceNotFoundError will be raised.

        :param file_system:
            The file system to delete. This can either be the name of the file system,
            or an instance of FileSystemProperties.
        :type file_system: str or ~azure.storage.filedatalake.FileSystemProperties
        :keyword lease:
            If specified, delete_file_system only succeeds if the
            file system's lease is active and matches this ID.
            Required if the file system has an active lease.
        :paramtype lease: ~azure.storage.filedatalake.DataLakeLeaseClient or str
        :keyword ~datetime.datetime if_modified_since:
            A DateTime value. Azure expects the date value passed in to be UTC.
            If timezone is included, any non-UTC datetimes will be converted to UTC.
            If a date is passed in without timezone info, it is assumed to be UTC.
            Specify this header to perform the operation only
            if the resource has been modified since the specified time.
        :keyword ~datetime.datetime if_unmodified_since:
            A DateTime value. Azure expects the date value passed in to be UTC.
            If timezone is included, any non-UTC datetimes will be converted to UTC.
            If a date is passed in without timezone info, it is assumed to be UTC.
            Specify this header to perform the operation only if
            the resource has not been modified since the specified date/time.
        :keyword str etag:
            An ETag value, or the wildcard character (*). Used to check if the resource has changed,
            and act according to the condition specified by the `match_condition` parameter.
        :keyword ~azure.core.MatchConditions match_condition:
            The match condition to use upon the etag.
        :keyword int timeout:
            The timeout parameter is expressed in seconds.
        :rtype: None

        .. admonition:: Example:

            .. literalinclude:: ../samples/datalake_samples_service.py
                :start-after: [START delete_file_system_from_service_client]
                :end-before: [END delete_file_system_from_service_client]
                :language: python
                :dedent: 8
                :caption: Deleting a file system in the datalake service.
        """
        file_system_client = self.get_file_system_client(file_system)
        file_system_client.delete_file_system(**kwargs)
        return file_system_client

    def get_file_system_client(
            self,
            file_system  # type: Union[FileSystemProperties, str]
    ):
        # type: (...) -> FileSystemClient
        """Get a client to interact with the specified file system.

        The file system need not already exist.

        :param file_system:
            The file system. This can either be the name of the file system,
            or an instance of FileSystemProperties.
        :type file_system: str or ~azure.storage.filedatalake.FileSystemProperties
        :returns: A FileSystemClient.
        :rtype: ~azure.storage.filedatalake.FileSystemClient

        .. admonition:: Example:

            .. literalinclude:: ../samples/datalake_samples_file_system.py
                :start-after: [START create_file_system_client_from_service]
                :end-before: [END create_file_system_client_from_service]
                :language: python
                :dedent: 8
                :caption: Getting the file system client to interact with a specific file system.
        """
        try:
            file_system_name = file_system.name
        except AttributeError:
            file_system_name = file_system

        _pipeline = Pipeline(
            transport=TransportWrapper(self._pipeline._transport
                                       ),  # pylint: disable = protected-access
            policies=self._pipeline.
            _impl_policies  # pylint: disable = protected-access
        )
        return FileSystemClient(
            self.url,
            file_system_name,
            credential=self._raw_credential,
            _configuration=self._config,
            _pipeline=_pipeline,
            _hosts=self._hosts,
            require_encryption=self.require_encryption,
            key_encryption_key=self.key_encryption_key,
            key_resolver_function=self.key_resolver_function)

    def get_directory_client(
        self,
        file_system,  # type: Union[FileSystemProperties, str]
        directory  # type: Union[DirectoryProperties, str]
    ):
        # type: (...) -> DataLakeDirectoryClient
        """Get a client to interact with the specified directory.

        The directory need not already exist.

        :param file_system:
            The file system that the directory is in. This can either be the name of the file system,
            or an instance of FileSystemProperties.
        :type file_system: str or ~azure.storage.filedatalake.FileSystemProperties
        :param directory:
            The directory with which to interact. This can either be the name of the directory,
            or an instance of DirectoryProperties.
        :type directory: str or ~azure.storage.filedatalake.DirectoryProperties
        :returns: A DataLakeDirectoryClient.
        :rtype: ~azure.storage.filedatalake.DataLakeDirectoryClient

        .. admonition:: Example:

            .. literalinclude:: ../samples/datalake_samples_service.py
                :start-after: [START get_directory_client_from_service_client]
                :end-before: [END get_directory_client_from_service_client]
                :language: python
                :dedent: 8
                :caption: Getting the directory client to interact with a specific directory.
        """
        try:
            file_system_name = file_system.name
        except AttributeError:
            file_system_name = file_system
        try:
            directory_name = directory.name
        except AttributeError:
            directory_name = directory

        _pipeline = Pipeline(
            transport=TransportWrapper(self._pipeline._transport
                                       ),  # pylint: disable = protected-access
            policies=self._pipeline.
            _impl_policies  # pylint: disable = protected-access
        )
        return DataLakeDirectoryClient(
            self.url,
            file_system_name,
            directory_name=directory_name,
            credential=self._raw_credential,
            _configuration=self._config,
            _pipeline=_pipeline,
            _hosts=self._hosts,
            require_encryption=self.require_encryption,
            key_encryption_key=self.key_encryption_key,
            key_resolver_function=self.key_resolver_function)

    def get_file_client(
        self,
        file_system,  # type: Union[FileSystemProperties, str]
        file_path  # type: Union[FileProperties, str]
    ):
        # type: (...) -> DataLakeFileClient
        """Get a client to interact with the specified file.

        The file need not already exist.

        :param file_system:
            The file system that the file is in. This can either be the name of the file system,
            or an instance of FileSystemProperties.
        :type file_system: str or ~azure.storage.filedatalake.FileSystemProperties
        :param file_path:
            The file with which to interact. This can either be the full path of the file(from the root directory),
            or an instance of FileProperties. eg. directory/subdirectory/file
        :type file_path: str or ~azure.storage.filedatalake.FileProperties
        :returns: A DataLakeFileClient.
        :rtype: ~azure.storage.filedatalake.DataLakeFileClient

        .. admonition:: Example:

            .. literalinclude:: ../samples/datalake_samples_service.py
                :start-after: [START get_file_client_from_service_client]
                :end-before: [END get_file_client_from_service_client]
                :language: python
                :dedent: 8
                :caption: Getting the file client to interact with a specific file.
        """
        try:
            file_system_name = file_system.name
        except AttributeError:
            file_system_name = file_system
        try:
            file_path = file_path.name
        except AttributeError:
            pass

        _pipeline = Pipeline(
            transport=TransportWrapper(self._pipeline._transport
                                       ),  # pylint: disable = protected-access
            policies=self._pipeline.
            _impl_policies  # pylint: disable = protected-access
        )
        return DataLakeFileClient(
            self.url,
            file_system_name,
            file_path=file_path,
            credential=self._raw_credential,
            _hosts=self._hosts,
            _configuration=self._config,
            _pipeline=_pipeline,
            require_encryption=self.require_encryption,
            key_encryption_key=self.key_encryption_key,
            key_resolver_function=self.key_resolver_function)

    def set_service_properties(self, **kwargs):
        # type: (**Any) -> None
        """Sets the properties of a storage account's Datalake service, including
        Azure Storage Analytics.

        .. versionadded:: 12.4.0
            This operation was introduced in API version '2020-06-12'.

        If an element (e.g. analytics_logging) is left as None, the
        existing settings on the service for that functionality are preserved.

        :keyword analytics_logging:
            Groups the Azure Analytics Logging settings.
        :type analytics_logging: ~azure.storage.filedatalake.AnalyticsLogging
        :keyword hour_metrics:
            The hour metrics settings provide a summary of request
            statistics grouped by API in hourly aggregates.
        :type hour_metrics: ~azure.storage.filedatalake.Metrics
        :keyword minute_metrics:
            The minute metrics settings provide request statistics
            for each minute.
        :type minute_metrics: ~azure.storage.filedatalake.Metrics
        :keyword cors:
            You can include up to five CorsRule elements in the
            list. If an empty list is specified, all CORS rules will be deleted,
            and CORS will be disabled for the service.
        :type cors: list[~azure.storage.filedatalake.CorsRule]
        :keyword str target_version:
            Indicates the default version to use for requests if an incoming
            request's version is not specified.
        :keyword delete_retention_policy:
            The delete retention policy specifies whether to retain deleted files/directories.
            It also specifies the number of days and versions of file/directory to keep.
        :type delete_retention_policy: ~azure.storage.filedatalake.RetentionPolicy
        :keyword static_website:
            Specifies whether the static website feature is enabled,
            and if yes, indicates the index document and 404 error document to use.
        :type static_website: ~azure.storage.filedatalake.StaticWebsite
        :keyword int timeout:
            The timeout parameter is expressed in seconds.
        :rtype: None
        """
        return self._blob_service_client.set_service_properties(**kwargs)  # pylint: disable=protected-access

    def get_service_properties(self, **kwargs):
        # type: (**Any) -> Dict[str, Any]
        """Gets the properties of a storage account's datalake service, including
        Azure Storage Analytics.

        .. versionadded:: 12.4.0
            This operation was introduced in API version '2020-06-12'.

        :keyword int timeout:
            The timeout parameter is expressed in seconds.
        :returns: An object containing datalake service properties such as
            analytics logging, hour/minute metrics, cors rules, etc.
        :rtype: Dict[str, Any]
        """
        props = self._blob_service_client.get_service_properties(**kwargs)  # pylint: disable=protected-access
        return get_datalake_service_properties(props)
Exemplo n.º 4
0
class DataLakeServiceClient(StorageAccountHostsMixin):
    """A client to interact with the DataLake Service at the account level.

    This client provides operations to retrieve and configure the account properties
    as well as list, create and delete file systems within the account.
    For operations relating to a specific file system, directory or file, clients for those entities
    can also be retrieved using the `get_client` functions.

    :ivar str url:
        The full endpoint URL to the datalake service endpoint. This could be either the
        primary endpoint, or the secondary endpoint depending on the current `location_mode`.
    :ivar str primary_endpoint:
        The full primary endpoint URL.
    :ivar str primary_hostname:
        The hostname of the primary endpoint.
    :param str account_url:
        The URL to the DataLake storage account. Any other entities included
        in the URL path (e.g. file system or file) will be discarded. This URL can be optionally
        authenticated with a SAS token.
    :param credential:
        The credentials with which to authenticate. This is optional if the
        account URL already has a SAS token. The value can be a SAS token string, and account
        shared access key, or an instance of a TokenCredentials class from azure.identity.
        If the URL already has a SAS token, specifying an explicit credential will take priority.

    .. admonition:: Example:

        .. literalinclude:: ../samples/test_datalake_authentication_samples.py
            :start-after: [START create_datalake_service_client]
            :end-before: [END create_datalake_service_client]
            :language: python
            :dedent: 8
            :caption: Creating the DataLakeServiceClient with account url and credential.

        .. literalinclude:: ../samples/test_datalake_authentication_samples.py
            :start-after: [START create_datalake_service_client_oauth]
            :end-before: [END create_datalake_service_client_oauth]
            :language: python
            :dedent: 8
            :caption: Creating the DataLakeServiceClient with Azure Identity credentials.
    """

    def __init__(
            self, account_url,  # type: str
            credential=None,  # type: Optional[Any]
            **kwargs  # type: Any
    ):
        # type: (...) -> None
        try:
            if not account_url.lower().startswith('http'):
                account_url = "https://" + account_url
        except AttributeError:
            raise ValueError("Account URL must be a string.")
        parsed_url = urlparse(account_url.rstrip('/'))
        if not parsed_url.netloc:
            raise ValueError("Invalid URL: {}".format(account_url))

        blob_account_url = convert_dfs_url_to_blob_url(account_url)
        self._blob_service_client = BlobServiceClient(blob_account_url, credential, **kwargs)

        _, sas_token = parse_query(parsed_url.query)
        self._query_str, self._raw_credential = self._format_query_string(sas_token, credential)

        super(DataLakeServiceClient, self).__init__(parsed_url, service='dfs',
                                                    credential=self._raw_credential, **kwargs)

    def _format_url(self, hostname):
        """Format the endpoint URL according to the current location
        mode hostname.
        """
        formated_url = "{}://{}/{}".format(self.scheme, hostname, self._query_str)
        return formated_url

    @classmethod
    def from_connection_string(
            cls, conn_str,  # type: str
            credential=None,  # type: Optional[Any]
            **kwargs  # type: Any
        ):  # type: (...) -> DataLakeServiceClient
        """
        Create DataLakeServiceClient from a Connection String.

        :param str conn_str:
            A connection string to an Azure Storage account.
        :param credential:
            The credentials with which to authenticate. This is optional if the
            account URL already has a SAS token, or the connection string already has shared
            access key values. The value can be a SAS token string, and account shared access
            key, or an instance of a TokenCredentials class from azure.identity.
            Credentials provided here will take precedence over those in the connection string.
        :return a DataLakeServiceClient
        :rtype ~azure.storage.filedatalake.DataLakeServiceClient
        """
        account_url, secondary, credential = parse_connection_str(conn_str, credential, 'dfs')
        if 'secondary_hostname' not in kwargs:
            kwargs['secondary_hostname'] = secondary
        return cls(account_url, credential=credential, **kwargs)

    def get_user_delegation_key(self, key_start_time,  # type: datetime
                                key_expiry_time,  # type: datetime
                                **kwargs  # type: Any
                                ):
        # type: (...) -> UserDelegationKey
        """
        Obtain a user delegation key for the purpose of signing SAS tokens.
        A token credential must be present on the service object for this request to succeed.

        :param ~datetime.datetime key_start_time:
            A DateTime value. Indicates when the key becomes valid.
        :param ~datetime.datetime key_expiry_time:
            A DateTime value. Indicates when the key stops being valid.
        :keyword int timeout:
            The timeout parameter is expressed in seconds.
        :return: The user delegation key.
        :rtype: ~azure.storage.filedatalake.UserDelegationKey
        """
        delegation_key = self._blob_service_client.get_user_delegation_key(key_start_time=key_start_time,
                                                                           key_expiry_time=key_expiry_time,
                                                                           **kwargs)  # pylint: disable=protected-access
        delegation_key._class_ = UserDelegationKey  # pylint: disable=protected-access
        return delegation_key

    def list_file_systems(self, name_starts_with=None,  # type: Optional[str]
                          include_metadata=None,  # type: Optional[bool]
                          **kwargs):
        # type: (...) -> ItemPaged[FileSystemProperties]
        """Returns a generator to list the file systems under the specified account.

        The generator will lazily follow the continuation tokens returned by
        the service and stop when all file systems have been returned.

        :param str name_starts_with:
            Filters the results to return only file systems whose names
            begin with the specified prefix.
        :param bool include_metadata:
            Specifies that file system metadata be returned in the response.
            The default value is `False`.
        :keyword int results_per_page:
            The maximum number of file system names to retrieve per API
            call. If the request does not specify the server will return up to 5,000 items per page.
        :keyword int timeout:
            The timeout parameter is expressed in seconds.
        :returns: An iterable (auto-paging) of FileSystemProperties.
        :rtype: ~azure.core.paging.ItemPaged[~azure.storage.filedatalake.FileSystemProperties]

        .. admonition:: Example:

            .. literalinclude:: ../samples/test_datalake_service_samples.py
                :start-after: [START dsc_list_file_systems]
                :end-before: [END dsc_list_file_systems]
                :language: python
                :dedent: 12
                :caption: Listing the file systems in the datalake service.
        """
        item_paged = self._blob_service_client.list_containers(name_starts_with=name_starts_with,
                                                               include_metadata=include_metadata,
                                                               **kwargs)  # pylint: disable=protected-access
        item_paged._page_iterator_class = FileSystemPropertiesPaged  # pylint: disable=protected-access
        return item_paged

    def create_file_system(self, file_system,  # type: Union[FileSystemProperties, str]
                           metadata=None,  # type: Optional[Dict[str, str]]
                           public_access=None,  # type: Optional[PublicAccess]
                           **kwargs):
        # type: (...) -> FileSystemClient
        """Creates a new file system under the specified account.

        If the file system with the same name already exists, a ResourceExistsError will
        be raised. This method returns a client with which to interact with the newly
        created file system.

        :param str file_system: The name of the file system to create.
        :param metadata:
            A dict with name-value pairs to associate with the
            file system as metadata. Example: `{'Category':'test'}`
        :type metadata: dict(str, str)
        :param public_access:
            Possible values include: file system, file.
        :type public_access: ~azure.storage.filedatalake.PublicAccess
        :keyword int timeout:
            The timeout parameter is expressed in seconds.
        :rtype: ~azure.storage.filedatalake.FileSystemClient

        .. admonition:: Example:

            .. literalinclude:: ../samples/test_datalake_service_samples.py
                :start-after: [START dsc_create_file_system]
                :end-before: [END dsc_create_file_system]
                :language: python
                :dedent: 12
                :caption: Creating a file system in the datalake service.
        """
        file_system_client = self.get_file_system_client(file_system)
        file_system_client.create_file_system(metadata=metadata, public_access=public_access, **kwargs)
        return file_system_client

    def delete_file_system(self, file_system,  # type: Union[FileSystemProperties, str]
                           **kwargs):
        # type: (...) -> FileSystemClient
        """Marks the specified file system for deletion.

        The file system and any files contained within it are later deleted during garbage collection.
        If the file system is not found, a ResourceNotFoundError will be raised.

        :param file_system:
            The file system to delete. This can either be the name of the file system,
            or an instance of FileSystemProperties.
        :type file_system: str or ~azure.storage.filedatalake.FileSystemProperties
        :keyword ~azure.storage.filedatalake.DataLakeLeaseClient lease:
            If specified, delete_file_system only succeeds if the
            file system's lease is active and matches this ID.
            Required if the file system has an active lease.
        :keyword ~datetime.datetime if_modified_since:
            A DateTime value. Azure expects the date value passed in to be UTC.
            If timezone is included, any non-UTC datetimes will be converted to UTC.
            If a date is passed in without timezone info, it is assumed to be UTC.
            Specify this header to perform the operation only
            if the resource has been modified since the specified time.
        :keyword ~datetime.datetime if_unmodified_since:
            A DateTime value. Azure expects the date value passed in to be UTC.
            If timezone is included, any non-UTC datetimes will be converted to UTC.
            If a date is passed in without timezone info, it is assumed to be UTC.
            Specify this header to perform the operation only if
            the resource has not been modified since the specified date/time.
        :keyword str etag:
            An ETag value, or the wildcard character (*). Used to check if the resource has changed,
            and act according to the condition specified by the `match_condition` parameter.
        :keyword ~azure.core.MatchConditions match_condition:
            The match condition to use upon the etag.
        :keyword int timeout:
            The timeout parameter is expressed in seconds.
        :rtype: None

        .. admonition:: Example:

            .. literalinclude:: ../samples/test_datalake_service_samples.py
                :start-after: [START bsc_delete_file_system]
                :end-before: [END bsc_delete_file_system]
                :language: python
                :dedent: 12
                :caption: Deleting a file system in the datalake service.
        """
        file_system_client = self.get_file_system_client(file_system)
        file_system_client.delete_file_system(**kwargs)
        return file_system_client

    def get_file_system_client(self, file_system  # type: Union[FileSystemProperties, str]
                               ):
        # type: (...) -> FileSystemClient
        """Get a client to interact with the specified file system.

        The file system need not already exist.

        :param file_system:
            The file system. This can either be the name of the file system,
            or an instance of FileSystemProperties.
        :type file_system: str or ~azure.storage.filedatalake.FileSystemProperties
        :returns: A FileSystemClient.
        :rtype: ~azure.storage.filedatalake.FileSystemClient

        .. admonition:: Example:

            .. literalinclude:: ../samples/test_datalake_service_samples.py
                :start-after: [START bsc_get_file_system_client]
                :end-before: [END bsc_get_file_system_client]
                :language: python
                :dedent: 8
                :caption: Getting the file system client to interact with a specific file system.
        """
        return FileSystemClient(self.url, file_system, credential=self._raw_credential, _configuration=self._config,
                                _pipeline=self._pipeline, _location_mode=self._location_mode, _hosts=self._hosts,
                                require_encryption=self.require_encryption, key_encryption_key=self.key_encryption_key,
                                key_resolver_function=self.key_resolver_function)

    def get_directory_client(self, file_system,  # type: Union[FileSystemProperties, str]
                             directory  # type: Union[DirectoryProperties, str]
                             ):
        # type: (...) -> DataLakeDirectoryClient
        """Get a client to interact with the specified directory.

        The directory need not already exist.

        :param file_system:
            The file system that the directory is in. This can either be the name of the file system,
            or an instance of FileSystemProperties.
        :type file_system: str or ~azure.storage.filedatalake.FileSystemProperties
        :param directory:
            The directory with which to interact. This can either be the name of the directory,
            or an instance of DirectoryProperties.
        :type directory: str or ~azure.storage.filedatalake.DirectoryProperties
        :returns: A DataLakeDirectoryClient.
        :rtype: ~azure.storage.filedatalake.DataLakeDirectoryClient

        .. admonition:: Example:

            .. literalinclude:: ../samples/test_datalake_service_samples.py
                :start-after: [START bsc_get_directory_client]
                :end-before: [END bsc_get_directory_client]
                :language: python
                :dedent: 12
                :caption: Getting the directory client to interact with a specific directory.
        """
        return DataLakeDirectoryClient(self.url, file_system, directory_name=directory,
                                       credential=self._raw_credential,
                                       _configuration=self._config, _pipeline=self._pipeline,
                                       _location_mode=self._location_mode, _hosts=self._hosts,
                                       require_encryption=self.require_encryption,
                                       key_encryption_key=self.key_encryption_key,
                                       key_resolver_function=self.key_resolver_function
                                       )

    def get_file_client(self, file_system,  # type: Union[FileSystemProperties, str]
                        file_path  # type: Union[FileProperties, str]
                        ):
        # type: (...) -> DataLakeFileClient
        """Get a client to interact with the specified file.

        The file need not already exist.

        :param file_system:
            The file system that the file is in. This can either be the name of the file system,
            or an instance of FileSystemProperties.
        :type file_system: str or ~azure.storage.filedatalake.FileSystemProperties
        :param file_path:
            The file with which to interact. This can either be the full path of the file(from the root directory),
            or an instance of FileProperties. eg. directory/subdirectory/file
        :type file_path: str or ~azure.storage.filedatalake.FileProperties
        :returns: A DataLakeFileClient.
        :rtype: ~azure.storage.filedatalake..DataLakeFileClient

        .. admonition:: Example:

            .. literalinclude:: ../samples/test_datalake_service_samples.py
                :start-after: [START bsc_get_file_client]
                :end-before: [END bsc_get_file_client]
                :language: python
                :dedent: 12
                :caption: Getting the file client to interact with a specific file.
        """
        try:
            file_path = file_path.name
        except AttributeError:
            pass

        return DataLakeFileClient(
            self.url, file_system, file_path=file_path, credential=self._raw_credential,
            _hosts=self._hosts, _configuration=self._config, _pipeline=self._pipeline,
            _location_mode=self._location_mode, require_encryption=self.require_encryption,
            key_encryption_key=self.key_encryption_key,
            key_resolver_function=self.key_resolver_function)
Exemplo n.º 5
0
class AzureBlobStorage:
    """Class for interacting with Azure Blob Storage."""
    def __init__(self, abs_name: str, connect: bool = False):
        """Initialize connector for Azure Python SDK."""
        self.connected = False
        self.abs_site = f"{abs_name}.blob.core.windows.net"
        self.credentials: Optional[AzCredentials] = None
        self.abs_client: Optional[BlobServiceClient] = None
        if connect is True:
            self.connect()

    def connect(
        self,
        auth_methods: List = None,
        silent: bool = False,
    ):
        """Authenticate with the SDK."""
        self.credentials = az_connect(auth_methods=auth_methods, silent=silent)
        if not self.credentials:
            raise CloudError("Could not obtain credentials.")
        self.abs_client = BlobServiceClient(self.abs_site,
                                            self.credentials.modern)
        if not self.abs_client:
            raise CloudError("Could not create a Blob Storage client.")
        self.connected = True

    def containers(self) -> pd.DataFrame:
        """Return containers in the Azure Blob Storage Account."""
        try:
            container_list = self.abs_client.list_containers()  # type:ignore
        except ServiceRequestError as err:
            raise CloudError(
                "Unable to connect check the Azure Blob Store account name"
            ) from err
        if container_list:
            containers_df = _parse_returned_items(
                container_list, remove_list=["lease", "encryption_scope"])
        else:
            containers_df = None
        return containers_df

    def create_container(self, container_name: str, **kwargs) -> pd.DataFrame:
        """
        Create a new container within the Azure Blob Storage account.

        Parameters
        ----------
        container_name : str
            The name for the new container.
        Additional container parameters can be passed as kwargs

        Returns
        -------
        pd.DataFrame
            Details of the created container.

        """
        try:
            new_container = self.abs_client.create_container(  # type: ignore
                container_name, **kwargs)  # type:ignore
        except ResourceExistsError as err:
            raise CloudError(
                f"Container {container_name} already exists.") from err
        properties = new_container.get_container_properties()
        container_df = _parse_returned_items([properties],
                                             ["encryption_scope", "lease"])
        return container_df

    def blobs(self, container_name: str) -> Optional[pd.DataFrame]:
        """
        Get a list of blobs in a container.

        Parameters
        ----------
        container_name : str
            The name of the container to get blobs from.

        Returns
        -------
        pd.DataFrame
            Details of the blobs.

        """
        container_client = self.abs_client.get_container_client(
            container_name)  # type: ignore
        blobs = list(container_client.list_blobs())
        return _parse_returned_items(blobs) if blobs else None

    def upload_to_blob(self,
                       blob: Any,
                       container_name: str,
                       blob_name: str,
                       overwrite: bool = True):
        """
        Upload a blob of data.

        Parameters
        ----------
        blob : Any
            The data to upload.
        container_name : str
            The name of the container to upload the blob to.
        blob_name : str
            The name to give the blob.
        overwrite : bool, optional
            Whether or not you want to overwrite the blob if it exists, by default True.

        """
        try:
            blob_client = self.abs_client.get_blob_client(  # type:ignore
                container=container_name, blob=blob_name)
            upload = blob_client.upload_blob(blob, overwrite=overwrite)
        except ResourceNotFoundError as err:
            raise CloudError(
                "Unknown container, check container name or create it first."
            ) from err
        if not upload["error_code"]:
            print("Upload complete")
        else:
            raise CloudError(
                f"There was a problem uploading the blob: {upload['error_code']}"
            )
        return True

    def get_blob(self, container_name: str, blob_name: str) -> bytes:
        """
        Get a blob from the Azure Blob Storage account.

        Parameters
        ----------
        container_name : str
            The name of the container that holds the blob.
        blob_name : str
            The name of the blob to download.

        Returns
        -------
        bytes
            The content of the blob in bytes.

        """
        blob_client = self.abs_client.get_blob_client(  # type: ignore
            container=container_name, blob=blob_name)
        if blob_client.exists():
            data_stream = blob_client.download_blob()
            data = data_stream.content_as_bytes()
        else:
            raise CloudError(
                f"The blob {blob_name} does not exist in {container_name}")
        return data

    def delete_blob(self, container_name: str, blob_name: str) -> bool:
        """
        Delete a blob from the Azure Blob Storage account.

        Parameters
        ----------
        container_name : str
            The container name that has the blob.
        blob_name : str
            The name of the blob to delete.
        Note deleting a blob also deletes associated snapshots.

        Returns
        -------
        bool
            True if blob successfully deleted

        """
        blob_client = self.abs_client.get_blob_client(  # type: ignore
            container=container_name, blob=blob_name)
        if blob_client.exists():
            blob_client.delete_blob(delete_snapshots="include")
        else:
            raise CloudError(
                f"The blob {blob_name} does not exist in {container_name}")

        return True

    def get_sas_token(
        self,
        container_name: str,
        blob_name: str,
        end: datetime.datetime = None,
        permission: str = "r",
    ) -> str:
        """
        Generate a shared access string (SAS) token for a blob.

        Parameters
        ----------
        container_name : str
            The name of the Azure Blob Storage container that holds the blob.
        blob_name : str
            The name of the blob to generate the SAS token for.
        end : datetime.datetime, optional
            The datetime the SAS token should expire, by default this is 7 days from now.
        permission : str, optional
            The permissions to give the SAS token, by default 'r' for read.

        Returns
        -------
        str
            A URI of the blob with SAS token.

        """
        start = datetime.datetime.now()
        if not end:
            end = start + datetime.timedelta(days=7)
        key = self.abs_client.get_user_delegation_key(start,
                                                      end)  # type: ignore
        abs_name = self.abs_client.account_name  # type: ignore
        sast = generate_blob_sas(
            abs_name,
            container_name,
            blob_name,
            user_delegation_key=key,
            permission=permission,
            expiry=end,
            start=start,
        )
        full_path = f"https://{abs_name}.blob.core.windows.net/{container_name}/{blob_name}?{sast}"
        return full_path