Exemplo n.º 1
0
    def __init__(
            self, client, lease_id=None
    ):  # pylint: disable=missing-client-constructor-parameter-credential,missing-client-constructor-parameter-kwargs
        # type: (Union[FileSystemClient, DataLakeDirectoryClient, DataLakeFileClient], Optional[str]) -> None
        self.id = lease_id or str(uuid.uuid4())
        self.last_modified = None
        self.etag = None

        if hasattr(client, '_blob_client'):
            _client = client._blob_client  # type: ignore # pylint: disable=protected-access
        elif hasattr(client, '_container_client'):
            _client = client._container_client  # type: ignore # pylint: disable=protected-access
        else:
            raise TypeError("Lease must use any of FileSystemClient DataLakeDirectoryClient, or DataLakeFileClient.")

        self._blob_lease_client = BlobLeaseClient(_client, lease_id=lease_id)
Exemplo n.º 2
0
 def tearDown(self):
     if not self.is_playback():
         for container_name in self.test_containers:
             try:
                 container = self.bsc.get_container_client(container_name)
                 container.delete_container()
             except HttpResponseError:
                 try:
                     lease = BlobLeaseClient(container)
                     lease.break_lease(0)
                     container.delete_container()
                 except:
                     pass
             except:
                 pass
     return super(StorageContainerTest, self).tearDown()
def copy_blob(storage_account_conn_str, source_container_name,
              source_blob_name, dest_container_name):
    """
    Copy a blob in a blob storage container to another blob storage container in a storage account.
    """
    try:
        # Create the blob object representing the source
        source_blob_client = BlobClient.from_connection_string(
            conn_str=storage_account_conn_str,
            container_name=source_container_name,
            blob_name=source_blob_name)

        # Create the blob object representing the destination
        dest_blob_client = BlobClient.from_connection_string(
            conn_str=storage_account_conn_str,
            container_name=dest_container_name,
            blob_name=source_blob_name)

        print('Copying a Blob from a Blob container to another one ... ')

        # Lease the source blob for the copy operation
        # to prevent another client from modifying it.
        lease = BlobLeaseClient(source_blob_client)
        lease.acquire()

        # Get the source blob's properties and display the lease state.
        source_props = source_blob_client.get_blob_properties()
        print("Lease state: " + source_props.lease.state)

        # Start the copy operation.
        dest_blob_client.start_copy_from_url(source_blob_client.url)

        # Get the destination blob's properties to check the copy status.
        properties = dest_blob_client.get_blob_properties()
        copy_props = properties.copy

        # Display the copy status.
        print("Copy status: " + copy_props["status"])
        print("Copy progress: " + copy_props["progress"])
        print("Completion time: " + str(copy_props["completion_time"]))
        print("Total bytes: " + str(properties.size))

        if (source_props.lease.state == "leased"):
            # Break the lease on the source blob.
            lease.break_lease()

            # Update the destination blob's properties to check the lease state.
            source_props = source_blob_client.get_blob_properties()
            print("Lease state: " + source_props.lease.state)

        print('\nCopied')

    except Exception as e:
        print("\nError:")
        print(e)

    return
Exemplo n.º 4
0
    def blob_copy(self, container_name, blob_name):

        # Create a BlobClient from a connection string
        # retrieved from an environment variable named
        # AZURE_STORAGE_CONNECTION_STRING
        source_blob = BlobClient.from_connection_string(
            os.getenv("AZURE_STORAGE_CONNECTION_STRING"), 
            container_name, blob_name
            )

        try:
            # Lease the source blob for the copy operation
            # to prevent another client from modifying it.
            lease = BlobLeaseClient(source_blob)
            lease.acquire()

            # Get the source blob's properties and display the lease state.
            source_props = source_blob.get_blob_properties()
            print("Lease state: " + source_props.lease.state)

            # Create a BlobClient representing the
            # destination blob with a unique name.
            dest_blob = BlobClient.from_connection_string(
                os.getenv("AZURE_STORAGE_CONNECTION_STRING"),
                container_name, str(uuid.uuid4()) + "-" + blob_name
                )

            # Start the copy operation.
            dest_blob.start_copy_from_url(source_blob.url)

            # Get the destination blob's properties to check the copy status.
            properties = dest_blob.get_blob_properties()
            copy_props = properties.copy

            # Display the copy status
            print("Copy status: " + copy_props["status"])
            print("Copy progress: " + copy_props["progress"]);
            print("Completion time: " + str(copy_props["completion_time"]));
            print("Total bytes: " + str(properties.size));

            if (source_props.lease.state == "leased"):
                # Break the lease on the source blob.
                lease.break_lease()

                # Update the destination blob's properties to check the lease state.
                source_props = source_blob.get_blob_properties()
                print("Lease state: " + source_props.lease.state)

        except ResourceNotFoundError as ex:
            print("ResourceNotFoundError: ", ex.message)

        except ServiceRequestError as ex:
            print("ServiceRequestError: ", ex.message)
Exemplo n.º 5
0
class DataLakeLeaseClient(object):
    """Creates a new DataLakeLeaseClient.

    This client provides lease operations on a FileSystemClient, DataLakeDirectoryClient or DataLakeFileClient.

    :ivar str id:
        The ID of the lease currently being maintained. This will be `None` if no
        lease has yet been acquired.
    :ivar str etag:
        The ETag of the lease currently being maintained. This will be `None` if no
        lease has yet been acquired or modified.
    :ivar ~datetime.datetime last_modified:
        The last modified timestamp of the lease currently being maintained.
        This will be `None` if no lease has yet been acquired or modified.

    :param client:
        The client of the file system, directory, or file to lease.
    :type client: ~azure.storage.filedatalake.FileSystemClient or
        ~azure.storage.filedatalake.DataLakeDirectoryClient or ~azure.storage.filedatalake.DataLakeFileClient
    :param str lease_id:
        A string representing the lease ID of an existing lease. This value does not
        need to be specified in order to acquire a new lease, or break one.
    """
    def __init__(
            self, client, lease_id=None
    ):  # pylint: disable=missing-client-constructor-parameter-credential,missing-client-constructor-parameter-kwargs
        # type: (Union[FileSystemClient, DataLakeDirectoryClient, DataLakeFileClient], Optional[str]) -> None
        self.id = lease_id or str(uuid.uuid4())
        self.last_modified = None
        self.etag = None

        if hasattr(client, '_blob_client'):
            _client = client._blob_client  # type: ignore # pylint: disable=protected-access
        elif hasattr(client, '_container_client'):
            _client = client._container_client  # type: ignore # pylint: disable=protected-access
        else:
            raise TypeError("Lease must use any of FileSystemClient DataLakeDirectoryClient, or DataLakeFileClient.")

        self._blob_lease_client = BlobLeaseClient(_client, lease_id=lease_id)

    def __enter__(self):
        return self

    def __exit__(self, *args):
        self.release()

    def acquire(self, lease_duration=-1, **kwargs):
        # type: (int, Optional[int], **Any) -> None
        """Requests a new lease.

        If the file/file system does not have an active lease, the DataLake service creates a
        lease on the file/file system and returns a new lease ID.

        :param int lease_duration:
            Specifies the duration of the lease, in seconds, or negative one
            (-1) for a lease that never expires. A non-infinite lease can be
            between 15 and 60 seconds. A lease duration cannot be changed
            using renew or change. Default is -1 (infinite lease).
        :keyword ~datetime.datetime if_modified_since:
            A DateTime value. Azure expects the date value passed in to be UTC.
            If timezone is included, any non-UTC datetimes will be converted to UTC.
            If a date is passed in without timezone info, it is assumed to be UTC.
            Specify this header to perform the operation only
            if the resource has been modified since the specified time.
        :keyword ~datetime.datetime if_unmodified_since:
            A DateTime value. Azure expects the date value passed in to be UTC.
            If timezone is included, any non-UTC datetimes will be converted to UTC.
            If a date is passed in without timezone info, it is assumed to be UTC.
            Specify this header to perform the operation only if
            the resource has not been modified since the specified date/time.
        :keyword str etag:
            An ETag value, or the wildcard character (*). Used to check if the resource has changed,
            and act according to the condition specified by the `match_condition` parameter.
        :keyword ~azure.core.MatchConditions match_condition:
            The match condition to use upon the etag.
        :keyword int timeout:
            The timeout parameter is expressed in seconds.
        :rtype: None
        """
        self._blob_lease_client.acquire(lease_duration=lease_duration, **kwargs)
        self._update_lease_client_attributes()

    def renew(self, **kwargs):
        # type: (Any) -> None
        """Renews the lease.

        The lease can be renewed if the lease ID specified in the
        lease client matches that associated with the file system or file. Note that
        the lease may be renewed even if it has expired as long as the file system
        or file has not been leased again since the expiration of that lease. When you
        renew a lease, the lease duration clock resets.

        :keyword ~datetime.datetime if_modified_since:
            A DateTime value. Azure expects the date value passed in to be UTC.
            If timezone is included, any non-UTC datetimes will be converted to UTC.
            If a date is passed in without timezone info, it is assumed to be UTC.
            Specify this header to perform the operation only
            if the resource has been modified since the specified time.
        :keyword ~datetime.datetime if_unmodified_since:
            A DateTime value. Azure expects the date value passed in to be UTC.
            If timezone is included, any non-UTC datetimes will be converted to UTC.
            If a date is passed in without timezone info, it is assumed to be UTC.
            Specify this header to perform the operation only if
            the resource has not been modified since the specified date/time.
        :keyword str etag:
            An ETag value, or the wildcard character (*). Used to check if the resource has changed,
            and act according to the condition specified by the `match_condition` parameter.
        :keyword ~azure.core.MatchConditions match_condition:
            The match condition to use upon the etag.
        :keyword int timeout:
            The timeout parameter is expressed in seconds.
        :return: None
        """
        self._blob_lease_client.renew(**kwargs)
        self._update_lease_client_attributes()

    def release(self, **kwargs):
        # type: (Any) -> None
        """Release the lease.

        The lease may be released if the client lease id specified matches
        that associated with the file system or file. Releasing the lease allows another client
        to immediately acquire the lease for the file system or file as soon as the release is complete.

        :keyword ~datetime.datetime if_modified_since:
            A DateTime value. Azure expects the date value passed in to be UTC.
            If timezone is included, any non-UTC datetimes will be converted to UTC.
            If a date is passed in without timezone info, it is assumed to be UTC.
            Specify this header to perform the operation only
            if the resource has been modified since the specified time.
        :keyword ~datetime.datetime if_unmodified_since:
            A DateTime value. Azure expects the date value passed in to be UTC.
            If timezone is included, any non-UTC datetimes will be converted to UTC.
            If a date is passed in without timezone info, it is assumed to be UTC.
            Specify this header to perform the operation only if
            the resource has not been modified since the specified date/time.
        :keyword str etag:
            An ETag value, or the wildcard character (*). Used to check if the resource has changed,
            and act according to the condition specified by the `match_condition` parameter.
        :keyword ~azure.core.MatchConditions match_condition:
            The match condition to use upon the etag.
        :keyword int timeout:
            The timeout parameter is expressed in seconds.
        :return: None
        """
        self._blob_lease_client.release(**kwargs)
        self._update_lease_client_attributes()

    def change(self, proposed_lease_id, **kwargs):
        # type: (str, Any) -> None
        """Change the lease ID of an active lease.

        :param str proposed_lease_id:
            Proposed lease ID, in a GUID string format. The DataLake service returns 400
            (Invalid request) if the proposed lease ID is not in the correct format.
        :keyword ~datetime.datetime if_modified_since:
            A DateTime value. Azure expects the date value passed in to be UTC.
            If timezone is included, any non-UTC datetimes will be converted to UTC.
            If a date is passed in without timezone info, it is assumed to be UTC.
            Specify this header to perform the operation only
            if the resource has been modified since the specified time.
        :keyword ~datetime.datetime if_unmodified_since:
            A DateTime value. Azure expects the date value passed in to be UTC.
            If timezone is included, any non-UTC datetimes will be converted to UTC.
            If a date is passed in without timezone info, it is assumed to be UTC.
            Specify this header to perform the operation only if
            the resource has not been modified since the specified date/time.
        :keyword str etag:
            An ETag value, or the wildcard character (*). Used to check if the resource has changed,
            and act according to the condition specified by the `match_condition` parameter.
        :keyword ~azure.core.MatchConditions match_condition:
            The match condition to use upon the etag.
        :keyword int timeout:
            The timeout parameter is expressed in seconds.
        :return: None
        """
        self._blob_lease_client.change(proposed_lease_id=proposed_lease_id, **kwargs)
        self._update_lease_client_attributes()

    def break_lease(self, lease_break_period=None, **kwargs):
        # type: (Optional[int], Any) -> int
        """Break the lease, if the file system or file has an active lease.

        Once a lease is broken, it cannot be renewed. Any authorized request can break the lease;
        the request is not required to specify a matching lease ID. When a lease
        is broken, the lease break period is allowed to elapse, during which time
        no lease operation except break and release can be performed on the file system or file.
        When a lease is successfully broken, the response indicates the interval
        in seconds until a new lease can be acquired.

        :param int lease_break_period:
            This is the proposed duration of seconds that the lease
            should continue before it is broken, between 0 and 60 seconds. This
            break period is only used if it is shorter than the time remaining
            on the lease. If longer, the time remaining on the lease is used.
            A new lease will not be available before the break period has
            expired, but the lease may be held for longer than the break
            period. If this header does not appear with a break
            operation, a fixed-duration lease breaks after the remaining lease
            period elapses, and an infinite lease breaks immediately.
        :keyword ~datetime.datetime if_modified_since:
            A DateTime value. Azure expects the date value passed in to be UTC.
            If timezone is included, any non-UTC datetimes will be converted to UTC.
            If a date is passed in without timezone info, it is assumed to be UTC.
            Specify this header to perform the operation only
            if the resource has been modified since the specified time.
        :keyword ~datetime.datetime if_unmodified_since:
            A DateTime value. Azure expects the date value passed in to be UTC.
            If timezone is included, any non-UTC datetimes will be converted to UTC.
            If a date is passed in without timezone info, it is assumed to be UTC.
            Specify this header to perform the operation only if
            the resource has not been modified since the specified date/time.
        :keyword int timeout:
            The timeout parameter is expressed in seconds.
        :return: Approximate time remaining in the lease period, in seconds.
        :rtype: int
        """
        self._blob_lease_client.break_lease(lease_break_period=lease_break_period, **kwargs)

    def _update_lease_client_attributes(self):
        self.id = self._blob_lease_client.id  # type: str
        self.last_modified = self._blob_lease_client.last_modified  # type: datetime
        self.etag = self._blob_lease_client.etag  # type: str
Exemplo n.º 6
0
    def update(self, instance, validated_data):
        files_for_removal = list()

        for field in validated_data:
            content_type = self.get_content_type(validated_data[field])
            # S3 storage - File copy needed
            if hasattr(default_storage, 'bucket'):
                fname = path.basename(validated_data[field])
                new_file = ContentFile(b'')
                new_file.name = default_storage.get_alternative_name(fname, '')
                new_related_file = RelatedFile.objects.create(
                    file=new_file,
                    filename=fname,
                    content_type=content_type,
                    creator=self.context['request'].user,
                    store_as_filename=True,
                )
                bucket = default_storage.bucket
                stored_file = default_storage.open(new_related_file.file.name)
                stored_file.obj.copy({
                    "Bucket": bucket.name,
                    "Key": validated_data[field]
                })
                stored_file.obj.wait_until_exists()

            elif hasattr(default_storage, 'azure_container'):
                # https://docs.microsoft.com/en-us/azure/storage/blobs/storage-blob-copy?tabs=python
                fname = path.basename(validated_data[field])
                new_file_name = default_storage.get_alternative_name(
                    validated_data[field], '')
                new_blobname = path.basename(new_file_name)

                # Copies a blob asynchronously.
                source_blob = default_storage.client.get_blob_client(
                    validated_data[field])
                dest_blob = default_storage.client.get_blob_client(
                    new_file_name)

                try:
                    lease = BlobLeaseClient(source_blob)
                    lease.acquire()
                    dest_blob.start_copy_from_url(source_blob.url)
                    wait_for_blob_copy(dest_blob)
                    lease.break_lease()
                except Exception as e:
                    # copy failed, break file lease and re-raise
                    lease.break_lease()
                    raise e

                stored_blob = default_storage.open(new_blobname)
                new_related_file = RelatedFile.objects.create(
                    file=File(stored_blob, name=new_blobname),
                    filename=fname,
                    content_type=content_type,
                    creator=self.context['request'].user,
                    store_as_filename=True,
                )

            # Shared-fs
            else:
                stored_file = default_storage.open(validated_data[field])
                new_file = File(stored_file, name=validated_data[field])
                new_related_file = RelatedFile.objects.create(
                    file=new_file,
                    filename=validated_data[field],
                    content_type=content_type,
                    creator=self.context['request'].user,
                    store_as_filename=True,
                )

            # Mark prev ref for deleation if it exisits
            if hasattr(instance, field):
                prev_file = getattr(instance, field)
                if prev_file:
                    files_for_removal.append(prev_file)

            # Set new file ref
            setattr(instance, field, new_related_file)

        # Update & Delete prev linked files
        instance.save(update_fields=[k for k in validated_data])
        for f in files_for_removal:
            f.delete()
        return instance
Exemplo n.º 7
0
def store_file(reference, content_type, creator, required=True, filename=None):
    """ Returns a `RelatedFile` obejct to store

    :param reference: Storage reference of file (url or file path)
    :type  reference: string

    :param content_type: Mime type of file
    :type  content_type: string

    :param creator: Id of Django user
    :type  creator: int

    :param required: Allow for None returns if set to false
    :type  required: boolean

    :return: Model Object holding a Django file
    :rtype RelatedFile
    """

    # Download data from URL
    if is_valid_url(reference):
        response = urlopen(reference)
        fdata = response.read()

        # Find file name
        header_fname = response.headers.get('Content-Disposition', '').split('filename=')[-1]
        ref = header_fname if header_fname else os.path.basename(urlparse(reference).path)
        fname = filename if filename else ref
        logger.info('Store file: {}'.format(ref))

        # Create temp file, download content and store
        with TemporaryFile() as tmp_file:
            tmp_file.write(fdata)
            tmp_file.seek(0)
            return RelatedFile.objects.create(
                file=File(tmp_file, name=fname),
                filename=fname,
                content_type=content_type,
                creator=creator,
                store_as_filename=True,
            )

    # Issue S3 object Copy
    if is_in_bucket(reference):
        fname = filename if filename else ref
        new_file = ContentFile(b'')
        new_file.name = fname
        new_related_file = RelatedFile.objects.create(
            file=new_file,
            filename=fname,
            content_type=content_type,
            creator=creator,
            store_as_filename=True,
        )
        stored_file = default_storage.open(new_related_file.file.name)
        stored_file.obj.copy({"Bucket": default_storage.bucket.name, "Key": reference})
        stored_file.obj.wait_until_exists()
        return new_related_file

    # Issue Azure object Copy
    if is_in_container(reference):
        new_filename = filename if filename else ref
        fname = default_storage._get_valid_path(new_filename)
        source_blob = default_storage.client.get_blob_client(reference)
        dest_blob = default_storage.client.get_blob_client(fname)

        try:
            lease = BlobLeaseClient(source_blob)
            lease.acquire()
            dest_blob.start_copy_from_url(source_blob.url)
            wait_for_blob_copy(dest_blob)
            lease.break_lease()
        except Exception as e:
            # copy failed, break file lease and re-raise
            lease.break_lease()
            raise e

        stored_blob = default_storage.open(os.path.basename(fname))
        new_related_file = RelatedFile.objects.create(
            file=File(stored_blob, name=fname),
            filename=fname,
            content_type=content_type,
            creator=creator,
            store_as_filename=True)
        return new_related_file

    try:
        # Copy via shared FS
        ref = str(os.path.basename(reference))
        fname = filename if filename else ref
        return RelatedFile.objects.create(
            file=ref,
            filename=fname,
            content_type=content_type,
            creator=creator,
            store_as_filename=True,
        )
    except TypeError as e:
        if not required:
            logger.warning(f'Failed to store file reference: {reference} - {e}')
            return None
        else:
            raise e
    def copy_files(self, from_location, to_location, file_pattern):
        result = messages.message["copy_files_failed"]
        result, sources, source_names = self.list_files(location=from_location, file_pattern=file_pattern)
        if sources is None:
            print("no files found in source >" + from_location + "<.")
            result = messages.message["ok"]
            result["reference"] = "No files in source: " + from_location
            return result

        for file in sources:
            # https://docs.microsoft.com/en-us/azure/storage/blobs/storage-blob-copy?tabs=python
            src_file = file # os.path.basename(file)
            tgt_file = to_location + "/" + os.path.basename(src_file)
            src_blob = BlobClient(
                self.blob_service_client.url,
                container_name=self.settings.storage_container,
                blob_name=src_file,
                credential=self.sas_token
            )
            lease = BlobLeaseClient(src_blob)
            lease.acquire()
            source_props = src_blob.get_blob_properties()
            print("Lease state for source file %s: %s" %(src_file ,source_props.lease.state))

            print(f"Copying %s.%s to %s using url %s"
                  % (self.settings.storage_container, src_file, tgt_file, src_blob.url))

            self.tgt = self.blob_service_client.get_blob_client(self.settings.storage_container, tgt_file)

            # download_file_path = os.path.join(".", str.replace("tryout", '.txt', 'DOWNLOAD.txt'))
            # print("\nDownloading blob to \n\t" + download_file_path)
            # with open(download_file_path, "wb") as download_file:
            #    download_file.write(src_blob.download_blob().readall())

            try:
                action = self.tgt.start_copy_from_url(src_blob.url)
                copy_id = action["copy_id"]
                status = action["copy_status"]
                error = action["error_code"]
                self.wait_condition(condition=self.check_copy_status
                                    , timeout=self.max_wait_in_sec
                                    , granularity=self.recheck)

                properties = self.tgt.get_blob_properties()
                print("Total bytes: " + str(properties.size))
                copy_props = properties.copy
                if copy_props["status"] != "success":
                    self.tgt.abort_copy(copy_id=copy_props["id"])
                    print(f"Unable to copy blob %s to %s. Status: %s" % (src_file, tgt_file, copy_props.status))
                    result = messages.message["copy_files_failed"]
                    break
                    # Note: We do not release the lease in case of errors

                if source_props.lease.state == "leased":
                    # Break the lease on the source blob.
                    lease.break_lease()
                    # Update the source blob's properties to check the lease state.
                    source_props = src_blob.get_blob_properties()
                    print("Lease state: " + source_props.lease.state)

                result = messages.message["ok"]
            except exceptions.ResourceNotFoundError as e:
                print("Azure reported a resource not found error: ", e)
                result = messages.message["resource_not_found"]
                result["reference"] = f"source: %s, targte: %s" % (src_file, tgt_file)

        return result