示例#1
0
    def upload_chunks(
        self,
        file_path: Path,
        filename: str = None,
        mime_type: str = None,
        **params: Any,
    ) -> FileBlob:
        """Upload a blob by chunks or in one go."""

        action = UploadAction(file_path, reporter=QApplication.instance())
        blob = FileBlob(str(file_path))
        if filename:
            blob.name = filename
        if mime_type:
            blob.mimetype = mime_type

        batch = None
        chunk_size = None
        upload: Optional[Upload] = None

        try:
            # See if there is already a transfer for this file
            upload = self.dao.get_upload(path=file_path)
            if upload:
                log.debug(f"Retrieved transfer for {file_path!r}: {upload}")
                if upload.status not in (TransferStatus.ONGOING,
                                         TransferStatus.DONE):
                    raise UploadPaused(upload.uid or -1)

                # Check if the associated batch still exists server-side
                try:
                    self.uploads.get(upload.batch, upload.idx)
                except Exception:
                    log.debug(
                        f"No associated batch found, restarting from zero",
                        exc_info=True,
                    )
                else:
                    log.debug(f"Associated batch found, resuming the upload")
                    batch = Batch(batchId=upload.batch, service=self.uploads)
                    batch.upload_idx = upload.idx
                    chunk_size = upload.chunk_size

            if not batch:
                # Create a new batch and save it in the DB
                batch = self.uploads.batch()

            # By default, Options.chunk_size is 20, so chunks will be 20MiB.
            # It can be set to a value between 1 and 20 through the config.ini
            chunk_size = chunk_size or (Options.chunk_size * 1024 * 1024)

            # For the upload to be chunked, the Options.chunk_upload must be True
            # and the blob must be bigger than Options.chunk_limit, which by default
            # is equal to Options.chunk_size.
            chunked = (Options.chunk_upload
                       and blob.size > Options.chunk_limit * 1024 * 1024)

            engine_uid = params.pop("engine_uid", None)
            is_direct_edit = params.pop("is_direct_edit", False)

            if not upload:
                # Add an upload entry in the database
                upload = Upload(
                    None,
                    file_path,
                    TransferStatus.ONGOING,
                    engine=engine_uid,
                    is_direct_edit=is_direct_edit,
                    batch=batch.uid,
                    idx=batch.upload_idx,
                    chunk_size=chunk_size,
                )
                self.dao.save_upload(upload)

            # Set those attributes as FileBlob does not have them
            # and they are required for the step 2 of .upload()
            blob.batch_id = upload.batch
            blob.fileIdx = upload.idx

            uploader: Uploader = batch.get_uploader(
                blob,
                chunked=chunked,
                chunk_size=chunk_size,
                callback=self.upload_callback,
            )

            # Update the progress on chunked upload only as the first call to
            # action.progress will set the action.uploaded attr to True for
            # empty files. This is not what we want: empty files are legits.
            if uploader.chunked:
                action.progress = chunk_size * len(
                    uploader.blob.uploadedChunkIds)

            log.debug(
                f"Upload progression is {action.get_percent():.2f}% "
                f"(data length is {sizeof_fmt(blob.size)}, "
                f"chunked is {chunked}, chunk size is {sizeof_fmt(chunk_size)})"
            )

            if action.get_percent() < 100.0 or not action.uploaded:
                if uploader.chunked:
                    # Store the chunck size and start time for later transfer speed computation
                    action.chunk_size = chunk_size
                    action.chunk_transfer_start_time_ns = monotonic_ns()

                    # If there is an UploadError, we catch it from the processor
                    for _ in uploader.iter_upload():
                        # Here 0 may happen when doing a single upload
                        action.progress += uploader.chunk_size or 0

                        # Save the progression
                        upload.progress = action.get_percent()
                        self.dao.set_transfer_progress("upload", upload)

                        # Handle status changes every time a chunk is sent
                        transfer = self.dao.get_upload(path=file_path)
                        if transfer and transfer.status not in (
                                TransferStatus.ONGOING,
                                TransferStatus.DONE,
                        ):
                            raise UploadPaused(transfer.uid or -1)
                else:
                    uploader.upload()

                    # For empty files, this will set action.uploaded to True,
                    # telling us that the file was correctly sent to the server.
                    action.progress += blob.size

                    upload.progress = action.get_percent()

            # Transfer is completed, update the status in the database
            upload.status = TransferStatus.DONE
            self.dao.set_transfer_status("upload", upload)

            return blob
        finally:
            # In case of error, log the progression to help debugging
            percent = action.get_percent()
            if percent < 100.0 and not action.uploaded:
                log.debug(f"Upload progression stopped at {percent:.2f}%")

                # Save the progression
                if upload:
                    upload.progress = percent
                    self.dao.set_transfer_progress("upload", upload)

            UploadAction.finish_action()

            if blob.fd:
                blob.fd.close()
示例#2
0
    def upload_chunks(
        self,
        file_path: Path,
        filename: str = None,
        mime_type: str = None,
        **kwargs: Any,
    ) -> Tuple[FileBlob, Batch]:
        """Upload a blob by chunks or in one go."""

        engine_uid = kwargs.get("engine_uid", None)
        is_direct_edit = kwargs.pop("is_direct_edit", False)
        is_direct_transfer = kwargs.get("is_direct_transfer", False)
        remote_parent_path = kwargs.pop("remote_parent_path", "")
        remote_parent_ref = kwargs.pop("remote_parent_ref", "")

        blob = FileBlob(str(file_path))
        action = self.upload_action(
            file_path, blob.size, reporter=QApplication.instance(), engine=engine_uid
        )
        if filename:
            blob.name = filename
        if mime_type:
            blob.mimetype = mime_type

        batch: Optional[Batch] = None
        chunk_size = None

        # See if there is already a transfer for this file
        transfer = self.get_upload(file_path)

        try:
            if transfer:
                log.debug(f"Retrieved transfer for {file_path!r}: {transfer}")
                if transfer.status not in (TransferStatus.ONGOING, TransferStatus.DONE):
                    raise UploadPaused(transfer.uid or -1)

                # When fetching for an eventual batch, specifying the file index
                # is not possible for S3 as there is no blob at the current index
                # until the S3 upload is done itself and the call to
                # batch.complete() done.
                file_idx = None if transfer.batch.get("provider", "") == "s3" else 0

                # Check if the associated batch still exists server-side
                try:
                    self.remote.uploads.get(
                        transfer.batch["batchId"], file_idx=file_idx
                    )
                except HTTPError as exc:
                    if exc.status != 404:
                        raise
                    log.debug("No associated batch found, restarting from zero")
                else:
                    log.debug("Associated batch found, resuming the upload")
                    batch = Batch(service=self.remote.uploads, **transfer.batch)
                    chunk_size = transfer.chunk_size

                    # The transfer was already completed on the third-party provider
                    if batch.etag:
                        return self._complete_upload(batch, blob)

            if not batch:
                # .uploads.handlers() result is cached, so it is convenient to call it each time here
                # in case the server did not answer correctly the previous time and thus S3 would
                # be completely disabled because of a one-time server error.
                handler = "s3" if Feature.s3 and self.remote.uploads.has_s3() else ""

                # Create a new batch and save it in the DB
                batch = self.remote.uploads.batch(handler=handler)

            # By default, Options.chunk_size is 20, so chunks will be 20MiB.
            # It can be set to a value between 1 and 20 through the config.ini
            chunk_size = chunk_size or (Options.chunk_size * 1024 * 1024)

            # For the upload to be chunked, the Options.chunk_upload must be True
            # and the blob must be bigger than Options.chunk_limit, which by default
            # is equal to Options.chunk_size.
            chunked = (
                Options.chunk_upload and blob.size > Options.chunk_limit * 1024 * 1024
            )

            action.is_direct_transfer = is_direct_transfer

            try:
                uploader = batch.get_uploader(
                    blob,
                    chunked=chunked,
                    chunk_size=chunk_size,
                    callback=self.remote.upload_callback,
                )
            except ClientError as exc:
                if exc.response["Error"]["Code"] != "NoSuchUpload":
                    raise

                log.warning(
                    "Either the upload ID does not exist, either the upload was already completed."
                )
                return self._complete_upload(batch, blob)

            log.debug(f"Using {type(uploader).__name__!r} uploader")

            if not transfer:
                # Remove eventual obsolete upload (it happens when an upload using S3 has invalid metadatas)
                self.dao.remove_transfer("upload", file_path)

                # Add an upload entry in the database
                transfer = Upload(
                    None,
                    file_path,
                    TransferStatus.ONGOING,
                    engine=engine_uid,
                    is_direct_edit=is_direct_edit,
                    filesize=blob.size,
                    batch=batch.as_dict(),
                    chunk_size=chunk_size,
                    is_direct_transfer=is_direct_transfer,
                    remote_parent_path=remote_parent_path,
                    remote_parent_ref=remote_parent_ref,
                )
                self.dao.save_upload(transfer)
            elif transfer.batch["batchId"] != batch.uid:
                # The upload was not a fresh one but its batch ID was perimed.
                # Before NXDRIVE-2183, the batch ID was not updated and so the second step
                # of the upload (attaching the blob to a document) was failing.
                transfer.batch["batchId"] = batch.uid
                self.dao.update_upload(transfer)

            if uploader.chunked:
                # Update the progress on chunked upload only as the first call to
                # action.progress will set the action.uploaded attr to True for
                # empty files. This is not what we want: empty files are legits.
                action.progress = chunk_size * len(uploader.blob.uploadedChunkIds)

                # Store the chunk size and start time for later transfer speed computation
                action.chunk_size = chunk_size
                action.chunk_transfer_start_time_ns = monotonic_ns()

                if batch.is_s3():
                    self._patch_refresh_token(uploader, transfer)

                # If there is an UploadError, we catch it from the processor
                for _ in uploader.iter_upload():
                    action.progress = chunk_size * len(uploader.blob.uploadedChunkIds)

                    # Save the progression
                    transfer.progress = action.get_percent()
                    self.dao.set_transfer_progress("upload", transfer)

                    # Handle status changes every time a chunk is sent
                    _transfer = self.get_upload(file_path)
                    if _transfer and _transfer.status not in (
                        TransferStatus.ONGOING,
                        TransferStatus.DONE,
                    ):
                        raise UploadPaused(transfer.uid or -1)
            else:
                uploader.upload()

                # For empty files, this will set action.uploaded to True,
                # telling us that the file was correctly sent to the server.
                action.progress += blob.size

                transfer.progress = action.get_percent()

            if batch.is_s3():
                if not batch.blobs:
                    # This may happen when resuming an upload with all parts sent.
                    # Trigger upload() that will complete the MPU and fill required
                    # attributes like the Batch ETag, blob index, etc..
                    uploader.upload()

                # Save the final ETag in the database to prevent future issue if
                # the FileManager throws an error
                transfer.batch = batch.as_dict()
                self.dao.update_upload(transfer)

            self._complete_upload(batch, blob)

            # Transfer is completed, update the status in the database
            transfer.status = TransferStatus.DONE
            self.dao.set_transfer_status("upload", transfer)

            return blob, batch
        finally:
            # In case of error, log the progression to help debugging
            percent = action.get_percent()
            if percent < 100.0 and not action.uploaded:
                log.debug(f"Upload progression stopped at {percent:.2f}%")

                # Save the progression
                if transfer:
                    transfer.progress = percent
                    self.dao.set_transfer_progress("upload", transfer)

            action.finish_action()

            if blob.fd:
                blob.fd.close()
示例#3
0
    def upload_chunks(
        self,
        file_path: Path,
        filename: str = None,
        mime_type: str = None,
        **params: Any,
    ) -> FileBlob:
        """Upload a blob by chunks or in one go."""

        action = UploadAction(file_path, reporter=QApplication.instance())
        blob = FileBlob(str(file_path))
        if filename:
            blob.name = filename
        if mime_type:
            blob.mimetype = mime_type

        batch: Optional[Batch] = None
        chunk_size = None
        upload: Optional[Upload] = None

        try:
            # See if there is already a transfer for this file
            upload = self.dao.get_upload(path=file_path)

            if upload:
                log.debug(f"Retrieved transfer for {file_path!r}: {upload}")
                if upload.status not in (TransferStatus.ONGOING,
                                         TransferStatus.DONE):
                    raise UploadPaused(upload.uid or -1)

                # When fetching for an eventual batch, specifying the file index
                # is not possible for S3 as there is no blob at the current index
                # until the S3 upload is done itself and the call to
                # batch.complete() done.
                file_idx = (None if upload.batch.get("provider", "") == "s3"
                            else upload.batch["upload_idx"])

                # Check if the associated batch still exists server-side
                try:
                    self.uploads.get(upload.batch["batchId"],
                                     file_idx=file_idx)
                except Exception:
                    log.debug(
                        "No associated batch found, restarting from zero",
                        exc_info=True)
                else:
                    log.debug("Associated batch found, resuming the upload")
                    batch = Batch(service=self.uploads, **upload.batch)
                    chunk_size = upload.chunk_size

                    if batch.is_s3():
                        token_ttl = self._aws_token_ttl(
                            batch.extraInfo["expiration"] / 1000)
                        if token_ttl.total_seconds() < 1:
                            batch = None
                            upload = None
                            log.warning(
                                "AWS token has expired, restarting from zero")

            if not batch:
                # .uploads.handlers() result is cached, so it is convenient to call it each time here
                # in case the server did not answer correctly the previous time and thus S3 would
                # be completely disabled because of a one-time server error.
                handler = "s3" if Feature.s3 and self.uploads.has_s3() else ""

                # Create a new batch and save it in the DB
                batch = self.uploads.batch(handler=handler)

                if batch.is_s3():
                    self._aws_token_ttl(batch.extraInfo["expiration"] / 1000)

            # By default, Options.chunk_size is 20, so chunks will be 20MiB.
            # It can be set to a value between 1 and 20 through the config.ini
            chunk_size = chunk_size or (Options.chunk_size * 1024 * 1024)

            # For the upload to be chunked, the Options.chunk_upload must be True
            # and the blob must be bigger than Options.chunk_limit, which by default
            # is equal to Options.chunk_size.
            chunked = (Options.chunk_upload
                       and blob.size > Options.chunk_limit * 1024 * 1024)

            engine_uid = params.pop("engine_uid", None)
            is_direct_edit = params.pop("is_direct_edit", False)

            # Set those attributes as FileBlob does not have them
            # and they are required for the step 2 of .upload()
            blob.batch_id = batch.uid
            blob.fileIdx = batch.upload_idx

            uploader: Uploader = batch.get_uploader(
                blob,
                chunked=chunked,
                chunk_size=chunk_size,
                callback=self.upload_callback,
            )
            log.debug(f"Using {type(uploader).__name__!r} uploader")

            if not upload:
                # Remove eventual obsolete upload (it happens when an upload using S3 has invalid metadatas)
                self.dao.remove_transfer("upload", file_path)

                # Add an upload entry in the database
                upload = Upload(
                    None,
                    file_path,
                    TransferStatus.ONGOING,
                    engine=engine_uid,
                    is_direct_edit=is_direct_edit,
                    batch=batch.as_dict(),
                    chunk_size=chunk_size,
                )
                self.dao.save_upload(upload)

            # Update the progress on chunked upload only as the first call to
            # action.progress will set the action.uploaded attr to True for
            # empty files. This is not what we want: empty files are legits.
            if uploader.chunked:
                action.progress = chunk_size * len(
                    uploader.blob.uploadedChunkIds)

            if action.get_percent() < 100.0 or not action.uploaded:
                if uploader.chunked:
                    # Store the chunk size and start time for later transfer speed computation
                    action.chunk_size = chunk_size
                    action.chunk_transfer_start_time_ns = monotonic_ns()

                    # If there is an UploadError, we catch it from the processor
                    for _ in uploader.iter_upload():
                        # Here 0 may happen when doing a single upload
                        action.progress += uploader.chunk_size or 0

                        # Save the progression
                        upload.progress = action.get_percent()
                        self.dao.set_transfer_progress("upload", upload)

                        # Handle status changes every time a chunk is sent
                        transfer = self.dao.get_upload(path=file_path)
                        if transfer and transfer.status not in (
                                TransferStatus.ONGOING,
                                TransferStatus.DONE,
                        ):
                            raise UploadPaused(transfer.uid or -1)
                else:
                    uploader.upload()

                    # For empty files, this will set action.uploaded to True,
                    # telling us that the file was correctly sent to the server.
                    action.progress += blob.size

                    upload.progress = action.get_percent()

            if batch.is_s3():
                if not batch.blobs:
                    # This may happen when resuming an upload with all parts sent.
                    # Trigger upload() that will complete the MPU and fill required
                    # attributes like the Batch ETag, blob index, etc..
                    uploader.upload()

                # Complete the S3 upload
                # (setting a big timeout to handle big files)
                batch.complete(timeout=(TX_TIMEOUT, TX_TIMEOUT))

            # Transfer is completed, update the status in the database
            upload.status = TransferStatus.DONE
            self.dao.set_transfer_status("upload", upload)

            return blob
        finally:
            # In case of error, log the progression to help debugging
            percent = action.get_percent()
            if percent < 100.0 and not action.uploaded:
                log.debug(f"Upload progression stopped at {percent:.2f}%")

                # Save the progression
                if upload:
                    upload.progress = percent
                    self.dao.set_transfer_progress("upload", upload)

            UploadAction.finish_action()

            if blob.fd:
                blob.fd.close()