def upload_chunks( self, file_path: Path, filename: str = None, mime_type: str = None, **params: Any, ) -> FileBlob: """Upload a blob by chunks or in one go.""" action = UploadAction(file_path, reporter=QApplication.instance()) blob = FileBlob(str(file_path)) if filename: blob.name = filename if mime_type: blob.mimetype = mime_type batch = None chunk_size = None upload: Optional[Upload] = None try: # See if there is already a transfer for this file upload = self.dao.get_upload(path=file_path) if upload: log.debug(f"Retrieved transfer for {file_path!r}: {upload}") if upload.status not in (TransferStatus.ONGOING, TransferStatus.DONE): raise UploadPaused(upload.uid or -1) # Check if the associated batch still exists server-side try: self.uploads.get(upload.batch, upload.idx) except Exception: log.debug( f"No associated batch found, restarting from zero", exc_info=True, ) else: log.debug(f"Associated batch found, resuming the upload") batch = Batch(batchId=upload.batch, service=self.uploads) batch.upload_idx = upload.idx chunk_size = upload.chunk_size if not batch: # Create a new batch and save it in the DB batch = self.uploads.batch() # By default, Options.chunk_size is 20, so chunks will be 20MiB. # It can be set to a value between 1 and 20 through the config.ini chunk_size = chunk_size or (Options.chunk_size * 1024 * 1024) # For the upload to be chunked, the Options.chunk_upload must be True # and the blob must be bigger than Options.chunk_limit, which by default # is equal to Options.chunk_size. chunked = (Options.chunk_upload and blob.size > Options.chunk_limit * 1024 * 1024) engine_uid = params.pop("engine_uid", None) is_direct_edit = params.pop("is_direct_edit", False) if not upload: # Add an upload entry in the database upload = Upload( None, file_path, TransferStatus.ONGOING, engine=engine_uid, is_direct_edit=is_direct_edit, batch=batch.uid, idx=batch.upload_idx, chunk_size=chunk_size, ) self.dao.save_upload(upload) # Set those attributes as FileBlob does not have them # and they are required for the step 2 of .upload() blob.batch_id = upload.batch blob.fileIdx = upload.idx uploader: Uploader = batch.get_uploader( blob, chunked=chunked, chunk_size=chunk_size, callback=self.upload_callback, ) # Update the progress on chunked upload only as the first call to # action.progress will set the action.uploaded attr to True for # empty files. This is not what we want: empty files are legits. if uploader.chunked: action.progress = chunk_size * len( uploader.blob.uploadedChunkIds) log.debug( f"Upload progression is {action.get_percent():.2f}% " f"(data length is {sizeof_fmt(blob.size)}, " f"chunked is {chunked}, chunk size is {sizeof_fmt(chunk_size)})" ) if action.get_percent() < 100.0 or not action.uploaded: if uploader.chunked: # Store the chunck size and start time for later transfer speed computation action.chunk_size = chunk_size action.chunk_transfer_start_time_ns = monotonic_ns() # If there is an UploadError, we catch it from the processor for _ in uploader.iter_upload(): # Here 0 may happen when doing a single upload action.progress += uploader.chunk_size or 0 # Save the progression upload.progress = action.get_percent() self.dao.set_transfer_progress("upload", upload) # Handle status changes every time a chunk is sent transfer = self.dao.get_upload(path=file_path) if transfer and transfer.status not in ( TransferStatus.ONGOING, TransferStatus.DONE, ): raise UploadPaused(transfer.uid or -1) else: uploader.upload() # For empty files, this will set action.uploaded to True, # telling us that the file was correctly sent to the server. action.progress += blob.size upload.progress = action.get_percent() # Transfer is completed, update the status in the database upload.status = TransferStatus.DONE self.dao.set_transfer_status("upload", upload) return blob finally: # In case of error, log the progression to help debugging percent = action.get_percent() if percent < 100.0 and not action.uploaded: log.debug(f"Upload progression stopped at {percent:.2f}%") # Save the progression if upload: upload.progress = percent self.dao.set_transfer_progress("upload", upload) UploadAction.finish_action() if blob.fd: blob.fd.close()
def upload_chunks( self, file_path: Path, filename: str = None, mime_type: str = None, **kwargs: Any, ) -> Tuple[FileBlob, Batch]: """Upload a blob by chunks or in one go.""" engine_uid = kwargs.get("engine_uid", None) is_direct_edit = kwargs.pop("is_direct_edit", False) is_direct_transfer = kwargs.get("is_direct_transfer", False) remote_parent_path = kwargs.pop("remote_parent_path", "") remote_parent_ref = kwargs.pop("remote_parent_ref", "") blob = FileBlob(str(file_path)) action = self.upload_action( file_path, blob.size, reporter=QApplication.instance(), engine=engine_uid ) if filename: blob.name = filename if mime_type: blob.mimetype = mime_type batch: Optional[Batch] = None chunk_size = None # See if there is already a transfer for this file transfer = self.get_upload(file_path) try: if transfer: log.debug(f"Retrieved transfer for {file_path!r}: {transfer}") if transfer.status not in (TransferStatus.ONGOING, TransferStatus.DONE): raise UploadPaused(transfer.uid or -1) # When fetching for an eventual batch, specifying the file index # is not possible for S3 as there is no blob at the current index # until the S3 upload is done itself and the call to # batch.complete() done. file_idx = None if transfer.batch.get("provider", "") == "s3" else 0 # Check if the associated batch still exists server-side try: self.remote.uploads.get( transfer.batch["batchId"], file_idx=file_idx ) except HTTPError as exc: if exc.status != 404: raise log.debug("No associated batch found, restarting from zero") else: log.debug("Associated batch found, resuming the upload") batch = Batch(service=self.remote.uploads, **transfer.batch) chunk_size = transfer.chunk_size # The transfer was already completed on the third-party provider if batch.etag: return self._complete_upload(batch, blob) if not batch: # .uploads.handlers() result is cached, so it is convenient to call it each time here # in case the server did not answer correctly the previous time and thus S3 would # be completely disabled because of a one-time server error. handler = "s3" if Feature.s3 and self.remote.uploads.has_s3() else "" # Create a new batch and save it in the DB batch = self.remote.uploads.batch(handler=handler) # By default, Options.chunk_size is 20, so chunks will be 20MiB. # It can be set to a value between 1 and 20 through the config.ini chunk_size = chunk_size or (Options.chunk_size * 1024 * 1024) # For the upload to be chunked, the Options.chunk_upload must be True # and the blob must be bigger than Options.chunk_limit, which by default # is equal to Options.chunk_size. chunked = ( Options.chunk_upload and blob.size > Options.chunk_limit * 1024 * 1024 ) action.is_direct_transfer = is_direct_transfer try: uploader = batch.get_uploader( blob, chunked=chunked, chunk_size=chunk_size, callback=self.remote.upload_callback, ) except ClientError as exc: if exc.response["Error"]["Code"] != "NoSuchUpload": raise log.warning( "Either the upload ID does not exist, either the upload was already completed." ) return self._complete_upload(batch, blob) log.debug(f"Using {type(uploader).__name__!r} uploader") if not transfer: # Remove eventual obsolete upload (it happens when an upload using S3 has invalid metadatas) self.dao.remove_transfer("upload", file_path) # Add an upload entry in the database transfer = Upload( None, file_path, TransferStatus.ONGOING, engine=engine_uid, is_direct_edit=is_direct_edit, filesize=blob.size, batch=batch.as_dict(), chunk_size=chunk_size, is_direct_transfer=is_direct_transfer, remote_parent_path=remote_parent_path, remote_parent_ref=remote_parent_ref, ) self.dao.save_upload(transfer) elif transfer.batch["batchId"] != batch.uid: # The upload was not a fresh one but its batch ID was perimed. # Before NXDRIVE-2183, the batch ID was not updated and so the second step # of the upload (attaching the blob to a document) was failing. transfer.batch["batchId"] = batch.uid self.dao.update_upload(transfer) if uploader.chunked: # Update the progress on chunked upload only as the first call to # action.progress will set the action.uploaded attr to True for # empty files. This is not what we want: empty files are legits. action.progress = chunk_size * len(uploader.blob.uploadedChunkIds) # Store the chunk size and start time for later transfer speed computation action.chunk_size = chunk_size action.chunk_transfer_start_time_ns = monotonic_ns() if batch.is_s3(): self._patch_refresh_token(uploader, transfer) # If there is an UploadError, we catch it from the processor for _ in uploader.iter_upload(): action.progress = chunk_size * len(uploader.blob.uploadedChunkIds) # Save the progression transfer.progress = action.get_percent() self.dao.set_transfer_progress("upload", transfer) # Handle status changes every time a chunk is sent _transfer = self.get_upload(file_path) if _transfer and _transfer.status not in ( TransferStatus.ONGOING, TransferStatus.DONE, ): raise UploadPaused(transfer.uid or -1) else: uploader.upload() # For empty files, this will set action.uploaded to True, # telling us that the file was correctly sent to the server. action.progress += blob.size transfer.progress = action.get_percent() if batch.is_s3(): if not batch.blobs: # This may happen when resuming an upload with all parts sent. # Trigger upload() that will complete the MPU and fill required # attributes like the Batch ETag, blob index, etc.. uploader.upload() # Save the final ETag in the database to prevent future issue if # the FileManager throws an error transfer.batch = batch.as_dict() self.dao.update_upload(transfer) self._complete_upload(batch, blob) # Transfer is completed, update the status in the database transfer.status = TransferStatus.DONE self.dao.set_transfer_status("upload", transfer) return blob, batch finally: # In case of error, log the progression to help debugging percent = action.get_percent() if percent < 100.0 and not action.uploaded: log.debug(f"Upload progression stopped at {percent:.2f}%") # Save the progression if transfer: transfer.progress = percent self.dao.set_transfer_progress("upload", transfer) action.finish_action() if blob.fd: blob.fd.close()
def upload_chunks( self, file_path: Path, filename: str = None, mime_type: str = None, **params: Any, ) -> FileBlob: """Upload a blob by chunks or in one go.""" action = UploadAction(file_path, reporter=QApplication.instance()) blob = FileBlob(str(file_path)) if filename: blob.name = filename if mime_type: blob.mimetype = mime_type batch: Optional[Batch] = None chunk_size = None upload: Optional[Upload] = None try: # See if there is already a transfer for this file upload = self.dao.get_upload(path=file_path) if upload: log.debug(f"Retrieved transfer for {file_path!r}: {upload}") if upload.status not in (TransferStatus.ONGOING, TransferStatus.DONE): raise UploadPaused(upload.uid or -1) # When fetching for an eventual batch, specifying the file index # is not possible for S3 as there is no blob at the current index # until the S3 upload is done itself and the call to # batch.complete() done. file_idx = (None if upload.batch.get("provider", "") == "s3" else upload.batch["upload_idx"]) # Check if the associated batch still exists server-side try: self.uploads.get(upload.batch["batchId"], file_idx=file_idx) except Exception: log.debug( "No associated batch found, restarting from zero", exc_info=True) else: log.debug("Associated batch found, resuming the upload") batch = Batch(service=self.uploads, **upload.batch) chunk_size = upload.chunk_size if batch.is_s3(): token_ttl = self._aws_token_ttl( batch.extraInfo["expiration"] / 1000) if token_ttl.total_seconds() < 1: batch = None upload = None log.warning( "AWS token has expired, restarting from zero") if not batch: # .uploads.handlers() result is cached, so it is convenient to call it each time here # in case the server did not answer correctly the previous time and thus S3 would # be completely disabled because of a one-time server error. handler = "s3" if Feature.s3 and self.uploads.has_s3() else "" # Create a new batch and save it in the DB batch = self.uploads.batch(handler=handler) if batch.is_s3(): self._aws_token_ttl(batch.extraInfo["expiration"] / 1000) # By default, Options.chunk_size is 20, so chunks will be 20MiB. # It can be set to a value between 1 and 20 through the config.ini chunk_size = chunk_size or (Options.chunk_size * 1024 * 1024) # For the upload to be chunked, the Options.chunk_upload must be True # and the blob must be bigger than Options.chunk_limit, which by default # is equal to Options.chunk_size. chunked = (Options.chunk_upload and blob.size > Options.chunk_limit * 1024 * 1024) engine_uid = params.pop("engine_uid", None) is_direct_edit = params.pop("is_direct_edit", False) # Set those attributes as FileBlob does not have them # and they are required for the step 2 of .upload() blob.batch_id = batch.uid blob.fileIdx = batch.upload_idx uploader: Uploader = batch.get_uploader( blob, chunked=chunked, chunk_size=chunk_size, callback=self.upload_callback, ) log.debug(f"Using {type(uploader).__name__!r} uploader") if not upload: # Remove eventual obsolete upload (it happens when an upload using S3 has invalid metadatas) self.dao.remove_transfer("upload", file_path) # Add an upload entry in the database upload = Upload( None, file_path, TransferStatus.ONGOING, engine=engine_uid, is_direct_edit=is_direct_edit, batch=batch.as_dict(), chunk_size=chunk_size, ) self.dao.save_upload(upload) # Update the progress on chunked upload only as the first call to # action.progress will set the action.uploaded attr to True for # empty files. This is not what we want: empty files are legits. if uploader.chunked: action.progress = chunk_size * len( uploader.blob.uploadedChunkIds) if action.get_percent() < 100.0 or not action.uploaded: if uploader.chunked: # Store the chunk size and start time for later transfer speed computation action.chunk_size = chunk_size action.chunk_transfer_start_time_ns = monotonic_ns() # If there is an UploadError, we catch it from the processor for _ in uploader.iter_upload(): # Here 0 may happen when doing a single upload action.progress += uploader.chunk_size or 0 # Save the progression upload.progress = action.get_percent() self.dao.set_transfer_progress("upload", upload) # Handle status changes every time a chunk is sent transfer = self.dao.get_upload(path=file_path) if transfer and transfer.status not in ( TransferStatus.ONGOING, TransferStatus.DONE, ): raise UploadPaused(transfer.uid or -1) else: uploader.upload() # For empty files, this will set action.uploaded to True, # telling us that the file was correctly sent to the server. action.progress += blob.size upload.progress = action.get_percent() if batch.is_s3(): if not batch.blobs: # This may happen when resuming an upload with all parts sent. # Trigger upload() that will complete the MPU and fill required # attributes like the Batch ETag, blob index, etc.. uploader.upload() # Complete the S3 upload # (setting a big timeout to handle big files) batch.complete(timeout=(TX_TIMEOUT, TX_TIMEOUT)) # Transfer is completed, update the status in the database upload.status = TransferStatus.DONE self.dao.set_transfer_status("upload", upload) return blob finally: # In case of error, log the progression to help debugging percent = action.get_percent() if percent < 100.0 and not action.uploaded: log.debug(f"Upload progression stopped at {percent:.2f}%") # Save the progression if upload: upload.progress = percent self.dao.set_transfer_progress("upload", upload) UploadAction.finish_action() if blob.fd: blob.fd.close()