def execute(self, task_status_queue=None): """Runs download to stream.""" progress_callback = progress_callbacks.FilesAndBytesProgressCallback( status_queue=task_status_queue, offset=0, length=self._source_resource.size, source_url=self._source_resource.storage_url, destination_url=self._download_stream.name, operation_name=task_status.OperationName.DOWNLOADING, process_id=os.getpid(), thread_id=threading.get_ident(), ) request_config = request_config_factory.get_request_config( self._source_resource.storage_url, decryption_key_hash=self._source_resource.decryption_key_hash, user_request_args=self._user_request_args, ) provider = self._source_resource.storage_url.scheme api_factory.get_api(provider).download_object( self._source_resource, self._download_stream, request_config, download_strategy=cloud_api.DownloadStrategy.ONE_SHOT, progress_callback=progress_callback) if self._print_created_message: log.status.Print('Created: {}'.format(self._download_stream.name))
def execute(self, task_status_queue=None): """Performs download.""" if self._source_resource.md5_hash and self._component_number is None: # Checks component_number to avoid hashing slices in sliced downloads. digesters = {util.HashAlgorithms.MD5: util.get_md5_hash()} else: digesters = {} progress_callback = progress_callbacks.FilesAndBytesProgressCallback( status_queue=task_status_queue, size=self._source_resource.size, source_url=self._source_resource.storage_url, destination_url=self._destination_resource.storage_url, component_number=self._component_number, total_components=self._total_components, operation_name=task_status.OperationName.DOWNLOADING, process_id=os.getpid(), thread_id=threading.get_ident(), ) if self._component_number is not None: self._perform_component_download() elif (self._source_resource.size and self._source_resource.size >= properties.VALUES.storage.resumable_threshold.GetInt()): self._perform_resumable_download(digesters, progress_callback) else: self._perform_one_shot_download(digesters, progress_callback)
def execute(self, task_status_queue=None): """Performs upload.""" progress_callback = progress_callbacks.FilesAndBytesProgressCallback( status_queue=task_status_queue, size=self._length, source_url=self._source_resource.storage_url, destination_url=self._destination_resource.storage_url, component_number=self._component_number, total_components=self._total_components, operation_name=task_status.OperationName.UPLOADING, process_id=os.getpid(), thread_id=threading.get_ident(), ) source_stream = files.BinaryFileReader( self._source_resource.storage_url.object_name) provider = self._destination_resource.storage_url.scheme with file_part.FilePart(source_stream, self._offset, self._length) as upload_stream: api_factory.get_api(provider).upload_object( upload_stream, self._destination_resource, request_config=cloud_api.RequestConfig( md5_hash=self._source_resource.md5_hash, size=self._length), progress_callback=progress_callback)
def execute(self, task_status_queue=None): api_client = api_factory.get_api( self._source_resource.storage_url.scheme) if copy_util.check_for_cloud_clobber(self._user_request_args, api_client, self._destination_resource): log.status.Print( copy_util.get_no_clobber_message( self._destination_resource.storage_url)) if self._send_manifest_messages: manifest_util.send_skip_message( task_status_queue, self._source_resource, self._destination_resource, copy_util.get_no_clobber_message( self._destination_resource.storage_url)) return progress_callback = progress_callbacks.FilesAndBytesProgressCallback( status_queue=task_status_queue, offset=0, length=self._source_resource.size, source_url=self._source_resource.storage_url, destination_url=self._destination_resource.storage_url, operation_name=task_status.OperationName.INTRA_CLOUD_COPYING, process_id=os.getpid(), thread_id=threading.get_ident(), ) request_config = request_config_factory.get_request_config( self._destination_resource.storage_url, decryption_key_hash=self._source_resource.decryption_key_hash, user_request_args=self._user_request_args) # TODO(b/161900052): Support all of copy_object's parameters result_resource = api_client.copy_object( self._source_resource, self._destination_resource, request_config, progress_callback=progress_callback) if self._print_created_message: log.status.Print('Created: {}'.format(result_resource.storage_url)) if self._send_manifest_messages: manifest_util.send_success_message( task_status_queue, self._source_resource, self._destination_resource, md5_hash=result_resource.md5_hash) if self._delete_source: return task.Output(additional_task_iterators=[[ delete_object_task.DeleteObjectTask( self._source_resource.storage_url) ]], messages=None)
def get_stream(source_resource, length, offset=0, digesters=None, task_status_queue=None, destination_resource=None, component_number=None, total_components=None): """Gets a stream to use for an upload. Args: source_resource (resource_reference.FileObjectResource): Contains a path to the source file. length (int): The total number of bytes to be uploaded. offset (int): The position of the first byte to be uploaded. digesters (dict[hash_util.HashAlgorithm, hash object]): Hash objects to be populated as bytes are read. task_status_queue (multiprocessing.Queue|None): Used for sending progress messages. If None, no messages will be generated or sent. destination_resource (resource_reference.ObjectResource): The upload destination. Used for progress reports, and should be specified if task_status_queue is. component_number (int|None): Identifies a component in composite uploads. total_components (int|None): The total number of components used in a composite upload. Returns: An UploadStream wrapping the file specified by source_resource. """ if task_status_queue: progress_callback = progress_callbacks.FilesAndBytesProgressCallback( status_queue=task_status_queue, offset=offset, length=length, source_url=source_resource.storage_url, destination_url=destination_resource.storage_url, component_number=component_number, total_components=total_components, operation_name=task_status.OperationName.UPLOADING, process_id=os.getpid(), thread_id=threading.get_ident(), ) else: progress_callback = None source_stream = files.BinaryFileReader( source_resource.storage_url.object_name) return upload_stream.UploadStream(source_stream, offset, length, digesters=digesters, progress_callback=progress_callback)
def execute(self, task_status_queue=None): progress_callback = progress_callbacks.FilesAndBytesProgressCallback( status_queue=task_status_queue, size=self._source_resource.size, source_url=self._source_resource.storage_url, destination_url=self._destination_resource.storage_url, operation_name=task_status.OperationName.INTRA_CLOUD_COPYING, process_id=os.getpid(), thread_id=threading.get_ident(), ) # TODO(b/161900052): Support all of copy_object's parameters provider = self._source_resource.storage_url.scheme api_factory.get_api(provider).copy_object( self._source_resource, self._destination_resource, progress_callback=progress_callback)
def execute(self, task_status_queue=None): """Copies file by downloading and uploading in parallel.""" # TODO (b/168712813): Add option to use the Data Transfer component. daisy_chain_stream = QueuingStream(self._source_resource.size) # Perform download in a separate thread so that upload can be performed # simultaneously. download_thread = threading.Thread(target=self._run_download, args=(daisy_chain_stream, )) download_thread.start() destination_client = api_factory.get_api( self._destination_resource.storage_url.scheme) request_config = cloud_api.RequestConfig( size=self._source_resource.size) progress_callback = progress_callbacks.FilesAndBytesProgressCallback( status_queue=task_status_queue, size=self._source_resource.size, source_url=self._source_resource.storage_url, destination_url=self._destination_resource.storage_url, operation_name=task_status.OperationName.DAISY_CHAIN_COPYING, process_id=os.getpid(), thread_id=threading.get_ident(), ) try: destination_client.upload_object( daisy_chain_stream.readable_stream, self._destination_resource, request_config=request_config, progress_callback=progress_callback) except _AbruptShutdownError: # Not raising daisy_chain_stream.exception_raised here because we want # to wait for the download thread to finish. pass except Exception as e: # pylint: disable=broad-except # For all the other errors raised during upload, we want to to make # sure that the download thread is terminated before we re-reaise. # Hence we catch any exception and store it to be re-raised later. daisy_chain_stream.shutdown(e) download_thread.join() if daisy_chain_stream.exception_raised: raise daisy_chain_stream.exception_raised
def execute(self, task_status_queue=None): """Performs download.""" digesters = _get_digesters(self._component_number, self._source_resource) progress_callback = progress_callbacks.FilesAndBytesProgressCallback( status_queue=task_status_queue, offset=self._offset, length=self._length, source_url=self._source_resource.storage_url, destination_url=self._destination_resource.storage_url, component_number=self._component_number, total_components=self._total_components, operation_name=task_status.OperationName.DOWNLOADING, process_id=os.getpid(), thread_id=threading.get_ident(), ) request_config = request_config_factory.get_request_config( self._source_resource.storage_url, decryption_key_hash=self._source_resource.decryption_key_hash, user_request_args=self._user_request_args, ) if self._source_resource.size and self._component_number is not None: try: api_download_result = self._perform_component_download( request_config, progress_callback, digesters) # pylint:disable=broad-except except Exception as e: # pylint:enable=broad-except return task.Output( additional_task_iterators=None, messages=[task.Message(topic=task.Topic.ERROR, payload=e)]) elif self._strategy is cloud_api.DownloadStrategy.RESUMABLE: api_download_result = self._perform_resumable_download( request_config, progress_callback, digesters) else: api_download_result = self._perform_one_shot_download( request_config, progress_callback, digesters) return self._get_output(digesters, api_download_result)
def _get_upload_stream(self, digesters, task_status_queue): if task_status_queue: progress_callback = progress_callbacks.FilesAndBytesProgressCallback( status_queue=task_status_queue, offset=self._offset, length=self._length, source_url=self._source_resource.storage_url, destination_url=self._destination_resource.storage_url, component_number=self._component_number, total_components=self._total_components, operation_name=task_status.OperationName.UPLOADING, process_id=os.getpid(), thread_id=threading.get_ident(), ) else: progress_callback = None source_stream = files.BinaryFileReader(self._source_path) return upload_stream.UploadStream(source_stream, self._offset, self._length, digesters=digesters, progress_callback=progress_callback)
def execute(self, task_status_queue=None): """Copies file by downloading and uploading in parallel.""" # TODO (b/168712813): Add option to use the Data Transfer component. destination_client = api_factory.get_api( self._destination_resource.storage_url.scheme) if copy_util.check_for_cloud_clobber(self._user_request_args, destination_client, self._destination_resource): log.status.Print( copy_util.get_no_clobber_message( self._destination_resource.storage_url)) if self._send_manifest_messages: manifest_util.send_skip_message( task_status_queue, self._source_resource, self._destination_resource, copy_util.get_no_clobber_message( self._destination_resource.storage_url)) return progress_callback = progress_callbacks.FilesAndBytesProgressCallback( status_queue=task_status_queue, offset=0, length=self._source_resource.size, source_url=self._source_resource.storage_url, destination_url=self._destination_resource.storage_url, operation_name=task_status.OperationName.DAISY_CHAIN_COPYING, process_id=os.getpid(), thread_id=threading.get_ident(), ) buffer_controller = BufferController( self._source_resource, self._destination_resource.storage_url.scheme, self._user_request_args, progress_callback) # Perform download in a separate thread so that upload can be performed # simultaneously. buffer_controller.start_download_thread() content_type = (self._source_resource.content_type or request_config_factory.DEFAULT_CONTENT_TYPE) request_config = request_config_factory.get_request_config( self._destination_resource.storage_url, content_type=content_type, md5_hash=self._get_md5_hash(), size=self._source_resource.size, user_request_args=self._user_request_args) result_resource = None try: upload_strategy = upload_util.get_upload_strategy( api=destination_client, object_length=self._source_resource.size) result_resource = destination_client.upload_object( buffer_controller.readable_stream, self._destination_resource, request_config, source_resource=self._source_resource, upload_strategy=upload_strategy) except _AbruptShutdownError: # Not raising daisy_chain_stream.exception_raised here because we want # to wait for the download thread to finish. pass except Exception as e: # pylint: disable=broad-except # For all the other errors raised during upload, we want to to make # sure that the download thread is terminated before we re-reaise. # Hence we catch any exception and store it to be re-raised later. buffer_controller.shutdown(e) buffer_controller.wait_for_download_thread_to_terminate() buffer_controller.readable_stream.close() if buffer_controller.exception_raised: raise buffer_controller.exception_raised if result_resource: if self._print_created_message: log.status.Print('Created: {}'.format( result_resource.storage_url)) if self._send_manifest_messages: manifest_util.send_success_message( task_status_queue, self._source_resource, self._destination_resource, md5_hash=result_resource.md5_hash) if self._delete_source: return task.Output(additional_task_iterators=[[ delete_object_task.DeleteObjectTask( self._source_resource.storage_url) ]], messages=None)