def delete_batch(self, paths): """Deletes the objects at the given GCS paths. Args: paths: List of GCS file path patterns in the form gs://<bucket>/<name>, not to exceed MAX_BATCH_OPERATION_SIZE in length. Returns: List of tuples of (path, exception) in the same order as the paths argument, where exception is None if the operation succeeded or the relevant exception if the operation failed. """ if not paths: return [] batch_request = BatchApiRequest( batch_url=GCS_BATCH_ENDPOINT, retryable_codes=retry.SERVER_ERROR_OR_TIMEOUT_CODES) for path in paths: bucket, object_path = parse_gcs_path(path) request = storage.StorageObjectsDeleteRequest(bucket=bucket, object=object_path) batch_request.Add(self.client.objects, 'Delete', request) api_calls = batch_request.Execute(self.client._http) # pylint: disable=protected-access result_statuses = [] for i, api_call in enumerate(api_calls): path = paths[i] exception = None if api_call.is_error: exception = api_call.exception # Return success when the file doesn't exist anymore for idempotency. if isinstance(exception, HttpError) and exception.status_code == 404: exception = None result_statuses.append((path, exception)) return result_statuses
def copy_batch(self, src_dest_pairs): """Copies the given GCS object from src to dest. Args: src_dest_pairs: list of (src, dest) tuples of gs://<bucket>/<name> files paths to copy from src to dest, not to exceed MAX_BATCH_OPERATION_SIZE in length. Returns: List of tuples of (src, dest, exception) in the same order as the src_dest_pairs argument, where exception is None if the operation succeeded or the relevant exception if the operation failed. """ if not src_dest_pairs: return [] batch_request = BatchApiRequest( batch_url=GCS_BATCH_ENDPOINT, retryable_codes=retry.SERVER_ERROR_OR_TIMEOUT_CODES) for src, dest in src_dest_pairs: src_bucket, src_path = parse_gcs_path(src) dest_bucket, dest_path = parse_gcs_path(dest) request = storage.StorageObjectsCopyRequest( sourceBucket=src_bucket, sourceObject=src_path, destinationBucket=dest_bucket, destinationObject=dest_path) batch_request.Add(self.client.objects, 'Copy', request) api_calls = batch_request.Execute(self.client._http) # pylint: disable=protected-access result_statuses = [] for i, api_call in enumerate(api_calls): src, dest = src_dest_pairs[i] exception = None if api_call.is_error: exception = api_call.exception # Translate 404 to the appropriate not found exception. if isinstance(exception, HttpError) and exception.status_code == 404: exception = (GcsIOError(errno.ENOENT, 'Source file not found: %s' % src)) result_statuses.append((src, dest, exception)) return result_statuses
def copy_batch(self, src_dest_pairs, dest_kms_key_name=None, max_bytes_rewritten_per_call=None): """Copies the given GCS object from src to dest. Args: src_dest_pairs: list of (src, dest) tuples of gs://<bucket>/<name> files paths to copy from src to dest, not to exceed MAX_BATCH_OPERATION_SIZE in length. dest_kms_key_name: Experimental. No backwards compatibility guarantees. Encrypt dest with this Cloud KMS key. If None, will use dest bucket encryption defaults. max_bytes_rewritten_per_call: Experimental. No backwards compatibility guarantees. Each rewrite call will return after these many bytes. Used primarily for testing. Returns: List of tuples of (src, dest, exception) in the same order as the src_dest_pairs argument, where exception is None if the operation succeeded or the relevant exception if the operation failed. """ if not src_dest_pairs: return [] pair_to_request = {} for pair in src_dest_pairs: src_bucket, src_path = parse_gcs_path(pair[0]) dest_bucket, dest_path = parse_gcs_path(pair[1]) request = storage.StorageObjectsRewriteRequest( sourceBucket=src_bucket, sourceObject=src_path, destinationBucket=dest_bucket, destinationObject=dest_path, destinationKmsKeyName=dest_kms_key_name, maxBytesRewrittenPerCall=max_bytes_rewritten_per_call) pair_to_request[pair] = request pair_to_status = {} while True: pairs_in_batch = list(set(src_dest_pairs) - set(pair_to_status)) if not pairs_in_batch: break batch_request = BatchApiRequest( batch_url=GCS_BATCH_ENDPOINT, retryable_codes=retry.SERVER_ERROR_OR_TIMEOUT_CODES, response_encoding='utf-8') for pair in pairs_in_batch: batch_request.Add(self.client.objects, 'Rewrite', pair_to_request[pair]) api_calls = batch_request.Execute(self.client._http) # pylint: disable=protected-access for pair, api_call in zip(pairs_in_batch, api_calls): src, dest = pair response = api_call.response if self._rewrite_cb is not None: self._rewrite_cb(response) if api_call.is_error: exception = api_call.exception # Translate 404 to the appropriate not found exception. if isinstance(exception, HttpError) and exception.status_code == 404: exception = (GcsIOError( errno.ENOENT, 'Source file not found: %s' % src)) pair_to_status[pair] = exception elif not response.done: _LOGGER.debug('Rewrite progress: %d of %d bytes, %s to %s', response.totalBytesRewritten, response.objectSize, src, dest) pair_to_request[pair].rewriteToken = response.rewriteToken else: _LOGGER.debug('Rewrite done: %s to %s', src, dest) pair_to_status[pair] = None return [(pair[0], pair[1], pair_to_status[pair]) for pair in src_dest_pairs]