def delete_objects(self, path): bucket, path = self.parse_path(path=path) client = self._session.boto3_session.client( service_name="s3", config=self._session.botocore_config) procs = [] args = {"Bucket": bucket, "MaxKeys": 1000, "Prefix": path} logger.debug(f"Arguments: \n{args}") next_continuation_token = "" while next_continuation_token is not None: res = client.list_objects_v2(**args) if not res.get("Contents"): break keys = [{"Key": x.get("Key")} for x in res.get("Contents")] logger.debug(f"Number of listed keys: {len(keys)}") next_continuation_token = res.get("NextContinuationToken") if next_continuation_token: args["ContinuationToken"] = next_continuation_token proc = mp.Process( target=self.delete_objects_batch, args=(self._session.primitives, bucket, keys), ) proc.daemon = False proc.start() procs.append(proc) if len(procs) == self._session.procs_io_bound: wait_process_release(procs) else: logger.debug(f"Starting last delete call...") self.delete_objects_batch(self._session.primitives, bucket, keys) logger.debug(f"Waiting final processes...") for proc in procs: proc.join()
def delete_not_listed_objects( self, objects_paths: List[str], procs_io_bound: Optional[int] = None) -> None: """ Delete all NOT listed objects. :param objects_paths: List of objects paths to be held. :param procs_io_bound: Number of processes to be used for I/O bound operations :return: None """ procs_io_bound = procs_io_bound if procs_io_bound is not None else self._session.procs_io_bound if self._session.procs_io_bound is not None else 1 logger.debug(f"procs_io_bound: {procs_io_bound}") partitions: Dict[str, List[str]] = {} for object_path in objects_paths: partition_path = f"{object_path.rsplit('/', 1)[0]}/" if partition_path not in partitions: partitions[partition_path] = [] partitions[partition_path].append(object_path) procs = [] for partition_path, batch in partitions.items(): proc = mp.Process( target=self._delete_not_listed_batch, args=(self._session.primitives, partition_path, batch, 1), ) proc.daemon = False proc.start() procs.append(proc) if len(procs) == self._session.procs_io_bound: wait_process_release(procs) logger.debug(f"Waiting final processes...") for proc in procs: proc.join()
def delete_not_listed_objects(self, objects_paths, procs_io_bound=None): if not procs_io_bound: procs_io_bound = self._session.procs_io_bound logger.debug(f"procs_io_bound: {procs_io_bound}") partitions = {} for object_path in objects_paths: partition_path = f"{object_path.rsplit('/', 1)[0]}/" if partition_path not in partitions: partitions[partition_path] = [] partitions[partition_path].append(object_path) procs = [] for partition_path, batch in partitions.items(): proc = mp.Process( target=self.delete_not_listed_batch, args=(self._session.primitives, partition_path, batch, 1), ) proc.daemon = False proc.start() procs.append(proc) if len(procs) == self._session.procs_io_bound: wait_process_release(procs) logger.debug(f"Waiting final processes...") for proc in procs: proc.join()
def delete_objects(self, path: str, procs_io_bound: Optional[int] = None) -> None: """ Delete all objects in the received S3 path. :param path: S3 path (e.g. "s3://bucket/path") :param procs_io_bound: Number of processes to be used for I/O bound operations :return: None """ procs_io_bound = procs_io_bound if procs_io_bound is not None else self._session.procs_io_bound if self._session.procs_io_bound is not None else 1 bucket, path = self.parse_path(path=path) procs: List[mp.Process] = [] args: Dict[str, Any] = { "Bucket": bucket, "MaxKeys": 1000, "Prefix": path } logger.debug(f"Arguments: \n{args}") next_continuation_token: Optional[str] = "" while next_continuation_token is not None: res: Dict = self._client_s3.list_objects_v2(**args) if res.get("Contents") is None: break keys: List[Dict[str, str]] = [{ "Key": x.get("Key") } for x in res.get("Contents") if "Key" in x] # type: ignore logger.debug(f"Number of listed keys: {len(keys)}") next_continuation_token = res.get("NextContinuationToken") if next_continuation_token: args["ContinuationToken"] = next_continuation_token proc: mp.Process = mp.Process( target=self._delete_objects_batch, args=(self._session.primitives, bucket, keys), ) proc.daemon = False proc.start() procs.append(proc) if len(procs) == procs_io_bound: wait_process_release(procs) else: logger.debug(f"Starting last delete call...") self._delete_objects_batch(self._session.primitives, bucket, keys) logger.debug(f"Waiting final processes...") for proc in procs: proc.join()