def apply(self, fn: Any, remote_args: dict, blocks: List[Block[T]]) -> List[ObjectRef[Block]]: map_bar = ProgressBar("Map Progress", total=len(blocks)) class Worker: def ready(self): return "ok" def process_block(self, block: Block[T]) -> Block[U]: return fn(block) if "num_cpus" not in remote_args: remote_args["num_cpus"] = 1 Worker = ray.remote(**remote_args)(Worker) workers = [Worker.remote()] tasks = {w.ready.remote(): w for w in workers} ready_workers = set() blocks_in = blocks.copy() blocks_out = [] while len(blocks_out) < len(blocks): ready, _ = ray.wait(list(tasks), timeout=0.01, num_returns=1, fetch_local=False) if not ready: if len(ready_workers) / len(workers) > 0.8: w = Worker.remote() workers.append(w) tasks[w.ready.remote()] = w map_bar.set_description( "Map Progress ({} actors {} pending)".format( len(ready_workers), len(workers) - len(ready_workers))) continue [obj_id] = ready worker = tasks[obj_id] del tasks[obj_id] # Process task result. if worker in ready_workers: blocks_out.append(obj_id) map_bar.update(1) else: ready_workers.add(worker) # Schedule a new task. if blocks_in: tasks[worker.process_block.remote(blocks_in.pop())] = worker map_bar.close() return blocks_out
def apply(self, fn: Any, remote_args: dict, blocks: Iterable[Block]) -> Iterable[ObjectRef[Block]]: map_bar = ProgressBar("Map Progress", total=len(blocks)) class BlockWorker: def ready(self): return "ok" @ray.method(num_returns=2) def process_block(self, block: Block, meta: BlockMetadata) -> (Block, BlockMetadata): new_block = fn(block) accessor = BlockAccessor.for_block(new_block) new_metadata = BlockMetadata(num_rows=accessor.num_rows(), size_bytes=accessor.size_bytes(), schema=accessor.schema(), input_files=meta.input_files) return new_block, new_metadata if not remote_args: remote_args["num_cpus"] = 1 BlockWorker = ray.remote(**remote_args)(BlockWorker) self.workers = [BlockWorker.remote()] metadata_mapping = {} tasks = {w.ready.remote(): w for w in self.workers} ready_workers = set() blocks_in = [(b, m) for (b, m) in zip(blocks, blocks.get_metadata())] blocks_out = [] while len(blocks_out) < len(blocks): ready, _ = ray.wait(list(tasks), timeout=0.01, num_returns=1, fetch_local=False) if not ready: if len(ready_workers) / len(self.workers) > 0.8: w = BlockWorker.remote() self.workers.append(w) tasks[w.ready.remote()] = w map_bar.set_description( "Map Progress ({} actors {} pending)".format( len(ready_workers), len(self.workers) - len(ready_workers))) continue [obj_id] = ready worker = tasks[obj_id] del tasks[obj_id] # Process task result. if worker in ready_workers: blocks_out.append(obj_id) map_bar.update(1) else: ready_workers.add(worker) # Schedule a new task. if blocks_in: block_ref, meta_ref = worker.process_block.remote( *blocks_in.pop()) metadata_mapping[block_ref] = meta_ref tasks[block_ref] = worker new_metadata = ray.get([metadata_mapping[b] for b in blocks_out]) map_bar.close() return BlockList(blocks_out, new_metadata)