def __init__(self, label: str = 'HighThroughputExecutor', provider: ExecutionProvider = LocalProvider(), launch_cmd: Optional[str] = None, address: Optional[str] = None, worker_ports: Optional[Tuple[int, int]] = None, worker_port_range: Optional[Tuple[int, int]] = (54000, 55000), interchange_port_range: Optional[Tuple[int, int]] = (55000, 56000), storage_access: Optional[List[Staging]] = None, working_dir: Optional[str] = None, worker_debug: bool = False, cores_per_worker: float = 1.0, mem_per_worker: Optional[float] = None, max_workers: Union[int, float] = float('inf'), cpu_affinity: str = 'none', available_accelerators: Union[int, Sequence[str]] = (), prefetch_capacity: int = 0, heartbeat_threshold: int = 120, heartbeat_period: int = 30, poll_period: int = 10, address_probe_timeout: Optional[int] = None, managed: bool = True, worker_logdir_root: Optional[str] = None, block_error_handler: bool = True): logger.debug("Initializing HighThroughputExecutor") BlockProviderExecutor.__init__(self, provider=provider, block_error_handler=block_error_handler) self.label = label self.launch_cmd = launch_cmd self.worker_debug = worker_debug self.storage_access = storage_access self.working_dir = working_dir self.managed = managed self.cores_per_worker = cores_per_worker self.mem_per_worker = mem_per_worker self.max_workers = max_workers self.prefetch_capacity = prefetch_capacity self.address = address self.address_probe_timeout = address_probe_timeout if self.address: self.all_addresses = address else: self.all_addresses = ','.join(get_all_addresses()) mem_slots = max_workers cpu_slots = max_workers if hasattr(self.provider, 'mem_per_node') and \ self.provider.mem_per_node is not None and \ mem_per_worker is not None and \ mem_per_worker > 0: mem_slots = math.floor(self.provider.mem_per_node / mem_per_worker) if hasattr(self.provider, 'cores_per_node') and \ self.provider.cores_per_node is not None: cpu_slots = math.floor(self.provider.cores_per_node / cores_per_worker) # Set the list of available accelerators if isinstance(available_accelerators, int): # If the user provide an integer, create some names for them available_accelerators = list(map(str, range(available_accelerators))) self.available_accelerators = list(available_accelerators) # Determine the number of workers per node self._workers_per_node = min(max_workers, mem_slots, cpu_slots) if len(self.available_accelerators) > 0: self._workers_per_node = min(self._workers_per_node, len(available_accelerators)) if self._workers_per_node == float('inf'): self._workers_per_node = 1 # our best guess-- we do not have any provider hints self._task_counter = 0 self.run_id = None # set to the correct run_id in dfk self.hub_address = None # set to the correct hub address in dfk self.hub_port = None # set to the correct hub port in dfk self.worker_ports = worker_ports self.worker_port_range = worker_port_range self.interchange_port_range = interchange_port_range self.heartbeat_threshold = heartbeat_threshold self.heartbeat_period = heartbeat_period self.poll_period = poll_period self.run_dir = '.' self.worker_logdir_root = worker_logdir_root self.cpu_affinity = cpu_affinity if not launch_cmd: self.launch_cmd = ("process_worker_pool.py {debug} {max_workers} " "-a {addresses} " "-p {prefetch_capacity} " "-c {cores_per_worker} " "-m {mem_per_worker} " "--poll {poll_period} " "--task_port={task_port} " "--result_port={result_port} " "--logdir={logdir} " "--block_id={{block_id}} " "--hb_period={heartbeat_period} " "{address_probe_timeout_string} " "--hb_threshold={heartbeat_threshold} " "--cpu-affinity {cpu_affinity} " "--available-accelerators {accelerators}")
def __init__(self, label: str = 'HighThroughputExecutor', provider: ExecutionProvider = LocalProvider(), launch_cmd: Optional[str] = None, address: Optional[str] = None, worker_ports: Optional[Tuple[int, int]] = None, worker_port_range: Optional[Tuple[int, int]] = (54000, 55000), interchange_port_range: Optional[Tuple[int, int]] = (55000, 56000), storage_access: Optional[List[Staging]] = None, working_dir: Optional[str] = None, worker_debug: bool = False, cores_per_worker: float = 1.0, mem_per_worker: Optional[float] = None, max_workers: Union[int, float] = float('inf'), cpu_affinity: str = 'none', prefetch_capacity: int = 0, heartbeat_threshold: int = 120, heartbeat_period: int = 30, poll_period: int = 10, address_probe_timeout: Optional[int] = None, managed: bool = True, worker_logdir_root: Optional[str] = None): logger.debug("Initializing HighThroughputExecutor") StatusHandlingExecutor.__init__(self, provider) self.label = label self.launch_cmd = launch_cmd self.worker_debug = worker_debug self.storage_access = storage_access self.working_dir = working_dir self.managed = managed self.blocks = {} # type: Dict[str, str] self.cores_per_worker = cores_per_worker self.mem_per_worker = mem_per_worker self.max_workers = max_workers self.prefetch_capacity = prefetch_capacity self.address = address self.address_probe_timeout = address_probe_timeout if self.address: self.all_addresses = address else: self.all_addresses = ','.join(get_all_addresses()) mem_slots = max_workers cpu_slots = max_workers if hasattr(self.provider, 'mem_per_node') and \ self.provider.mem_per_node is not None and \ mem_per_worker is not None and \ mem_per_worker > 0: mem_slots = math.floor(self.provider.mem_per_node / mem_per_worker) if hasattr(self.provider, 'cores_per_node') and \ self.provider.cores_per_node is not None: cpu_slots = math.floor(self.provider.cores_per_node / cores_per_worker) self.workers_per_node = min(max_workers, mem_slots, cpu_slots) if self.workers_per_node == float('inf'): self.workers_per_node = 1 # our best guess-- we do not have any provider hints self._task_counter = 0 self.hub_address = None # set to the correct hub address in dfk self.hub_port = None # set to the correct hub port in dfk self.worker_ports = worker_ports self.worker_port_range = worker_port_range self.interchange_port_range = interchange_port_range self.heartbeat_threshold = heartbeat_threshold self.heartbeat_period = heartbeat_period self.poll_period = poll_period self.run_dir = '.' self.worker_logdir_root = worker_logdir_root self.cpu_affinity = cpu_affinity if not launch_cmd: self.launch_cmd = ("process_worker_pool.py {debug} {max_workers} " "-a {addresses} " "-p {prefetch_capacity} " "-c {cores_per_worker} " "-m {mem_per_worker} " "--poll {poll_period} " "--task_port={task_port} " "--result_port={result_port} " "--logdir={logdir} " "--block_id={{block_id}} " "--hb_period={heartbeat_period} " "{address_probe_timeout_string} " "--hb_threshold={heartbeat_threshold} " "--cpu-affinity {cpu_affinity} ")
# Linger is set to 0, so that the manager can exit even when there might be # messages in the pipe self.task_incoming.setsockopt(zmq.LINGER, 0) address = probe_addresses(addresses, port) print("Viable address :", address) self.task_incoming.connect("tcp://{}:{}".format(address, port)) print("Here") def heartbeat(self): """ Send heartbeat to the incoming task queue """ HEARTBEAT_CODE = (2**32) - 1 heartbeat = (HEARTBEAT_CODE).to_bytes(4, "little") r = self.task_incoming.send(heartbeat) print("Return from heartbeat: {}".format(r)) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("-p", "--port", required=True, help="Port to connect to") args = parser.parse_args() addresses = get_all_addresses() worker = TestWorker(addresses, args.port) worker.heartbeat()