def query_host_stats(docker_client, workdir): # query cpu and ram usage in our containers stats = query_containers_resources(docker_client) # disk space workir_fs_stats = os.statvfs(workdir) disk_used = stats["disk"] disk_free = workir_fs_stats.f_bavail * workir_fs_stats.f_frsize # CPU cores cpu_used = stats["cpu_shares"] // DEFAULT_CPU_SHARE cpu_avail = as_pos_int(ZIMFARM_CPUS - cpu_used) # RAM mem_used = stats["memory"] mem_avail = as_pos_int(ZIMFARM_MEMORY - mem_used) return { "cpu": {"total": ZIMFARM_CPUS, "used": cpu_used, "available": cpu_avail}, "disk": { "total": ZIMFARM_DISK_SPACE, "used": disk_used, "available": disk_free, "remaining": ZIMFARM_DISK_SPACE - disk_used, }, "memory": {"total": ZIMFARM_MEMORY, "used": mem_used, "available": mem_avail}, }
def poll(self, task_id=None): self.check_cancellation() # update our tasks register logger.debug("polling…") self.last_poll = datetime.datetime.now() host_stats = query_host_stats(self.docker, self.workdir) expected_disk_avail = as_pos_int(host_stats["disk"]["total"] - host_stats["disk"]["used"]) if host_stats["disk"]["available"] < expected_disk_avail: self.should_stop = True logger.critical( f"Available disk space ({format_size(host_stats['disk']['available'])}) is lower than expected ({format_size(expected_disk_avail)}). Exiting." ) return success, status_code, response = self.query_api( "GET", "/requested-tasks/worker", params={ "worker": self.worker_name, "avail_cpu": host_stats["cpu"]["available"], "avail_memory": host_stats["memory"]["available"], "avail_disk": host_stats["disk"]["available"], }, ) if not success: logger.warning(f"poll failed with HTTP {status_code}: {response}") return if self.selfish: response["items"] = [ t for t in response["items"] if t["worker"] == self.worker_name ] if response["items"]: logger.info("API is offering {nb} task(s): {ids}".format( nb=len(response["items"]), ids=[task["_id"] for task in response["items"]], )) self.start_task(response["items"].pop()) # we need to allow the task to start, its container to start and # eventually its scraper to start so docker can report to us # the assigned resources (on the scraper) _before_ polling again self.last_poll = datetime.datetime.now() + datetime.timedelta( seconds=90)
OPENSSL_BIN = os.getenv("OPENSSL_BIN", "/usr/bin/openssl") # task-related CANCELED = "canceled" CANCEL_REQUESTED = "cancel_requested" CANCELING = "canceling" # docker resources DEFAULT_CPU_SHARE = 1024 DOCKER_CLIENT_TIMEOUT = 180 # 3mn for read timeout on docker API socket # configuration ZIMFARM_CPUS, ZIMFARM_MEMORY, ZIMFARM_DISK_SPACE = None, None, None try: ZIMFARM_DISK_SPACE = as_pos_int( humanfriendly.parse_size(os.getenv("ZIMFARM_DISK"))) except Exception as exc: ZIMFARM_DISK_SPACE = 2**34 # 16GiB logger.error(f"Incorrect or missing `ZIMFARM_DISK` env. " f"defaulting to {format_size(ZIMFARM_DISK_SPACE)} ({exc})") try: ZIMFARM_CPUS = as_pos_int(int(os.getenv("ZIMFARM_CPUS"))) except Exception: physical_cpu = multiprocessing.cpu_count() if ZIMFARM_CPUS: ZIMFARM_CPUS = min([ZIMFARM_CPUS, physical_cpu]) else: ZIMFARM_CPUS = physical_cpu try: