Пример #1
0
def query_host_stats(docker_client, workdir):

    # query cpu and ram usage in our containers
    stats = query_containers_resources(docker_client)

    # disk space
    workir_fs_stats = os.statvfs(workdir)
    disk_used = stats["disk"]
    disk_free = workir_fs_stats.f_bavail * workir_fs_stats.f_frsize

    # CPU cores
    cpu_used = stats["cpu_shares"] // DEFAULT_CPU_SHARE
    cpu_avail = as_pos_int(ZIMFARM_CPUS - cpu_used)

    # RAM
    mem_used = stats["memory"]
    mem_avail = as_pos_int(ZIMFARM_MEMORY - mem_used)

    return {
        "cpu": {"total": ZIMFARM_CPUS, "used": cpu_used, "available": cpu_avail},
        "disk": {
            "total": ZIMFARM_DISK_SPACE,
            "used": disk_used,
            "available": disk_free,
            "remaining": ZIMFARM_DISK_SPACE - disk_used,
        },
        "memory": {"total": ZIMFARM_MEMORY, "used": mem_used, "available": mem_avail},
    }
Пример #2
0
    def poll(self, task_id=None):
        self.check_cancellation()  # update our tasks register

        logger.debug("polling…")
        self.last_poll = datetime.datetime.now()

        host_stats = query_host_stats(self.docker, self.workdir)
        expected_disk_avail = as_pos_int(host_stats["disk"]["total"] -
                                         host_stats["disk"]["used"])
        if host_stats["disk"]["available"] < expected_disk_avail:
            self.should_stop = True
            logger.critical(
                f"Available disk space ({format_size(host_stats['disk']['available'])}) is lower than expected ({format_size(expected_disk_avail)}). Exiting."
            )
            return

        success, status_code, response = self.query_api(
            "GET",
            "/requested-tasks/worker",
            params={
                "worker": self.worker_name,
                "avail_cpu": host_stats["cpu"]["available"],
                "avail_memory": host_stats["memory"]["available"],
                "avail_disk": host_stats["disk"]["available"],
            },
        )
        if not success:
            logger.warning(f"poll failed with HTTP {status_code}: {response}")
            return

        if self.selfish:
            response["items"] = [
                t for t in response["items"] if t["worker"] == self.worker_name
            ]

        if response["items"]:
            logger.info("API is offering {nb} task(s): {ids}".format(
                nb=len(response["items"]),
                ids=[task["_id"] for task in response["items"]],
            ))
            self.start_task(response["items"].pop())
            # we need to allow the task to start, its container to start and
            # eventually its scraper to start so docker can report to us
            # the assigned resources (on the scraper) _before_ polling again
            self.last_poll = datetime.datetime.now() + datetime.timedelta(
                seconds=90)
Пример #3
0
OPENSSL_BIN = os.getenv("OPENSSL_BIN", "/usr/bin/openssl")

# task-related
CANCELED = "canceled"
CANCEL_REQUESTED = "cancel_requested"
CANCELING = "canceling"

# docker resources
DEFAULT_CPU_SHARE = 1024
DOCKER_CLIENT_TIMEOUT = 180  # 3mn for read timeout on docker API socket

# configuration
ZIMFARM_CPUS, ZIMFARM_MEMORY, ZIMFARM_DISK_SPACE = None, None, None

try:
    ZIMFARM_DISK_SPACE = as_pos_int(
        humanfriendly.parse_size(os.getenv("ZIMFARM_DISK")))
except Exception as exc:
    ZIMFARM_DISK_SPACE = 2**34  # 16GiB
    logger.error(f"Incorrect or missing `ZIMFARM_DISK` env. "
                 f"defaulting to {format_size(ZIMFARM_DISK_SPACE)} ({exc})")

try:
    ZIMFARM_CPUS = as_pos_int(int(os.getenv("ZIMFARM_CPUS")))
except Exception:
    physical_cpu = multiprocessing.cpu_count()
    if ZIMFARM_CPUS:
        ZIMFARM_CPUS = min([ZIMFARM_CPUS, physical_cpu])
    else:
        ZIMFARM_CPUS = physical_cpu

try: