Пример #1
0
def process_scaleset(scaleset: Scaleset) -> None:
    if scaleset.state == ScalesetState.halt:
        scaleset.halt()
        return

    # if the scaleset is touched during cleanup, don't continue to process it
    if scaleset.cleanup_nodes():
        return

    if scaleset.state in ScalesetState.needs_work():
        logging.info(
            "executing scaleset state: %s - %s",
            scaleset.scaleset_id,
            scaleset.state.name,
        )
        getattr(scaleset, scaleset.state.name)()
        return
Пример #2
0
def autoscale_pool(pool: Pool) -> None:
    logging.info("autoscale: %s", pool.autoscale)
    if not pool.autoscale:
        return

    # get all the tasks (count not stopped) for the pool
    tasks = Task.get_tasks_by_pool_name(pool.name)
    logging.info("Pool: %s, #Tasks %d", pool.name, len(tasks))

    num_of_tasks = get_vm_count(tasks)
    nodes_needed = max(num_of_tasks, pool.autoscale.min_size)
    if pool.autoscale.max_size:
        nodes_needed = min(nodes_needed, pool.autoscale.max_size)

    # do scaleset logic match with pool
    # get all the scalesets for the pool
    scalesets = Scaleset.search_by_pool(pool.name)
    pool_resize = False
    for scaleset in scalesets:
        if scaleset.state in ScalesetState.modifying():
            pool_resize = True
            break
        nodes_needed = nodes_needed - scaleset.size

    if pool_resize:
        return

    logging.info("Pool: %s, #Nodes Needed: %d", pool.name, nodes_needed)
    if nodes_needed > 0:
        # resizing scaleset or creating new scaleset.
        scale_up(pool, scalesets, nodes_needed)
    elif nodes_needed < 0:
        for scaleset in scalesets:
            nodes = Node.search_states(scaleset_id=scaleset.scaleset_id)
            for node in nodes:
                if node.delete_requested:
                    nodes_needed += 1
    if nodes_needed < 0:
        scale_down(scalesets, abs(nodes_needed))
Пример #3
0
def process_scaleset(scaleset: Scaleset) -> None:
    logging.debug("checking scaleset for updates: %s", scaleset.scaleset_id)

    if scaleset.state == ScalesetState.resize:
        scaleset.resize()

    # if the scaleset is touched during cleanup, don't continue to process it
    if scaleset.cleanup_nodes():
        logging.debug("scaleset needed cleanup: %s", scaleset.scaleset_id)
        return

    if (scaleset.state in ScalesetState.needs_work()
            and scaleset.state != ScalesetState.resize):
        logging.info(
            "exec scaleset state: %s - %s",
            scaleset.scaleset_id,
            scaleset.state,
        )

        if hasattr(scaleset, scaleset.state.name):
            getattr(scaleset, scaleset.state.name)()
        return
Пример #4
0
def main(mytimer: func.TimerRequest,
         dashboard: func.Out[str]) -> None:  # noqa: F841
    scalesets = Scaleset.search()
    scalesets_needs_work = ScalesetState.needs_work()
    for scaleset in scalesets:
        logging.info("queueing scaleset updates: %s", scaleset.scaleset_id)
        scaleset.queue(method=scaleset.update_configs)
        if scaleset.state in scalesets_needs_work:
            scaleset.queue()

    proxies = Proxy.search_states(states=VmState.needs_work())
    for proxy in proxies:
        logging.info("requeueing update proxy vm: %s", proxy.region)
        proxy.queue()

    vms = Repro.search_states(states=VmState.needs_work())
    for vm in vms:
        logging.info("requeueing update vm: %s", vm.vm_id)
        vm.queue()

    tasks = Task.search_states(states=TaskState.needs_work())
    for task in tasks:
        logging.info("requeueing update task: %s", task.task_id)
        task.queue()

    jobs = Job.search_states(states=JobState.needs_work())
    for job in jobs:
        logging.info("requeueing update job: %s", job.job_id)
        job.queue()

    pools = Pool.search_states(states=PoolState.needs_work())
    for pool in pools:
        logging.info("queuing update pool: %s (%s)", pool.pool_id, pool.name)
        pool.queue()

    nodes = Node.search_states(states=NodeState.needs_work())
    for node in nodes:
        logging.info("queuing update node: %s", node.machine_id)
        node.queue()

    expired_tasks = Task.search_expired()
    for task in expired_tasks:
        logging.info("queuing stop for task: %s", task.job_id)
        task.queue_stop()

    expired_jobs = Job.search_expired()
    for job in expired_jobs:
        logging.info("queuing stop for job: %s", job.job_id)
        job.queue_stop()

    # Reminder, proxies are created on-demand.  If something is "wrong" with
    # a proxy, the plan is: delete and recreate it.
    for proxy in Proxy.search():
        if not proxy.is_alive():
            logging.error("proxy alive check failed, stopping: %s",
                          proxy.region)
            proxy.state = VmState.stopping
            proxy.save()
        else:
            proxy.save_proxy_config()

    event = get_event()
    if event:
        dashboard.set(event)
Пример #5
0
    def can_process_new_work(self) -> bool:
        from .pools import Pool
        from .scalesets import Scaleset

        if (self.is_outdated()
                and os.environ.get("ONEFUZZ_ALLOW_OUTDATED_AGENT") != "true"):
            logging.info(
                "can_process_new_work agent and service versions differ, "
                "stopping node. "
                "machine_id:%s agent_version:%s service_version: %s",
                self.machine_id,
                self.version,
                __version__,
            )
            self.stop(done=True)
            return False

        if self.is_too_old():
            logging.info(
                "can_process_new_work node is too old.  machine_id:%s",
                self.machine_id)
            self.stop(done=True)
            return False

        if self.state not in NodeState.can_process_new_work():
            logging.info(
                "can_process_new_work node not in appropriate state for new work"
                "machine_id:%s state:%s",
                self.machine_id,
                self.state.name,
            )
            return False

        if self.state in NodeState.ready_for_reset():
            logging.info(
                "can_process_new_work node is set for reset.  machine_id:%s",
                self.machine_id,
            )
            return False

        if self.delete_requested:
            logging.info(
                "can_process_new_work is set to be deleted.  machine_id:%s",
                self.machine_id,
            )
            self.stop(done=True)
            return False

        if self.reimage_requested:
            logging.info(
                "can_process_new_work is set to be reimaged.  machine_id:%s",
                self.machine_id,
            )
            self.stop(done=True)
            return False

        if self.could_shrink_scaleset():
            logging.info(
                "can_process_new_work node scheduled to shrink.  machine_id:%s",
                self.machine_id,
            )
            self.set_halt()
            return False

        if self.scaleset_id:
            scaleset = Scaleset.get_by_id(self.scaleset_id)
            if isinstance(scaleset, Error):
                logging.info(
                    "can_process_new_work invalid scaleset.  "
                    "scaleset_id:%s machine_id:%s",
                    self.scaleset_id,
                    self.machine_id,
                )
                return False

            if scaleset.state not in ScalesetState.available():
                logging.info(
                    "can_process_new_work scaleset not available for work. "
                    "scaleset_id:%s machine_id:%s",
                    self.scaleset_id,
                    self.machine_id,
                )
                return False

        pool = Pool.get_by_name(self.pool_name)
        if isinstance(pool, Error):
            logging.info(
                "can_schedule - invalid pool. "
                "pool_name:%s machine_id:%s",
                self.pool_name,
                self.machine_id,
            )
            return False
        if pool.state not in PoolState.available():
            logging.info(
                "can_schedule - pool is not available for work. "
                "pool_name:%s machine_id:%s",
                self.pool_name,
                self.machine_id,
            )
            return False

        return True
Пример #6
0
    def can_process_new_work(self) -> bool:
        from .pools import Pool
        from .scalesets import Scaleset

        if self.is_outdated():
            logging.info(
                "can_schedule agent and service versions differ, stopping node. "
                "machine_id:%s agent_version:%s service_version: %s",
                self.machine_id,
                self.version,
                __version__,
            )
            self.stop()
            return False

        if self.state in NodeState.ready_for_reset():
            logging.info("can_schedule node is set for reset.  machine_id:%s",
                         self.machine_id)
            return False

        if self.delete_requested:
            logging.info(
                "can_schedule is set to be deleted.  machine_id:%s",
                self.machine_id,
            )
            self.stop()
            return False

        if self.reimage_requested:
            logging.info(
                "can_schedule is set to be reimaged.  machine_id:%s",
                self.machine_id,
            )
            self.stop()
            return False

        if self.could_shrink_scaleset():
            self.set_halt()
            logging.info("node scheduled to shrink.  machine_id:%s",
                         self.machine_id)
            return False

        if self.scaleset_id:
            scaleset = Scaleset.get_by_id(self.scaleset_id)
            if isinstance(scaleset, Error):
                logging.info(
                    "can_schedule - invalid scaleset.  scaleset_id:%s machine_id:%s",
                    self.scaleset_id,
                    self.machine_id,
                )
                return False

            if scaleset.state not in ScalesetState.available():
                logging.info(
                    "can_schedule - scaleset not available for work. "
                    "scaleset_id:%s machine_id:%s",
                    self.scaleset_id,
                    self.machine_id,
                )
                return False

        pool = Pool.get_by_name(self.pool_name)
        if isinstance(pool, Error):
            logging.info(
                "can_schedule - invalid pool. "
                "pool_name:%s machine_id:%s",
                self.pool_name,
                self.machine_id,
            )
            return False
        if pool.state not in PoolState.available():
            logging.info(
                "can_schedule - pool is not available for work. "
                "pool_name:%s machine_id:%s",
                self.pool_name,
                self.machine_id,
            )
            return False

        return True