Пример #1
0
    def registered(self, driver, frameworkId, masterInfo):
        """
        Invoked when the scheduler successfully registers with a Mesos master.
        It is called with the frameworkId, a unique ID generated by the
        master, and the masterInfo which is information about the master
        itself.

        See documentation for :meth:`mesos_api.mesos.Scheduler.registered`.
        """

        self._driver = driver
        self._framework_id = frameworkId.value
        self._master_hostname = masterInfo.hostname
        self._master_port = masterInfo.port
        logger.info(
            'Scale scheduler registered as framework %s with Mesos master at %s:%i',
            self._framework_id, self._master_hostname, self._master_port)

        initialize_system()
        Scheduler.objects.update_master(self._master_hostname,
                                        self._master_port)

        # Initial database sync
        self._job_type_manager.sync_with_database()
        self._scheduler_manager.sync_with_database()
        self._workspace_manager.sync_with_database()

        # Start up background threads
        self._db_sync_thread = DatabaseSyncThread(self._driver,
                                                  self._job_exe_manager,
                                                  self._job_type_manager,
                                                  self._node_manager,
                                                  self._scheduler_manager,
                                                  self._workspace_manager)
        db_sync_thread = threading.Thread(target=self._db_sync_thread.run)
        db_sync_thread.daemon = True
        db_sync_thread.start()

        self._recon_thread = ReconciliationThread(self._driver)
        recon_thread = threading.Thread(target=self._recon_thread.run)
        recon_thread.daemon = True
        recon_thread.start()

        self._scheduling_thread = SchedulingThread(
            self._driver, self._job_exe_manager, self._job_type_manager,
            self._node_manager, self._offer_manager, self._scheduler_manager,
            self._workspace_manager)
        scheduling_thread = threading.Thread(
            target=self._scheduling_thread.run)
        scheduling_thread.daemon = True
        scheduling_thread.start()

        self._reconcile_running_jobs()
Пример #2
0
    def registered(self, driver, frameworkId, masterInfo):
        """
        Invoked when the scheduler successfully registers with a Mesos master.
        It is called with the frameworkId, a unique ID generated by the
        master, and the masterInfo which is information about the master
        itself.

        See documentation for :meth:`mesos_api.mesos.Scheduler.registered`.
        """

        self._driver = driver
        self._framework_id = frameworkId.value
        self._master_hostname = masterInfo.hostname
        self._master_port = masterInfo.port
        logger.info('Scale scheduler registered as framework %s with Mesos master at %s:%i',
                    self._framework_id, self._master_hostname, self._master_port)

        initialize_system()
        Scheduler.objects.update_master(self._master_hostname, self._master_port)

        # Initial database sync
        self._job_type_manager.sync_with_database()
        self._scheduler_manager.sync_with_database()
        self._workspace_manager.sync_with_database()

        # Start up background threads
        self._db_sync_thread = DatabaseSyncThread(self._driver, self._job_exe_manager, self._job_type_manager,
                                                  self._node_manager, self._scheduler_manager, self._workspace_manager)
        db_sync_thread = threading.Thread(target=self._db_sync_thread.run)
        db_sync_thread.daemon = True
        db_sync_thread.start()

        self._recon_thread = ReconciliationThread(self._driver)
        recon_thread = threading.Thread(target=self._recon_thread.run)
        recon_thread.daemon = True
        recon_thread.start()

        self._scheduling_thread = SchedulingThread(self._driver, self._framework_id, self._job_exe_manager,
                                                   self._job_type_manager, self._node_manager, self._offer_manager,
                                                   self._scheduler_manager, self._workspace_manager)
        scheduling_thread = threading.Thread(target=self._scheduling_thread.run)
        scheduling_thread.daemon = True
        scheduling_thread.start()

        self._status_thread = StatusUpdateThread(self._status_manager)
        status_thread = threading.Thread(target=self._status_thread.run)
        status_thread.daemon = True
        status_thread.start()

        self._reconcile_running_jobs()
Пример #3
0
class ScaleScheduler(MesosScheduler):
    """Mesos scheduler for the Scale framework"""

    # Warning threshold for normal callbacks (those with no external calls, e.g. database queries)
    NORMAL_WARN_THRESHOLD = datetime.timedelta(milliseconds=5)

    # Warning threshold for callbacks that include database queries
    DATABASE_WARN_THRESHOLD = datetime.timedelta(milliseconds=100)

    def __init__(self):
        """Constructor
        """

        self._driver = None
        self._framework_id = None
        self._master_hostname = None
        self._master_port = None

        self._job_exe_manager = RunningJobExecutionManager()
        self._job_type_manager = JobTypeManager()
        self._node_manager = NodeManager()
        self._offer_manager = OfferManager()
        self._scheduler_manager = SchedulerManager()
        self._workspace_manager = WorkspaceManager()

        self._db_sync_thread = None
        self._recon_thread = None
        self._scheduling_thread = None

    def registered(self, driver, frameworkId, masterInfo):
        """
        Invoked when the scheduler successfully registers with a Mesos master.
        It is called with the frameworkId, a unique ID generated by the
        master, and the masterInfo which is information about the master
        itself.

        See documentation for :meth:`mesos_api.mesos.Scheduler.registered`.
        """

        self._driver = driver
        self._framework_id = frameworkId.value
        self._master_hostname = masterInfo.hostname
        self._master_port = masterInfo.port
        logger.info('Scale scheduler registered as framework %s with Mesos master at %s:%i',
                    self._framework_id, self._master_hostname, self._master_port)

        initialize_system()
        Scheduler.objects.update_master(self._master_hostname, self._master_port)

        # Initial database sync
        self._job_type_manager.sync_with_database()
        self._scheduler_manager.sync_with_database()
        self._workspace_manager.sync_with_database()

        # Start up background threads
        self._db_sync_thread = DatabaseSyncThread(self._driver, self._job_exe_manager, self._job_type_manager,
                                                  self._node_manager, self._scheduler_manager, self._workspace_manager)
        db_sync_thread = threading.Thread(target=self._db_sync_thread.run)
        db_sync_thread.daemon = True
        db_sync_thread.start()

        self._recon_thread = ReconciliationThread(self._driver)
        recon_thread = threading.Thread(target=self._recon_thread.run)
        recon_thread.daemon = True
        recon_thread.start()

        self._scheduling_thread = SchedulingThread(self._driver, self._framework_id, self._job_exe_manager,
                                                   self._job_type_manager, self._node_manager, self._offer_manager,
                                                   self._scheduler_manager, self._workspace_manager)
        scheduling_thread = threading.Thread(target=self._scheduling_thread.run)
        scheduling_thread.daemon = True
        scheduling_thread.start()

        self._reconcile_running_jobs()

    def reregistered(self, driver, masterInfo):
        """
        Invoked when the scheduler re-registers with a newly elected Mesos
        master.  This is only called when the scheduler has previously been
        registered.  masterInfo contains information about the newly elected
        master.

        See documentation for :meth:`mesos_api.mesos.Scheduler.reregistered`.
        """

        self._driver = driver
        self._master_hostname = masterInfo.hostname
        self._master_port = masterInfo.port
        logger.info('Scale scheduler re-registered with Mesos master at %s:%i',
                    self._master_hostname, self._master_port)

        Scheduler.objects.update_master(self._master_hostname, self._master_port)

        # Update driver for background threads
        self._db_sync_thread.driver = self._driver
        self._recon_thread.driver = self._driver
        self._scheduling_thread.driver = self._driver

        self._reconcile_running_jobs()

    def disconnected(self, driver):
        """
        Invoked when the scheduler becomes disconnected from the master, e.g.
        the master fails and another is taking over.

        See documentation for :meth:`mesos_api.mesos.Scheduler.disconnected`.
        """

        if self._master_hostname:
            logger.error('Scale scheduler disconnected from the Mesos master at %s:%i',
                         self._master_hostname, self._master_port)
        else:
            logger.error('Scale scheduler disconnected from the Mesos master')

    def resourceOffers(self, driver, offers):
        """
        Invoked when resources have been offered to this framework. A single
        offer will only contain resources from a single slave.  Resources
        associated with an offer will not be re-offered to _this_ framework
        until either (a) this framework has rejected those resources (see
        SchedulerDriver.launchTasks) or (b) those resources have been
        rescinded (see Scheduler.offerRescinded).  Note that resources may be
        concurrently offered to more than one framework at a time (depending
        on the allocator being used).  In that case, the first framework to
        launch tasks using those resources will be able to use them while the
        other frameworks will have those resources rescinded (or if a
        framework has already launched tasks with those resources then those
        tasks will fail with a TASK_LOST status and a message saying as much).

        See documentation for :meth:`mesos_api.mesos.Scheduler.resourceOffers`.
        """

        started = now()

        agent_ids = []
        resource_offers = []
        for offer in offers:
            offer_id = offer.id.value
            agent_id = offer.slave_id.value
            disk = 0
            mem = 0
            cpus = 0
            for resource in offer.resources:
                if resource.name == 'disk':
                    disk = resource.scalar.value
                elif resource.name == 'mem':
                    mem = resource.scalar.value
                elif resource.name == 'cpus':
                    cpus = resource.scalar.value
            resources = NodeResources(cpus=cpus, mem=mem, disk=disk)
            agent_ids.append(agent_id)
            resource_offers.append(ResourceOffer(offer_id, agent_id, resources))

        self._node_manager.add_agent_ids(agent_ids)
        self._offer_manager.add_new_offers(resource_offers)

        duration = now() - started
        msg = 'Scheduler resourceOffers() took %.3f seconds'
        if duration > ScaleScheduler.NORMAL_WARN_THRESHOLD:
            logger.warning(msg, duration.total_seconds())
        else:
            logger.debug(msg, duration.total_seconds())

    def offerRescinded(self, driver, offerId):
        """
        Invoked when an offer is no longer valid (e.g., the slave was lost or
        another framework used resources in the offer.) If for whatever reason
        an offer is never rescinded (e.g., dropped message, failing over
        framework, etc.), a framwork that attempts to launch tasks using an
        invalid offer will receive TASK_LOST status updats for those tasks.

        See documentation for :meth:`mesos_api.mesos.Scheduler.offerRescinded`.
        """

        started = now()

        offer_id = offerId.value
        self._offer_manager.remove_offers([offer_id])

        duration = now() - started
        msg = 'Scheduler offerRescinded() took %.3f seconds'
        if duration > ScaleScheduler.NORMAL_WARN_THRESHOLD:
            logger.warning(msg, duration.total_seconds())
        else:
            logger.debug(msg, duration.total_seconds())

    def statusUpdate(self, driver, status):
        """
        Invoked when the status of a task has changed (e.g., a slave is lost
        and so the task is lost, a task finishes and an executor sends a
        status update saying so, etc.) Note that returning from this callback
        acknowledges receipt of this status update.  If for whatever reason
        the scheduler aborts during this callback (or the process exits)
        another status update will be delivered.  Note, however, that this is
        currently not true if the slave sending the status update is lost or
        fails during that time.

        See documentation for :meth:`mesos_api.mesos.Scheduler.statusUpdate`.
        """

        started = now()

        task_id = status.task_id.value
        job_exe_id = RunningJobExecution.get_job_exe_id(task_id)
        logger.info('Status update for task %s: %s', task_id, utils.status_to_string(status.state))

        # Since we have a status update for this task, remove it from reconciliation set
        self._recon_thread.remove_task_id(task_id)

        try:
            running_job_exe = self._job_exe_manager.get_job_exe(job_exe_id)

            if running_job_exe:
                results = TaskResults(task_id)
                results.exit_code = utils.parse_exit_code(status)
                results.when = utils.get_status_timestamp(status)
                if status.state in [mesos_pb2.TASK_FINISHED, mesos_pb2.TASK_ERROR, mesos_pb2.TASK_FAILED,
                                    mesos_pb2.TASK_KILLED]:
                    try:
                        log_start_time = now()
                        hostname = running_job_exe._node_hostname
                        port = running_job_exe._node_port
                        task_dir = get_slave_task_directory(hostname, port, task_id)
                        results.stdout = get_slave_task_file(hostname, port, task_dir, 'stdout')
                        results.stderr = get_slave_task_file(hostname, port, task_dir, 'stderr')
                        log_end_time = now()
                        logger.debug('Time to pull logs for task: %s', str(log_end_time - log_start_time))
                    except Exception:
                        logger.exception('Error pulling logs for task %s', task_id)

                # Apply status update to running job execution
                if status.state == mesos_pb2.TASK_RUNNING:
                    hostname = running_job_exe._node_hostname
                    port = running_job_exe._node_port
                    task_dir = get_slave_task_directory(hostname, port, task_id)
                    stdout_url = get_slave_task_url(hostname, port, task_dir, 'stdout')
                    stderr_url = get_slave_task_url(hostname, port, task_dir, 'stderr')
                    running_job_exe.task_running(task_id, results.when, stdout_url, stderr_url)
                elif status.state == mesos_pb2.TASK_FINISHED:
                    running_job_exe.task_complete(results)
                elif status.state == mesos_pb2.TASK_LOST:
                    running_job_exe.task_fail(results, Error.objects.get_builtin_error('mesos-lost'))
                elif status.state in [mesos_pb2.TASK_ERROR, mesos_pb2.TASK_FAILED, mesos_pb2.TASK_KILLED]:
                    running_job_exe.task_fail(results)

                # Remove finished job execution
                if running_job_exe.is_finished():
                    self._job_exe_manager.remove_job_exe(job_exe_id)
            else:
                # Scheduler doesn't have any knowledge of this job execution
                Queue.objects.handle_job_failure(job_exe_id, now(), Error.objects.get_builtin_error('scheduler-lost'))
        except Exception:
            logger.exception('Error handling status update for job execution: %s', job_exe_id)
            # Error handling status update, add task so it can be reconciled
            self._recon_thread.add_task_ids([task_id])

        duration = now() - started
        msg = 'Scheduler statusUpdate() took %.3f seconds'
        if duration > ScaleScheduler.DATABASE_WARN_THRESHOLD:
            logger.warning(msg, duration.total_seconds())
        else:
            logger.debug(msg, duration.total_seconds())

    def frameworkMessage(self, driver, executorId, slaveId, message):
        """
        Invoked when an executor sends a message. These messages are best
        effort; do not expect a framework message to be retransmitted in any
        reliable fashion.

        See documentation for :meth:`mesos_api.mesos.Scheduler.frameworkMessage`.
        """

        started = now()

        agent_id = slaveId.value
        node = self._node_manager.get_node(agent_id)

        if node:
            logger.info('Message from %s on host %s: %s', executorId.value, node.hostname, message)
        else:
            logger.info('Message from %s on agent %s: %s', executorId.value, agent_id, message)

        duration = now() - started
        msg = 'Scheduler frameworkMessage() took %.3f seconds'
        if duration > ScaleScheduler.NORMAL_WARN_THRESHOLD:
            logger.warning(msg, duration.total_seconds())
        else:
            logger.debug(msg, duration.total_seconds())

    def slaveLost(self, driver, slaveId):
        """
        Invoked when a slave has been determined unreachable (e.g., machine
        failure, network partition.) Most frameworks will need to reschedule
        any tasks launched on this slave on a new slave.

        See documentation for :meth:`mesos_api.mesos.Scheduler.slaveLost`.
        """

        started = now()

        agent_id = slaveId.value
        node = self._node_manager.get_node(agent_id)

        if node:
            logger.error('Node lost on host %s', node.hostname)
        else:
            logger.error('Node lost on agent %s', agent_id)

        self._node_manager.lost_node(agent_id)
        self._offer_manager.lost_node(agent_id)

        # Fail job executions that were running on the lost node
        if node:
            for running_job_exe in self._job_exe_manager.get_job_exes_on_node(node.id):
                try:
                    running_job_exe.execution_lost(started)
                except DatabaseError:
                    logger.exception('Error failing lost job execution: %s', running_job_exe.id)
                    # Error failing execution, add task so it can be reconciled
                    task = running_job_exe.current_task
                    if task:
                        self._recon_thread.add_task_ids([task.id])
                if running_job_exe.is_finished():
                    self._job_exe_manager.remove_job_exe(running_job_exe.id)

        duration = now() - started
        msg = 'Scheduler slaveLost() took %.3f seconds'
        if duration > ScaleScheduler.DATABASE_WARN_THRESHOLD:
            logger.warning(msg, duration.total_seconds())
        else:
            logger.debug(msg, duration.total_seconds())

    def executorLost(self, driver, executorId, slaveId, status):
        """
        Invoked when an executor has exited/terminated. Note that any tasks
        running will have TASK_LOST status updates automatically generated.

        See documentation for :meth:`mesos_api.mesos.Scheduler.executorLost`.
        """

        started = now()

        agent_id = slaveId.value
        node = self._node_manager.get_node(agent_id)

        if node:
            logger.error('Executor %s lost on host: %s', executorId.value, node.hostname)
        else:
            logger.error('Executor %s lost on agent: %s', executorId.value, agent_id)

        duration = now() - started
        msg = 'Scheduler executorLost() took %.3f seconds'
        if duration > ScaleScheduler.NORMAL_WARN_THRESHOLD:
            logger.warning(msg, duration.total_seconds())
        else:
            logger.debug(msg, duration.total_seconds())

    def error(self, driver, message):
        """
        Invoked when there is an unrecoverable error in the scheduler or
        scheduler driver.  The driver will be aborted BEFORE invoking this
        callback.

        See documentation for :meth:`mesos_api.mesos.Scheduler.error`.
        """

        logger.error('Unrecoverable error: %s', message)

    def shutdown(self):
        """Performs any clean up required by this scheduler implementation.

        Currently this method just notifies any background threads to break out of their work loops.
        """

        logger.info('Scheduler shutdown invoked, stopping background threads')
        self._db_sync_thread.shutdown()
        self._recon_thread.shutdown()
        self._scheduling_thread.shutdown()

    def _reconcile_running_jobs(self):
        """Looks up all currently running jobs in the database and sets them up to be reconciled with Mesos"""

        # List of task IDs to reconcile
        task_ids = []

        # Query for job executions that are running
        job_exes = JobExecution.objects.get_running_job_exes()

        # Find current task IDs for running executions
        for job_exe in job_exes:
            running_job_exe = self._job_exe_manager.get_job_exe(job_exe.id)
            if running_job_exe:
                task = running_job_exe.current_task
                if task:
                    task_ids.append(task.id)
            else:
                # Fail any executions that the scheduler has lost
                Queue.objects.handle_job_failure(job_exe.id, now(), Error.objects.get_builtin_error('scheduler-lost'))

        # Send task IDs to reconciliation thread
        self._recon_thread.add_task_ids(task_ids)
Пример #4
0
    def initialize(self):
        """Initializes the scheduler and gets it ready to connect to Mesos. This method should only ever be called once.
        """

        initialize_system()

        # Initial database sync
        logger.info('Performing initial sync with Scale database')
        logger.info('Retrieving errors...')
        reset_error_cache()
        logger.info('Retrieving job execution metrics...')
        job_exe_mgr.init_with_database()
        logger.info('Retrieving job types...')
        job_type_mgr.sync_with_database()
        logger.info('Retrieving workspaces...')
        workspace_mgr.sync_with_database()
        logger.info('Retrieving scheduler settings...')
        scheduler_mgr.sync_with_database()

        # Start up background threads
        self._threads = []

        logger.info('Starting up background threads')
        self._messaging_thread = MessagingThread()
        restart_msg = RestartScheduler()
        restart_msg.when = now()
        self._messaging_thread.add_initial_messages([restart_msg])
        messaging_thread = threading.Thread(target=self._messaging_thread.run)
        messaging_thread.daemon = True
        messaging_thread.start()
        self._threads.append(messaging_thread)

        self._recon_thread = ReconciliationThread()
        recon_thread = threading.Thread(target=self._recon_thread.run)
        recon_thread.daemon = True
        recon_thread.start()
        self._threads.append(recon_thread)

        self._scheduler_status_thread = SchedulerStatusThread()
        scheduler_status_thread = threading.Thread(target=self._scheduler_status_thread.run)
        scheduler_status_thread.daemon = True
        scheduler_status_thread.start()
        self._threads.append(scheduler_status_thread)

        self._scheduling_thread = SchedulingThread(self._client)
        scheduling_thread = threading.Thread(target=self._scheduling_thread.run)
        scheduling_thread.daemon = True
        scheduling_thread.start()
        self._threads.append(scheduling_thread)

        self._sync_thread = SyncThread(self._driver)
        sync_thread = threading.Thread(target=self._sync_thread.run)
        sync_thread.daemon = True
        sync_thread.start()
        self._threads.append(sync_thread)

        self._task_handling_thread = TaskHandlingThread(self._driver)
        task_handling_thread = threading.Thread(target=self._task_handling_thread.run)
        task_handling_thread.daemon = True
        task_handling_thread.start()
        self._threads.append(task_handling_thread)

        self._task_update_thread = TaskUpdateThread()
        task_update_thread = threading.Thread(target=self._task_update_thread.run)
        task_update_thread.daemon = True
        task_update_thread.start()
        self._threads.append(task_update_thread)
Пример #5
0
class ScaleScheduler(object):
    """Mesos scheduler for the Scale framework"""

    # Warning threshold for normal callbacks (those with no external calls, e.g. database queries)
    NORMAL_WARN_THRESHOLD = datetime.timedelta(milliseconds=5)

    def __init__(self):
        """Constructor
        """

        self._driver = None
        self._client = None
        self._framework_id = None
        self._master_host_address = None

        self._messaging_thread = None
        self._recon_thread = None
        self._scheduler_status_thread = None
        self._scheduling_thread = None
        self._sync_thread = None
        self._task_handling_thread = None
        self._task_update_thread = None

    def initialize(self):
        """Initializes the scheduler and gets it ready to connect to Mesos. This method should only ever be called once.
        """

        initialize_system()

        # Initial database sync
        logger.info('Performing initial sync with Scale database')
        logger.info('Retrieving errors...')
        reset_error_cache()
        logger.info('Retrieving job execution metrics...')
        job_exe_mgr.init_with_database()
        logger.info('Retrieving job types...')
        job_type_mgr.sync_with_database()
        logger.info('Retrieving workspaces...')
        workspace_mgr.sync_with_database()
        logger.info('Retrieving scheduler settings...')
        scheduler_mgr.sync_with_database()

        # Start up background threads
        self._threads = []

        logger.info('Starting up background threads')
        self._messaging_thread = MessagingThread()
        restart_msg = RestartScheduler()
        restart_msg.when = now()
        self._messaging_thread.add_initial_messages([restart_msg])
        messaging_thread = threading.Thread(target=self._messaging_thread.run)
        messaging_thread.daemon = True
        messaging_thread.start()
        self._threads.append(messaging_thread)

        self._recon_thread = ReconciliationThread()
        recon_thread = threading.Thread(target=self._recon_thread.run)
        recon_thread.daemon = True
        recon_thread.start()
        self._threads.append(recon_thread)

        self._scheduler_status_thread = SchedulerStatusThread()
        scheduler_status_thread = threading.Thread(target=self._scheduler_status_thread.run)
        scheduler_status_thread.daemon = True
        scheduler_status_thread.start()
        self._threads.append(scheduler_status_thread)

        self._scheduling_thread = SchedulingThread(self._client)
        scheduling_thread = threading.Thread(target=self._scheduling_thread.run)
        scheduling_thread.daemon = True
        scheduling_thread.start()
        self._threads.append(scheduling_thread)

        self._sync_thread = SyncThread(self._driver)
        sync_thread = threading.Thread(target=self._sync_thread.run)
        sync_thread.daemon = True
        sync_thread.start()
        self._threads.append(sync_thread)

        self._task_handling_thread = TaskHandlingThread(self._driver)
        task_handling_thread = threading.Thread(target=self._task_handling_thread.run)
        task_handling_thread.daemon = True
        task_handling_thread.start()
        self._threads.append(task_handling_thread)

        self._task_update_thread = TaskUpdateThread()
        task_update_thread = threading.Thread(target=self._task_update_thread.run)
        task_update_thread.daemon = True
        task_update_thread.start()
        self._threads.append(task_update_thread)

    def run(self, client):
        """Launch scheduler with callbacks for Mesos events.

        :param client: MesosClient object with
        :type client: :class:`mesoshttp.client.MesosClient`
        """

        self._client = client
        self._client.on(MesosClient.SUBSCRIBED, self.subscribed)
        self._client.on(MesosClient.OFFERS, self.offers)
        self._client.on(MesosClient.ERROR, self.error)
        self._client.on(MesosClient.UPDATE, self.update)
        #self._client.on(MesosClient.FAILURE, self.failure)
        self._client.on(MesosClient.RESCIND, self.rescind)
        self._client.on(MesosClient.DISCONNECTED, self.disconnected)
        self._client.on(MesosClient.RECONNECTED, self.reconnected)
        self._client.max_reconnect=settings.SCHEDULER_MAX_RECONNECT
        self._client.connection_timeout=20

        self._client.register()

        self.shutdown() #client has deregistered

        while not self._client.stop:
            for thread in self._threads:
                if thread.is_alive():
                    thread.join(1)

    def subscribed(self, driver):
        """
        Invoked when the scheduler successfully registers with a Mesos master.
        It is called with a SchedulerDriver which has attributes that include
        the frameworkId, and the mesos_url, which indicates the master currently leading.
        """

        self._driver = driver
        self._framework_id = driver.frameworkId
        self._master_host_address = host_address_from_mesos_url(driver.mesos_url)

        logger.info('Scale scheduler registered as framework %s with Mesos master at %s:%i',
                    self._framework_id, self._master_host_address.hostname, self._master_host_address.port)

        scheduler_mgr.update_from_mesos(self._framework_id, self._master_host_address)

        # Update driver for background threads
        recon_mgr.driver = self._driver
        self._scheduling_thread.client = self._client
        self._sync_thread.driver = self._driver
        self._task_handling_thread.driver = self._driver

        self._reconcile_running_jobs()

    def reconnected(self, message):
        """
        Invoked when the scheduler re-registers with a newly elected Mesos
        master.  This is only called when the scheduler has previously been
        registered.
        """

        self._framework_id = self._driver.frameworkId
        self._master_host_address = host_address_from_mesos_url(self._driver.mesos_url)

        logger.info('Scale scheduler re-registered with Mesos master at %s:%i',
                    self._master_host_address.hostname, self._master_host_address.port)

        scheduler_mgr.update_from_mesos(mesos_address=self._master_host_address)

        self._reconcile_running_jobs()

    def disconnected(self, message):
        """
        Invoked when the scheduler becomes disconnected from the master, e.g.
        the master fails and another is taking over.
        """

        if self._master_host_address:
            logger.error('Scale scheduler disconnected from the Mesos master at %s:%i: %s',
                         self._master_host_address.hostname, self._master_host_address.port, message)
        else:
            logger.error('Scale scheduler disconnected from the Mesos master: %s', message)

    def offers(self, offers):
        """
        Invoked when resources have been offered to this framework. A single
        offer will only contain resources from a single agent.  Resources
        associated with an offer will not be re-offered to _this_ framework
        until either (a) this framework has rejected those resources
        or (b) those resources have been rescinded.  Note that resources may be
        concurrently offered to more than one framework at a time (depending
        on the allocator being used).  In that case, the first framework to
        launch tasks using those resources will be able to use them while the
        other frameworks will have those resources rescinded (or if a
        framework has already launched tasks with those resources then those
        tasks will fail with a TASK_LOST status and a message saying as much).
        """

        started = now()

        agents = {}
        offered_nodes = []
        resource_offers = []
        total_resources = NodeResources()
        skipped_roles = set()
        for offer in offers:
            scale_offer = from_mesos_offer(offer)
            offer_id = scale_offer.id.value
            agent_id = scale_offer.agent_id.value
            framework_id = scale_offer.framework_id.value
            hostname = scale_offer.hostname
            offered_nodes.append(hostname)
            # ignore offers while we're paused
            if scheduler_mgr.config.is_paused:
                offer.decline()
                continue
            resource_list = []
            for resource in scale_offer.resources:
                # Only accept resource that are of SCALAR type and have a role matching our accept list
                if resource.type == RESOURCE_TYPE_SCALAR:
                    if resource.role in settings.ACCEPTED_RESOURCE_ROLE:
                        logger.debug("Received scalar resource %s with value %i associated with role %s" %
                                     (resource.name, resource.scalar.value, resource.role))
                        resource_list.append(ScalarResource(resource.name, resource.scalar.value))
                    else:
                        skipped_roles.add(resource.role)
                        offer.decline()

            logger.debug("Number of resources: %i" % len(resource_list))

            # Only register agent, if offers are being received
            if len(resource_list) > 0:
                resources = NodeResources(resource_list)
                total_resources.add(resources)
                agents[agent_id] = Agent(agent_id, hostname)
                resource_offers.append(ResourceOffer(offer_id, agent_id, framework_id, resources, started, offer))

        logger.debug("Offer analysis complete with %i resource offers." % len(resource_offers))

        node_mgr.register_agents(agents.values())
        logger.debug("Agents registered.")
        resource_mgr.add_new_offers(resource_offers)
        logger.debug("Resource offers added.")
        Node.objects.update_node_offers(offered_nodes, now())
        logger.debug("Node offer times updated.")

        num_offers = len(resource_offers)
        logger.info('Received %d offer(s) with %s from %d node(s)', num_offers, total_resources, len(agents))
        if len(skipped_roles):
            logger.warning('Skipped offers from roles that are not marked as accepted: %s', ','.join(skipped_roles))
        scheduler_mgr.add_new_offer_count(num_offers)

        duration = now() - started
        msg = 'Scheduler resourceOffers() took %.3f seconds'
        if duration > ScaleScheduler.NORMAL_WARN_THRESHOLD:
            logger.warning(msg, duration.total_seconds())
        else:
            logger.debug(msg, duration.total_seconds())

    def rescind(self, offer):
        """
        Invoked when an offer is no longer valid (e.g., the slave was lost or
        another framework used resources in the offer.) If for whatever reason
        an offer is never rescinded (e.g., dropped message, failing over
        framework, etc.), a framework that attempts to launch tasks using an
        invalid offer will receive TASK_LOST status updates for those tasks.
        """

        started = now()
        offer_id = offer['offer_id']['value']
        resource_mgr.rescind_offers([offer_id])

        duration = now() - started
        msg = 'Scheduler offerRescinded() took %.3f seconds'
        if duration > ScaleScheduler.NORMAL_WARN_THRESHOLD:
            logger.warning(msg, duration.total_seconds())
        else:
            logger.debug(msg, duration.total_seconds())

    def update(self, status):
        """
        Invoked when the status of a task has changed (e.g., a slave is lost
        and so the task is lost, a task finishes and an executor sends a
        status update saying so, etc.) Note that returning from this callback
        acknowledges receipt of this status update.  If for whatever reason
        the scheduler aborts during this callback (or the process exits)
        another status update will be delivered.  Note, however, that this is
        currently not true if the slave sending the status update is lost or
        fails during that time.
        """

        started = now()

        model = utils.create_task_update_model(status)
        mesos_status = model.status
        task_update = TaskStatusUpdate(model, utils.get_status_agent_id(status), utils.get_status_data(status))
        task_id = task_update.task_id
        was_task_finished = task_update.status in TaskStatusUpdate.TERMINAL_STATUSES
        was_job_finished = False

        if mesos_status == 'TASK_ERROR':
            logger.error('Status update for task %s: %s', task_id, mesos_status)
        if mesos_status == 'TASK_LOST':
            logger.warning('Status update for task %s: %s', task_id, mesos_status)
        else:
            logger.info('Status update for task %s: %s', task_id, mesos_status)

        # Since we have a status update for this task, remove it from reconciliation set
        recon_mgr.remove_task_id(task_id)

        # Hand off task update to be saved in the database
        if task_id.startswith(JOB_TASK_ID_PREFIX):
            # Grab job execution ID from manager
            cluster_id = JobExecution.parse_cluster_id(task_id)
            job_exe = job_exe_mgr.get_running_job_exe(cluster_id)
            if job_exe:
                model.job_exe_id = job_exe.id
        task_update_mgr.add_task_update(model)

        # Update task with latest status
        # This should happen before the job execution or node manager are updated, since they will assume that the task
        # has already been updated
        task_mgr.handle_task_update(task_update)

        if task_id.startswith(JOB_TASK_ID_PREFIX):
            # Job task, so update the job execution
            try:
                job_exe = job_exe_mgr.handle_task_update(task_update)
                if job_exe and job_exe.is_finished():
                    logger.info("job_exe with job id %s and node id %s is finished", job_exe.job_id, job_exe.node_id)
                    was_job_finished = True
                    cleanup_mgr.add_job_execution(job_exe)
                    GPUManager.release_gpus(job_exe.node_id, job_exe.job_id)

            except Exception:
                cluster_id = JobExecution.parse_cluster_id(task_id)
                logger.exception('Error handling status update for job execution: %s', cluster_id)
                # Error handling status update, add task so it can be reconciled
                task = task_mgr.get_task(task_id)
                if task:
                    recon_mgr.add_tasks([task])
        else:
            # Not a job task, so must be either a node or system task
            node_mgr.handle_task_update(task_update)
            system_task_mgr.handle_task_update(task_update)

        scheduler_mgr.add_task_update_counts(was_task_finished, was_job_finished)

        duration = now() - started
        msg = 'Scheduler statusUpdate() took %.3f seconds'
        if duration > ScaleScheduler.NORMAL_WARN_THRESHOLD:
            logger.warning(msg, duration.total_seconds())
        else:
            logger.debug(msg, duration.total_seconds())

    def error(self, message):
        """
        Invoked when there is an unrecoverable error in the scheduler or
        scheduler driver.  The driver will be aborted BEFORE invoking this
        callback.
        """

        logger.error('Unrecoverable error: %s', message)

    def shutdown(self):
        """Performs any clean up required by this scheduler implementation.

        Currently this method just notifies any background threads to break out of their work loops.
        """

        logger.info('Scheduler shutdown invoked, stopping background threads')
        self._messaging_thread.shutdown()
        self._recon_thread.shutdown()
        self._scheduler_status_thread.shutdown()
        self._scheduling_thread.shutdown()
        self._sync_thread.shutdown()
        self._task_handling_thread.shutdown()
        self._task_update_thread.shutdown()

        # Shutdown Mesos client
        self._client.stop = True

        # Ensure driver is cleaned up
        if self._driver:
            self._driver.tearDown()

    def _reconcile_running_jobs(self):
        """Reconciles all currently running job executions with Mesos"""

        # List of tasks to reconcile
        tasks_to_reconcile = []

        # Find current tasks for running executions
        for running_job_exe in job_exe_mgr.get_running_job_exes():
            task = running_job_exe.current_task
            if task:
                tasks_to_reconcile.append(task)

        # Send tasks to reconciliation thread
        recon_mgr.add_tasks(tasks_to_reconcile)
Пример #6
0
class ScaleScheduler(MesosScheduler):
    """Mesos scheduler for the Scale framework"""

    # Warning threshold for normal callbacks (those with no external calls, e.g. database queries)
    NORMAL_WARN_THRESHOLD = datetime.timedelta(milliseconds=5)

    # Warning threshold for callbacks that include database queries
    DATABASE_WARN_THRESHOLD = datetime.timedelta(milliseconds=100)

    def __init__(self, executor):
        """Constructor

        :param executor: The executor to use for launching tasks
        :type executor: :class:`mesos_pb2.ExecutorInfo`
        """

        self._driver = None
        self._executor = executor
        self._framework_id = None
        self._master_hostname = None
        self._master_port = None

        self._job_exe_manager = RunningJobExecutionManager()
        self._job_type_manager = JobTypeManager()
        self._node_manager = NodeManager()
        self._offer_manager = OfferManager()
        self._scheduler_manager = SchedulerManager()

        self._db_sync_thread = None
        self._recon_thread = None
        self._scheduling_thread = None

    def registered(self, driver, frameworkId, masterInfo):
        '''
        Invoked when the scheduler successfully registers with a Mesos master.
        It is called with the frameworkId, a unique ID generated by the
        master, and the masterInfo which is information about the master
        itself.

        See documentation for :meth:`mesos_api.mesos.Scheduler.registered`.
        '''

        self._driver = driver
        self._framework_id = frameworkId.value
        self._master_hostname = masterInfo.hostname
        self._master_port = masterInfo.port
        Scheduler.objects.update_master(self._master_hostname,
                                        self._master_port)
        logger.info(
            'Scale scheduler registered as framework %s with Mesos master at %s:%i',
            self._framework_id, self._master_hostname, self._master_port)

        initialize_system()

        # Initial database sync
        self._job_type_manager.sync_with_database()
        self._scheduler_manager.sync_with_database()

        # Start up background threads
        self._db_sync_thread = DatabaseSyncThread(self._driver,
                                                  self._job_exe_manager,
                                                  self._job_type_manager,
                                                  self._node_manager,
                                                  self._scheduler_manager)
        db_sync_thread = threading.Thread(target=self._db_sync_thread.run)
        db_sync_thread.daemon = True
        db_sync_thread.start()

        self._recon_thread = ReconciliationThread(self._driver)
        recon_thread = threading.Thread(target=self._recon_thread.run)
        recon_thread.daemon = True
        recon_thread.start()

        self._scheduling_thread = SchedulingThread(
            self._driver, self._job_exe_manager, self._job_type_manager,
            self._node_manager, self._offer_manager, self._scheduler_manager)
        scheduling_thread = threading.Thread(
            target=self._scheduling_thread.run)
        scheduling_thread.daemon = True
        scheduling_thread.start()

        self._reconcile_running_jobs()

    def reregistered(self, driver, masterInfo):
        '''
        Invoked when the scheduler re-registers with a newly elected Mesos
        master.  This is only called when the scheduler has previously been
        registered.  masterInfo contains information about the newly elected
        master.

        See documentation for :meth:`mesos_api.mesos.Scheduler.reregistered`.
        '''

        self._driver = driver
        self._master_hostname = masterInfo.hostname
        self._master_port = masterInfo.port
        Scheduler.objects.update_master(self._master_hostname,
                                        self._master_port)
        logger.info('Scale scheduler re-registered with Mesos master at %s:%i',
                    self._master_hostname, self._master_port)

        # Update driver for background threads
        self._db_sync_thread.driver = self._driver
        self._recon_thread.driver = self._driver
        self._scheduling_thread.driver = self._driver

        self._reconcile_running_jobs()

    def disconnected(self, driver):
        '''
        Invoked when the scheduler becomes disconnected from the master, e.g.
        the master fails and another is taking over.

        See documentation for :meth:`mesos_api.mesos.Scheduler.disconnected`.
        '''

        if self._master_hostname:
            logger.error(
                'Scale scheduler disconnected from the Mesos master at %s:%i',
                self._master_hostname, self._master_port)
        else:
            logger.error('Scale scheduler disconnected from the Mesos master')

    def resourceOffers(self, driver, offers):
        '''
        Invoked when resources have been offered to this framework. A single
        offer will only contain resources from a single slave.  Resources
        associated with an offer will not be re-offered to _this_ framework
        until either (a) this framework has rejected those resources (see
        SchedulerDriver.launchTasks) or (b) those resources have been
        rescinded (see Scheduler.offerRescinded).  Note that resources may be
        concurrently offered to more than one framework at a time (depending
        on the allocator being used).  In that case, the first framework to
        launch tasks using those resources will be able to use them while the
        other frameworks will have those resources rescinded (or if a
        framework has already launched tasks with those resources then those
        tasks will fail with a TASK_LOST status and a message saying as much).

        See documentation for :meth:`mesos_api.mesos.Scheduler.resourceOffers`.
        '''

        started = now()

        agent_ids = []
        resource_offers = []
        for offer in offers:
            offer_id = offer.id.value
            agent_id = offer.slave_id.value
            disk = 0
            mem = 0
            cpus = 0
            for resource in offer.resources:
                if resource.name == 'disk':
                    disk = resource.scalar.value
                elif resource.name == 'mem':
                    mem = resource.scalar.value
                elif resource.name == 'cpus':
                    cpus = resource.scalar.value
            resources = NodeResources(cpus=cpus, mem=mem, disk=disk)
            agent_ids.append(agent_id)
            resource_offers.append(ResourceOffer(offer_id, agent_id,
                                                 resources))

        self._node_manager.add_agent_ids(agent_ids)
        self._offer_manager.add_new_offers(resource_offers)

        duration = now() - started
        msg = 'Scheduler resourceOffers() took %.3f seconds'
        if duration > ScaleScheduler.NORMAL_WARN_THRESHOLD:
            logger.warning(msg, duration.total_seconds())
        else:
            logger.debug(msg, duration.total_seconds())

    def offerRescinded(self, driver, offerId):
        '''
        Invoked when an offer is no longer valid (e.g., the slave was lost or
        another framework used resources in the offer.) If for whatever reason
        an offer is never rescinded (e.g., dropped message, failing over
        framework, etc.), a framwork that attempts to launch tasks using an
        invalid offer will receive TASK_LOST status updats for those tasks.

        See documentation for :meth:`mesos_api.mesos.Scheduler.offerRescinded`.
        '''

        started = now()

        offer_id = offerId.value
        self._offer_manager.remove_offers([offer_id])

        duration = now() - started
        msg = 'Scheduler offerRescinded() took %.3f seconds'
        if duration > ScaleScheduler.NORMAL_WARN_THRESHOLD:
            logger.warning(msg, duration.total_seconds())
        else:
            logger.debug(msg, duration.total_seconds())

    def statusUpdate(self, driver, status):
        '''
        Invoked when the status of a task has changed (e.g., a slave is lost
        and so the task is lost, a task finishes and an executor sends a
        status update saying so, etc.) Note that returning from this callback
        acknowledges receipt of this status update.  If for whatever reason
        the scheduler aborts during this callback (or the process exits)
        another status update will be delivered.  Note, however, that this is
        currently not true if the slave sending the status update is lost or
        fails during that time.

        See documentation for :meth:`mesos_api.mesos.Scheduler.statusUpdate`.
        '''

        started = now()

        task_id = status.task_id.value
        job_exe_id = RunningJobExecution.get_job_exe_id(task_id)
        logger.info('Status update for task %s: %s', task_id,
                    utils.status_to_string(status.state))

        # Since we have a status update for this task, remove it from reconciliation set
        self._recon_thread.remove_task_id(task_id)

        try:
            running_job_exe = self._job_exe_manager.get_job_exe(job_exe_id)

            if running_job_exe:
                results = TaskResults(task_id)
                results.exit_code = utils.parse_exit_code(status)
                results.when = utils.get_status_timestamp(status)
                if status.state in [
                        mesos_pb2.TASK_FINISHED, mesos_pb2.TASK_ERROR,
                        mesos_pb2.TASK_FAILED, mesos_pb2.TASK_KILLED
                ]:
                    try:
                        log_start_time = now()
                        hostname = running_job_exe._node_hostname
                        port = running_job_exe._node_port
                        task_dir = get_slave_task_directory(
                            hostname, port, task_id)
                        results.stdout = get_slave_task_file(
                            hostname, port, task_dir, 'stdout')
                        results.stderr = get_slave_task_file(
                            hostname, port, task_dir, 'stderr')
                        log_end_time = now()
                        logger.debug('Time to pull logs for task: %s',
                                     str(log_end_time - log_start_time))
                    except Exception:
                        logger.exception('Error pulling logs for task %s',
                                         task_id)

                # Apply status update to running job execution
                if status.state == mesos_pb2.TASK_RUNNING:
                    hostname = running_job_exe._node_hostname
                    port = running_job_exe._node_port
                    task_dir = get_slave_task_directory(
                        hostname, port, task_id)
                    stdout_url = get_slave_task_url(hostname, port, task_dir,
                                                    'stdout')
                    stderr_url = get_slave_task_url(hostname, port, task_dir,
                                                    'stderr')
                    running_job_exe.task_running(task_id, results.when,
                                                 stdout_url, stderr_url)
                elif status.state == mesos_pb2.TASK_FINISHED:
                    running_job_exe.task_complete(results)
                elif status.state == mesos_pb2.TASK_LOST:
                    running_job_exe.task_fail(
                        results, Error.objects.get_builtin_error('mesos-lost'))
                elif status.state in [
                        mesos_pb2.TASK_ERROR, mesos_pb2.TASK_FAILED,
                        mesos_pb2.TASK_KILLED
                ]:
                    running_job_exe.task_fail(results)

                # Remove finished job execution
                if running_job_exe.is_finished():
                    self._job_exe_manager.remove_job_exe(job_exe_id)
            else:
                # Scheduler doesn't have any knowledge of this job execution
                Queue.objects.handle_job_failure(
                    job_exe_id, now(),
                    Error.objects.get_builtin_error('scheduler-lost'))
        except Exception:
            logger.exception(
                'Error handling status update for job execution: %s',
                job_exe_id)
            # Error handling status update, add task so it can be reconciled
            self._recon_thread.add_task_ids([task_id])

        duration = now() - started
        msg = 'Scheduler statusUpdate() took %.3f seconds'
        if duration > ScaleScheduler.DATABASE_WARN_THRESHOLD:
            logger.warning(msg, duration.total_seconds())
        else:
            logger.debug(msg, duration.total_seconds())

    def frameworkMessage(self, driver, executorId, slaveId, message):
        '''
        Invoked when an executor sends a message. These messages are best
        effort; do not expect a framework message to be retransmitted in any
        reliable fashion.

        See documentation for :meth:`mesos_api.mesos.Scheduler.frameworkMessage`.
        '''

        started = now()

        agent_id = slaveId.value
        node = self._node_manager.get_node(agent_id)

        if node:
            logger.info('Message from %s on host %s: %s', executorId.value,
                        node.hostname, message)
        else:
            logger.info('Message from %s on agent %s: %s', executorId.value,
                        agent_id, message)

        duration = now() - started
        msg = 'Scheduler frameworkMessage() took %.3f seconds'
        if duration > ScaleScheduler.NORMAL_WARN_THRESHOLD:
            logger.warning(msg, duration.total_seconds())
        else:
            logger.debug(msg, duration.total_seconds())

    def slaveLost(self, driver, slaveId):
        '''
        Invoked when a slave has been determined unreachable (e.g., machine
        failure, network partition.) Most frameworks will need to reschedule
        any tasks launched on this slave on a new slave.

        See documentation for :meth:`mesos_api.mesos.Scheduler.slaveLost`.
        '''

        started = now()

        agent_id = slaveId.value
        node = self._node_manager.get_node(agent_id)

        if node:
            logger.error('Node lost on host %s', node.hostname)
        else:
            logger.error('Node lost on agent %s', agent_id)

        self._node_manager.lost_node(agent_id)
        self._offer_manager.lost_node(agent_id)

        # Fail job executions that were running on the lost node
        if node:
            for running_job_exe in self._job_exe_manager.get_job_exes_on_node(
                    node.id):
                try:
                    running_job_exe.execution_lost(started)
                except DatabaseError:
                    logger.exception('Error failing lost job execution: %s',
                                     running_job_exe.id)
                    # Error failing execution, add task so it can be reconciled
                    task = running_job_exe.current_task
                    if task:
                        self._recon_thread.add_task_ids([task.id])
                if running_job_exe.is_finished():
                    self._job_exe_manager.remove_job_exe(running_job_exe.id)

        duration = now() - started
        msg = 'Scheduler slaveLost() took %.3f seconds'
        if duration > ScaleScheduler.DATABASE_WARN_THRESHOLD:
            logger.warning(msg, duration.total_seconds())
        else:
            logger.debug(msg, duration.total_seconds())

    def executorLost(self, driver, executorId, slaveId, status):
        '''
        Invoked when an executor has exited/terminated. Note that any tasks
        running will have TASK_LOST status updates automatically generated.

        See documentation for :meth:`mesos_api.mesos.Scheduler.executorLost`.
        '''

        started = now()

        agent_id = slaveId.value
        node = self._node_manager.get_node(agent_id)

        if node:
            logger.error('Executor %s lost on host: %s', executorId.value,
                         node.hostname)
        else:
            logger.error('Executor %s lost on agent: %s', executorId.value,
                         agent_id)

        duration = now() - started
        msg = 'Scheduler executorLost() took %.3f seconds'
        if duration > ScaleScheduler.NORMAL_WARN_THRESHOLD:
            logger.warning(msg, duration.total_seconds())
        else:
            logger.debug(msg, duration.total_seconds())

    def error(self, driver, message):
        '''
        Invoked when there is an unrecoverable error in the scheduler or
        scheduler driver.  The driver will be aborted BEFORE invoking this
        callback.

        See documentation for :meth:`mesos_api.mesos.Scheduler.error`.
        '''

        logger.error('Unrecoverable error: %s', message)

    def shutdown(self):
        '''Performs any clean up required by this scheduler implementation.

        Currently this method just notifies any background threads to break out of their work loops.
        '''

        logger.info('Scheduler shutdown invoked, stopping background threads')
        self._db_sync_thread.shutdown()
        self._recon_thread.shutdown()
        self._scheduling_thread.shutdown()

    def _reconcile_running_jobs(self):
        """Looks up all currently running jobs in the database and sets them up to be reconciled with Mesos"""

        # List of task IDs to reconcile
        task_ids = []

        # Query for job executions that are running
        job_exes = JobExecution.objects.get_running_job_exes()

        # Find current task IDs for running executions
        for job_exe in job_exes:
            running_job_exe = self._job_exe_manager.get_job_exe(job_exe.id)
            if running_job_exe:
                task = running_job_exe.current_task
                if task:
                    task_ids.append(task.id)
            else:
                # Fail any executions that the scheduler has lost
                Queue.objects.handle_job_failure(
                    job_exe.id, now(),
                    Error.objects.get_builtin_error('scheduler-lost'))

        # Send task IDs to reconciliation thread
        self._recon_thread.add_task_ids(task_ids)
Пример #7
0
    def registered(self, driver, frameworkId, masterInfo):
        """
        Invoked when the scheduler successfully registers with a Mesos master.
        It is called with the frameworkId, a unique ID generated by the
        master, and the masterInfo which is information about the master
        itself.

        See documentation for :meth:`mesos_api.mesos.Scheduler.registered`.
        """

        self._driver = driver
        self._framework_id = frameworkId.value
        self._master_hostname = masterInfo.hostname
        self._master_port = masterInfo.port
        logger.info(
            'Scale scheduler registered as framework %s with Mesos master at %s:%i',
            self._framework_id, self._master_hostname, self._master_port)

        initialize_system()
        Scheduler.objects.update_master(self._master_hostname,
                                        self._master_port)
        scheduler_mgr.update_from_mesos(
            self._framework_id,
            HostAddress(self._master_hostname, self._master_port))
        recon_mgr.driver = self._driver

        # Initial database sync
        logger.info('Performing initial sync with Scale database')
        reset_error_cache()
        job_exe_mgr.init_with_database()
        job_type_mgr.sync_with_database()
        scheduler_mgr.sync_with_database()
        workspace_mgr.sync_with_database()

        # Start up background threads
        self._messaging_thread = MessagingThread()
        restart_msg = RestartScheduler()
        restart_msg.when = now()
        self._messaging_thread.add_initial_messages([restart_msg])
        messaging_thread = threading.Thread(target=self._messaging_thread.run)
        messaging_thread.daemon = True
        messaging_thread.start()

        self._recon_thread = ReconciliationThread()
        recon_thread = threading.Thread(target=self._recon_thread.run)
        recon_thread.daemon = True
        recon_thread.start()

        self._scheduler_status_thread = SchedulerStatusThread()
        scheduler_status_thread = threading.Thread(
            target=self._scheduler_status_thread.run)
        scheduler_status_thread.daemon = True
        scheduler_status_thread.start()

        self._scheduling_thread = SchedulingThread(self._driver)
        scheduling_thread = threading.Thread(
            target=self._scheduling_thread.run)
        scheduling_thread.daemon = True
        scheduling_thread.start()

        self._sync_thread = SyncThread(self._driver)
        sync_thread = threading.Thread(target=self._sync_thread.run)
        sync_thread.daemon = True
        sync_thread.start()

        self._task_handling_thread = TaskHandlingThread(self._driver)
        task_handling_thread = threading.Thread(
            target=self._task_handling_thread.run)
        task_handling_thread.daemon = True
        task_handling_thread.start()

        self._task_update_thread = TaskUpdateThread()
        task_update_thread = threading.Thread(
            target=self._task_update_thread.run)
        task_update_thread.daemon = True
        task_update_thread.start()

        self._reconcile_running_jobs()
Пример #8
0
class ScaleScheduler(MesosScheduler):
    """Mesos scheduler for the Scale framework"""

    # Warning threshold for normal callbacks (those with no external calls, e.g. database queries)
    NORMAL_WARN_THRESHOLD = datetime.timedelta(milliseconds=5)

    # Warning threshold for callbacks that include database queries
    DATABASE_WARN_THRESHOLD = datetime.timedelta(milliseconds=100)

    def __init__(self):
        """Constructor
        """

        self._driver = None
        self._framework_id = None
        self._master_hostname = None
        self._master_port = None

        self._messaging_thread = None
        self._recon_thread = None
        self._scheduler_status_thread = None
        self._scheduling_thread = None
        self._sync_thread = None
        self._task_handling_thread = None
        self._task_update_thread = None

    def registered(self, driver, frameworkId, masterInfo):
        """
        Invoked when the scheduler successfully registers with a Mesos master.
        It is called with the frameworkId, a unique ID generated by the
        master, and the masterInfo which is information about the master
        itself.

        See documentation for :meth:`mesos_api.mesos.Scheduler.registered`.
        """

        self._driver = driver
        self._framework_id = frameworkId.value
        self._master_hostname = masterInfo.hostname
        self._master_port = masterInfo.port
        logger.info(
            'Scale scheduler registered as framework %s with Mesos master at %s:%i',
            self._framework_id, self._master_hostname, self._master_port)

        initialize_system()
        Scheduler.objects.update_master(self._master_hostname,
                                        self._master_port)
        scheduler_mgr.update_from_mesos(
            self._framework_id,
            HostAddress(self._master_hostname, self._master_port))
        recon_mgr.driver = self._driver

        # Initial database sync
        logger.info('Performing initial sync with Scale database')
        reset_error_cache()
        job_exe_mgr.init_with_database()
        job_type_mgr.sync_with_database()
        scheduler_mgr.sync_with_database()
        workspace_mgr.sync_with_database()

        # Start up background threads
        self._messaging_thread = MessagingThread()
        restart_msg = RestartScheduler()
        restart_msg.when = now()
        self._messaging_thread.add_initial_messages([restart_msg])
        messaging_thread = threading.Thread(target=self._messaging_thread.run)
        messaging_thread.daemon = True
        messaging_thread.start()

        self._recon_thread = ReconciliationThread()
        recon_thread = threading.Thread(target=self._recon_thread.run)
        recon_thread.daemon = True
        recon_thread.start()

        self._scheduler_status_thread = SchedulerStatusThread()
        scheduler_status_thread = threading.Thread(
            target=self._scheduler_status_thread.run)
        scheduler_status_thread.daemon = True
        scheduler_status_thread.start()

        self._scheduling_thread = SchedulingThread(self._driver)
        scheduling_thread = threading.Thread(
            target=self._scheduling_thread.run)
        scheduling_thread.daemon = True
        scheduling_thread.start()

        self._sync_thread = SyncThread(self._driver)
        sync_thread = threading.Thread(target=self._sync_thread.run)
        sync_thread.daemon = True
        sync_thread.start()

        self._task_handling_thread = TaskHandlingThread(self._driver)
        task_handling_thread = threading.Thread(
            target=self._task_handling_thread.run)
        task_handling_thread.daemon = True
        task_handling_thread.start()

        self._task_update_thread = TaskUpdateThread()
        task_update_thread = threading.Thread(
            target=self._task_update_thread.run)
        task_update_thread.daemon = True
        task_update_thread.start()

        self._reconcile_running_jobs()

    def reregistered(self, driver, masterInfo):
        """
        Invoked when the scheduler re-registers with a newly elected Mesos
        master.  This is only called when the scheduler has previously been
        registered.  masterInfo contains information about the newly elected
        master.

        See documentation for :meth:`mesos_api.mesos.Scheduler.reregistered`.
        """

        self._driver = driver
        self._master_hostname = masterInfo.hostname
        self._master_port = masterInfo.port
        logger.info('Scale scheduler re-registered with Mesos master at %s:%i',
                    self._master_hostname, self._master_port)

        Scheduler.objects.update_master(self._master_hostname,
                                        self._master_port)
        scheduler_mgr.update_from_mesos(mesos_address=HostAddress(
            self._master_hostname, self._master_port))

        # Update driver for background threads
        recon_mgr.driver = self._driver
        self._scheduling_thread.driver = self._driver
        self._sync_thread.driver = self._driver
        self._task_handling_thread.driver = self._driver

        self._reconcile_running_jobs()

    def disconnected(self, driver):
        """
        Invoked when the scheduler becomes disconnected from the master, e.g.
        the master fails and another is taking over.

        See documentation for :meth:`mesos_api.mesos.Scheduler.disconnected`.
        """

        if self._master_hostname:
            logger.error(
                'Scale scheduler disconnected from the Mesos master at %s:%i',
                self._master_hostname, self._master_port)
        else:
            logger.error('Scale scheduler disconnected from the Mesos master')

    def resourceOffers(self, driver, offers):
        """
        Invoked when resources have been offered to this framework. A single
        offer will only contain resources from a single slave.  Resources
        associated with an offer will not be re-offered to _this_ framework
        until either (a) this framework has rejected those resources (see
        SchedulerDriver.launchTasks) or (b) those resources have been
        rescinded (see Scheduler.offerRescinded).  Note that resources may be
        concurrently offered to more than one framework at a time (depending
        on the allocator being used).  In that case, the first framework to
        launch tasks using those resources will be able to use them while the
        other frameworks will have those resources rescinded (or if a
        framework has already launched tasks with those resources then those
        tasks will fail with a TASK_LOST status and a message saying as much).

        See documentation for :meth:`mesos_api.mesos.Scheduler.resourceOffers`.
        """

        started = now()

        agents = {}
        resource_offers = []
        total_resources = NodeResources()
        for offer in offers:
            offer_id = offer.id.value
            agent_id = offer.slave_id.value
            framework_id = offer.framework_id.value
            hostname = offer.hostname
            resource_list = []
            for resource in offer.resources:
                if resource.type == 0:  # This is the SCALAR type
                    resource_list.append(
                        ScalarResource(resource.name, resource.scalar.value))
            resources = NodeResources(resource_list)
            total_resources.add(resources)
            agents[agent_id] = Agent(agent_id, hostname)
            resource_offers.append(
                ResourceOffer(offer_id, agent_id, framework_id, resources,
                              started))

        node_mgr.register_agents(agents.values())
        resource_mgr.add_new_offers(resource_offers)

        num_offers = len(resource_offers)
        logger.info('Received %d offer(s) with %s from %d node(s)', num_offers,
                    total_resources, len(agents))
        scheduler_mgr.add_new_offer_count(num_offers)

        duration = now() - started
        msg = 'Scheduler resourceOffers() took %.3f seconds'
        if duration > ScaleScheduler.NORMAL_WARN_THRESHOLD:
            logger.warning(msg, duration.total_seconds())
        else:
            logger.debug(msg, duration.total_seconds())

    def offerRescinded(self, driver, offerId):
        """
        Invoked when an offer is no longer valid (e.g., the slave was lost or
        another framework used resources in the offer.) If for whatever reason
        an offer is never rescinded (e.g., dropped message, failing over
        framework, etc.), a framwork that attempts to launch tasks using an
        invalid offer will receive TASK_LOST status updats for those tasks.

        See documentation for :meth:`mesos_api.mesos.Scheduler.offerRescinded`.
        """

        started = now()

        offer_id = offerId.value
        resource_mgr.rescind_offers([offer_id])

        duration = now() - started
        msg = 'Scheduler offerRescinded() took %.3f seconds'
        if duration > ScaleScheduler.NORMAL_WARN_THRESHOLD:
            logger.warning(msg, duration.total_seconds())
        else:
            logger.debug(msg, duration.total_seconds())

    def statusUpdate(self, driver, status):
        """
        Invoked when the status of a task has changed (e.g., a slave is lost
        and so the task is lost, a task finishes and an executor sends a
        status update saying so, etc.) Note that returning from this callback
        acknowledges receipt of this status update.  If for whatever reason
        the scheduler aborts during this callback (or the process exits)
        another status update will be delivered.  Note, however, that this is
        currently not true if the slave sending the status update is lost or
        fails during that time.

        See documentation for :meth:`mesos_api.mesos.Scheduler.statusUpdate`.
        """

        started = now()

        model = utils.create_task_update_model(status)
        mesos_status = model.status
        task_update = TaskStatusUpdate(model,
                                       utils.get_status_agent_id(status),
                                       utils.get_status_data(status))
        task_id = task_update.task_id
        was_task_finished = task_update.status in TaskStatusUpdate.TERMINAL_STATUSES
        was_job_finished = False

        if mesos_status == 'TASK_ERROR':
            logger.error('Status update for task %s: %s', task_id,
                         mesos_status)
        if mesos_status == 'TASK_LOST':
            logger.warning('Status update for task %s: %s', task_id,
                           mesos_status)
        else:
            logger.info('Status update for task %s: %s', task_id, mesos_status)

        # Since we have a status update for this task, remove it from reconciliation set
        recon_mgr.remove_task_id(task_id)

        # Hand off task update to be saved in the database
        if task_id.startswith(JOB_TASK_ID_PREFIX):
            # Grab job execution ID from manager
            cluster_id = JobExecution.parse_cluster_id(task_id)
            job_exe = job_exe_mgr.get_running_job_exe(cluster_id)
            if job_exe:
                model.job_exe_id = job_exe.id
        task_update_mgr.add_task_update(model)

        # Update task with latest status
        # This should happen before the job execution or node manager are updated, since they will assume that the task
        # has already been updated
        task_mgr.handle_task_update(task_update)

        if task_id.startswith(JOB_TASK_ID_PREFIX):
            # Job task, so update the job execution
            try:
                job_exe = job_exe_mgr.handle_task_update(task_update)
                if job_exe and job_exe.is_finished():
                    was_job_finished = True
                    cleanup_mgr.add_job_execution(job_exe)
            except Exception:
                cluster_id = JobExecution.parse_cluster_id(task_id)
                logger.exception(
                    'Error handling status update for job execution: %s',
                    cluster_id)
                # Error handling status update, add task so it can be reconciled
                task = task_mgr.get_task(task_id)
                if task:
                    recon_mgr.add_tasks([task])
        else:
            # Not a job task, so must be either a node or system task
            node_mgr.handle_task_update(task_update)
            system_task_mgr.handle_task_update(task_update)

        scheduler_mgr.add_task_update_counts(was_task_finished,
                                             was_job_finished)

        duration = now() - started
        msg = 'Scheduler statusUpdate() took %.3f seconds'
        if duration > ScaleScheduler.DATABASE_WARN_THRESHOLD:
            logger.warning(msg, duration.total_seconds())
        else:
            logger.debug(msg, duration.total_seconds())

    def frameworkMessage(self, driver, executorId, slaveId, message):
        """
        Invoked when an executor sends a message. These messages are best
        effort; do not expect a framework message to be retransmitted in any
        reliable fashion.

        See documentation for :meth:`mesos_api.mesos.Scheduler.frameworkMessage`.
        """

        started = now()

        agent_id = slaveId.value
        node = node_mgr.get_node(agent_id)

        if node:
            logger.info('Message from %s on host %s: %s', executorId.value,
                        node.hostname, message)
        else:
            logger.info('Message from %s on agent %s: %s', executorId.value,
                        agent_id, message)

        duration = now() - started
        msg = 'Scheduler frameworkMessage() took %.3f seconds'
        if duration > ScaleScheduler.NORMAL_WARN_THRESHOLD:
            logger.warning(msg, duration.total_seconds())
        else:
            logger.debug(msg, duration.total_seconds())

    def slaveLost(self, driver, slaveId):
        """
        Invoked when a slave has been determined unreachable (e.g., machine
        failure, network partition.) Most frameworks will need to reschedule
        any tasks launched on this slave on a new slave.

        See documentation for :meth:`mesos_api.mesos.Scheduler.slaveLost`.
        """

        started = now()

        agent_id = slaveId.value
        node = node_mgr.get_node(agent_id)

        if node:
            logger.warning('Node lost on host %s', node.hostname)
        else:
            logger.warning('Node lost on agent %s', agent_id)

        node_mgr.lost_node(agent_id)
        resource_mgr.lost_agent(agent_id)

        # Fail job executions that were running on the lost node
        if node:
            for finished_job_exe in job_exe_mgr.lost_node(node.id, started):
                cleanup_mgr.add_job_execution(finished_job_exe)

        duration = now() - started
        msg = 'Scheduler slaveLost() took %.3f seconds'
        if duration > ScaleScheduler.NORMAL_WARN_THRESHOLD:
            logger.warning(msg, duration.total_seconds())
        else:
            logger.debug(msg, duration.total_seconds())

    def executorLost(self, driver, executorId, slaveId, status):
        """
        Invoked when an executor has exited/terminated. Note that any tasks
        running will have TASK_LOST status updates automatically generated.

        See documentation for :meth:`mesos_api.mesos.Scheduler.executorLost`.
        """

        started = now()

        agent_id = slaveId.value
        node = node_mgr.get_node(agent_id)

        if node:
            logger.warning('Executor %s lost on host: %s', executorId.value,
                           node.hostname)
        else:
            logger.warning('Executor %s lost on agent: %s', executorId.value,
                           agent_id)

        duration = now() - started
        msg = 'Scheduler executorLost() took %.3f seconds'
        if duration > ScaleScheduler.NORMAL_WARN_THRESHOLD:
            logger.warning(msg, duration.total_seconds())
        else:
            logger.debug(msg, duration.total_seconds())

    def error(self, driver, message):
        """
        Invoked when there is an unrecoverable error in the scheduler or
        scheduler driver.  The driver will be aborted BEFORE invoking this
        callback.

        See documentation for :meth:`mesos_api.mesos.Scheduler.error`.
        """

        logger.error('Unrecoverable error: %s', message)

    def shutdown(self):
        """Performs any clean up required by this scheduler implementation.

        Currently this method just notifies any background threads to break out of their work loops.
        """

        logger.info('Scheduler shutdown invoked, stopping background threads')
        self._messaging_thread.shutdown()
        self._recon_thread.shutdown()
        self._scheduler_status_thread.shutdown()
        self._scheduling_thread.shutdown()
        self._sync_thread.shutdown()
        self._task_handling_thread.shutdown()
        self._task_update_thread.shutdown()

    def _reconcile_running_jobs(self):
        """Reconciles all currently running job executions with Mesos"""

        # List of tasks to reconcile
        tasks_to_reconcile = []

        # Find current tasks for running executions
        for running_job_exe in job_exe_mgr.get_running_job_exes():
            task = running_job_exe.current_task
            if task:
                tasks_to_reconcile.append(task)

        # Send tasks to reconciliation thread
        recon_mgr.add_tasks(tasks_to_reconcile)