Exemplo n.º 1
0
    def registered(self, driver, frameworkId, masterInfo):
        """
        Invoked when the scheduler re-registers with a newly elected Mesos
        master.  This is only called when the scheduler has previously been
        registered.

        MasterInfo containing the updated information about the elected master
        is provided as an argument.
        """
        catch(self._registered, self.exception_sender)(
            driver, frameworkId, masterInfo)
Exemplo n.º 2
0
    def registered(self, driver, frameworkId, masterInfo):
        """
        Invoked when the scheduler re-registers with a newly elected Mesos
        master.  This is only called when the scheduler has previously been
        registered.

        MasterInfo containing the updated information about the elected master
        is provided as an argument.
        """
        catch(self._registered, self.exception_sender)(driver, frameworkId,
                                                       masterInfo)
Exemplo n.º 3
0
 def statusUpdate(self, driver, update):
     catch(self._statusUpdate, self.exception_sender)(driver, update)
Exemplo n.º 4
0
 def resourceOffers(self, driver, offers):
     catch(self._resourceOffers, self.exception_sender)(
         driver, offers)
Exemplo n.º 5
0
def main(ns):
    """
    Run Relay as a Mesos framework.
    Relay's event loop and the Mesos scheduler each run in separate processes
    and communicate through a multiprocessing.Pipe.

    These two processes bounce control back and forth between mesos
    resourceOffers and Relay's warmer/cooler functions.  Relay warmer/cooler
    functions request that mesos tasks get spun up, but those requests are only
    filled if the mesos scheduler receives enough relevant offers.  Relay's
    requests don't build up: only the largest request since the last fulfilled
    request is fulfilled at moment enough mesos resources are available.
    """
    if ns.mesos_master is None:
        log.error(
            "Oops!  You didn't define --mesos_master",
            extra=dict(mesos_framework_name=ns.mesos_framework_name))
        build_arg_parser().print_usage()
        sys.exit(1)
    if not ns.mesos_task_resources:
        log.warn(
            "You didn't define '--mesos_task_resources'."
            "  Tasks may not start on slaves",
            extra=dict(mesos_framework_name=ns.mesos_framework_name))
    log.info(
        "Starting Relay Mesos!",
        extra={k: str(v) for k, v in ns.__dict__.items()})

    # a distributed value storing the num and type of tasks mesos scheduler
    # should create at any given moment in time.
    # Sign of MV determines task type: warmer or cooler
    # ie. A positive value of n means n warmer tasks
    MV = mp.Array('d', [0, 0])  # max_val is a ctypes.c_int64

    # store exceptions that may be raised
    exception_receiver, exception_sender = mp.Pipe(False)
    # notify relay when mesos framework is ready
    mesos_ready = mp.Condition()

    # copy and then override warmer and cooler
    ns_relay = ns.__class__(**{k: v for k, v in ns.__dict__.items()})
    if ns.warmer:
        ns_relay.warmer = warmer_cooler_wrapper(MV, ns)
    if ns.cooler:
        ns_relay.cooler = warmer_cooler_wrapper(MV, ns)

    mesos_name = "Relay.Mesos Scheduler"
    mesos = mp.Process(
        target=catch(init_mesos_scheduler, exception_sender),
        kwargs=dict(ns=ns, MV=MV, exception_sender=exception_sender,
                    mesos_ready=mesos_ready),
        name=mesos_name)
    relay_name = "Relay.Runner Event Loop"
    relay = mp.Process(
        target=catch(init_relay, exception_sender),
        args=(ns_relay, mesos_ready, ns.mesos_framework_name),
        name=relay_name)
    mesos.start()  # start mesos framework
    relay.start()  # start relay's loop
    set_signals(mesos, relay, ns)

    while True:
        if exception_receiver.poll():
            exception_receiver.recv()
            log.error(
                'Terminating child processes because one of them raised'
                ' an exception', extra=dict(
                    is_relay_alive=relay.is_alive(),
                    is_mesos_alive=mesos.is_alive(),
                    mesos_framework_name=ns.mesos_framework_name))
            break
        if not relay.is_alive():
            log.error(
                "Relay died.  Check logs to see why.",
                extra=dict(mesos_framework_name=ns.mesos_framework_name))
            break
        if not mesos.is_alive():
            log.error(
                "Mesos Scheduler died and didn't notify me of its exception."
                "  This may be a code bug.  Check logs.",
                extra=dict(mesos_framework_name=ns.mesos_framework_name))
            break
        # save cpu cycles by checking for subprocess failures less often
        if ns.delay > 5:
            time.sleep(5)
        else:
            time.sleep(ns.delay)

    relay.terminate()
    mesos.terminate()
    sys.exit(1)
Exemplo n.º 6
0
 def statusUpdate(self, driver, update):
     catch(self._statusUpdate, self.exception_sender)(driver, update)
Exemplo n.º 7
0
 def resourceOffers(self, driver, offers):
     catch(self._resourceOffers, self.exception_sender)(driver, offers)