Пример #1
0
    def cleanup(self):
        def get_zombie_instances():
            wl = logalyzer.WorkersLog(self.path_workers, self.taskconf.command)
            for worker in wl.workers:
                if worker.instanceid and not worker.instancetime:
                    yield worker

        zombie_workers = list(get_zombie_instances())
        if not zombie_workers:
            return

        zombie_instances = [worker.instanceid for worker in zombie_workers]
        self.log("destroying zombie instances: " +
                 " ".join(sorted(zombie_instances)))
        hub = Hub(self.taskconf.hub_apikey)
        retrier = Retrier(self.DESTROY_ERROR_TIMEOUT, self.DESTROY_ERROR_SLEEP,
                          self.logfh)
        destroyed_instances = [
            instanceid for ipaddress, instanceid in retrier(
                hub.destroy, *zombie_instances)
        ]
        self.log("destroyed zombie instances: " +
                 " ".join(sorted(destroyed_instances)))

        # log destruction to the respective worker logs
        for zombie_worker in zombie_workers:
            if zombie_worker.instanceid not in destroyed_instances:
                continue
            worker_log = file(
                "%s/%d" % (self.path_workers, zombie_worker.worker_id), "a")
            timestamp = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())
            print >> worker_log, "\n# %s [watchdog] destroyed worker %s" % (
                timestamp, zombie_worker.instanceid)
            worker_log.close()
Пример #2
0
                def thread():
                    def callback():
                        return not self.event_stop.is_set()

                    hub = Hub(taskconf.hub_apikey)
                    i = None
                    try:
                        for i, instance in enumerate(
                                hub.launch(new_workers,
                                           VerboseLog(session_logs.manager),
                                           callback, **taskconf.ec2_opts)):
                            launchq.put(instance)
                    except Exception, e:
                        unlaunched_workers = new_workers - (i + 1) \
                                             if i is not None \
                                             else new_workers

                        for i in range(unlaunched_workers):
                            launchq.put(None)

                        if not isinstance(e, hub.Stopped):
                            traceback.print_exc(file=session_logs.manager)
Пример #3
0
    def __init__(self,
                 session_logs,
                 taskconf,
                 sshkey,
                 ipaddress=None,
                 destroy=None,
                 event_stop=None,
                 launchq=None):

        self.pid = os.getpid()

        if event_stop:
            signal.signal(signal.SIGINT, signal.SIG_IGN)

        self.event_stop = event_stop

        self.logs = session_logs
        self.sshkey = sshkey

        self.strikes = taskconf.strikes
        self.strike = 0

        self.timeout = taskconf.timeout
        self.cleanup_command = taskconf.post
        self.user = taskconf.user

        self.ipaddress = ipaddress
        self.instanceid = None

        self.hub = None
        self.ssh = None

        if destroy is None:
            if ipaddress:
                destroy = False
            else:
                destroy = True
        self.destroy = destroy

        if not ipaddress:
            if not taskconf.hub_apikey:
                raise self.Error(
                    "can't auto launch a worker without a Hub API KEY")
            self.hub = Hub(taskconf.hub_apikey)

            if launchq:
                with sighandle.sigignore(signal.SIGINT, signal.SIGTERM):
                    instance = launchq.get()
            else:

                class Bool:
                    value = False

                stopped = Bool()

                def handler(s, f):
                    stopped.value = True

                with sighandle.sighandle(handler, signal.SIGINT,
                                         signal.SIGTERM):

                    def callback():
                        return not stopped.value

                    instance = list(
                        self.hub.launch(1, VerboseLog(session_logs.manager),
                                        callback, **taskconf.ec2_opts))[0]

            if not instance or (event_stop and event_stop.is_set()):
                raise self.Terminated

            self.ipaddress, self.instanceid = instance

            self.status("launched worker %s" % self.instanceid)

        else:
            self.status("using existing worker")

        self.handle_stop = self._stop_handler(event_stop)

        try:
            self.ssh = SSH(self.ipaddress,
                           identity_file=self.sshkey.path,
                           login_name=taskconf.user,
                           callback=self.handle_stop)
        except SSH.Error, e:
            self.status("unreachable via ssh: " + str(e))
            traceback.print_exc(file=self.logs.worker)

            raise self.Error(e)