class CommandRunner(object): def __init__(self, options): self.queuedir = options.queuedir self.q = QueueDir('commands', self.queuedir) self.concurrency = options.concurrency self.retry_time = options.retry_time self.max_retries = options.max_retries self.max_time = options.max_time self.active = [] # List of (signal_time, level, proc) self.to_kill = [] def run(self, job): """ Runs the given job """ log.info("Running %s", job.cmd) try: job.start() self.active.append(job) except OSError: job.log.write("\nFailed with OSError; requeuing in %i seconds\n" % self.retry_time) # Wait to requeue it # If we die, then it's still in cur, and will be moved back into # 'new' eventually self.q.requeue(job.item_id, self.retry_time, self.max_retries) def monitor(self): """ Monitor running jobs """ for job in self.active[:]: self.q.touch(job.item_id) result = job.check() if result is not None: self.active.remove(job) if result == 0: self.q.remove(job.item_id) else: log.warn("%s failed; requeuing", job.item_id) # Requeue it! self.q.requeue( job.item_id, self.retry_time, self.max_retries) def loop(self): """ Main processing loop. Read new items from the queue and run them! """ while True: self.monitor() if len(self.active) >= self.concurrency: # Wait! time.sleep(1) continue while len(self.active) < self.concurrency: item = self.q.pop() if not item: # Don't wait for very long, since we have to check up on # our children if self.active: self.q.wait(1) else: self.q.wait() break item_id, fp = item try: command = json.load(fp) job = Job(command, item_id, self.q.getlog(item_id)) job.max_time = self.max_time self.run(job) except ValueError: # Couldn't parse it as json # There's no hope! self.q.log(item_id, "Couldn't load json; murdering") self.q.murder(item_id) finally: fp.close()
class PulsePusher(object): """ Publish buildbot events via pulse. `queuedir` - a directory to look for incoming events being written by a buildbot master `publisher` - an instance of mozillapulse.GenericPublisher indicating where these messages should be sent `max_idle_time` - number of seconds since last activity after which we'll disconnect. Set to None/0 to disable `max_connect_time` - number of seconds since we last connected after which we'll disconnect. Set to None/0 to disable `retry_time` - time in seconds to wait between retries `max_retries` - how many times to retry """ def __init__(self, queuedir, publisher, max_idle_time=300, max_connect_time=600, retry_time=60, max_retries=5): self.queuedir = QueueDir('pulse', queuedir) self.publisher = publisher self.max_idle_time = max_idle_time self.max_connect_time = max_connect_time self.retry_time = retry_time self.max_retries = max_retries # When should we next disconnect self._disconnect_timer = None # When did we last have activity self._last_activity = None # When did we last connect self._last_connection = None def send(self, events): """ Send events to pulse `events` - a list of buildbot event dicts """ if not self._last_connection and self.max_connect_time: self._last_connection = time.time() log.debug("Sending %i messages", len(events)) start = time.time() skipped = 0 sent = 0 for e in events: routing_key = e['event'] if any(exp.search(routing_key) for exp in skip_exps): skipped += 1 log.debug("Skipping event %s", routing_key) continue else: log.debug("Sending event %s", routing_key) msg = BuildMessage(transform_times(e)) self.publisher.publish(msg) sent += 1 end = time.time() log.info("Sent %i messages in %.2fs (skipped %i)", sent, end - start, skipped) self._last_activity = time.time() # Update our timers t = 0 if self.max_connect_time: t = self._last_connection + self.max_connect_time if self.max_idle_time: if t: t = min(t, self._last_activity + self.max_idle_time) else: t = self._last_activity + self.max_idle_time if t: self._disconnect_timer = t def maybe_disconnect(self): "Disconnect from pulse if our timer has expired" now = time.time() if self._disconnect_timer and now > self._disconnect_timer: log.info("Disconnecting") self.publisher.disconnect() self._disconnect_timer = None self._last_connection = None self._last_activity = None def loop(self): """ Main processing loop. Read new items from the queue, push them to pulse, remove processed items, and then wait for more. """ while True: self.maybe_disconnect() # Grab any new events item_ids = [] events = [] come_back_soon = False try: while True: item = self.queuedir.pop() if not item: break if len(events) > 50: come_back_soon = True break try: item_id, fp = item item_ids.append(item_id) log.debug("Loading %s", item) events.extend(json.load(fp)) except: log.exception("Error loading %s", item_id) raise finally: fp.close() log.info("Loaded %i events", len(events)) self.send(events) for item_id in item_ids: log.info("Removing %s", item_id) try: self.queuedir.remove(item_id) except OSError: # Somebody (re-)moved it already, that's ok! pass except: log.exception("Error processing messages") # Don't try again soon, something has gone horribly wrong! come_back_soon = False for item_id in item_ids: self.queuedir.requeue(item_id, self.retry_time, self.max_retries) if come_back_soon: # Let's do more right now! log.info("Doing more!") continue # Wait for more # don't wait more than our max_idle/max_connect_time now = time.time() to_wait = None if self._disconnect_timer: to_wait = self._disconnect_timer - now if to_wait < 0: to_wait = None log.info("Waiting for %s", to_wait) self.queuedir.wait(to_wait)
class PulsePusher(object): """ Publish buildbot events via pulse. `queuedir` - a directory to look for incoming events being written by a buildbot master `publisher` - an instance of mozillapulse.GenericPublisher indicating where these messages should be sent `max_idle_time` - number of seconds since last activity after which we'll disconnect. Set to None/0 to disable `max_connect_time` - number of seconds since we last connected after which we'll disconnect. Set to None/0 to disable `retry_time` - time in seconds to wait between retries `max_retries` - how many times to retry """ def __init__(self, queuedir, publisher, max_idle_time=300, max_connect_time=600, retry_time=60, max_retries=5): self.queuedir = QueueDir('pulse', queuedir) self.publisher = publisher self.max_idle_time = max_idle_time self.max_connect_time = max_connect_time self.retry_time = retry_time self.max_retries = max_retries # When should we next disconnect self._disconnect_timer = None # When did we last have activity self._last_activity = None # When did we last connect self._last_connection = None def send(self, events): """ Send events to pulse `events` - a list of buildbot event dicts """ if not self._last_connection and self.max_connect_time: self._last_connection = time.time() log.debug("Sending %i messages", len(events)) start = time.time() skipped = 0 sent = 0 for e in events: routing_key = e['event'] if any(exp.search(routing_key) for exp in skip_exps): skipped += 1 log.debug("Skipping event %s", routing_key) continue else: log.debug("Sending event %s", routing_key) msg = BuildMessage(transform_times(e)) self.publisher.publish(msg) sent += 1 end = time.time() log.info("Sent %i messages in %.2fs (skipped %i)", sent, end - start, skipped) self._last_activity = time.time() # Update our timers t = 0 if self.max_connect_time: t = self._last_connection + self.max_connect_time if self.max_idle_time: if t: t = min(t, self._last_activity + self.max_idle_time) else: t = self._last_activity + self.max_idle_time if t: self._disconnect_timer = t def maybe_disconnect(self): "Disconnect from pulse if our timer has expired" now = time.time() if self._disconnect_timer and now > self._disconnect_timer: log.info("Disconnecting") self.publisher.disconnect() self._disconnect_timer = None self._last_connection = None self._last_activity = None def loop(self): """ Main processing loop. Read new items from the queue, push them to pulse, remove processed items, and then wait for more. """ while True: self.maybe_disconnect() # Grab any new events item_ids = [] events = [] come_back_soon = False try: while True: item = self.queuedir.pop() if not item: break if len(events) > 50: come_back_soon = True break try: item_id, fp = item item_ids.append(item_id) log.debug("Loading %s", item) events.extend(json.load(fp)) except: log.exception("Error loading %s", item_id) raise finally: fp.close() log.info("Loaded %i events", len(events)) self.send(events) for item_id in item_ids: log.info("Removing %s", item_id) try: self.queuedir.remove(item_id) except OSError: # Somebody (re-)moved it already, that's ok! pass except: log.exception("Error processing messages") # Don't try again soon, something has gone horribly wrong! come_back_soon = False for item_id in item_ids: self.queuedir.requeue( item_id, self.retry_time, self.max_retries) if come_back_soon: # Let's do more right now! log.info("Doing more!") continue # Wait for more # don't wait more than our max_idle/max_connect_time now = time.time() to_wait = None if self._disconnect_timer: to_wait = self._disconnect_timer - now if to_wait < 0: to_wait = None log.info("Waiting for %s", to_wait) self.queuedir.wait(to_wait)
class CommandRunner(object): def __init__(self, options): self.queuedir = options.queuedir self.q = QueueDir('commands', self.queuedir) self.concurrency = options.concurrency self.retry_time = options.retry_time self.max_retries = options.max_retries self.max_time = options.max_time self.active = [] # List of (signal_time, level, proc) self.to_kill = [] def run(self, job): """ Runs the given job """ log.info("Running %s", job.cmd) try: job.start() self.active.append(job) except OSError: job.log.write("\nFailed with OSError; requeuing in %i seconds\n" % self.retry_time) # Wait to requeue it # If we die, then it's still in cur, and will be moved back into # 'new' eventually self.q.requeue(job.item_id, self.retry_time, self.max_retries) def monitor(self): """ Monitor running jobs """ for job in self.active[:]: self.q.touch(job.item_id) result = job.check() if result is not None: self.active.remove(job) if result == 0: self.q.remove(job.item_id) else: log.warn("%s failed; requeuing", job.item_id) # Requeue it! self.q.requeue(job.item_id, self.retry_time, self.max_retries) def loop(self): """ Main processing loop. Read new items from the queue and run them! """ while True: self.monitor() if len(self.active) >= self.concurrency: # Wait! time.sleep(1) continue while len(self.active) < self.concurrency: item = self.q.pop() if not item: # Don't wait for very long, since we have to check up on # our children if self.active: self.q.wait(1) else: self.q.wait() break item_id, fp = item try: command = json.load(fp) job = Job(command, item_id, self.q.getlog(item_id)) job.max_time = self.max_time self.run(job) except ValueError: # Couldn't parse it as json # There's no hope! self.q.log(item_id, "Couldn't load json; murdering") self.q.murder(item_id) finally: fp.close()