def mark_dead_workers(cn, wids, message, traceback=None): if not wids: return # mark workers as dead update('rework.worker').where('id in %(ids)s', ids=tuple(wids)).values( running=False, finished=utcnow(), deathinfo=message).do(cn) # mark tasks as done update('rework.task as task').table('rework.worker as worker').where( "task.status != 'done'", 'worker.id = task.worker', 'worker.id in %(ids)s', ids=tuple(wids), ).values(finished=utcnow(), status='done', abort=True, traceback=traceback).do(cn)
def run(self): with self.engine.begin() as cn: update( 'rework.task' ).where( id=self.tid ).values( started=utcnow() ).do(cn) try: name, path = self.engine.execute(""" select name, path from rework.operation where rework.operation.id = %(operation)s """, {'operation': self.operation} ).fetchone() mod = imp.load_source('module', path) func = getattr(mod, name) func(self) except: with self.engine.begin() as cn: update( 'rework.task' ).where( id=self.tid ).values( traceback=tb.format_exc() ).do(cn) finally: self.finish()
def finish(self): with self.engine.begin() as cn: update( 'rework.task' ).where( id=self.tid ).values( finished=utcnow(), status='done' ).do(cn)
def track_timeouts(self): if not self.workers: return sql = ('select task.id, task.started, timeout ' 'from rework.operation as op, ' ' rework.task as task ' 'where ' ' task.operation = op.id and ' ' timeout is not null and ' ' task.worker in ({})').format(','.join( str(wid) for wid in self.wids)) with self.engine.begin() as cn: for tid, start_time, timeout in cn.execute(sql).fetchall(): start_time = start_time.astimezone(pytz.utc) delta = parse_delta(timeout) now = utcnow() if (now - start_time) > delta: Task.byid(self.engine, tid).abort()
def preemptive_kill(self): if not self.wids: return q = select('id').table('rework.worker').where( 'kill = true', 'running = true').where('id in %(ids)s', ids=tuple(self.wids)) killed = [] with self.engine.begin() as cn: for row in q.do(cn).fetchall(): wid = row.id proc = self.workers.pop(wid) if not kill_process_tree(proc.pid): print('could not kill {}'.format(proc.pid)) continue mark_dead_workers( cn, [wid], 'preemptive kill at {}'.format(utcnow().astimezone(TZ))) killed.append(wid) return killed
def vacuum(dburi, workers=False, tasks=False, finished=None): " delete non-runing workers or finished tasks " if not (workers or tasks): print('to cleanup old workers or tasks ' 'please use --workers or --tasks') return if workers and tasks: print('vacuum deletes workers or tasks, not both ' 'at the same time') return engine = create_engine(find_dburi(dburi)) if finished is None: finished = utcnow() if workers: count = cleanup_workers(engine, finished) print('deleted {} workers'.format(count)) if tasks: count = cleanup_tasks(engine, finished) print('deleted {} tasks'.format(count))
def list_monitors(dburi): init() engine = create_engine(find_dburi(dburi)) sql = ('select id, domain, options, lastseen from rework.monitor') now = utcnow().astimezone(TZ) for mid, domain, options, lastseen in engine.execute(sql): color = Fore.GREEN delta = (now - lastseen).total_seconds() if delta > 60: color = Fore.RED elif delta > 10: color = Fore.MAGENTA print(mid, color + lastseen.astimezone(TZ).strftime('%Y-%m-%d %H:%M:%S%z'), end=' ') print(Style.RESET_ALL, end=' ') print( domain, 'options({})'.format(', '.join('{}={}'.format(k, v) for k, v in options.items())))
def dead_man_switch(self): with self.engine.begin() as cn: update('rework.monitor').where(id=self.monid).values( lastseen=utcnow().astimezone(TZ)).do(cn)
def death_sql(self, cause): return update('rework.worker').where(id=self.wid).values( deathinfo=cause, running=False, finished=utcnow())
def running_sql(self, running): value = {'running': running, 'debugport': self.debugport or None} if running: value['pid'] = os.getpid() value['started'] = utcnow() return update('rework.worker').where(id=self.wid).values(**value)
def list_workers(): if not has_permission('read'): abort(403, 'Nothing to see there.') # workers q = select('id', 'host', 'domain', 'pid', 'mem', 'cpu', 'shutdown', 'kill', 'debugport', 'started').table( 'rework.worker').where('running = true').order('id') domain = uiargsdict(request.args).domain if domain != 'all': q.where(domain=domain) workers = q.do(engine).fetchall() # monitors q = select('id', 'domain', 'lastseen', 'options').table('rework.monitor') if domain != 'all': q.where(domain=domain) monitors = {row.domain: row for row in q.do(engine).fetchall()} now = utcnow().astimezone(TZ) h = HTML() h.br() with h.table( klass='table table-sm table-bordered table-striped table-hover' ) as t: with t.thead(klass='thead-inverse') as th: with th.tr() as r: r.th('#') r.th('domain') r.th('seen last') r.th('options') for domain, row in sorted(monitors.items()): with t.tr() as r: r.td(str(row.id)) r.td(row.domain) delta = (now - row.lastseen).total_seconds() color = 'DarkGreen' if delta > 60: color = 'DarkRed' elif delta > 10: color = 'DarkMagenta' r.td(row.lastseen.astimezone(TZ).strftime( '%Y-%m-%d %H:%M:%S%z'), style='color: {}'.format(color)) r.td(', '.join('{}={}'.format(k, v) for k, v in sorted(row.options.items()))) with h.table( klass='table table-sm table-bordered table-striped table-hover' ) as t: with t.thead(klass='thead-inverse') as th: with th.tr() as r: r.th('#') r.th('pid@host') r.th('domain') r.th('memory (Mb)') r.th('cpu') r.th('debug port') r.th('started') r.th('action') for wid, host, domain, pid, mem, cpu, shutdown, kill, debugport, started in workers: with t.tr() as r: r.th(str(wid), scope='row') r.td('{}@{}'.format(pid, host)) r.td(domain) r.td(str(mem)) r.td(str(cpu / 100.)) r.td(debugport and str(debugport) or '') if started: started = started.astimezone(TZ).strftime( '%Y-%m-%d %H:%M:%S%z') r.td(started or '') with r.td() as col: with col.button() as b: if shutdown: b('shutdown asked', klass='btn gltyphicon glyphicon-ban-circle') else: b('shutdown', type='button', klass='btn btn-warning btn-sm', onclick='shutdown_worker({})'.format(wid)) col.span(' ') with col.button() as b: if kill: b('kill asked', klass='btn glyphicon glyphicon-ban-circle') else: b('kill', type='button', klass='btn btn-danger btn-sm', onclick='kill_worker({})'.format(wid)) return str(h)