Пример #1
0
def mark_dead_workers(cn, wids, message, traceback=None):
    if not wids:
        return
    # mark workers as dead
    update('rework.worker').where('id in %(ids)s', ids=tuple(wids)).values(
        running=False, finished=utcnow(), deathinfo=message).do(cn)
    # mark tasks as done
    update('rework.task as task').table('rework.worker as worker').where(
        "task.status != 'done'",
        'worker.id = task.worker',
        'worker.id in %(ids)s',
        ids=tuple(wids),
    ).values(finished=utcnow(), status='done', abort=True,
             traceback=traceback).do(cn)
Пример #2
0
 def run(self):
     with self.engine.begin() as cn:
         update(
             'rework.task'
         ).where(
             id=self.tid
         ).values(
             started=utcnow()
         ).do(cn)
     try:
         name, path = self.engine.execute("""
             select name, path
             from rework.operation
             where rework.operation.id = %(operation)s
         """, {'operation': self.operation}
         ).fetchone()
         mod = imp.load_source('module', path)
         func = getattr(mod, name)
         func(self)
     except:
         with self.engine.begin() as cn:
             update(
                 'rework.task'
             ).where(
                 id=self.tid
             ).values(
                 traceback=tb.format_exc()
             ).do(cn)
     finally:
         self.finish()
Пример #3
0
 def finish(self):
     with self.engine.begin() as cn:
         update(
             'rework.task'
         ).where(
             id=self.tid
         ).values(
             finished=utcnow(),
             status='done'
         ).do(cn)
Пример #4
0
 def track_timeouts(self):
     if not self.workers:
         return
     sql = ('select task.id, task.started, timeout '
            'from rework.operation as op, '
            '     rework.task as task '
            'where '
            ' task.operation = op.id and '
            ' timeout is not null and '
            ' task.worker in ({})').format(','.join(
                str(wid) for wid in self.wids))
     with self.engine.begin() as cn:
         for tid, start_time, timeout in cn.execute(sql).fetchall():
             start_time = start_time.astimezone(pytz.utc)
             delta = parse_delta(timeout)
             now = utcnow()
             if (now - start_time) > delta:
                 Task.byid(self.engine, tid).abort()
Пример #5
0
    def preemptive_kill(self):
        if not self.wids:
            return
        q = select('id').table('rework.worker').where(
            'kill = true', 'running = true').where('id in %(ids)s',
                                                   ids=tuple(self.wids))
        killed = []
        with self.engine.begin() as cn:
            for row in q.do(cn).fetchall():
                wid = row.id
                proc = self.workers.pop(wid)
                if not kill_process_tree(proc.pid):
                    print('could not kill {}'.format(proc.pid))
                    continue

                mark_dead_workers(
                    cn, [wid],
                    'preemptive kill at {}'.format(utcnow().astimezone(TZ)))
                killed.append(wid)
        return killed
Пример #6
0
def vacuum(dburi, workers=False, tasks=False, finished=None):
    " delete non-runing workers or finished tasks "
    if not (workers or tasks):
        print('to cleanup old workers or tasks '
              'please use --workers or --tasks')
        return
    if workers and tasks:
        print('vacuum deletes workers or tasks, not both ' 'at the same time')
        return

    engine = create_engine(find_dburi(dburi))
    if finished is None:
        finished = utcnow()
    if workers:
        count = cleanup_workers(engine, finished)
        print('deleted {} workers'.format(count))

    if tasks:
        count = cleanup_tasks(engine, finished)
        print('deleted {} tasks'.format(count))
Пример #7
0
def list_monitors(dburi):
    init()
    engine = create_engine(find_dburi(dburi))
    sql = ('select id, domain, options, lastseen from rework.monitor')
    now = utcnow().astimezone(TZ)
    for mid, domain, options, lastseen in engine.execute(sql):
        color = Fore.GREEN
        delta = (now - lastseen).total_seconds()
        if delta > 60:
            color = Fore.RED
        elif delta > 10:
            color = Fore.MAGENTA
        print(mid,
              color + lastseen.astimezone(TZ).strftime('%Y-%m-%d %H:%M:%S%z'),
              end=' ')
        print(Style.RESET_ALL, end=' ')
        print(
            domain,
            'options({})'.format(', '.join('{}={}'.format(k, v)
                                           for k, v in options.items())))
Пример #8
0
 def dead_man_switch(self):
     with self.engine.begin() as cn:
         update('rework.monitor').where(id=self.monid).values(
             lastseen=utcnow().astimezone(TZ)).do(cn)
Пример #9
0
 def death_sql(self, cause):
     return update('rework.worker').where(id=self.wid).values(
         deathinfo=cause, running=False, finished=utcnow())
Пример #10
0
 def running_sql(self, running):
     value = {'running': running, 'debugport': self.debugport or None}
     if running:
         value['pid'] = os.getpid()
         value['started'] = utcnow()
     return update('rework.worker').where(id=self.wid).values(**value)
Пример #11
0
    def list_workers():
        if not has_permission('read'):
            abort(403, 'Nothing to see there.')

        # workers
        q = select('id', 'host', 'domain', 'pid', 'mem', 'cpu', 'shutdown',
                   'kill', 'debugport', 'started').table(
                       'rework.worker').where('running = true').order('id')

        domain = uiargsdict(request.args).domain
        if domain != 'all':
            q.where(domain=domain)

        workers = q.do(engine).fetchall()

        # monitors
        q = select('id', 'domain', 'lastseen',
                   'options').table('rework.monitor')
        if domain != 'all':
            q.where(domain=domain)

        monitors = {row.domain: row for row in q.do(engine).fetchall()}
        now = utcnow().astimezone(TZ)

        h = HTML()
        h.br()
        with h.table(
                klass='table table-sm table-bordered table-striped table-hover'
        ) as t:
            with t.thead(klass='thead-inverse') as th:
                with th.tr() as r:
                    r.th('#')
                    r.th('domain')
                    r.th('seen last')
                    r.th('options')
            for domain, row in sorted(monitors.items()):
                with t.tr() as r:
                    r.td(str(row.id))
                    r.td(row.domain)

                    delta = (now - row.lastseen).total_seconds()
                    color = 'DarkGreen'
                    if delta > 60:
                        color = 'DarkRed'
                    elif delta > 10:
                        color = 'DarkMagenta'

                    r.td(row.lastseen.astimezone(TZ).strftime(
                        '%Y-%m-%d %H:%M:%S%z'),
                         style='color: {}'.format(color))
                    r.td(', '.join('{}={}'.format(k, v)
                                   for k, v in sorted(row.options.items())))

        with h.table(
                klass='table table-sm table-bordered table-striped table-hover'
        ) as t:
            with t.thead(klass='thead-inverse') as th:
                with th.tr() as r:
                    r.th('#')
                    r.th('pid@host')
                    r.th('domain')
                    r.th('memory (Mb)')
                    r.th('cpu')
                    r.th('debug port')
                    r.th('started')
                    r.th('action')
            for wid, host, domain, pid, mem, cpu, shutdown, kill, debugport, started in workers:
                with t.tr() as r:
                    r.th(str(wid), scope='row')
                    r.td('{}@{}'.format(pid, host))
                    r.td(domain)
                    r.td(str(mem))
                    r.td(str(cpu / 100.))
                    r.td(debugport and str(debugport) or '')
                    if started:
                        started = started.astimezone(TZ).strftime(
                            '%Y-%m-%d %H:%M:%S%z')
                    r.td(started or '')
                    with r.td() as col:
                        with col.button() as b:
                            if shutdown:
                                b('shutdown asked',
                                  klass='btn gltyphicon glyphicon-ban-circle')
                            else:
                                b('shutdown',
                                  type='button',
                                  klass='btn btn-warning btn-sm',
                                  onclick='shutdown_worker({})'.format(wid))
                        col.span(' ')
                        with col.button() as b:
                            if kill:
                                b('kill asked',
                                  klass='btn glyphicon glyphicon-ban-circle')
                            else:
                                b('kill',
                                  type='button',
                                  klass='btn btn-danger btn-sm',
                                  onclick='kill_worker({})'.format(wid))

        return str(h)