Exemplo n.º 1
0
 def __init__(self, scheduled_jobs):
     self.my_pid = os.getpid()
     self.timer_wait = None
     signal.signal(signal.SIGUSR1, JobRunner.sig_general_handler)
     self.job_queue = JobQueue(scheduled_jobs, db, logger)
     self._should_quit = False
     self._should_kill = False
     self.sleep_to = None
     self.queue_paused_at = 0
     self.queue_killed_at = 0
     self._last_pause_warn = 0
Exemplo n.º 2
0
 def __init__(self, scheduled_jobs):
     self.my_pid = os.getpid()
     self.timer_wait = None
     signal.signal(signal.SIGUSR1, JobRunner.sig_general_handler)
     self.job_queue = JobQueue(scheduled_jobs, db, logger)
     self._should_quit = False
     self._should_kill = False
     self.sleep_to = None
     self.queue_paused_at = 0
     self.queue_killed_at = 0
     self._last_pause_warn = 0
Exemplo n.º 3
0
def main():
    try:
        opts, args = getopt.getopt(sys.argv[1:], '',
                                   ['reload', 'quit', 'status', 'config=',
                                    'dump=', 'run=', 'pause', 'quiet', 'resume',
                                    'show-job=', 'with-deps', 'kill'])
    except getopt.GetoptError:
        usage(1)
    #global scheduled_jobs
    alt_config = with_deps = quiet = False
    for opt, val in opts:
        if opt == '--with-deps':
            with_deps = True
        elif opt == '--quiet':
            quiet = True
    for opt, val in opts:
        if opt in('--reload', '--quit', '--status', '--run', '--pause',
                  '--resume', '--show-job', '--kill'):
            if opt == '--reload':
                cmd = 'RELOAD'
            elif opt == '--quit':
                cmd = 'QUIT'
            elif opt == '--status':
                cmd = 'STATUS'
            elif opt == '--pause':
                cmd = 'PAUSE'
            elif opt == '--resume':
                cmd = 'RESUME'
            elif opt == '--run':
                cmd = 'RUNJOB %s %i' % (val, with_deps)
            elif opt == '--show-job':
                cmd = 'SHOWJOB %s' % val
            elif opt == '--kill':
                cmd = 'KILL'
            sock = SocketHandling(logger)
            try:
                print "Response: %s" % sock.send_cmd(cmd)
            except SocketHandling.Timeout:
                print "Timout contacting server, is it running?"
            sys.exit(0)
        elif opt in ('--config',):
            if val.find("/") == -1:
                sys.path.insert(0, '.')
                name = val
            else:
                sys.path.insert(0, val[:val.rindex("/")])
                name = val[val.rindex("/")+1:]
            name = name[:name.rindex(".")]
            exec("import %s as tmp" % name)
            scheduled_jobs = tmp
            # sys.path = sys.path[1:] #With this reload(module) loads another file(!)
            alt_config = True
        elif opt in ('--dump',):
            JobQueue.dump_jobs(scheduled_jobs, int(val))
            sys.exit(0)
    if not alt_config:
        import scheduled_jobs
    sock = SocketHandling(logger)
    ca = CallableAction()
    ca.set_id("master_jr_lock")
    try:
        if(sock.ping_server()):
            if not quiet:
                print "Server already running"
            sys.exit(1)
        try:
            ca.check_lockfile()
        except LockExists:
            logger.error(
                ("%s: Master lock exists, but jr-socket didn't respond to "+
                 "ping. This should be a very rare error!") %
                ca.lockfile_name)
            sys.exit(1)
        ca.make_lockfile()
    except SocketHandling.Timeout:
        # Assuming that previous run aborted without removing socket
        logger.warn("Socket timeout, assuming server is dead")
        try:
            os.unlink(cereconf.JOB_RUNNER_SOCKET)
        except OSError:
            pass
        pass
    jr = JobRunner(scheduled_jobs)
    if True:
        socket_thread = threading.Thread(target=sock.start_listener, args=(jr,))
        socket_thread.setDaemon(True)
        socket_thread.setName("socket_thread")
        socket_thread.start()

    jr.run_job_loop()
    logger.debug("bye")
    sock.cleanup()
    ca.free_lock()
Exemplo n.º 4
0
class JobRunner(object):
    def __init__(self, scheduled_jobs):
        self.my_pid = os.getpid()
        self.timer_wait = None
        signal.signal(signal.SIGUSR1, JobRunner.sig_general_handler)
        self.job_queue = JobQueue(scheduled_jobs, db, logger)
        self._should_quit = False
        self._should_kill = False
        self.sleep_to = None
        self.queue_paused_at = 0
        self.queue_killed_at = 0
        self._last_pause_warn = 0

    def sig_general_handler(signum, frame):
        """General signal handler, for places where we use signal.pause()"""
        logger.debug2("siggeneral_handler(%s)" % (str(signum)))
    sig_general_handler = staticmethod(sig_general_handler)

    def signal_sleep(self, seconds):
        # SIGALRM is already used by the SocketThread, se we arrange
        # for a SIGUSR1 to be delivered instead
        runner_cw.acquire()
        if not self.timer_wait:  # Only have one signal-sleep thread
            logger.debug("Signalling sleep: %s seconds" % str(seconds))
            self.timer_wait = threading.Timer(seconds, self.wake_runner_signal)
            self.timer_wait.setDaemon(True)
            self.timer_wait.start()
            self.sleep_to = time.time() + seconds
        else:
            logger.debug("already doing a signal sleep")
        runner_cw.release()

    def handle_completed_jobs(self):
        """Handle any completed jobs (only jobs that has
        call != None).  Will block if any of the jobs has wait=1"""
        did_wait = False

        logger.debug("handle_completed_jobs: ")
        for job in self.job_queue.get_running_jobs():
            try:
                ret = job['call'].cond_wait(job['pid'])
            except OSError, msg:
                if not str(msg).startswith("[Errno 4]"):
                    # 4 = "Interrupted system call", which we may get
                    # as we catch SIGCHLD
                    # TODO: We need to filter out false positives from being
                    # logged:
                    logger.error("error (%s): %s" % (job['name'], msg))
                time.sleep(1)
                continue
            logger.debug2("cond_wait(%s) = %s" % (job['name'], ret))
            if ret is None:          # Job not completed
                job_def = self.job_queue.get_known_job(job['name'])
                if job_def.max_duration is not None:
                    run_for = time.time() - job['started']
                    if run_for > job_def.max_duration:
                        # We sleep a little so that we don't risk entering
                        # a tight loop with lots of logging
                        time.sleep(1)
                        logger.error("%s (pid %d) has run for %d seconds, "
                                     "sending SIGTERM" %
                                     (job['name'], job['pid'], run_for))
                        try:
                            os.kill(job['pid'], signal.SIGTERM)
                            # By setting did_wait to True, the main loop
                            # will immediately call this function again to
                            # reap the job we just killed.  (If we don't,
                            # the SIGCHLD may be delivered before we reach
                            # sigpause)
                            did_wait = True
                        except OSError, msg:
                            # Don't die if we're not allowed to kill
                            # the job. The reason is probably that the
                            # process is run by root (sudo)
                            logger.error("Couldn't kill job %s (pid %d): %s" %
                                         (job['name'], job['pid'], msg))
            else:
Exemplo n.º 5
0
def main():
    try:
        opts, args = getopt.getopt(sys.argv[1:], '', [
            'reload', 'quit', 'status', 'config=', 'dump=', 'run=', 'pause',
            'quiet', 'resume', 'show-job=', 'with-deps', 'kill'
        ])
    except getopt.GetoptError:
        usage(1)
    #global scheduled_jobs
    alt_config = with_deps = quiet = False
    for opt, val in opts:
        if opt == '--with-deps':
            with_deps = True
        elif opt == '--quiet':
            quiet = True
    for opt, val in opts:
        if opt in ('--reload', '--quit', '--status', '--run', '--pause',
                   '--resume', '--show-job', '--kill'):
            if opt == '--reload':
                cmd = 'RELOAD'
            elif opt == '--quit':
                cmd = 'QUIT'
            elif opt == '--status':
                cmd = 'STATUS'
            elif opt == '--pause':
                cmd = 'PAUSE'
            elif opt == '--resume':
                cmd = 'RESUME'
            elif opt == '--run':
                cmd = 'RUNJOB %s %i' % (val, with_deps)
            elif opt == '--show-job':
                cmd = 'SHOWJOB %s' % val
            elif opt == '--kill':
                cmd = 'KILL'
            sock = SocketHandling(logger)
            try:
                print "Response: %s" % sock.send_cmd(cmd)
            except SocketHandling.Timeout:
                print "Timout contacting server, is it running?"
            sys.exit(0)
        elif opt in ('--config', ):
            if val.find("/") == -1:
                sys.path.insert(0, '.')
                name = val
            else:
                sys.path.insert(0, val[:val.rindex("/")])
                name = val[val.rindex("/") + 1:]
            name = name[:name.rindex(".")]
            exec("import %s as tmp" % name)
            scheduled_jobs = tmp
            # sys.path = sys.path[1:] #With this reload(module) loads another file(!)
            alt_config = True
        elif opt in ('--dump', ):
            JobQueue.dump_jobs(scheduled_jobs, int(val))
            sys.exit(0)
    if not alt_config:
        import scheduled_jobs
    sock = SocketHandling(logger)
    ca = CallableAction()
    ca.set_id("master_jr_lock")
    try:
        if (sock.ping_server()):
            if not quiet:
                print "Server already running"
            sys.exit(1)
        try:
            ca.check_lockfile()
        except LockExists:
            logger.error(
                ("%s: Master lock exists, but jr-socket didn't respond to " +
                 "ping. This should be a very rare error!") % ca.lockfile_name)
            sys.exit(1)
        ca.make_lockfile()
    except SocketHandling.Timeout:
        # Assuming that previous run aborted without removing socket
        logger.warn("Socket timeout, assuming server is dead")
        try:
            os.unlink(cereconf.JOB_RUNNER_SOCKET)
        except OSError:
            pass
        pass
    jr = JobRunner(scheduled_jobs)
    if True:
        socket_thread = threading.Thread(target=sock.start_listener,
                                         args=(jr, ))
        socket_thread.setDaemon(True)
        socket_thread.setName("socket_thread")
        socket_thread.start()

    jr.run_job_loop()
    logger.debug("bye")
    sock.cleanup()
    ca.free_lock()
Exemplo n.º 6
0
class JobRunner(object):
    def __init__(self, scheduled_jobs):
        self.my_pid = os.getpid()
        self.timer_wait = None
        signal.signal(signal.SIGUSR1, JobRunner.sig_general_handler)
        self.job_queue = JobQueue(scheduled_jobs, db, logger)
        self._should_quit = False
        self._should_kill = False
        self.sleep_to = None
        self.queue_paused_at = 0
        self.queue_killed_at = 0
        self._last_pause_warn = 0

    def sig_general_handler(signum, frame):
        """General signal handler, for places where we use signal.pause()"""
        logger.debug2("siggeneral_handler(%s)" % (str(signum)))

    sig_general_handler = staticmethod(sig_general_handler)

    def signal_sleep(self, seconds):
        # SIGALRM is already used by the SocketThread, se we arrange
        # for a SIGUSR1 to be delivered instead
        runner_cw.acquire()
        if not self.timer_wait:  # Only have one signal-sleep thread
            logger.debug("Signalling sleep: %s seconds" % str(seconds))
            self.timer_wait = threading.Timer(seconds, self.wake_runner_signal)
            self.timer_wait.setDaemon(True)
            self.timer_wait.start()
            self.sleep_to = time.time() + seconds
        else:
            logger.debug("already doing a signal sleep")
        runner_cw.release()

    def handle_completed_jobs(self):
        """Handle any completed jobs (only jobs that has
        call != None).  Will block if any of the jobs has wait=1"""
        did_wait = False

        logger.debug("handle_completed_jobs: ")
        for job in self.job_queue.get_running_jobs():
            try:
                ret = job['call'].cond_wait(job['pid'])
            except OSError, msg:
                if not str(msg).startswith("[Errno 4]"):
                    # 4 = "Interrupted system call", which we may get
                    # as we catch SIGCHLD
                    # TODO: We need to filter out false positives from being
                    # logged:
                    logger.error("error (%s): %s" % (job['name'], msg))
                time.sleep(1)
                continue
            logger.debug2("cond_wait(%s) = %s" % (job['name'], ret))
            if ret is None:  # Job not completed
                job_def = self.job_queue.get_known_job(job['name'])
                if job_def.max_duration is not None:
                    run_for = time.time() - job['started']
                    if run_for > job_def.max_duration:
                        # We sleep a little so that we don't risk entering
                        # a tight loop with lots of logging
                        time.sleep(1)
                        logger.error("%s (pid %d) has run for %d seconds, "
                                     "sending SIGTERM" %
                                     (job['name'], job['pid'], run_for))
                        try:
                            os.kill(job['pid'], signal.SIGTERM)
                            # By setting did_wait to True, the main loop
                            # will immediately call this function again to
                            # reap the job we just killed.  (If we don't,
                            # the SIGCHLD may be delivered before we reach
                            # sigpause)
                            did_wait = True
                        except OSError, msg:
                            # Don't die if we're not allowed to kill
                            # the job. The reason is probably that the
                            # process is run by root (sudo)
                            logger.error("Couldn't kill job %s (pid %d): %s" %
                                         (job['name'], job['pid'], msg))
            else: