Пример #1
0
 def run(self):
     """Main run loop of the Scheduler."""
     self.timer.start()
     
     while not Status.is_final(self.status):
         if self.request:
             self.handle_request()
         
         if self.status == Status.RUNNING:
             # Clean up orphaned schedules and undead schedulers.
             # Schedule.objects.orphaned().update(scheduler=None)
             # CronSchedule.objects.orphaned().update(scheduler=None)
             
             cron = CronSchedule.objects.unclaimed()[:SCHEDULER_LIMIT]
             simple = Schedule.objects.unclaimed()[:SCHEDULER_LIMIT]
             for schedule in itertools.chain(cron, simple):
                 self.log.info('Claiming %s.' % schedule)
                 schedule.scheduler = self
                 schedule.save()
                 self.add(schedule)
         
         if not Status.is_final(self.status):
             self.wait()
             self.request = Scheduler.objects.get(pk=self.pk).request
     
     cron = self.cronschedules.all()
     simple = self.schedules.all()
     claimed_count = cron.count() + simple.count()
     if claimed_count > 0:
         self.log.info('Cleaning up %s schedules.' % claimed_count)
         cron.update(scheduler=None)
         simple.update(scheduler=None)
Пример #2
0
    def run(self):
        """Core executor function."""
        if settings.BACKUP_SYSTEM:
            self.pool = ThreadPool(self.concurrent + 1)
        self.log.info("%s is now running on host %s." % (self, self.host))

        if self.log.debug_on:
            self.resource_reporter = Thread(target=self.report_resources)
            self.resource_reporter.daemon = True
            self.resource_reporter.start()

        # Main loop.
        while not Status.is_final(self.status):
            if self.request:
                self.handle_request()

            if self.status == Status.RUNNING:
                while len(self.processes) < self.concurrent:
                    # self.log.debug("Popping instance...")
                    instance = self.queue.pop()
                    if instance:
                        # self.log.debug("Popped %s" % instance)
                        self.start_instance(instance)
                    else:
                        # self.log.debug("No instance in queue.")
                        break

            elif self.status == Status.STOPPING and len(self.processes) == 0:
                self.set_status(Status.ENDED)
                self.save(safe=True)

            # Clean up completed tasks before iterating.
            for pid, p in self.processes.items()[:]:
                p.poll()
                self.log.debug("Checking pid %s: return code %s." %
                               (pid, p.returncode))
                if not p.returncode == None:
                    i = type(p.instance).objects.get(pk=p.instance.pk)
                    if i.status == Status.CREATED:
                        self.log.info(
                            ("%s fail to initialize properly; " +
                             "entering suspension to avoid more errors.") % i)
                        self.set_status(Status.SUSPENDED)
                        self.save()
                    if not Status.is_final(i.status):
                        self.log.info(("%s ended with invalid " +
                                       "status %s, changing to ERROR.") %
                                      (i, Status.name(i.status)))
                        i.status = Status.ERROR
                        i.save()
                    self.log.info("%s ended with status %s." %
                                  (i, Status.name(i.status)))
                    del self.processes[pid]
                    if settings.BACKUP_SYSTEM:
                        self.pool.queueTask(self.backup_instance_log, [i])

            if not Status.is_final(self.status):
                self.wait(EXECUTOR_PERIOD)
                self.request = Executor.objects.get(pk=self.pk).request
Пример #3
0
 def run(self):
     """Core executor function."""
     if settings.BACKUP_SYSTEM:
         self.pool = ThreadPool(self.concurrent + 1)
     self.log.info("%s is now running on host %s." % (self, self.host))
     
     if self.log.debug_on:
         self.resource_reporter = Thread(target=self.report_resources)
         self.resource_reporter.daemon = True
         self.resource_reporter.start()
     
     # Main loop.
     while not Status.is_final(self.status):
         if self.request:
             self.handle_request()
         
         if self.status == Status.RUNNING:
             while len(self.processes) < self.concurrent:
                 # self.log.debug("Popping instance...")
                 instance = self.queue.pop()
                 if instance:
                     # self.log.debug("Popped %s" % instance)
                     self.start_instance(instance)
                 else:
                     # self.log.debug("No instance in queue.")
                     break
         
         elif self.status == Status.STOPPING and len(self.processes) == 0:
             self.set_status(Status.ENDED)
             self.save(safe=True)
         
         # Clean up completed tasks before iterating.
         for pid, p in self.processes.items()[:]:
             p.poll()
             self.log.debug(
                 "Checking pid %s: return code %s." % (pid, p.returncode))
             if not p.returncode == None:
                 i = type(p.instance).objects.get(pk=p.instance.pk)
                 if i.status == Status.CREATED:
                     self.log.info(("%s fail to initialize properly; " +
                         "entering suspension to avoid more errors.") % i)
                     self.set_status(Status.SUSPENDED)
                     self.save()
                 if not Status.is_final(i.status):
                     self.log.info(("%s ended with invalid " +
                         "status %s, changing to ERROR.") %
                         (i, Status.name(i.status)))
                     i.status = Status.ERROR
                     i.save()
                 self.log.info("%s ended with status %s." %
                     (i, Status.name(i.status)))
                 del self.processes[pid]
                 if settings.BACKUP_SYSTEM:
                     self.pool.queueTask(self.backup_instance_log, [i])
         
         if not Status.is_final(self.status):
             self.wait(EXECUTOR_PERIOD)
             self.request = Executor.objects.get(pk=self.pk).request
Пример #4
0
 def report_resources(self):
     while not Status.is_final(self.status):
         time.sleep(10)
         rself = resource.getrusage(resource.RUSAGE_SELF)
         self.log.debug(rself)
         rchildren = resource.getrusage(resource.RUSAGE_CHILDREN)
         self.log.debug(rchildren)
Пример #5
0
 def report_resources(self):
     while not Status.is_final(self.status):
         time.sleep(10)
         rself = resource.getrusage(resource.RUSAGE_SELF)
         self.log.debug(rself)
         rchildren = resource.getrusage(resource.RUSAGE_CHILDREN)
         self.log.debug(rchildren)
Пример #6
0
 def test_kill(self):
     self.thread.start()
     wait_until(lambda: self.executor.status == Status.RUNNING, 3)
     self.assertEqual(self.executor.status, Status.RUNNING)
     self.executor.make_request(Request.KILL)
     wait_until(lambda: Status.is_final(self.executor.status), 5)
     self.assertEqual(self.executor.status, Status.KILLED)
Пример #7
0
 def tearDown(self):
     if not Status.is_final(self._executor.status):
         print self._executor.make_request(Request.KILL)
     self.thread.join(7)
     self._executor.heart.join(7)
     assert not self.thread.isAlive()
     assert not self._executor.heart.isAlive()
Пример #8
0
 def tearDown(self):
     if not Status.is_final(self._executor.status):
         self._executor.make_request(Request.KILL)
     self.thread.join(7)
     self._executor.heart.join(7)
     assert not self.thread.isAlive()
     assert not self._executor.heart.isAlive()
Пример #9
0
 def test_kill(self):
     self.thread.start()
     wait_until(lambda: self.executor.status == Status.RUNNING, 3)
     self.assertEqual(self.executor.status, Status.RUNNING)
     self.executor.make_request(Request.KILL)
     wait_until(lambda: Status.is_final(self.executor.status), 5)
     self.assertEqual(self.executor.status, Status.KILLED)
Пример #10
0
 def test_start_stop(self):    
     self.assertEqual(self.executor.status, Status.CREATED)
     self.thread.start()
     wait_until(lambda: self.executor.status == Status.RUNNING, 3)
     self.assertEqual(self.executor.status, Status.RUNNING)
     self.executor.make_request(Request.STOP)
     wait_until(lambda: Status.is_final(self.executor.status), 5)
     self.assertEqual(self.executor.status, Status.ENDED)
Пример #11
0
 def test_start_stop(self):    
     self.assertEqual(self.executor.status, Status.CREATED)
     self.thread.start()
     wait_until(lambda: self.executor.status == Status.RUNNING, 3)
     self.assertEqual(self.executor.status, Status.RUNNING)
     self.executor.make_request(Request.STOP)
     wait_until(lambda: Status.is_final(self.executor.status), 5)
     self.assertEqual(self.executor.status, Status.ENDED)
Пример #12
0
 def is_alive(self):
     """Whether the Daemon is still alive.
     
     A Daemon is defined as alive if its status is not final and its
     last heartbeat was within the last HEARTBEAT_FAILED seconds.
     
     """
     return not Status.is_final(self.status) \
         and self.heartbeat and self.heartbeat > \
         datetime.utcnow() - timedelta(seconds=HEARTBEAT_FAILED)
Пример #13
0
 def is_alive(self):
     """Whether the Daemon is still alive.
     
     A Daemon is defined as alive if its status is not final and its
     last heartbeat was within the last HEARTBEAT_FAILED seconds.
     
     """
     return not Status.is_final(self.status) \
         and self.heartbeat and self.heartbeat > \
         datetime.utcnow() - timedelta(seconds=HEARTBEAT_FAILED)
Пример #14
0
 def make_request(self, request):
     """This method is how the request field should always be set."""
     if not request in self.VALID_REQUESTS:
         return False
     if not Status.is_final(self.status):
         self.request = request
         self.save()
         self.flag.set()
         return True
     else:
         return False
Пример #15
0
Файл: daemon.py Проект: tml/norc
 def make_request(self, request):
     """This method is how the request field should always be set."""
     assert request in self.VALID_REQUESTS, "Invalid request: " + \
         "\"%s\" (%s)" % (Request.name(request), request)
     if not Status.is_final(self.status):
         self.request = request
         self.save()
         self.flag.set()
         return True
     else:
         return False
Пример #16
0
 def make_request(self, request):
     """This method is how the request field should always be set."""
     if not request in self.VALID_REQUESTS:
         return False
     if not Status.is_final(self.status):
         self.request = request
         self.save()
         self.flag.set()
         return True
     else:
         return False
Пример #17
0
    def run(self):
        """Main run loop of the Scheduler."""
        self.timer.start()

        while not Status.is_final(self.status):
            if self.request:
                self.handle_request()

            if self.status == Status.RUNNING:
                # Clean up orphaned schedules and undead schedulers.
                # Schedule.objects.orphaned().update(scheduler=None)
                # CronSchedule.objects.orphaned().update(scheduler=None)

                cron = CronSchedule.objects.unclaimed()[:SCHEDULER_LIMIT]
                simple = Schedule.objects.unclaimed()[:SCHEDULER_LIMIT]
                for schedule in itertools.chain(cron, simple):
                    self.log.info('Claiming %s.' % schedule)
                    schedule.scheduler = self
                    schedule.save()
                    self.add(schedule)
            if not Status.is_final(self.status):
                self.wait()
                self.request = Scheduler.objects.get(pk=self.pk).request
Пример #18
0
 def heart_run(self):
     """Method to be run by the heart thread."""
     while not Status.is_final(self.status):
         start = time.time()
         
         self.heartbeat = datetime.utcnow()
         self.save(safe=True)
         
         # In case the database is slow and saving takes longer
         # than HEARTBEAT_PERIOD to complete.
         wait = HEARTBEAT_PERIOD - (time.time() - start)
         if wait > 0:
             self.heart.flag.wait(wait)
             self.heart.flag.clear()
Пример #19
0
    def heart_run(self):
        """Method to be run by the heart thread."""
        while not Status.is_final(self.status):
            start = time.time()

            self.heartbeat = datetime.utcnow()
            self.save(safe=True)

            # In case the database is slow and saving takes longer
            # than HEARTBEAT_PERIOD to complete.
            wait = HEARTBEAT_PERIOD - (time.time() - start)
            if wait > 0:
                self.heart.flag.wait(wait)
                self.heart.flag.clear()
Пример #20
0
 def run(self, instance):
     """Enqueue instances for all nodes that don't have dependencies."""
     for node in self.nodes.all():
         node_instance = JobNodeInstance.objects.create(
             node=node, job_instance=instance)
         if node_instance.can_run():
             instance.schedule.queue.push(node_instance)
     while True:
         complete = True
         for ni in instance.nodis.all():
             if not Status.is_final(ni.status):
                 complete = False
             elif Status.is_failure(ni.status):
                 return False
         if complete and instance.nodis.count() == self.nodes.count():
             return True
         time.sleep(1)
Пример #21
0
Файл: job.py Проект: tml/norc
 def run(self, instance):
     """Enqueue instances for all nodes that don't have dependencies."""
     for node in self.nodes.all():
         node_instance = JobNodeInstance.objects.create(
             node=node,
             job_instance=instance)
         if node_instance.can_run():
             instance.schedule.queue.push(node_instance)
     while True:
         complete = True
         for ni in instance.nodis.all():
             if not Status.is_final(ni.status):
                 complete = False
             elif Status.is_failure(ni.status):
                 return False
         if complete and instance.nodis.count() == self.nodes.count():
             return True
         time.sleep(1)
Пример #22
0
 def start(self):
     """Starts the daemon.  Does initialization then calls run()."""
     
     if self.status != Status.CREATED:
         print "Can't start a %s that's already been run." \
             % type(self).__name__
         return
     
     if not hasattr(self, 'id'):
         self.save()
     if not hasattr(self, 'log'):
         self.log = make_log(self.log_path)
     
     if settings.DEBUG:
         self.log.info("WARNING, DEBUG is True, which means Django " +
             "will gobble memory as it stores all database queries.")
     
     # This try block is needed because the unit tests run daemons
     # in threads, which breaks signals.
     try:
         for signum in (signal.SIGINT, signal.SIGTERM):
             signal.signal(signum, self.signal_handler)
     except ValueError:
         pass
     
     self.log.start_redirect()
     self.log.info("%s initialized; starting..." % self)
     
     self.status = Status.RUNNING
     self.heartbeat = self.started = datetime.utcnow()
     self.save()
     self.heart.start()
     
     try:
         self.run()
     except Exception:
         self.set_status(Status.ERROR)
         self.log.error("An internal error occured!", trace=True)
     else:
         if not Status.is_final(self.status):
             self.set_status(Status.ENDED)
     finally:    
         self.log.info("Shutting down...")
         try:
             self.clean_up()
         except:
             self.log.error("Clean up function failed.", trace=True)
         if not Status.is_final(self.status):
             self.set_status(Status.ERROR)
         self.heart.flag.set()
         self.heart.join()
         self.ended = datetime.utcnow()
         self.save()
         if settings.BACKUP_SYSTEM:
             self.log.info('Backing up log file...')
             try:
                 if backup_log(self.log_path):
                     self.log.info('Completed log backup.')
                 else:
                     self.log.error('Failed to backup log.')
             except:
                 self.log.error('Failed to backup log.', trace=True)
         self.log.info('%s has been shut down successfully.' % self)
         self.log.stop_redirect()
         self.log.close()
Пример #23
0
 def tearDown(self):
     if not Status.is_final(self._scheduler.status):
         self._scheduler.make_request(Request.KILL)
     self.thread.join(15)
     assert not self.thread.isAlive()
     assert not self._scheduler.timer.isAlive()
Пример #24
0
def main():
    usage = "norc_control [executor | scheduler | host] <id | host> " + \
        "--[stop | kill | pause | resume | reload | handle] [--wait]"

    def bad_args(message):
        print message
        print usage
        sys.exit(2)

    parser = OptionParser(usage)
    parser.add_option("-s",
                      "--stop",
                      action="store_true",
                      default=False,
                      help="Send a stop request.")
    parser.add_option("-k",
                      "--kill",
                      action="store_true",
                      default=False,
                      help="Send a kill request.")
    parser.add_option("-p",
                      "--pause",
                      action="store_true",
                      default=False,
                      help="Send a pause request.")
    parser.add_option("-u",
                      "--resume",
                      action="store_true",
                      default=False,
                      help="Send an resume request.")
    parser.add_option("-r",
                      "--reload",
                      action="store_true",
                      default=False,
                      help="Send an reload request to a Scheduler.")
    parser.add_option("--handle",
                      action="store_true",
                      default=False,
                      help="Change the object's status to HANDLED.")
    parser.add_option("-f",
                      "--force",
                      action="store_true",
                      default=False,
                      help="Force the request to be made..")
    parser.add_option("-w",
                      "--wait",
                      action="store_true",
                      default=False,
                      help="Wait until the request has been responded to.")

    options, args = parser.parse_args()

    if len(args) != 2:
        bad_args("Invalid number of arguments.")

    requests = filter(lambda a: getattr(options, a.lower()),
                      Request.NAMES.values())
    if len(requests) + (1 if options.handle else 0) != 1:
        bad_args("Must request exactly one action.")
    if not options.handle:
        request = requests[0]
        req = getattr(Request, request)

    cls = None
    if args[0] in EXECUTOR_KEYWORDS:
        cls = Executor
    elif args[0] in SCHEDULER_KEYWORDS:
        cls = Scheduler
    elif args[0] in HOST_KEYWORDS:
        if options.handle:
            bad_args("Can't perform handle operation on multiple daemons.")
        daemons = MultiQuerySet(Executor, Scheduler).objects.all()
        daemons = daemons.filter(host=args[1]).status_in("active")
        if not options.force:
            daemons = daemons.filter(request=None)
        for d in daemons:
            if req in d.VALID_REQUESTS:
                d.make_request(req)
                print "%s was sent a %s request." % (d, request)
        if options.wait:
            _wait(daemons, req)
    else:
        bad_args("Invalid keyword '%s'." % args[0])

    if cls:
        name = cls.__name__
        try:
            obj_id = int(args[1])
        except ValueError:
            bad_args("Invalid id '%s'; must be an integer." % args[1])
        try:
            d = cls.objects.get(id=obj_id)
        except cls.DoesNotExist:
            print "Could not find a(n) %s with id=%s" % (name, obj_id)
        else:
            if options.handle:
                if controls.handle(d):
                    print "The error state of %s was marked as handled." % d
                else:
                    print "%s isn't in an error state." % d
            elif Status.is_final(d.status) and not options.force:
                print "%s is already in a final state." % d
            elif d.request == None or options.force:
                d.make_request(req)
                print "%s was sent a %s request." % (d, request)
                if options.wait:
                    _wait([d], req)
            else:
                print "%s already has request %s." % \
                    (d, Request.name(d.request))
Пример #25
0
    def start(self):
        """Starts the daemon.  Does initialization then calls run()."""

        if self.status != Status.CREATED:
            print "Can't start a %s that's already been run." \
                % type(self).__name__
            return

        if not hasattr(self, 'id'):
            self.save()
        if not hasattr(self, 'log'):
            self.log = make_log(self.log_path)

        if settings.DEBUG:
            self.log.info(
                "WARNING, DEBUG is True, which means Django " +
                "will gobble memory as it stores all database queries.")

        # This try block is needed because the unit tests run daemons
        # in threads, which breaks signals.
        try:
            for signum in (signal.SIGINT, signal.SIGTERM):
                signal.signal(signum, self.signal_handler)
        except ValueError:
            pass

        self.log.start_redirect()
        self.log.info("%s initialized; starting..." % self)

        self.status = Status.RUNNING
        self.heartbeat = self.started = datetime.utcnow()
        self.save()
        self.heart.start()

        try:
            self.run()
        except Exception:
            self.set_status(Status.ERROR)
            self.log.error("An internal error occured!", trace=True)
        else:
            if not Status.is_final(self.status):
                self.set_status(Status.ENDED)
        finally:
            self.log.info("Shutting down...")
            try:
                self.clean_up()
            except:
                self.log.error("Clean up function failed.", trace=True)
            if not Status.is_final(self.status):
                self.set_status(Status.ERROR)
            self.heart.flag.set()
            self.heart.join()
            self.ended = datetime.utcnow()
            self.save()
            if settings.BACKUP_SYSTEM:
                self.log.info('Backing up log file...')
                try:
                    if backup_log(self.log_path):
                        self.log.info('Completed log backup.')
                    else:
                        self.log.error('Failed to backup log.')
                except:
                    self.log.error('Failed to backup log.', trace=True)
            self.log.info('%s has been shut down successfully.' % self)
            self.log.stop_redirect()
            self.log.close()
Пример #26
0
 def tearDown(self):
     if not Status.is_final(self._scheduler.status):
         self._scheduler.make_request(Request.KILL)
     self.thread.join(15)
     assert not self.thread.isAlive()
     assert not self._scheduler.timer.isAlive()
Пример #27
0
def main():
    usage = "norc_control [executor | scheduler | host] <id | host> " + \
        "--[stop | kill | pause | resume | reload | handle] [--wait]"
    
    def bad_args(message):
        print message
        print usage
        sys.exit(2)
    
    parser = OptionParser(usage)
    parser.add_option("-s", "--stop", action="store_true", default=False,
        help="Send a stop request.")
    parser.add_option("-k", "--kill", action="store_true", default=False,
        help="Send a kill request.")
    parser.add_option("-p", "--pause", action="store_true", default=False,
        help="Send a pause request.")
    parser.add_option("-u", "--resume", action="store_true", default=False,
        help="Send an resume request.")
    parser.add_option("-r", "--reload", action="store_true", default=False,
        help="Send an reload request to a Scheduler.")
    parser.add_option("--handle", action="store_true", default=False,
        help="Change the object's status to HANDLED.")
    parser.add_option("-f", "--force", action="store_true", default=False,
        help="Force the request to be made..")
    parser.add_option("-w", "--wait", action="store_true", default=False,
        help="Wait until the request has been responded to.")
    
    options, args = parser.parse_args()
    
    if len(args) != 2:
        bad_args("Invalid number of arguments.")
    
    
    requests = filter(lambda a: getattr(options, a.lower()),
        Request.NAMES.values())
    if  len(requests) + (1 if options.handle else 0) != 1:
        bad_args("Must request exactly one action.")
    if not options.handle:
        request = requests[0]
        req = getattr(Request, request)
    
    cls = None
    if args[0] in EXECUTOR_KEYWORDS:
        cls = Executor
    elif args[0] in SCHEDULER_KEYWORDS:
        cls = Scheduler
    elif args[0] in HOST_KEYWORDS:
        if options.handle:
            bad_args("Can't perform handle operation on multiple daemons.")
        daemons = MultiQuerySet(Executor, Scheduler).objects.all()
        daemons = daemons.filter(host=args[1]).status_in("active")
        if not options.force:
            daemons = daemons.filter(request=None)
        for d in daemons:
            if req in d.VALID_REQUESTS:
                d.make_request(req)
                print "%s was sent a %s request." % (d, request)
        if options.wait:
            _wait(daemons, req)
    else:
        bad_args("Invalid keyword '%s'." % args[0])
    
    if cls:
        name = cls.__name__
        try:
            obj_id = int(args[1])
        except ValueError:
            bad_args("Invalid id '%s'; must be an integer." % args[1])
        try:
            d = cls.objects.get(id=obj_id)
        except cls.DoesNotExist:
            print "Could not find a(n) %s with id=%s" % (name, obj_id)
        else:
            if options.handle:
                if controls.handle(d):
                    print "The error state of %s was marked as handled." % d
                else:
                    print "%s isn't in an error state." % d
            elif Status.is_final(d.status) and not options.force:
                print "%s is already in a final state." % d
            elif d.request == None or options.force:
                d.make_request(req)
                print "%s was sent a %s request." % (d, request)
                if options.wait:
                    _wait([d], req)
            else:
                print "%s already has request %s." % \
                    (d, Request.name(d.request))