Exemplo n.º 1
0
Arquivo: tmsd.py Projeto: bdotdub/norc
 def run_batch(self):
     tasks_to_run = tms_manage.get_tasks_allowed_to_run(end_completed_iterations=True, max_to_return=10)
     num_running_tasks = self.get_num_running_tasks()
     log.debug("tmsd running %s task(s), at least %s task(s) due to run" % (num_running_tasks, len(tasks_to_run)))
     need_resource_types = []
     for (task, iteration) in tasks_to_run:
         if self.__break_tasks_to_run_loop__:
             # some other thread (request_stop) doesn't want me to continue.  Stop here.
             break
         # check that there are currently sufficient resources to prevent
         # erroneously thinking this task can be run when it cannot.
         # There will be occasional cases where race conditions mean a task is not run when
         # it could be, but there are many more cases when this will save threads.
         if type(task) in need_resource_types:
             # A Task of this type already returned unavailable resources; don't check again.
             # This should be an efficiency gain for the running of Tasks to prevent
             # excessive polling of the resources table when there are likely no new resources.
             # log.info("Assuming no resources avail for Task type '%s'" % (type(task)))
             pass
         elif task.resources_available_to_run(self.get_daemon_status().get_region()):
             try:
                 self.start_task(task, iteration)
             except Exception, e:
                 log.error("Could not run Task '%s'" % (task), e)
         else:
             need_resource_types.append(type(task))
Exemplo n.º 2
0
def __handle_timeout__(*args):
    global task
    sig_name = 'TIMEOUT'
    exit_code = 130
    if task == None:
        log.error("Received %s but Task not started yet. Stopping with exit code %s." % (sig_name, exit_code))
    elif not task.has_timeout():
        raise Exception("Task %s doesn't handle timeouts. How did you get here? BUG!" % (task))
    __handle_signal__(sig_name, exit_code, True)
Exemplo n.º 3
0
Arquivo: tmsd.py Projeto: bdotdub/norc
 def run(self):
     try:
         self.__logger__.start_redirect()
         ended_gracefully = NorcDaemon.run(self)
         self.__logger__.stop_redirect()
         return ended_gracefully
     except Exception, e:
         log.error("Error running daemon!", e)
         return False
Exemplo n.º 4
0
Arquivo: tmsd.py Projeto: bdotdub/norc
 def run(self):
     try:
         try:
             self.get_task().do_run(self.get_iteration(), self.get_daemon_status())
         except Exception, e:
             log.error("Exception propegated from task.do_run(). BAD! Bug?", e)
         except:
             log.error("Poorly thrown exception propegated from task.do_run(). BAD! Bug?")
             traceback.print_exc()
Exemplo n.º 5
0
Arquivo: tmsd.py Projeto: bdotdub/norc
 def run(self):
     """Start this daemon"""
     try:
         ended_gracefully = self.__do_run__()
         return ended_gracefully
     except Exception, e:
         self.get_daemon_status().set_status(tms_models.NorcDaemonStatus.STATUS_ERROR)
         log.error("tmsd suffered an internal error. BAD!", e)
         return False
Exemplo n.º 6
0
def rpt_queues(c):
    all_queues = c.get_all_queues()
    print "%s AWS SQS Queue(s) as of %s" % (len(all_queues), datetime.datetime.now())
    sys.stdout.write("\n")

    table_data = []
    header1 = ["Name", "~ #", "Timeout"]
    header2 = ["-", "-", "-"]
    table_data.append(header1)
    table_data.append(header2)
    for q in all_queues:
        try:
            row = [get_name(q), q.count(), q.get_timeout()]
            table_data.append(row)
        except SQSError, sqse:
            log.error("Internal SQS error (it generates ignorable errors sometimes)" + str(sqse))
Exemplo n.º 7
0
Arquivo: tmsd.py Projeto: bdotdub/norc
 def __do_run__(self):
     """Main daemon loop"""
     log.info("%s %s..." % (self.get_name(), str(self.get_daemon_status())))
     if settings.DEBUG:
         log.info("WARNING: settings.DEBUG is True: daemon will gobble up memory b/c django stores SQL queries.")
     self.get_daemon_status().set_status(tms_models.NorcDaemonStatus.STATUS_RUNNING)
     last_status = self.get_daemon_status().get_status()
     while True:
         if not last_status == self.get_daemon_status().get_status():
             log.info("tmsd state changed: %s -> %s" % (last_status, self.get_daemon_status().get_status()))
             last_status = self.get_daemon_status().get_status()
         self.__set_daemon_status__(self.get_daemon_status().thwart_cache())  # see note in this method definition
         if self.get_daemon_status().is_stop_requested() or self.get_daemon_status().is_being_stopped():
             # don't kick off more tasks, but wait for those running to finish on their own
             self.get_daemon_status().set_status(tms_models.NorcDaemonStatus.STATUS_STOPINPROGRESS)
             num_running_tasks = self.get_num_running_tasks()
             if num_running_tasks == 0:
                 log.info("tmsd stop requested and no more tasks. Ending gracefully.")
                 self.get_daemon_status().set_status(tms_models.NorcDaemonStatus.STATUS_ENDEDGRACEFULLY)
                 return True
             else:
                 log.info("tmsd stop requested; waiting for %s task(s) to finish." % (num_running_tasks))
         elif self.get_daemon_status().is_kill_requested() or self.get_daemon_status().is_being_killed():
             running_tasks = self.get_running_tasks()
             if len(running_tasks) == 0:
                 log.info("tmsd kill requested but no tasks running. Ending gracefully.")
                 self.get_daemon_status().set_status(tms_models.NorcDaemonStatus.STATUS_ENDEDGRACEFULLY)
                 return True
             else:
                 log.info(
                     "tmsd kill requested; interrupting %s task(s) and stopping immediately." % (len(running_tasks))
                 )
                 self.get_daemon_status().set_status(tms_models.NorcDaemonStatus.STATUS_KILLINPROGRESS)
                 for running_task in running_tasks:
                     # There's no way to actually interrupt python threads
                     # mark the task as ended in error, and leave it up to
                     # main() to call SIGKILL on this process.
                     log.info("interrupting task '%s'." % (running_task), indent_chars=4)
                     try:
                         running_task.interrupt()
                     except Exception, e:
                         log.error("Could not interrupt Task '%s'" % (running_task), e)
                 self.get_daemon_status().set_status(tms_models.NorcDaemonStatus.STATUS_KILLED)
                 return False
         elif self.get_daemon_status().is_pause_requested():
             log.info("tmsd pause requested.  Will just sit here.")
             self.get_daemon_status().set_status(tms_models.NorcDaemonStatus.STATUS_PAUSED)
Exemplo n.º 8
0
 def do_run(self, tmsd_status):
     """What's actually called by the daemon to run the Message. Don't override!"""
     try:
         try:
             self.__set_run_status__(SQSTaskRunStatus.STATUS_RUNNING, tmsd_status=tmsd_status)
             log.info("Running SQS Task '%s'" % (self))
             success = self.run()
             if success:
                 self.__set_run_status__(SQSTaskRunStatus.STATUS_SUCCESS)
                 log.info("SQS Task '%s' succeeded.\n\n" % (self))
             else:
                 raise Exception("SQS Task returned failure status. See log for details.")
         except SystemExit, se:
             # in python 2.4, SystemExit extends Exception, this is changed in 2.5 to 
             # extend BaseException, specifically so this check isn't necessary. But
             # we're using 2.4; upon upgrade, this check will be unecessary but ignorable.
             raise se
         except Exception, e:
             log.error("SQS Task failed!", e)
             log.error("\n\n", noalteration=True)
             self.__set_run_status__(SQSTaskRunStatus.STATUS_ERROR)
Exemplo n.º 9
0
def get_tasks_allowed_to_run(asof=None, end_completed_iterations=False, max_to_return=None):
    """
    Get all tasks that are allowed to run, regardless of resources available. Includes all interfaces.
    
    TODO Currently this is EXTREMELY expensive to run.  Use max_to_return or beware the sloooowness!
    *Slowness is due to having to independently query for each Task's lastest status and parent's status.
     One approach is to query for statuses, then tasks with no statuses, then merge the two lists.
     But this only satisfies some of the criteria that this slow way uses.
     Another approach: the daemon should ask for one task at a time, like a proper queue.
    """
    if asof == None:  # need to do this here and not in arg so it updates w/ each call
        asof = datetime.datetime.utcnow()
    to_run = []  # [[Task, Iteration]...]
    for iteration in core.Iteration.get_running_iterations():
        tasks = iteration.get_job().get_tasks()
        iteration_is_done = True
        for a_task in tasks:
            try:
                if not max_to_return == None and len(to_run) >= max_to_return:
                    break
                elif a_task.is_allowed_to_run(iteration, asof=asof):
                    to_run.append([a_task, iteration])
                    iteration_is_done = False
                elif iteration_is_done and end_completed_iterations and not __status_is_finished__(a_task, iteration):
                    iteration_is_done = False
            except Exception, e:
                log.error(
                    "Could not check if task type '%s' is due to run. Skipping.  \
                        BAD! Maybe DB is in an inconsistent state or software bug?"
                    % (a_task.__class__.__name__),
                    e,
                )

        # TODO there's a bug here! iterations end when tasks are sittign in failed state
        if iteration_is_done and end_completed_iterations and iteration.is_ephemeral():
            # this iteration has completed and should be set as such
            iteration.set_done()
        if not max_to_return == None and len(to_run) >= max_to_return:
            break
Exemplo n.º 10
0
                 self.__set_run_status__(SQSTaskRunStatus.STATUS_SUCCESS)
                 log.info("SQS Task '%s' succeeded.\n\n" % (self))
             else:
                 raise Exception("SQS Task returned failure status. See log for details.")
         except SystemExit, se:
             # in python 2.4, SystemExit extends Exception, this is changed in 2.5 to 
             # extend BaseException, specifically so this check isn't necessary. But
             # we're using 2.4; upon upgrade, this check will be unecessary but ignorable.
             raise se
         except Exception, e:
             log.error("SQS Task failed!", e)
             log.error("\n\n", noalteration=True)
             self.__set_run_status__(SQSTaskRunStatus.STATUS_ERROR)
         except:
             # if the error thrown doesn't use Exception(...), ie just throws a string
             log.error("Task failed with poorly thrown exception!")
             traceback.print_exc()
             log.error("\n\n", noalteration=True)
             self.__set_run_status__(SQSTaskRunStatus.STATUS_ERROR)
     finally:
         pass
 
 def get_log_file(self):
     #f = "%s.%s" % (self.get_id(), self.get_date_enqueued().strftime('%Y%m%d_%H%M%S'))
     fp = os.path.join(settings.TMS_LOG_DIR, self.get_queue_name(), str(self.get_id()))
     return fp
 def get_date_enqueued(self):
     return self.date_enqueued
 def get_id(self):
     raise NotImplementedError
 def get_queue_name(self):
Exemplo n.º 11
0
def __handle_signal__(sig_name, exit_code, timeout):
    global task, iteration, region
    
    if task == None or iteration == None or region == None:
        log.error("\n", noalteration=True)
        log.error("Received %s but Task not started yet. Stopping with exit code %s." % (sig_name, exit_code))
        log.error("\n", noalteration=True)
    else:
        log.error("\n", noalteration=True)
        log.error("Received %s! TMS Stopping Task with exit code %s." % (sig_name, exit_code))
        log.error("\n", noalteration=True)
        if timeout:
            task.set_ended_on_timeout(iteration, region)
        else:
            task.set_ended_on_error(iteration, region)
    
    # We call the normal os.exit(), even though it trusts that whatever try: ... except block
    # is currently executing will propegate the SystemExit exception instead of handling it.
    # In Python 2.5 SystemExit does not extend Exception so only when catching all (try: ... except:)
    # would this be a problem.  But we're using Python 2.4, so *all* catchers of Exception need to
    # distinguish between Exception & SystemExit
    sys.exit(exit_code)
Exemplo n.º 12
0
def __run_task__(task, iteration, daemon_status):
    # sanity check that this Task is allowed to run
    if not task.is_active():
        raise Exception("Cannot run task '%s' b/c it does not need to be run!" % (task))
    # run the Task!
    try:
        __start_timeout_timer__()
        task.do_run(iteration, daemon_status)
        __stop_timeout_timer__()
    except SystemExit, se:
        # in python 2.4, SystemExit extends Exception, this is changed in 2.5 to 
        # extend BaseException, specifically so this check isn't necessary. But
        # we're using 2.4; upon upgrade, this check will be unecessary but ignorable.
        raise se
    except Exception, e:
        log.error("Exception propegated from task.do_run(). BAD! Bug?", e)
        raise e
    except:
        log.error("Poorly thrown exception propegated from task.do_run(). BAD! Bug?")
        traceback.print_exc()
        raise Exception("Poorly handled exception propegated from task.do_run(). BAD! Bug?")
    #

def main():
    global task, iteration, region
    parser = OptionParser("%prog --daemon_status_id <id> --iteration_id <id> \
--task_library <lib> --task_id <id> [--nice 5] [--stdout <file_name>] [--stderr <file_name>|STDOUT>] [--debug]")
    parser.add_option("--daemon_status_id", action="store", type="int"
        , help="The id of the daemon status that launched this Task")
    parser.add_option("--iteration_id", action="store", type="int"
        , help="The id of the iteration in which this Task runs")
Exemplo n.º 13
0
Arquivo: tmsd.py Projeto: bdotdub/norc
        log.info('"%s:%s" starting in new process' % (task.get_job().get_name(), task.get_name()))
        tp = TaskInProcess(task, iteration, self.get_daemon_status(), self.__log_dir__)
        tp.run()
        self.__add_running_task__(tp)

    def run(self):
        try:
            self.__logger__.start_redirect()
            ended_gracefully = NorcDaemon.run(self)
            self.__logger__.stop_redirect()
            return ended_gracefully
        except Exception, e:
            log.error("Error running daemon!", e)
            return False
        except:
            log.error("Error running daemon & it was poorly thrown!", e)
            return False


#
#
#


class TaskInThread(RunnableTask, threading.Thread):

    __logger__ = None

    def __init__(self, task, iteration, daemon_status, logger):
        self.__logger__ = logger
        RunnableTask.__init__(self, task, iteration, daemon_status)