示例#1
0
  def __init__(self):
    """Initialize the queue and spawn the main loop thread

    Upon initialization, tasks stored in the database are immediately
    scheduled.

    _task_queue is a priority queue ordered using Python's heapq functionality.
    Elements in _task_queue are tuples of the form (datetime, task) where
    datetime is the scheduled run time and task is a dictionary as defined
    in the above docstring for the Scheduler class.

    For concurrency safety reasons, never write to _task_queue outside the
    _loop() thread.
    """
    self._task_queue = []  # Never write to this outside the _loop thread
    self._pending_cancels = set()
    self._executor = GIPCExecutor()

    # Load previously scheduled tasks from database
    now = datetime.datetime.now()

    with get_app().app_context():
      saved_schedule = Task.query.filter_by(active=True)

    for task in saved_schedule:
      new_task = {
        'id': task.id,
        'interval': task.interval,
        'code': task.code
      }
      # Writing directly to the _task_queue is safe since we haven't started
      # the _loop yet
      self._task_queue.append((now, new_task))

    # Make _task_queue a priority queue
    heapify(self._task_queue)

    # Spawn main loop and save writer for future communication
    (read, write) = gipc.pipe()
    self._main_thread = gevent.spawn(self._loop, read)
    self._schedule_pipe = write
    atexit.register(self._interrupt)
示例#2
0
class Scheduler(object):
  """Inteval based code execution scheduler"""

  def __init__(self):
    """Initialize the queue and spawn the main loop thread

    Upon initialization, tasks stored in the database are immediately
    scheduled.

    _task_queue is a priority queue ordered using Python's heapq functionality.
    Elements in _task_queue are tuples of the form (datetime, task) where
    datetime is the scheduled run time and task is a dictionary as defined
    in the above docstring for the Scheduler class.

    For concurrency safety reasons, never write to _task_queue outside the
    _loop() thread.
    """
    self._task_queue = []  # Never write to this outside the _loop thread
    self._pending_cancels = set()
    self._executor = GIPCExecutor()

    # Load previously scheduled tasks from database
    now = datetime.datetime.now()
    saved_schedule = Task.query.filter_by(active=True)
    for task in saved_schedule:
      new_task = {
        'id': task.id,
        'interval': task.interval,
        'code': task.code
      }
      # Writing directly to the _task_queue is safe since we haven't started
      # the _loop yet
      self._task_queue.append((now, new_task))

    # Make _task_queue a priority queue
    heapify(self._task_queue)

    # Spawn main loop and save writer for future communication
    (read, write) = gipc.pipe()
    gevent.spawn(self._loop, read)
    self._schedule_pipe = write

  def schedule(self, task):
    """Pass schedule request to the main loop

    Tasks should be dictionaries with the following attributes:

    task = {
      'id': 'a93de0f3',
      'code': ...,  # string of Python code
      'interval': 600,  # in seconds
    }

    An interval of 0 indicates the task should only be run once.
    """
    self._schedule_pipe.put(('schedule', task))

  def cancel(self, task_id):
    """Pass cancel request to the main loop"""
    self._schedule_pipe.put(('cancel', task_id))

  def _schedule(self, task, next_run=None):
    if not next_run:
      next_run = datetime.datetime.now()
    heappush(self._task_queue, (next_run, task))

  def _cancel(self, task_id):
    self._pending_cancels.add(task_id)

  def _loop(self, reader):
    """Main execution loop of the scheduler.

    The loop runs every second. Between iterations, the loop listens for
    schedule or cancel requests coming from Flask via over the gipc pipe
    (reader) and modifies the queue accordingly.

    When a task completes, it is rescheduled
    """
    results = set()

    while True:
      now = datetime.datetime.now()
      if self._task_queue and self._task_queue[0][0] <= now:
        task = heappop(self._task_queue)[1]
        if task['id'] not in self._pending_cancels:
          result = self._executor.submit(_execute, task)
          results.add(result)
        else:
          self._pending_cancels.remove(task['id'])
      else:
        # Check for new tasks coming from HTTP
        with gevent.Timeout(0.5, False) as t:
          message = reader.get(timeout=t)
          if message[0] == 'schedule':
            self._schedule(message[1], next_run=now)
          elif message[0] == 'cancel':
            self._cancel(message[1])
        # Reschedule completed tasks
        if not results:
          gevent.sleep(0.5)
          continue
        ready = self._executor.wait(results, num=1, timeout=0.5)
        for result in ready:
          results.remove(result)
          if result.value:
            task = result.value
            interval = int(task['interval'])
            if interval:
              run_at = now + datetime.timedelta(seconds=int(task['interval']))
              self._schedule(task, next_run=run_at)
          else:
            err_msg = result.exception
            sys.stderr.write("ERROR: %s" % err_msg)
            email_msg = 'Task %s failed at %s\n\n%s' % (
              task['id'],
              datetime.datetime.now(),
              err_msg
            )
            send_mail(app.config['SCHEDULER_FAILURE_EMAILS'],
                      'Scheduler Failure',
                      email_msg)
示例#3
0
class Scheduler(object):
  """Inteval based code execution scheduler"""

  def __init__(self):
    """Initialize the queue and spawn the main loop thread

    Upon initialization, tasks stored in the database are immediately
    scheduled.

    _task_queue is a priority queue ordered using Python's heapq functionality.
    Elements in _task_queue are tuples of the form (datetime, task) where
    datetime is the scheduled run time and task is a dictionary as defined
    in the above docstring for the Scheduler class.

    For concurrency safety reasons, never write to _task_queue outside the
    _loop() thread.
    """
    self._task_queue = []  # Never write to this outside the _loop thread
    self._pending_cancels = set()
    self._executor = GIPCExecutor()

    # Load previously scheduled tasks from database
    now = datetime.datetime.now()

    with get_app().app_context():
      saved_schedule = Task.query.filter_by(active=True)

    for task in saved_schedule:
      new_task = {
        'id': task.id,
        'interval': task.interval,
        'code': task.code
      }
      # Writing directly to the _task_queue is safe since we haven't started
      # the _loop yet
      self._task_queue.append((now, new_task))

    # Make _task_queue a priority queue
    heapify(self._task_queue)

    # Spawn main loop and save writer for future communication
    (read, write) = gipc.pipe()
    self._main_thread = gevent.spawn(self._loop, read)
    self._schedule_pipe = write
    atexit.register(self._interrupt)

  def schedule(self, task):
    """Pass schedule request to the main loop

    Tasks should be dictionaries with the following attributes:

    task = {
      'id': 'a93de0f3',
      'code': ...,  # string of Python code
      'interval': 600,  # in seconds
    }

    An interval of 0 indicates the task should only be run once.
    """
    self._schedule_pipe.put(('schedule', task))

  def cancel(self, task_id):
    """Pass cancel request to the main loop"""
    self._schedule_pipe.put(('cancel', task_id))

  def _schedule(self, task, next_run=None):
    if not next_run:
      next_run = datetime.datetime.now()
    heappush(self._task_queue, (next_run, task))

  def _cancel(self, task_id):
    self._pending_cancels.add(task_id)

  def _interrupt(self):
    self._main_thread.kill()
    #TODO(derek): kill child threads

  def _loop(self, reader):
    """Main execution loop of the scheduler.

    The loop runs every second. Between iterations, the loop listens for
    schedule or cancel requests coming from Flask via over the gipc pipe
    (reader) and modifies the queue accordingly.

    When a task completes, it is rescheduled
    """
    results = set()

    while True:
      now = datetime.datetime.now()
      if self._task_queue and self._task_queue[0][0] <= now:
        task = heappop(self._task_queue)[1]
        if task['id'] not in self._pending_cancels:
          result = self._executor.submit(_execute, task)
          results.add(result)
        else:
          self._pending_cancels.remove(task['id'])
      else:
        # Check for new tasks coming from HTTP
        with gevent.Timeout(0.5, False) as t:
          message = reader.get(timeout=t)
          if message[0] == 'schedule':
            self._schedule(message[1], next_run=now)
          elif message[0] == 'cancel':
            self._cancel(message[1])
        # Reschedule completed tasks
        if not results:
          gevent.sleep(0.5)
          continue
        ready = self._executor.wait(results, num=1, timeout=0.5)
        for result in ready:
          results.remove(result)
          if result.value:
            task = result.value
            interval = int(task['interval'])
            if interval:
              run_at = now + datetime.timedelta(seconds=int(task['interval']))
              self._schedule(task, next_run=run_at)
          else:
            err_msg = result.exception
            sys.stderr.write("ERROR: %s" % err_msg)
            email_msg = 'Task %s failed at %s\n\n%s' % (
              task['id'],
              datetime.datetime.now(),
              err_msg
            )
            send_mail(get_app().config['SCHEDULER_FAILURE_EMAILS'],
                      'Scheduler Failure',
                      email_msg)