예제 #1
0
    def run(self):
        tasks = []
        now = time.time()

        TaskTuple = namedtuple('TaskTuple',
                               'task_id age metadata_size log_size data_size')
        for task_id in self.collector.get_finished_tasks():
            age = Amount(int(now - self.collector.get_age(task_id)),
                         Time.SECONDS)
            self.log('Analyzing task %s (age: %s)... ' % (task_id, age))
            metadata_size = Amount(
                sum(sz for _, sz in self.collector.get_metadata(task_id)),
                Data.BYTES)
            self.log('  metadata %.1fKB ' % metadata_size.as_(Data.KB))
            log_size = Amount(
                sum(sz for _, sz in self.collector.get_logs(task_id)),
                Data.BYTES)
            self.log('  logs %.1fKB ' % log_size.as_(Data.KB))
            data_size = Amount(
                sum(sz for _, sz in self.collector.get_data(task_id)),
                Data.BYTES)
            self.log('  data %.1fMB ' % data_size.as_(Data.MB))
            tasks.append(
                TaskTuple(task_id, age, metadata_size, log_size, data_size))

        gc_tasks = set()
        gc_tasks.update(task for task in tasks if task.age > self._max_age)
        self.log('After age filter: %s tasks' % len(gc_tasks))

        def total_gc_size(task):
            return sum([
                task.data_size, task.metadata_size
                if self._include_metadata else Amount(0, Data.BYTES),
                task.log_size if self._include_logs else Amount(0, Data.BYTES)
            ], Amount(0, Data.BYTES))

        total_used = Amount(0, Data.BYTES)
        for task in sorted(tasks, key=lambda tsk: tsk.age, reverse=True):
            if task not in gc_tasks:
                total_used += total_gc_size(task)
                if total_used > self._max_space:
                    gc_tasks.add(task)
        self.log('After size filter: %s tasks' % len(gc_tasks))

        for task in sorted(tasks, key=lambda tsk: tsk.age, reverse=True):
            if task not in gc_tasks and len(tasks) - len(
                    gc_tasks) > self._max_tasks:
                gc_tasks.add(task)
        self.log('After total task filter: %s tasks' % len(gc_tasks))

        self.log('Deciding to garbage collect the following tasks:')
        if gc_tasks:
            for task in gc_tasks:
                self.log('   %s' % repr(task))
        else:
            self.log('   None.')

        return gc_tasks
예제 #2
0
  def run(self):
    tasks = []
    now = time.time()

    TaskTuple = namedtuple('TaskTuple', 'task_id age metadata_size log_size data_size')
    for task_id in self.collector.get_finished_tasks():
      age = Amount(int(now - self.collector.get_age(task_id)), Time.SECONDS)
      self.log('Analyzing task %s (age: %s)... ' % (task_id, age))
      metadata_size = Amount(sum(sz for _, sz in self.collector.get_metadata(task_id)), Data.BYTES)
      self.log('  metadata %.1fKB ' % metadata_size.as_(Data.KB))
      log_size = Amount(sum(sz for _, sz in self.collector.get_logs(task_id)), Data.BYTES)
      self.log('  logs %.1fKB ' % log_size.as_(Data.KB))
      data_size = Amount(sum(sz for _, sz in self.collector.get_data(task_id)), Data.BYTES)
      self.log('  data %.1fMB ' % data_size.as_(Data.MB))
      tasks.append(TaskTuple(task_id, age, metadata_size, log_size, data_size))

    gc_tasks = set()
    gc_tasks.update(task for task in tasks if task.age > self._max_age)
    self.log('After age filter: %s tasks' % len(gc_tasks))

    def total_gc_size(task):
      return sum([task.data_size,
                  task.metadata_size if self._include_metadata else Amount(0, Data.BYTES),
                  task.log_size if self._include_logs else Amount(0, Data.BYTES)],
                  Amount(0, Data.BYTES))

    total_used = Amount(0, Data.BYTES)
    for task in sorted(tasks, key=lambda tsk: tsk.age, reverse=True):
      if task not in gc_tasks:
        total_used += total_gc_size(task)
        if total_used > self._max_space:
          gc_tasks.add(task)
    self.log('After size filter: %s tasks' % len(gc_tasks))

    for task in sorted(tasks, key=lambda tsk: tsk.age, reverse=True):
      if task not in gc_tasks and len(tasks) - len(gc_tasks) > self._max_tasks:
        gc_tasks.add(task)
    self.log('After total task filter: %s tasks' % len(gc_tasks))

    self.log('Deciding to garbage collect the following tasks:')
    if gc_tasks:
      for task in gc_tasks:
        self.log('   %s' % repr(task))
    else:
      self.log('   None.')

    return gc_tasks
예제 #3
0
  def test_initialize(self):
    expected_interval = Amount(15, Time.SECONDS)
    mock_options = Mock(spec_set=['root', 'mesos_root', 'polling_interval_secs'])
    mock_options.root = ''
    mock_options.mesos_root = os.path.abspath('.')
    mock_options.polling_interval_secs = int(expected_interval.as_(Time.SECONDS))
    mock_task_observer = create_autospec(spec=TaskObserver)
    with patch(
        'apache.aurora.tools.thermos_observer.TaskObserver',
        return_value=mock_task_observer) as mock_observer:

      initialize(mock_options)

      assert len(mock_observer.mock_calls) == 1
      args = mock_observer.mock_calls[0][2]
      assert expected_interval == args['interval']
 def run_to_completion(self, runner, max_wait=Amount(10, Time.SECONDS)):
     poll_interval = Amount(100, Time.MILLISECONDS)
     total_time = Amount(0, Time.SECONDS)
     while runner.status is None and total_time < max_wait:
         total_time += poll_interval
         time.sleep(poll_interval.as_(Time.SECONDS))
 def run_to_completion(self, runner, max_wait=Amount(10, Time.SECONDS)):
     poll_interval = Amount(100, Time.MILLISECONDS)
     total_time = Amount(0, Time.SECONDS)
     while runner.status is None and total_time < max_wait:
         total_time += poll_interval
         time.sleep(poll_interval.as_(Time.SECONDS))
예제 #6
0
파일: garbage.py 프로젝트: songaal/aurora
    def run(self):
        tasks = []
        now = time.time()

        # age: The time (in seconds) since the last task transition to/from ACTIVE/FINISHED
        # metadata_size: The size of the thermos checkpoint records for this task
        # log_size: The size of the stdout/stderr logs for this task's processes
        # data_size: The size of the sandbox of this task.
        TaskTuple = namedtuple(
            'TaskTuple',
            'checkpoint_root task_id age metadata_size log_size data_size')

        for checkpoint_root, task_id in self.get_finished_tasks():
            collector = TaskGarbageCollector(checkpoint_root, task_id)

            age = Amount(int(now - collector.get_age()), Time.SECONDS)
            self.log('Analyzing task %s (age: %s)... ' % (task_id, age))
            metadata_size = Amount(
                sum(sz for _, sz in collector.get_metadata()), Data.BYTES)
            self.log('  metadata %.1fKB ' % metadata_size.as_(Data.KB))
            log_size = Amount(sum(sz for _, sz in collector.get_logs()),
                              Data.BYTES)
            self.log('  logs %.1fKB ' % log_size.as_(Data.KB))
            data_size = Amount(sum(sz for _, sz in collector.get_data()),
                               Data.BYTES)
            self.log('  data %.1fMB ' % data_size.as_(Data.MB))
            tasks.append(
                TaskTuple(checkpoint_root, task_id, age, metadata_size,
                          log_size, data_size))

        gc_tasks = set()
        gc_tasks.update(task for task in tasks if task.age > self._max_age)

        self.log('After age filter: %s tasks' % len(gc_tasks))

        def total_gc_size(task):
            return sum([
                task.data_size, task.metadata_size
                if self._include_metadata else Amount(0, Data.BYTES),
                task.log_size if self._include_logs else Amount(0, Data.BYTES)
            ], Amount(0, Data.BYTES))

        total_used = Amount(0, Data.BYTES)
        for task in sorted(tasks, key=lambda tsk: tsk.age, reverse=True):
            if task not in gc_tasks:
                total_used += total_gc_size(task)
                if total_used > self._max_space:
                    gc_tasks.add(task)
        self.log('After size filter: %s tasks' % len(gc_tasks))

        for task in sorted(tasks, key=lambda tsk: tsk.age, reverse=True):
            if task not in gc_tasks and len(tasks) - len(
                    gc_tasks) > self._max_tasks:
                gc_tasks.add(task)
        self.log('After total task filter: %s tasks' % len(gc_tasks))

        self.log('Deciding to garbage collect the following tasks:')
        if gc_tasks:
            for task in gc_tasks:
                self.log('   %s' % repr(task))
        else:
            self.log('   None.')

        return gc_tasks