def run(self): tasks = [] now = time.time() TaskTuple = namedtuple('TaskTuple', 'task_id age metadata_size log_size data_size') for task_id in self.collector.get_finished_tasks(): age = Amount(int(now - self.collector.get_age(task_id)), Time.SECONDS) self.log('Analyzing task %s (age: %s)... ' % (task_id, age)) metadata_size = Amount( sum(sz for _, sz in self.collector.get_metadata(task_id)), Data.BYTES) self.log(' metadata %.1fKB ' % metadata_size.as_(Data.KB)) log_size = Amount( sum(sz for _, sz in self.collector.get_logs(task_id)), Data.BYTES) self.log(' logs %.1fKB ' % log_size.as_(Data.KB)) data_size = Amount( sum(sz for _, sz in self.collector.get_data(task_id)), Data.BYTES) self.log(' data %.1fMB ' % data_size.as_(Data.MB)) tasks.append( TaskTuple(task_id, age, metadata_size, log_size, data_size)) gc_tasks = set() gc_tasks.update(task for task in tasks if task.age > self._max_age) self.log('After age filter: %s tasks' % len(gc_tasks)) def total_gc_size(task): return sum([ task.data_size, task.metadata_size if self._include_metadata else Amount(0, Data.BYTES), task.log_size if self._include_logs else Amount(0, Data.BYTES) ], Amount(0, Data.BYTES)) total_used = Amount(0, Data.BYTES) for task in sorted(tasks, key=lambda tsk: tsk.age, reverse=True): if task not in gc_tasks: total_used += total_gc_size(task) if total_used > self._max_space: gc_tasks.add(task) self.log('After size filter: %s tasks' % len(gc_tasks)) for task in sorted(tasks, key=lambda tsk: tsk.age, reverse=True): if task not in gc_tasks and len(tasks) - len( gc_tasks) > self._max_tasks: gc_tasks.add(task) self.log('After total task filter: %s tasks' % len(gc_tasks)) self.log('Deciding to garbage collect the following tasks:') if gc_tasks: for task in gc_tasks: self.log(' %s' % repr(task)) else: self.log(' None.') return gc_tasks
def run(self): tasks = [] now = time.time() TaskTuple = namedtuple('TaskTuple', 'task_id age metadata_size log_size data_size') for task_id in self.collector.get_finished_tasks(): age = Amount(int(now - self.collector.get_age(task_id)), Time.SECONDS) self.log('Analyzing task %s (age: %s)... ' % (task_id, age)) metadata_size = Amount(sum(sz for _, sz in self.collector.get_metadata(task_id)), Data.BYTES) self.log(' metadata %.1fKB ' % metadata_size.as_(Data.KB)) log_size = Amount(sum(sz for _, sz in self.collector.get_logs(task_id)), Data.BYTES) self.log(' logs %.1fKB ' % log_size.as_(Data.KB)) data_size = Amount(sum(sz for _, sz in self.collector.get_data(task_id)), Data.BYTES) self.log(' data %.1fMB ' % data_size.as_(Data.MB)) tasks.append(TaskTuple(task_id, age, metadata_size, log_size, data_size)) gc_tasks = set() gc_tasks.update(task for task in tasks if task.age > self._max_age) self.log('After age filter: %s tasks' % len(gc_tasks)) def total_gc_size(task): return sum([task.data_size, task.metadata_size if self._include_metadata else Amount(0, Data.BYTES), task.log_size if self._include_logs else Amount(0, Data.BYTES)], Amount(0, Data.BYTES)) total_used = Amount(0, Data.BYTES) for task in sorted(tasks, key=lambda tsk: tsk.age, reverse=True): if task not in gc_tasks: total_used += total_gc_size(task) if total_used > self._max_space: gc_tasks.add(task) self.log('After size filter: %s tasks' % len(gc_tasks)) for task in sorted(tasks, key=lambda tsk: tsk.age, reverse=True): if task not in gc_tasks and len(tasks) - len(gc_tasks) > self._max_tasks: gc_tasks.add(task) self.log('After total task filter: %s tasks' % len(gc_tasks)) self.log('Deciding to garbage collect the following tasks:') if gc_tasks: for task in gc_tasks: self.log(' %s' % repr(task)) else: self.log(' None.') return gc_tasks
def test_initialize(self): expected_interval = Amount(15, Time.SECONDS) mock_options = Mock(spec_set=['root', 'mesos_root', 'polling_interval_secs']) mock_options.root = '' mock_options.mesos_root = os.path.abspath('.') mock_options.polling_interval_secs = int(expected_interval.as_(Time.SECONDS)) mock_task_observer = create_autospec(spec=TaskObserver) with patch( 'apache.aurora.tools.thermos_observer.TaskObserver', return_value=mock_task_observer) as mock_observer: initialize(mock_options) assert len(mock_observer.mock_calls) == 1 args = mock_observer.mock_calls[0][2] assert expected_interval == args['interval']
def run_to_completion(self, runner, max_wait=Amount(10, Time.SECONDS)): poll_interval = Amount(100, Time.MILLISECONDS) total_time = Amount(0, Time.SECONDS) while runner.status is None and total_time < max_wait: total_time += poll_interval time.sleep(poll_interval.as_(Time.SECONDS))
def run_to_completion(self, runner, max_wait=Amount(10, Time.SECONDS)): poll_interval = Amount(100, Time.MILLISECONDS) total_time = Amount(0, Time.SECONDS) while runner.status is None and total_time < max_wait: total_time += poll_interval time.sleep(poll_interval.as_(Time.SECONDS))
def run(self): tasks = [] now = time.time() # age: The time (in seconds) since the last task transition to/from ACTIVE/FINISHED # metadata_size: The size of the thermos checkpoint records for this task # log_size: The size of the stdout/stderr logs for this task's processes # data_size: The size of the sandbox of this task. TaskTuple = namedtuple( 'TaskTuple', 'checkpoint_root task_id age metadata_size log_size data_size') for checkpoint_root, task_id in self.get_finished_tasks(): collector = TaskGarbageCollector(checkpoint_root, task_id) age = Amount(int(now - collector.get_age()), Time.SECONDS) self.log('Analyzing task %s (age: %s)... ' % (task_id, age)) metadata_size = Amount( sum(sz for _, sz in collector.get_metadata()), Data.BYTES) self.log(' metadata %.1fKB ' % metadata_size.as_(Data.KB)) log_size = Amount(sum(sz for _, sz in collector.get_logs()), Data.BYTES) self.log(' logs %.1fKB ' % log_size.as_(Data.KB)) data_size = Amount(sum(sz for _, sz in collector.get_data()), Data.BYTES) self.log(' data %.1fMB ' % data_size.as_(Data.MB)) tasks.append( TaskTuple(checkpoint_root, task_id, age, metadata_size, log_size, data_size)) gc_tasks = set() gc_tasks.update(task for task in tasks if task.age > self._max_age) self.log('After age filter: %s tasks' % len(gc_tasks)) def total_gc_size(task): return sum([ task.data_size, task.metadata_size if self._include_metadata else Amount(0, Data.BYTES), task.log_size if self._include_logs else Amount(0, Data.BYTES) ], Amount(0, Data.BYTES)) total_used = Amount(0, Data.BYTES) for task in sorted(tasks, key=lambda tsk: tsk.age, reverse=True): if task not in gc_tasks: total_used += total_gc_size(task) if total_used > self._max_space: gc_tasks.add(task) self.log('After size filter: %s tasks' % len(gc_tasks)) for task in sorted(tasks, key=lambda tsk: tsk.age, reverse=True): if task not in gc_tasks and len(tasks) - len( gc_tasks) > self._max_tasks: gc_tasks.add(task) self.log('After total task filter: %s tasks' % len(gc_tasks)) self.log('Deciding to garbage collect the following tasks:') if gc_tasks: for task in gc_tasks: self.log(' %s' % repr(task)) else: self.log(' None.') return gc_tasks