def gc(args, options): """Garbage collect task(s) and task metadata. Usage: thermos gc [options] [task_id1 task_id2 ...] If tasks specified, restrict garbage collection to only those tasks, otherwise all tasks are considered. The optional constraints are still honored. """ print('Analyzing root at %s' % options.root) gc_options = {} if options.max_age is not None: gc_options['max_age'] = parse_time(options.max_age) if options.max_space is not None: gc_options['max_space'] = parse_data(options.max_space) if options.max_tasks is not None: gc_options['max_tasks'] = int(options.max_tasks) gc_options.update(include_metadata=not options.keep_metadata, include_logs=not options.keep_logs, verbose=True, logger=print) if args: gc_tasks = list(tasks_from_re(args, state='finished')) else: print('No task ids specified, using default collector.') gc_tasks = [(task.checkpoint_root, task.task_id) for task in GarbageCollectionPolicy(get_path_detector(), **gc_options).run()] if not gc_tasks: print('No tasks to garbage collect. Exiting') return def maybe(function, *args): if options.dryrun: print(' would run %s%r' % (function.__name__, args)) else: function(*args) value = 'y' if not options.force: value = raw_input("Continue [y/N]? ") or 'N' if value.lower() == 'y': print('Running gc...') for checkpoint_root, task_id in gc_tasks: tgc = TaskGarbageCollector(checkpoint_root, task_id) print(' Task %s ' % task_id, end='') print('data (%s) ' % ('keeping' if options.keep_data else 'deleting'), end='') print('logs (%s) ' % ('keeping' if options.keep_logs else 'deleting'), end='') print('metadata (%s) ' % ('keeping' if options.keep_metadata else 'deleting')) if not options.keep_data: maybe(tgc.erase_data) if not options.keep_logs: maybe(tgc.erase_logs) if not options.keep_metadata: maybe(tgc.erase_metadata) print('done.') else: print('Cancelling gc.')
def tail(args, options): """Tail the logs of a task process. Usage: thermos tail task_name [process_name] """ if len(args) == 0: app.error('Expected a task to tail, got nothing!') if len(args) not in (1, 2): app.error('Expected at most two arguments (task and optional process), got %d' % len(args)) task_id = args[0] path_detector = get_path_detector() for root in path_detector.get_paths(): detector = TaskDetector(root=root) checkpoint = CheckpointDispatcher.from_file(detector.get_checkpoint(task_id)) if checkpoint: break else: print('ERROR: Could not find task.') sys.exit(1) log_dir = checkpoint.header.log_dir process_runs = [(process, run) for (process, run) in detector.get_process_runs(task_id, log_dir)] if len(args) == 2: process_runs = [(process, run) for (process, run) in process_runs if process == args[1]] if len(process_runs) == 0: print('ERROR: No processes found.', file=sys.stderr) sys.exit(1) processes = set([process for process, _ in process_runs]) if len(processes) != 1: print('ERROR: More than one process matches query.', file=sys.stderr) sys.exit(1) process = processes.pop() run = max([run for _, run in process_runs]) logdir = TaskPath(root=root, task_id=args[0], process=process, run=run, log_dir=log_dir).getpath('process_logdir') logfile = os.path.join(logdir, 'stderr' if options.use_stderr else 'stdout') monitor = TaskMonitor(root, args[0]) def log_is_active(): active_processes = monitor.get_active_processes() for process_status, process_run in active_processes: if process_status.process == process and process_run == run: return True return False if not log_is_active(): print('Tail of terminal log %s' % logfile) for line in tail_closed(logfile): print(line.rstrip()) return now = time.time() next_check = now + 5.0 print('Tail of active log %s' % logfile) for line in tail_f(logfile, include_last=True, forever=False): print(line.rstrip()) if time.time() > next_check: if not log_is_active(): break else: next_check = time.time() + 5.0
def status(args, options): """Get the status of task(s). Usage: thermos status [options] [task_name(s) or task_regexp(s)] """ path_detector = get_path_detector() def format_task(detector, task_id): checkpoint_filename = detector.get_checkpoint(task_id) checkpoint_stat = os.stat(checkpoint_filename) try: checkpoint_owner = pwd.getpwuid(checkpoint_stat.st_uid).pw_name except KeyError: checkpoint_owner = 'uid:%s' % checkpoint_stat.st_uid print(' %-20s [owner: %8s]' % (task_id, checkpoint_owner), end='') if options.verbose == 0: print() if options.verbose > 0: state = CheckpointDispatcher.from_file(checkpoint_filename) if state is None or state.header is None: print(' - checkpoint stream CORRUPT or outdated format') return print(' state: %8s' % TaskState._VALUES_TO_NAMES.get( state.statuses[-1].state, 'Unknown'), end='') print(' start: %25s' % time.asctime( time.localtime(state.header.launch_time_ms / 1000.0))) if options.verbose > 1: print(' user: %s' % state.header.user, end='') if state.header.ports: print(' ports: %s' % ' '.join('%s -> %s' % (key, val) for key, val in state.header.ports.items())) else: print(' ports: None') print(' sandbox: %s' % state.header.sandbox) if options.verbose > 2: print(' process table:') for process, process_history in state.processes.items(): print(' - %s runs: %s' % (process, len(process_history)), end='') last_run = process_history[-1] print(' last: pid=%s, rc=%s, finish:%s, state:%s' % (last_run.pid or 'None', last_run.return_code if last_run.return_code is not None else '', time.asctime(time.localtime(last_run.stop_time)) if last_run.stop_time else 'None', ProcessState._VALUES_TO_NAMES.get( last_run.state, 'Unknown'))) print() matchers = map(re.compile, args or ['.*']) active = [] finished = [] for root in path_detector.get_paths(): detector = TaskDetector(root) active.extend((detector, t_id) for _, t_id in detector.get_task_ids(state='active') if any(pattern.match(t_id) for pattern in matchers)) finished.extend((detector, t_id) for _, t_id in detector.get_task_ids(state='finished') if any(pattern.match(t_id) for pattern in matchers)) found = False if options.only is None or options.only == 'active': if active: print('Active tasks:') found = True for detector, task_id in active: format_task(detector, task_id) print() if options.only is None or options.only == 'finished': if finished: print('Finished tasks:') found = True for detector, task_id in finished: format_task(detector, task_id) print() if not found: print('No tasks found.') sys.exit(1)
def status(args, options): """Get the status of task(s). Usage: thermos status [options] [task_name(s) or task_regexp(s)] """ path_detector = get_path_detector() def format_task(detector, task_id): checkpoint_filename = detector.get_checkpoint(task_id) checkpoint_stat = os.stat(checkpoint_filename) try: checkpoint_owner = pwd.getpwuid(checkpoint_stat.st_uid).pw_name except KeyError: checkpoint_owner = 'uid:%s' % checkpoint_stat.st_uid print(' %-20s [owner: %8s]' % (task_id, checkpoint_owner), end='') if options.verbose == 0: print() if options.verbose > 0: state = CheckpointDispatcher.from_file(checkpoint_filename) if state is None or state.header is None: print(' - checkpoint stream CORRUPT or outdated format') return print(' state: %8s' % TaskState._VALUES_TO_NAMES.get(state.statuses[-1].state, 'Unknown'), end='') print(' start: %25s' % time.asctime(time.localtime(state.header.launch_time_ms / 1000.0))) if options.verbose > 1: print(' user: %s' % state.header.user, end='') if state.header.ports: print(' ports: %s' % ' '.join('%s -> %s' % (key, val) for key, val in state.header.ports.items())) else: print(' ports: None') print(' sandbox: %s' % state.header.sandbox) if options.verbose > 2: print(' process table:') for process, process_history in state.processes.items(): print(' - %s runs: %s' % (process, len(process_history)), end='') last_run = process_history[-1] print(' last: pid=%s, rc=%s, finish:%s, state:%s' % ( last_run.pid or 'None', last_run.return_code if last_run.return_code is not None else '', time.asctime(time.localtime(last_run.stop_time)) if last_run.stop_time else 'None', ProcessState._VALUES_TO_NAMES.get(last_run.state, 'Unknown'))) print() matchers = map(re.compile, args or ['.*']) active = [] finished = [] for root in path_detector.get_paths(): detector = TaskDetector(root) active.extend((detector, t_id) for _, t_id in detector.get_task_ids(state='active') if any(pattern.match(t_id) for pattern in matchers)) finished.extend((detector, t_id)for _, t_id in detector.get_task_ids(state='finished') if any(pattern.match(t_id) for pattern in matchers)) found = False if options.only is None or options.only == 'active': if active: print('Active tasks:') found = True for detector, task_id in active: format_task(detector, task_id) print() if options.only is None or options.only == 'finished': if finished: print('Finished tasks:') found = True for detector, task_id in finished: format_task(detector, task_id) print() if not found: print('No tasks found.') sys.exit(1)