def curses_main(stdscr, cmd_autostart_plotting, cmd_autostart_archiving, cfg): log = Log() if cmd_autostart_plotting is not None: plotting_active = cmd_autostart_plotting else: plotting_active = cfg.commands.interactive.autostart_plotting archiving_configured = cfg.archiving is not None if not archiving_configured: archiving_active = False elif cmd_autostart_archiving is not None: archiving_active = cmd_autostart_archiving else: archiving_active = cfg.commands.interactive.autostart_archiving plotting_status = '<startup>' # todo rename these msg? archiving_status = '<startup>' stdscr.nodelay(True) # make getch() non-blocking stdscr.timeout(2000) # Create windows. We'll size them in the main loop when we have their content. header_win = curses.newwin(1, 1, 1, 0) log_win = curses.newwin(1, 1, 1, 0) jobs_win = curses.newwin(1, 1, 1, 0) dirs_win = curses.newwin(1, 1, 1, 0) jobs = Job.get_running_jobs(cfg.logging.plots) last_refresh = None pressed_key = '' # For debugging archdir_freebytes = None aging_reason = None while True: # A full refresh scans for and reads info for running jobs from # scratch (i.e., reread their logfiles). Otherwise we'll only # initialize new jobs, and mostly rely on cached info. do_full_refresh = False elapsed = 0 # Time since last refresh, or zero if no prev. refresh if last_refresh is None: do_full_refresh = True else: elapsed = (datetime.datetime.now() - last_refresh).total_seconds() do_full_refresh = elapsed >= cfg.scheduling.polling_time_s if not do_full_refresh: jobs = Job.get_running_jobs(cfg.logging.plots, cached_jobs=jobs) else: last_refresh = datetime.datetime.now() jobs = Job.get_running_jobs(cfg.logging.plots) if plotting_active: (started, msg) = manager.maybe_start_new_plot(cfg.directories, cfg.scheduling, cfg.plotting, cfg.logging) if (started): if aging_reason is not None: log.log(aging_reason) aging_reason = None log.log(msg) plotting_status = '<just started job>' jobs = Job.get_running_jobs(cfg.logging.plots, cached_jobs=jobs) else: # If a plot is delayed for any reason other than stagger, log it if msg.find("stagger") < 0: aging_reason = msg plotting_status = msg if archiving_configured: if archiving_active: archiving_status, log_messages = archive.spawn_archive_process( cfg.directories, cfg.archiving, cfg.logging, jobs) for log_message in log_messages: log.log(log_message) archdir_freebytes, log_messages = archive.get_archdir_freebytes( cfg.archiving) for log_message in log_messages: log.log(log_message) # Get terminal size. Recommended method is stdscr.getmaxyx(), but this # does not seem to work on some systems. It may be a bug in Python # curses, maybe having to do with registering sigwinch handlers in # multithreaded environments. See e.g. # https://stackoverflow.com/questions/33906183#33906270 # Alternative option is to call out to `stty size`. For now, we # support both strategies, selected by a config option. # TODO: also try shutil.get_terminal_size() n_rows: int n_cols: int if cfg.user_interface.use_stty_size: completed_process = subprocess.run(['stty', 'size'], check=True, encoding='utf-8', stdout=subprocess.PIPE) elements = completed_process.stdout.split() (n_rows, n_cols) = [int(v) for v in elements] else: (n_rows, n_cols) = map(int, stdscr.getmaxyx()) stdscr.clear() stdscr.resize(n_rows, n_cols) curses.resize_term(n_rows, n_cols) # # Obtain and measure content # # Directory prefixes, for abbreviation tmp_prefix = os.path.commonpath(cfg.directories.tmp) dst_dir = cfg.directories.get_dst_directories() dst_prefix = os.path.commonpath(dst_dir) if archiving_configured: archive_directories = archdir_freebytes.keys() if len(archive_directories) == 0: arch_prefix = '' else: arch_prefix = os.path.commonpath(archive_directories) n_tmpdirs = len(cfg.directories.tmp) # Directory reports. tmp_report = reporting.tmp_dir_report(jobs, cfg.directories, cfg.scheduling, n_cols, 0, n_tmpdirs, tmp_prefix) dst_report = reporting.dst_dir_report(jobs, dst_dir, n_cols, dst_prefix) if archiving_configured: arch_report = reporting.arch_dir_report(archdir_freebytes, n_cols, arch_prefix) if not arch_report: arch_report = '<no archive dir info>' else: arch_report = '<archiving not configured>' # # Layout # tmp_h = len(tmp_report.splitlines()) tmp_w = len(max(tmp_report.splitlines(), key=len)) + 1 dst_h = len(dst_report.splitlines()) dst_w = len(max(dst_report.splitlines(), key=len)) + 1 arch_h = len(arch_report.splitlines()) + 1 arch_w = n_cols header_h = 3 dirs_h = max(tmp_h, dst_h) + arch_h remainder = n_rows - (header_h + dirs_h) jobs_h = max(5, math.floor(remainder * 0.6)) logs_h = n_rows - (header_h + jobs_h + dirs_h) header_pos = 0 jobs_pos = header_pos + header_h stdscr.resize(n_rows, n_cols) dirs_pos = jobs_pos + jobs_h logscreen_pos = dirs_pos + dirs_h linecap = n_cols - 1 logs_h = n_rows - (header_h + jobs_h + dirs_h) try: header_win = curses.newwin(header_h, n_cols, header_pos, 0) log_win = curses.newwin(logs_h, n_cols, logscreen_pos, 0) jobs_win = curses.newwin(jobs_h, n_cols, jobs_pos, 0) dirs_win = curses.newwin(dirs_h, n_cols, dirs_pos, 0) except Exception: raise Exception( 'Failed to initialize curses windows, try a larger ' 'terminal window.') # # Write # # Header header_win.addnstr(0, 0, 'Plotman', linecap, curses.A_BOLD) timestamp = datetime.datetime.now().strftime("%H:%M:%S") refresh_msg = "now" if do_full_refresh else f"{int(elapsed)}s/{cfg.scheduling.polling_time_s}" header_win.addnstr(f" {timestamp} (refresh {refresh_msg})", linecap) header_win.addnstr(' | <P>lotting: ', linecap, curses.A_BOLD) header_win.addnstr( plotting_status_msg(plotting_active, plotting_status), linecap) header_win.addnstr(' <A>rchival: ', linecap, curses.A_BOLD) header_win.addnstr( archiving_status_msg(archiving_configured, archiving_active, archiving_status), linecap) # Oneliner progress display header_win.addnstr(1, 0, 'Jobs (%d): ' % len(jobs), linecap) header_win.addnstr('[' + reporting.job_viz(jobs) + ']', linecap) # These are useful for debugging. # header_win.addnstr(' term size: (%d, %d)' % (n_rows, n_cols), linecap) # Debuggin # if pressed_key: # header_win.addnstr(' (keypress %s)' % str(pressed_key), linecap) header_win.addnstr(2, 0, 'Prefixes:', linecap, curses.A_BOLD) header_win.addnstr(' tmp=', linecap, curses.A_BOLD) header_win.addnstr(tmp_prefix, linecap) header_win.addnstr(' dst=', linecap, curses.A_BOLD) header_win.addnstr(dst_prefix, linecap) if archiving_configured: header_win.addnstr(' archive=', linecap, curses.A_BOLD) header_win.addnstr(arch_prefix, linecap) header_win.addnstr(' (remote)', linecap) # Jobs jobs_win.addstr( 0, 0, reporting.status_report(jobs, n_cols, jobs_h, tmp_prefix, dst_prefix)) jobs_win.chgat(0, 0, curses.A_REVERSE) # Dirs tmpwin_dstwin_gutter = 6 maxtd_h = max([tmp_h, dst_h]) tmpwin = curses.newwin(tmp_h, tmp_w, dirs_pos + int(maxtd_h - tmp_h), 0) tmpwin.addstr(tmp_report) tmpwin.chgat(0, 0, curses.A_REVERSE) dstwin = curses.newwin(dst_h, dst_w, dirs_pos + int( (maxtd_h - dst_h) / 2), tmp_w + tmpwin_dstwin_gutter) dstwin.addstr(dst_report) dstwin.chgat(0, 0, curses.A_REVERSE) archwin = curses.newwin(arch_h, arch_w, dirs_pos + maxtd_h, 0) archwin.addstr(0, 0, 'Archive dirs free space', curses.A_REVERSE) archwin.addstr(1, 0, arch_report) # Log. Could use a pad here instead of managing scrolling ourselves, but # this seems easier. log_win.addnstr( 0, 0, ('Log: %d (<up>/<down>/<end> to scroll)\n' % log.get_cur_pos()), linecap, curses.A_REVERSE) for i, logline in enumerate(log.cur_slice(logs_h - 1)): log_win.addnstr(i + 1, 0, logline, linecap) stdscr.noutrefresh() header_win.noutrefresh() jobs_win.noutrefresh() tmpwin.noutrefresh() dstwin.noutrefresh() archwin.noutrefresh() log_win.noutrefresh() curses.doupdate() try: key = stdscr.getch() except KeyboardInterrupt: key = ord('q') if key == curses.KEY_UP: log.shift_slice(-1) pressed_key = 'up' elif key == curses.KEY_DOWN: log.shift_slice(1) pressed_key = 'dwn' elif key == curses.KEY_END: log.shift_slice_to_end() pressed_key = 'end' elif key == ord('p'): plotting_active = not plotting_active pressed_key = 'p' elif key == ord('a'): archiving_active = not archiving_active pressed_key = 'a' elif key == ord('q'): break else: pressed_key = key
def main() -> None: random.seed() pm_parser = PlotmanArgParser() args = pm_parser.parse_args() if args.cmd == 'version': import pkg_resources print(pkg_resources.get_distribution('plotman')) return elif args.cmd == 'config': config_file_path = configuration.get_path() if args.config_subcommand == 'path': if os.path.isfile(config_file_path): print(config_file_path) return print( f"No 'plotman.yaml' file exists at expected location: '{config_file_path}'" ) print( f"To generate a default config file, run: 'plotman config generate'" ) return if args.config_subcommand == 'generate': if os.path.isfile(config_file_path): overwrite = None while overwrite not in {"y", "n"}: overwrite = input( f"A 'plotman.yaml' file already exists at the default location: '{config_file_path}' \n\n" "\tInput 'y' to overwrite existing file, or 'n' to exit without overwrite." ).lower() if overwrite == 'n': print("\nExited without overrwriting file") return # Copy the default plotman.yaml (packaged in plotman/resources/) to the user's config file path, # creating the parent plotman file/directory if it does not yet exist with importlib.resources.path(plotman_resources, "plotman.yaml") as default_config: config_dir = os.path.dirname(config_file_path) os.makedirs(config_dir, exist_ok=True) copyfile(default_config, config_file_path) print(f"\nWrote default plotman.yaml to: {config_file_path}") return if not args.config_subcommand: print("No action requested, add 'generate' or 'path'.") return config_path = configuration.get_path() config_text = configuration.read_configuration_text(config_path) preset_target_definitions_text = importlib.resources.read_text( plotman_resources, "target_definitions.yaml", ) cfg = configuration.get_validated_configs(config_text, config_path, preset_target_definitions_text) with cfg.setup(): root_logger = logging.getLogger() root_handler = logging.handlers.RotatingFileHandler( backupCount=10, encoding='utf-8', filename=cfg.logging.application, maxBytes=10_000_000, ) root_formatter = Iso8601Formatter(fmt='%(asctime)s: %(message)s') root_handler.setFormatter(root_formatter) root_logger.addHandler(root_handler) root_logger.setLevel(logging.INFO) root_logger.info('Start root logger') disk_space_logger = logging.getLogger("disk_space") disk_space_logger.propagate = False disk_space_handler = logging.handlers.RotatingFileHandler( backupCount=10, encoding='utf-8', filename=cfg.logging.disk_spaces, maxBytes=10_000_000, ) disk_space_formatter = Iso8601Formatter(fmt='%(asctime)s: %(message)s') disk_space_handler.setFormatter(disk_space_formatter) disk_space_logger.addHandler(disk_space_handler) disk_space_logger.setLevel(logging.INFO) disk_space_logger.info('Start disk space logger') # # Stay alive, spawning plot jobs # if args.cmd == 'plot': print('...starting plot loop') while True: (started, msg) = manager.maybe_start_new_plot(cfg.directories, cfg.scheduling, cfg.plotting, cfg.logging) # TODO: report this via a channel that can be polled on demand, so we don't spam the console if started: print('%s' % (msg)) else: print('...sleeping %d s: %s' % (cfg.scheduling.polling_time_s, msg)) root_logger.info('[plot] %s', msg) time.sleep(cfg.scheduling.polling_time_s) # # Analysis of completed jobs # elif args.cmd == 'analyze': analyzer.analyze(args.logfile, args.clipterminals, args.bytmp, args.bybitfield) # # Exports log metadata to CSV # elif args.cmd == 'export': logfilenames = glob.glob( os.path.join(cfg.logging.plots, '*.plot.log')) if args.save_to is None: csv_exporter.generate(logfilenames=logfilenames, file=sys.stdout) else: with open(args.save_to, 'w', encoding='utf-8') as file: csv_exporter.generate(logfilenames=logfilenames, file=file) else: jobs = Job.get_running_jobs(cfg.logging.plots) # Status report if args.cmd == 'status': if args.json: # convert jobs list into json result = reporting.json_report(jobs) else: result = "{0}\n\n{1}\n\nUpdated at: {2}".format( reporting.status_report(jobs, get_term_width()), reporting.summary(jobs), datetime.datetime.today().strftime("%c"), ) print(result) # Prometheus report if args.cmd == 'prometheus': print(reporting.prometheus_report(jobs)) # Directories report elif args.cmd == 'dirs': print( reporting.dirs_report(jobs, cfg.directories, cfg.archiving, cfg.scheduling, get_term_width())) elif args.cmd == 'interactive': interactive.run_interactive( cfg=cfg, autostart_plotting=args.autostart_plotting, autostart_archiving=args.autostart_archiving, ) # Start running archival elif args.cmd == 'archive': if cfg.archiving is None: start_msg = 'archiving not configured but is required for this command' print(start_msg) root_logger.info('[archive] %s', start_msg) else: start_msg = '...starting archive loop' print(start_msg) root_logger.info('[archive] %s', start_msg) firstit = True while True: if not firstit: print('Sleeping %d s until next iteration...' % (cfg.scheduling.polling_time_s)) time.sleep(cfg.scheduling.polling_time_s) jobs = Job.get_running_jobs(cfg.logging.plots) firstit = False archiving_status, log_messages = archive.spawn_archive_process( cfg.directories, cfg.archiving, cfg.logging, jobs) if log_messages: for log_message in log_messages: print(log_message) root_logger.info('[archive] %s', log_message) else: root_logger.info('[archive] %s', archiving_status) # Debugging: show the destination drive usage schedule elif args.cmd == 'dsched': for (d, ph) in manager.dstdirs_to_furthest_phase(jobs).items(): print(' %s : %s' % (d, str(ph))) # # Job control commands # elif args.cmd in [ 'details', 'logs', 'files', 'kill', 'suspend', 'resume' ]: print(args) selected = [] # TODO: clean up treatment of wildcard if args.idprefix[0] == 'all': selected = jobs else: # TODO: allow multiple idprefixes, not just take the first selected = manager.select_jobs_by_partial_id( jobs, args.idprefix[0]) if (len(selected) == 0): print('Error: %s matched no jobs.' % args.idprefix[0]) elif len(selected) > 1: print('Error: "%s" matched multiple jobs:' % args.idprefix[0]) for j in selected: print(' %s' % j.plot_id) selected = [] for job in selected: if args.cmd == 'details': print(job.status_str_long()) elif args.cmd == 'logs': job.print_logs(args.follow) elif args.cmd == 'files': temp_files = job.get_temp_files() for f in temp_files: print(' %s' % f) elif args.cmd == 'kill': # First suspend so job doesn't create new files print('Pausing PID %d, plot id %s' % (job.proc.pid, job.plot_id)) job.suspend() temp_files = job.get_temp_files() print('Will kill pid %d, plot id %s' % (job.proc.pid, job.plot_id)) print('Will delete %d temp files' % len(temp_files)) if args.force: conf = 'y' else: conf = input('Are you sure? ("y" to confirm): ') if (conf != 'y'): print( 'Canceled. If you wish to resume the job, do so manually.' ) else: print('killing...') job.cancel() print('cleaning up temp files...') for f in temp_files: os.remove(f) elif args.cmd == 'suspend': print('Suspending ' + job.plot_id) job.suspend() elif args.cmd == 'resume': print('Resuming ' + job.plot_id) job.resume()
def main(): random.seed() pm_parser = PlotmanArgParser() args = pm_parser.parse_args() if args.cmd == 'version': import pkg_resources print(pkg_resources.get_distribution('plotman')) return elif args.cmd == 'config': config_file_path = configuration.get_path() if args.config_subcommand == 'path': if os.path.isfile(config_file_path): print(config_file_path) return print( f"No 'plotman.yaml' file exists at expected location: '{config_file_path}'" ) print( f"To generate a default config file, run: 'plotman config generate'" ) return 1 if args.config_subcommand == 'generate': if os.path.isfile(config_file_path): overwrite = None while overwrite not in {"y", "n"}: overwrite = input( f"A 'plotman.yaml' file already exists at the default location: '{config_file_path}' \n\n" "\tInput 'y' to overwrite existing file, or 'n' to exit without overwrite." ).lower() if overwrite == 'n': print("\nExited without overrwriting file") return # Copy the default plotman.yaml (packaged in plotman/resources/) to the user's config file path, # creating the parent plotman file/directory if it does not yet exist with importlib.resources.path(plotman_resources, "plotman.yaml") as default_config: config_dir = os.path.dirname(config_file_path) os.makedirs(config_dir, exist_ok=True) copyfile(default_config, config_file_path) print(f"\nWrote default plotman.yaml to: {config_file_path}") return if not args.config_subcommand: print("No action requested, add 'generate' or 'path'.") return config_path = configuration.get_path() config_text = configuration.read_configuration_text(config_path) cfg = configuration.get_validated_configs(config_text, config_path) # # Stay alive, spawning plot jobs # if args.cmd == 'plot': print('...starting plot loop') while True: wait_reason = manager.maybe_start_new_plot(cfg.directories, cfg.scheduling, cfg.plotting) # TODO: report this via a channel that can be polled on demand, so we don't spam the console if wait_reason: print('...sleeping %d s: %s' % (cfg.scheduling.polling_time_s, wait_reason)) time.sleep(cfg.scheduling.polling_time_s) # # Analysis of completed jobs # elif args.cmd == 'analyze': analyzer.analyze(args.logfile, args.clipterminals, args.bytmp, args.bybitfield) else: jobs = Job.get_running_jobs(cfg.directories.log) # Status report if args.cmd == 'status': print(reporting.status_report(jobs, get_term_width())) # Directories report elif args.cmd == 'dirs': print( reporting.dirs_report(jobs, cfg.directories, cfg.scheduling, get_term_width())) elif args.cmd == 'interactive': interactive.run_interactive() # Start running archival elif args.cmd == 'archive': print('...starting archive loop') firstit = True while True: if not firstit: print('Sleeping 60s until next iteration...') time.sleep(60) jobs = Job.get_running_jobs(cfg.directories.log) firstit = False archiving_status, log_message = archive.spawn_archive_process( cfg.directories, jobs) if log_message: print(log_message) # Debugging: show the destination drive usage schedule elif args.cmd == 'dsched': for (d, ph) in manager.dstdirs_to_furthest_phase(jobs).items(): print(' %s : %s' % (d, str(ph))) # # Job control commands # elif args.cmd in ['details', 'files', 'kill', 'suspend', 'resume']: print(args) selected = [] # TODO: clean up treatment of wildcard if args.idprefix[0] == 'all': selected = jobs else: # TODO: allow multiple idprefixes, not just take the first selected = manager.select_jobs_by_partial_id( jobs, args.idprefix[0]) if (len(selected) == 0): print('Error: %s matched no jobs.' % args.idprefix[0]) elif len(selected) > 1: print('Error: "%s" matched multiple jobs:' % args.idprefix[0]) for j in selected: print(' %s' % j.plot_id) selected = [] for job in selected: if args.cmd == 'details': print(job.status_str_long()) elif args.cmd == 'files': temp_files = job.get_temp_files() for f in temp_files: print(' %s' % f) elif args.cmd == 'kill': # First suspend so job doesn't create new files print('Pausing PID %d, plot id %s' % (job.proc.pid, job.plot_id)) job.suspend() temp_files = job.get_temp_files() print('Will kill pid %d, plot id %s' % (job.proc.pid, job.plot_id)) print('Will delete %d temp files' % len(temp_files)) conf = input('Are you sure? ("y" to confirm): ') if (conf != 'y'): print( 'canceled. If you wish to resume the job, do so manually.' ) else: print('killing...') job.cancel() print('cleaing up temp files...') for f in temp_files: os.remove(f) elif args.cmd == 'suspend': print('Suspending ' + job.plot_id) job.suspend() elif args.cmd == 'resume': print('Resuming ' + job.plot_id) job.resume()