def main(argv, cfg): usage = "%(prog)s [-a | [workdir [workdir [...]]]" parser = ArgumentParser(usage=usage, prog=argv.pop(0)) parser.add_argument('-a', '--all', action='store_true', help="list all workdirs") parser.add_argument('workdirs', nargs='*', default=[]) args = parser.parse_args(argv) if args.all: args.workdirs.extend(sorted(cfg.workdirs)) if not args.workdirs: for wd in sorted(cfg.workdirs): print(wd) return for name in args.workdirs: if name not in cfg.workdirs: print("No such workdir:", name, file=sys.stderr) continue known = call(cfg.url + '/workdir/' + url_quote(name)) for jid in workdir_jids(cfg, name): show_job(known, jid) try: latest = os.readlink(os.path.join(cfg.workdirs[name], name + '-LATEST')) except OSError: latest = None if latest: show_job(known, jid, name + '-LATEST')
def main(argv, cfg): descr = 'show setup.json, dataset list, etc for jobs' parser = ArgumentParser( prog=argv.pop(0), description=descr, formatter_class=RawTextHelpFormatter, ) group = parser.add_mutually_exclusive_group() group.add_argument('-o', '--output', action='store_true', help='show job output') group.add_argument('-O', '--just-output', action='store_true', help='show only job output') group.add_argument('-P', '--just-path', action='store_true', help='show only job path') parser.add_argument( 'jobid', nargs='+', metavar='jobid/jobspec', help='jobid is just a jobid.\n' + 'you can also use path, method or :urdlist:[entry].\n' + 'path is to a jobdir (with setup.json in it).\n' + 'method is the latest (current) job with that method (i.e\n' + 'the latest finished job with current source code).\n' + ':urdlist:[entry] looks up jobs in urd. details are in the\n' + 'urd help, except here entry defaults to -1 and you can\'t\n' + 'list things (no .../ or .../since/x).\n' + 'you can use spec~ or spec~N to go back N current jobs\n' + 'with that method or spec^ or spec^N to follow .previous') args = parser.parse_intermixed_args(argv) res = 0 for path in args.jobid: try: job = name2job(cfg, path) if args.just_output: out = job.output() if out: print(out, end='' if out.endswith('\n') else '\n') elif args.just_path: print(job.path) else: show(cfg.url, job, args.output) except JobNotFound as e: print(e) res = 1 except Exception as e: if isinstance(e, OSError) and e.errno == errno.EPIPE: raise print_exc(file=sys.stderr) print("Failed to show %r" % (path, ), file=sys.stderr) res = 1 return res
def cmd_abort(argv): parser = ArgumentParser(prog=argv.pop(0)) parser.add_argument('-q', '--quiet', action='store_true', help="no output") args = parser.parse_args(argv) from accelerator.build import Automata a = Automata(cfg.url) res = a.abort() if not args.quiet: print("Killed %d running job%s." % (res.killed, '' if res.killed == 1 else 's'))
def main(argv, cfg): descr = 'show setup.json, dataset list, etc for jobs' parser = ArgumentParser(prog=argv.pop(0), description=descr) group = parser.add_mutually_exclusive_group() group.add_argument('-o', '--output', action='store_true', help='show job output') group.add_argument('-O', '--just-output', action='store_true', help='show only job output') group.add_argument('-P', '--just-path', action='store_true', help='show only job path') parser.add_argument( 'jobid', nargs='+', metavar='jobid/path/method', help='method shows the latest (current) job with that method\n' + '(i.e. the latest finished job with current source code)\n' + 'you can use spec~ or spec~N to go back N current jobs\n' + 'with that method or spec^ or spec^N to follow .previous') args = parser.parse_intermixed_args(argv) res = 0 for path in args.jobid: try: job = name2job(cfg, path) if args.just_output: out = job.output() if out: print(out, end='' if out.endswith('\n') else '\n') elif args.just_path: print(job.path) else: show(cfg.url, job, args.output) except JobNotFound as e: print(e) res = 1 except Exception as e: if isinstance(e, IOError) and e.errno == errno.EPIPE: raise print_exc() print("Failed to show %r" % (path, )) res = 1 return res
def main(argv, cfg): descr = "lists and describes build scripts" parser = ArgumentParser( prog=argv.pop(0), description=descr, ) parser.add_argument('-s', '--short', action='store_true', help='short listing') parser.add_argument('-p', '--path', action='store_true', help='show package paths') parser.add_argument('match', nargs='*', default=[], help='substring used for matching') args = parser.parse_intermixed_args(argv) columns = terminal_size().columns if not args.match: # no args => list everything in short format args.match = [''] args.short = True packages = [] for package in cfg.method_directories: path = dirname(import_module(package).__file__) scripts = [] packages.append((package, path, scripts)) for item in sorted( glob(path + '/build.py') + glob(path + '/build_*.py')): name = basename(item[:-3]) modname = '.'.join((package, name)) if any(m in modname for m in args.match): try: module = import_module(modname) except Exception as e: print('%s%s: %s%s' % (colour.RED, item, e, colour.RESET), file=sys.stderr) continue scripts.append((name, getattr(module, 'description', ''))) for package, path, scripts in sorted(packages): if scripts: if args.path: print(path + '/') else: print(package) printdesc(sorted(scripts), columns, full=not args.short)
def main(argv, cfg): global authdict, allow_passwordless, db parser = ArgumentParser(prog=argv.pop(0)) parser.add_argument( '--path', type=str, default='urd.db', help= 'database directory (can be relative to project directory) (default: urd.db)', ) parser.add_argument('--allow-passwordless', action='store_true', help='accept any pass for users not in passwd.') parser.add_argument('--quiet', action='store_true', help='less chatty.') args = parser.parse_args(argv) if not args.quiet: print('-' * 79) print(args) print() auth_fn = os.path.join(args.path, 'passwd') authdict = readauth(auth_fn) allow_passwordless = args.allow_passwordless if not authdict and not args.allow_passwordless: raise Exception( 'No users in %r and --allow-passwordless not specified.' % (auth_fn, )) db = DB(args.path, not args.quiet) bottle.install(jsonify) kw = dict(debug=False, reloader=False, quiet=args.quiet, server=WaitressServer) listen = cfg.urd_listen if not listen: raise Exception('urd not configured in this project') if isinstance(listen, tuple): kw['host'], kw['port'] = listen else: from accelerator.server import check_socket check_socket(listen) kw['host'] = listen kw['port'] = 0 bottle.run(**kw)
def main(argv, cfg): usage = "%(prog)s [options] ds [ds [...]]" parser = ArgumentParser(prog=argv.pop(0), usage=usage) parser.add_argument('-c', '--chain', action='store_true', help='list all datasets in a chain') parser.add_argument('-C', '--non-empty-chain', action='store_true', help='list all non-empty datasets in a chain') parser.add_argument('-l', '--list', action='store_true', help='list all datasets in a job with number of rows') parser.add_argument( '-L', '--chainedlist', action='store_true', help='list all datasets in a job with number of chained rows') parser.add_argument('-m', '--suppress-minmax', action='store_true', help='do not print min/max column values') parser.add_argument('-n', '--suppress-columns', action='store_true', help='do not print columns') parser.add_argument('-q', '--suppress-errors', action='store_true', help='silently ignores bad input datasets/jobids') parser.add_argument( '-s', '--slices', action='store_true', help='list relative number of lines per slice in sorted order') parser.add_argument('-S', '--chainedslices', action='store_true', help='same as -s but for full chain') parser.add_argument('-w', '--location', action='store_true', help='show where (ds/filename) each column is stored') parser.add_argument( "dataset", nargs='+', help= 'the job part of the dataset name can be specified in the same ways as for "ax job". you can use ds~ or ds~N to follow the chain N steps backwards, or ^ to follow .parent. this requires specifying the ds-name, so wd-1~ will not do this, but wd-1/default~ will.' ) args = parser.parse_intermixed_args(argv) args.chain = args.chain or args.non_empty_chain def finish(badinput): if badinput and not args.suppress_errors: print('Error, failed to resolve datasets:', file=sys.stderr) for n, e in badinput: print(' %r: %s' % ( n, e, ), file=sys.stderr) exit(1) exit() badinput = [] if args.list or args.chainedlist: for n in args.dataset: try: try: dsvec = name2ds(cfg, n).job.datasets except NoSuchWhateverError: dsvec = name2job(cfg, n).datasets except Exception as e: badinput.append((n, e)) dsvec = None if dsvec: print('%s' % (dsvec[0].job, )) v = [] for ds in dsvec: if args.chainedlist: lines = sum(sum(x.lines) for x in ds.chain()) else: lines = sum(ds.lines) v.append((ds.name, '{:n}'.format(lines))) len_n, len_l = colwidth(v) template = "{0:%d} ({1:>%d})" % (len_n, len_l) for name, numlines in sorted(v): print(' ' + template.format(name, numlines)) finish(badinput) for n in args.dataset: try: ds = name2ds(cfg, n) except NoSuchWhateverError as e: badinput.append((n, e)) continue print(ds.quoted) if ds.parent: if isinstance(ds.parent, tuple): print(" Parents:") max_n = max(len(x.quoted) for x in ds.parent) template = "{1:%d}" % (max_n, ) data = tuple( (None, x.quoted) for ix, x in enumerate(ds.parent)) data = sorted(data, key=lambda x: x[1]) printcolwise(data, template, lambda x: x, minrows=8, indent=8) else: print(" Parent:", ds.parent.quoted) print(" Method:", quote(ds.job.method)) if ds.filename: print(" Filename:", quote(ds.filename)) if ds.previous: print(" Previous:", ds.previous.quoted) if ds.hashlabel is not None: print(" Hashlabel:", quote(ds.hashlabel)) def prettyminmax(minval, maxval): if args.suppress_minmax: return '' s = '[%%%ds, %%%ds]' % (MINMAXWIDTH, MINMAXWIDTH) if minval is None: return '' elif isinstance(minval, float): def intdigits(x): if isinf(x) or isnan(x): return 3 return min(MINMAXWIDTH - 2, floor(log10(abs(x)) + 1)) if x else (MINMAXWIDTH - 2) // 2 ints = max(intdigits(minval), intdigits(maxval)) if ints > 0: format = "%% %d.%df" % (ints, MINMAXWIDTH - ints - 2) elif ints < -4: format = "%% .%de" % (MINMAXWIDTH - 7, ) else: format = "%% .%df" % (MINMAXWIDTH - 3, ) def format_or_int(v): try: i = int(v) if v == i: return i except (OverflowError, ValueError): pass return locale.format_string(format, v) return s % (format_or_int(minval), format_or_int(maxval)) elif isinstance(minval, int): return s % (minval, maxval) elif isinstance(minval, (date, time, datetime)): return s % (minval, maxval) else: return s % (minval, maxval) if not args.suppress_columns: print(" Columns:") name2typ = { n: c.type + '+None' if c.none_support else c.type for n, c in ds.columns.items() } len_n, len_t = colwidth( (quote(n), name2typ[n]) for n, c in ds.columns.items()) if args.location: len_l = max( len(quote(c.location)) for c in ds.columns.values()) len_c = max(len(c.compression) for c in ds.columns.values()) template = ' {2} {0:%d} {1:%d} {4:%d} {5:%d} {3}' % ( len_n, len_t, len_l, len_c, ) else: template = ' {2} {0:%d} {1:%d} {3}' % ( len_n, len_t, ) chain = False if args.chainedslices or args.chain: chain = ds.chain() for n, c in sorted(ds.columns.items()): if chain: minval, maxval = chain.min(n), chain.max(n) else: minval, maxval = c.min, c.max hashdot = colour("*", "ds/highlight") if n == ds.hashlabel else " " print( template.format(quote(n), name2typ[n], hashdot, prettyminmax(minval, maxval), quote(c.location), c.compression).rstrip()) print(" {0:n} columns".format(len(ds.columns))) print(" {0:n} lines".format(sum(ds.lines))) if ds.previous or args.chain: chain = ds.chain() if args.non_empty_chain: print(" Full chain length {0:n}, from {1} to {2}".format( len(chain), chain[0], chain[-1])) chain = [ds for ds in chain if sum(ds.lines)] print(" Filtered chain length {0:n}".format(len(chain))) if chain: if not args.non_empty_chain: print(" Chain length {0:n}, from {1} to {2}".format( len(chain), chain[0], chain[-1])) if args.chain: data = tuple((ix, "%s/%s" % (x.job, x.name), "{:n}".format(sum(x.lines))) for ix, x in enumerate(chain)) max_n, max_l = colwidth(x[1:] for x in data) template = "{0:3}: {1:%d} ({2:>%d})" % (max_n, max_l) printcolwise(data, template, lambda x: (x[0], x[1], x[2]), minrows=8, indent=8) if args.slices or args.chainedslices: if args.chainedslices and ds.previous: data = ( (ix, '{:n}'.format(sum(x)), sum(x)) for ix, x in enumerate(zip(*(x.lines for x in ds.chain())))) print(' Balance, lines per slice, full chain:') else: data = ((ix, '{:n}'.format(x), x) for ix, x in enumerate(ds.lines)) if ds.previous: print(' Balance, lines per slice, tip dataset:') else: print(' Balance, lines per slice:') data = sorted(data, key=lambda x: -x[2]) s = sum(x[2] for x in data) len_n = max(len(x[1]) for x in data) template = "{0:3}: {1!s}%% ({2:>%d})" % (len_n, ) printcolwise( data, template, lambda x: (x[0], locale.format_string("%6.2f", (100 * x[2] / (s or 1e20))), x[1]), minrows=8, indent=8) print(" Max to average ratio: " + locale.format_string("%2.3f", (max(x[2] for x in data) / ((s or 1e20) / len(data)), ))) if ds.previous: print(" {0:n} total lines in chain".format( sum(sum(ds.lines) for ds in chain))) finish(badinput)
def main(argv, cfg): parser = ArgumentParser( prog=argv.pop(0), usage="%(prog)s [options] [script]", formatter_class=RawTextHelpFormatter, ) parser.add_argument( '-f', '--flags', default='', help="comma separated list of flags", ) parser.add_argument( '-q', '--quick', action='store_true', help="skip method updates and checking workdirs for new jobs", ) parser.add_argument( '-c', '--concurrency', action='append', metavar='SPEC', help= "set max concurrency for methods, either method=N\nor just N to set for all other methods", ) parser.add_argument( '-w', '--workdir', default=None, help="build in this workdir\nset_workdir() and workdir= override this.", ) parser.add_argument( '-W', '--just_wait', action='store_true', help="just wait for running job, don't run any build script", ) parser.add_argument('-p', '--full-path', action='store_true', help="print full path to jobdirs") parser.add_argument('--verbose', default='status', help="verbosity style {no, status, dots, log}") parser.add_argument('--quiet', action='store_true', help="same as --verbose=no") parser.add_argument( '--horizon', default=None, help="time horizon - dates after this are not visible in\nurd.latest") parser.add_argument( 'script', default='build', help= "build script to run. default \"build\".\nsearches under all method directories in alphabetical\norder if it does not contain a dot.\nprefixes build_ to last element unless specified.\npackage name suffixes are ok.\nso for example \"test_methods.tests\" expands to\n\"accelerator.test_methods.build_tests\".", nargs='?') options = parser.parse_args(argv) if '.' in options.script: options.package, options.script = options.script.rsplit('.', 1) else: options.package = None options.verbose = { 'no': False, 'status': True, 'dots': 'dots', 'log': 'log' }[options.verbose] if options.quiet: options.verbose = False concurrency_map = {} for v in options.concurrency or (): if v.isnumeric(): concurrency_map['-default-'] = int(v) else: try: method, v = v.split('=', 1) concurrency_map[method] = int(v) except ValueError: raise Exception('Bad concurrency spec %r' % (v, )) options.concurrency_map = concurrency_map try: run_automata(options, cfg) return 0 except (JobError, ServerError): # If it's a JobError we don't care about the local traceback, # we want to see the job traceback, and maybe know what line # we built the job on. # If it's a ServerError we just want the line and message. print_minimal_traceback() except Exception: # For the rest we still don't want to see stuff from this # file and earlier. print_user_part_traceback() return 1
def main(): # As of python 3.8 the default start_method is 'spawn' on macOS. # This doesn't work for us. 'fork' is fairly unsafe on macOS, # but it's better than not working at all. See # https://bugs.python.org/issue33725 # for more information. import multiprocessing if hasattr(multiprocessing, 'set_start_method'): # If possible, make the forkserver (used by database updates) pre-import everthing if hasattr(multiprocessing, 'set_forkserver_preload'): multiprocessing.set_forkserver_preload( ['accelerator', 'accelerator.server']) multiprocessing.set_start_method('fork') from accelerator import g g.running = 'shell' from accelerator.autoflush import AutoFlush main_argv, argv = split_args(sys.argv[1:]) sys.stdout = AutoFlush(sys.stdout) sys.stderr = AutoFlush(sys.stderr) aliases = { 'cat': 'grep ""', } aliases.update(parse_user_config() or ()) while argv and argv[0] in aliases: try: expanded = shlex.split(aliases[argv[0]]) except ValueError as e: raise ValueError('Failed to expand alias %s (%r): %s' % ( argv[0], aliases[argv[0]], e, )) more_main_argv, argv = split_args(expanded + argv[1:]) main_argv.extend(more_main_argv) epilog = ['commands:', ''] cmdlen = max(len(cmd) for cmd in COMMANDS) template = ' %%%ds %%s' % (cmdlen, ) for cmd, func in sorted(COMMANDS.items()): epilog.append(template % ( cmd, func.help, )) epilog.append('') epilog.append('aliases:') epilog.extend('%s = %s' % item for item in sorted(aliases.items())) epilog.append('') epilog.append('use %(prog)s <command> --help for <command> usage') parser = ArgumentParser( usage='%(prog)s [--config CONFIG_FILE] command [args]', epilog='\n'.join(epilog), formatter_class=RawDescriptionHelpFormatter, ) parser.add_argument('--config', metavar='CONFIG_FILE', help='configuration file') parser.add_argument('--version', action='store_true', help='alias for the version command') args = parser.parse_args(main_argv) if args.version: sys.exit(cmd_version(())) args.command = argv.pop(0) if argv else None if args.command not in COMMANDS: parser.print_help(file=sys.stderr) print(file=sys.stderr) if args.command is not None: print('Unknown command "%s"' % (args.command, ), file=sys.stderr) sys.exit(2) config_fn = args.config if args.command == 'init': config_fn = False cmd = COMMANDS[args.command] debug_cmd = getattr(cmd, 'is_debug', False) try: setup(config_fn, debug_cmd) argv.insert(0, '%s %s' % ( basename(sys.argv[0]), args.command, )) return cmd(argv) except UserError as e: print(e, file=sys.stderr) return 1 except IOError as e: if e.errno == errno.EPIPE and debug_cmd: return else: raise
def main(argv, config): g.running = 'server' parser = ArgumentParser(prog=argv.pop(0)) parser.add_argument('--debug', action='store_true') parser.add_argument( '--debuggable', action='store_true', help= 'make breakpoint() work in methods. note that this makes a failing method kill the whole server.' ) options = parser.parse_args(argv) config.debuggable = options.debuggable # all forks belong to the same happy family try: os.setpgrp() except OSError: print( "Failed to create process group - there is probably already one (daemontools).", file=sys.stderr) # Set a low (but not too low) open file limit to make # dispatch.update_valid_fds faster. # The runners will set the highest limit they can # before actually running any methods. r1, r2 = resource.getrlimit(resource.RLIMIT_NOFILE) r1 = min(r1, r2, 1024) resource.setrlimit(resource.RLIMIT_NOFILE, (r1, r2)) # Start the board-server in a separate process so it can't interfere. # Even if it dies we don't care. try: if not isinstance(config.board_listen, tuple): # Don't bother if something is already listening. check_socket(config.board_listen) Process(target=board.run, args=(config, ), name='board-server').start() except Exception: pass iowrapper.main() # setup statmsg sink and tell address using ENV statmsg_rd, statmsg_wr = socket.socketpair(socket.AF_UNIX, socket.SOCK_DGRAM) os.environ['BD_STATUS_FD'] = str(statmsg_wr.fileno()) def buf_up(fh, opt): sock = socket.fromfd(fh.fileno(), socket.AF_UNIX, socket.SOCK_DGRAM) sock.setsockopt(socket.SOL_SOCKET, opt, 256 * 1024) # does not close fh, because fromfd dups the fd (but not the underlying socket) sock.close() buf_up(statmsg_wr, socket.SO_SNDBUF) buf_up(statmsg_rd, socket.SO_RCVBUF) t = DeadlyThread(target=statmsg_sink, args=(statmsg_rd, ), name="statmsg sink") t.daemon = True t.start() # do all main-stuff, i.e. run server sys.stdout = autoflush.AutoFlush(sys.stdout) sys.stderr = autoflush.AutoFlush(sys.stderr) atexit.register(exitfunction) signal.signal(signal.SIGTERM, exitfunction) signal.signal(signal.SIGINT, exitfunction) signal.signal(signal.SIGUSR1, siginfo) signal.siginterrupt(signal.SIGUSR1, False) if hasattr(signal, 'pthread_sigmask'): signal.pthread_sigmask(signal.SIG_UNBLOCK, {signal.SIGUSR1}) if hasattr(signal, 'SIGINFO'): signal.signal(signal.SIGINFO, siginfo) signal.siginterrupt(signal.SIGINFO, False) if isinstance(config.listen, tuple): server = ThreadedHTTPServer(config.listen, XtdHandler) else: check_socket(config.listen) # We want the socket to be world writeable, protect it with dir permissions. u = os.umask(0) server = ThreadedUnixHTTPServer(config.listen, XtdHandler) os.umask(u) if config.get('urd_local'): from accelerator import urd t = DeadlyThread(target=urd.main, args=(['urd', '--quiet', '--allow-passwordless'], config), name='urd') t.daemon = True t.start() ctrl = control.Main(config, options, config.url) print() ctrl.print_workdirs() print() XtdHandler.ctrl = ctrl job_tracking[None].workdir = ctrl.target_workdir for n in ( "project_directory", "result_directory", "input_directory", ): v = config.get(n) n = n.replace("_", " ") print("%17s: %s" % ( n, v, )) for n in ( "board", "urd", ): v = config.get(n + '_listen') if v and not config.get(n + '_local', True): extra = ' (remote)' else: extra = '' print("%17s: %s%s" % ( n, v, extra, )) print() print("Serving on %s\n" % (config.listen, ), file=sys.stderr) server.serve_forever()
def main(argv, cfg): usage = "%(prog)s [options] pattern ds [ds [...]] [column [column [...]]" parser = ArgumentParser(usage=usage, prog=argv.pop(0)) parser.add_argument( '-c', '--chain', action='store_true', help="follow dataset chains", ) parser.add_argument( '-C', '--color', action='store_true', help="color matched text", ) parser.add_argument( '-i', '--ignore-case', action='store_true', help="case insensitive pattern", ) parser.add_argument( '-H', '--headers', action='store_true', help="print column names before output (and on each change)", ) parser.add_argument( '-o', '--ordered', action='store_true', help="output in order (one slice at a time)", ) parser.add_argument( '-g', '--grep', action='append', help="grep this column only, can be specified multiple times", metavar='COLUMN') parser.add_argument( '-s', '--slice', action='append', help="grep this slice only, can be specified multiple times", type=int) parser.add_argument('-t', '--separator', help="field separator (default tab)", default='\t') parser.add_argument( '-D', '--show-dataset', action='store_true', help="show dataset on matching lines", ) parser.add_argument( '-S', '--show-sliceno', action='store_true', help="show sliceno on matching lines", ) parser.add_argument( '-L', '--show-lineno', action='store_true', help="show lineno (per slice) on matching lines", ) parser.add_argument('pattern') parser.add_argument( 'dataset', help='can be specified in the same ways as for "ax ds"') parser.add_argument('columns', nargs='*', default=[]) args = parser.parse_intermixed_args(argv) pat_s = re.compile(args.pattern, re.IGNORECASE if args.ignore_case else 0) pat_b = re.compile(args.pattern.encode('utf-8'), re.IGNORECASE if args.ignore_case else 0) datasets = [name2ds(cfg, args.dataset)] columns = [] separator_s = args.separator separator_b = separator_s.encode('utf-8') for ds_or_col in args.columns: if columns: columns.append(ds_or_col) else: try: datasets.append(name2ds(cfg, ds_or_col)) except Exception: columns.append(ds_or_col) if not datasets: parser.print_help(file=sys.stderr) return 1 grep_columns = set(args.grep or ()) if grep_columns == set(columns): grep_columns = None if args.slice: want_slices = [] for s in args.slice: assert 0 <= s < g.slices, "Slice %d not available" % (s, ) if s not in want_slices: want_slices.append(s) else: want_slices = list(range(g.slices)) if args.chain: datasets = list(chain.from_iterable(ds.chain() for ds in datasets)) if columns: bad = False for ds in datasets: missing = set(columns) - set(ds.columns) if missing: print('ERROR: %s does not have columns %r' % ( ds, missing, ), file=sys.stderr) bad = True if bad: return 1 def grep(ds, sliceno): # Use bytes for everything if anything is bytes, str otherwise. (For speed.) if any(ds.columns[col].backing_type == 'bytes' for col in (grep_columns or columns or ds.columns)): def strbytes(v): return str(v).encode('utf-8', 'replace') def mk_iter(col): if ds.columns[col].backing_type in ( 'bytes', 'unicode', 'ascii', ): return ds._column_iterator(sliceno, col, _type='bytes') else: return imap(strbytes, ds._column_iterator(sliceno, col)) chk = pat_b.search else: def mk_iter(col): if ds.columns[col].backing_type in ( 'unicode', 'ascii', ): return ds._column_iterator(sliceno, col, _type='unicode') else: return imap(str, ds._column_iterator(sliceno, col)) chk = pat_s.search def fmt(v): if not isinstance(v, (unicode, bytes)): v = str(v) if isinstance(v, unicode): v = v.encode('utf-8', 'replace') return v def color(item): pos = 0 parts = [] for m in pat_b.finditer(item): a, b = m.span() parts.extend((item[pos:a], b'\x1b[31m', item[a:b], b'\x1b[m')) pos = b parts.append(item[pos:]) return b''.join(parts) prefix = [] if args.show_dataset: prefix.append(ds.encode('utf-8')) if args.show_sliceno: prefix.append(str(sliceno).encode('utf-8')) prefix = tuple(prefix) def show(prefix, items): items = map(fmt, items) if args.color: items = map(color, items) # This will be atomic if the line is not too long # (at least up to PIPE_BUF bytes, should be at least 512). write(1, separator_b.join(prefix + tuple(items)) + b'\n') if grep_columns and grep_columns != set(columns or ds.columns): grep_iter = izip(*(mk_iter(col) for col in grep_columns)) lines_iter = ds.iterate(sliceno, columns) else: grep_iter = repeat(None) lines_iter = izip(*(mk_iter(col) for col in (columns or sorted(ds.columns)))) lines = izip(grep_iter, lines_iter) if args.show_lineno: for lineno, (grep_items, items) in enumerate(lines): if any(imap(chk, grep_items or items)): show(prefix + (str(lineno).encode('utf-8'), ), items) else: for grep_items, items in lines: if any(imap(chk, grep_items or items)): show(prefix, items) def one_slice(sliceno, q, wait_for): try: if q: q.get() for ds in datasets: if ds in wait_for: q.task_done() q.get() grep(ds, sliceno) except KeyboardInterrupt: return except IOError as e: if e.errno == errno.EPIPE: return else: raise finally: # Make sure we are joinable try: q.task_done() except Exception: pass headers_prefix = [] if args.show_dataset: headers_prefix.append('[DATASET]') if args.show_sliceno: headers_prefix.append('[SLICE]') if args.show_lineno: headers_prefix.append('[LINE]') headers = {} if args.headers: if columns: current_headers = columns else: current_headers = None for ds in datasets: candidate_headers = sorted(ds.columns) if candidate_headers != current_headers: headers[ds] = current_headers = candidate_headers current_headers = headers.pop(datasets[0]) def show_headers(headers): print('\x1b[34m' + separator_s.join(headers_prefix + headers) + '\x1b[m') show_headers(current_headers) queues = [] children = [] if not args.ordered: q = None wait_for = set(headers) for sliceno in want_slices[1:]: if wait_for: q = JoinableQueue() q.put(None) queues.append(q) p = Process( target=one_slice, args=(sliceno, q, wait_for), name='slice-%d' % (sliceno, ), ) p.daemon = True p.start() children.append(p) want_slices = want_slices[:1] try: for ds in datasets: if ds in headers: for q in queues: q.join() show_headers(headers.pop(ds)) for q in queues: q.put(None) for sliceno in want_slices: grep(ds, sliceno) for c in children: c.join() except KeyboardInterrupt: print()
def main(): # Several commands use SIGUSR1 which (naturally...) defaults to killing the # process, so start by blocking that to minimise the race time. if hasattr(signal, 'pthread_sigmask'): signal.pthread_sigmask(signal.SIG_BLOCK, {signal.SIGUSR1}) else: # Or if we can't block it, just ignore it. signal.signal(signal.SIGUSR1, signal.SIG_IGN) # As of python 3.8 the default start_method is 'spawn' on macOS. # This doesn't work for us. 'fork' is fairly unsafe on macOS, # but it's better than not working at all. See # https://bugs.python.org/issue33725 # for more information. import multiprocessing if hasattr(multiprocessing, 'set_start_method'): # If possible, make the forkserver (used by database updates) pre-import everthing if hasattr(multiprocessing, 'set_forkserver_preload'): multiprocessing.set_forkserver_preload( ['accelerator', 'accelerator.server']) multiprocessing.set_start_method('fork') from accelerator import g g.running = 'shell' from accelerator.autoflush import AutoFlush main_argv, argv = split_args(sys.argv[1:]) sys.stdout = AutoFlush(sys.stdout) sys.stderr = AutoFlush(sys.stderr) # configuration defaults aliases = { 'cat': 'grep -e ""', } colour_d = { 'warning': ('RED', ), 'highlight': ('BOLD', ), 'grep/highlight': ('RED', ), 'info': ('BRIGHTBLUE', ), 'infohighlight': ( 'BOLD', 'BRIGHTBLUE', ), 'separator': ( 'CYAN', 'UNDERLINE', ), 'header': ( 'BRIGHTBLUE', 'BOLD', ), } parse_user_config(aliases, colour_d) colour._names.update(colour_d) used_aliases = [] while argv and argv[0] in aliases: alias = argv[0] if alias == 'noalias': # save the user from itself break try: expanded = shlex.split(aliases[alias]) except ValueError as e: raise ValueError('Failed to expand alias %s (%r): %s' % ( argv[0], aliases[argv[0]], e, )) more_main_argv, argv = split_args(expanded + argv[1:]) main_argv.extend(more_main_argv) if expanded and alias == expanded[0]: break used_aliases.append(alias) if alias in used_aliases[:-1]: raise ValueError('Alias loop: %r' % (used_aliases, )) while argv and argv[0] == 'noalias': argv.pop(0) epilog = ['commands:', ''] cmdlen = max(len(cmd) for cmd in COMMANDS) template = ' %%%ds %%s' % (cmdlen, ) for cmd, func in sorted(COMMANDS.items()): epilog.append(template % ( cmd, func.help, )) epilog.append('') epilog.append('aliases:') epilog.extend(' %s = %s' % item for item in sorted(aliases.items())) epilog.append('') epilog.append('use "' + colour('%(prog)s <command> --help', 'help/highlight') + '" for <command> usage') epilog.append('try "' + colour('%(prog)s intro', 'help/highlight') + '" for an introduction') parser = ArgumentParser( usage='%(prog)s [--config CONFIG_FILE] command [args]', epilog='\n'.join(epilog), formatter_class=RawDescriptionHelpFormatter, ) parser.add_argument('--config', metavar='CONFIG_FILE', help='configuration file') parser.add_argument('--version', action='store_true', help='alias for the version command') args = parser.parse_args(main_argv) if args.version: sys.exit(cmd_version(())) args.command = argv.pop(0) if argv else None if args.command not in COMMANDS: parser.print_help(file=sys.stderr) if args.command is not None: print(file=sys.stderr) print('Unknown command "%s"' % (args.command, ), file=sys.stderr) sys.exit(2) config_fn = args.config if args.command in ( 'init', 'intro', 'version', ): config_fn = False cmd = COMMANDS[args.command] debug_cmd = getattr(cmd, 'is_debug', False) try: setup(config_fn, debug_cmd) argv.insert(0, '%s %s' % ( basename(sys.argv[0]), args.command, )) return cmd(argv) except UserError as e: print(e, file=sys.stderr) return 1 except OSError as e: if e.errno == errno.EPIPE: return 1 else: raise except KeyboardInterrupt: # Exiting with KeyboardInterrupt causes python to print a traceback. # We don't want that, but we do want to exit from SIGINT (so the # calling process can know that happened). signal.signal(signal.SIGINT, signal.SIG_DFL) os.kill(os.getpid(), signal.SIGINT) # If that didn't work let's re-raise the KeyboardInterrupt. raise
def main(argv, cfg): parser = ArgumentParser( prog=argv.pop(0), usage="%(prog)s [options] [script]", formatter_class=RawTextHelpFormatter, ) parser.add_argument('-f', '--flags', default='', help="comma separated list of flags", ) parser.add_argument('-q', '--quick', action='store_true', help="skip method updates and checking workdirs for new jobs", ) parser.add_argument('-w', '--workdir', default=None, help="build in this workdir\nset_workdir() and workdir= override this.", ) parser.add_argument('-W', '--just_wait',action='store_true', help="just wait for running job, don't run any build script", ) parser.add_argument('-F', '--fullpath', action='store_true', help="print full path to jobdirs") parser.add_argument('--verbose', default='status', help="verbosity style {no, status, dots, log}") parser.add_argument('--quiet', action='store_true', help="same as --verbose=no") parser.add_argument('--horizon', default=None, help="time horizon - dates after this are not visible in\nurd.latest") parser.add_argument('script', default='build' , help="build script to run. default \"build\".\nsearches under all method directories in alphabetical\norder if it does not contain a dot.\nprefixes build_ to last element unless specified.\npackage name suffixes are ok.\nso for example \"test_methods.tests\" expands to\n\"accelerator.test_methods.build_tests\".", nargs='?') options = parser.parse_args(argv) if '.' in options.script: options.package, options.script = options.script.rsplit('.', 1) else: options.package = None options.verbose = {'no': False, 'status': True, 'dots': 'dots', 'log': 'log'}[options.verbose] if options.quiet: options.verbose = False try: run_automata(options, cfg) return 0 except (JobError, ServerError): # If it's a JobError we don't care about the local traceback, # we want to see the job traceback, and maybe know what line # we built the job on. # If it's a ServerError we just want the line and message. print_minimal_traceback() return 1
def main(argv): from os import makedirs, listdir, chdir from os.path import exists, join, realpath from sys import version_info from argparse import RawDescriptionHelpFormatter from accelerator.compat import ArgumentParser from accelerator.error import UserError parser = ArgumentParser( prog=argv.pop(0), description=r''' creates an accelerator project directory. defaults to the current directory. creates accelerator.conf, a method dir, a workdir and result dir. both the method directory and workdir will be named <NAME>, "dev" by default. '''.replace('\t', ''), formatter_class=RawDescriptionHelpFormatter, ) parser.add_argument('--slices', default=None, type=int, help='override slice count detection') parser.add_argument('--name', default='dev', help='name of method dir and workdir, default "dev"') parser.add_argument('--input', default='# /some/path where you want import methods to look.', help='input directory') parser.add_argument('--force', action='store_true', help='go ahead even though directory is not empty, or workdir exists with incompatible slice count') parser.add_argument('directory', default='.', help='project directory to create. default "."', metavar='DIR', nargs='?') options = parser.parse_args(argv) assert options.name assert '/' not in options.name if not options.input.startswith('#'): options.input = quote(realpath(options.input)) prefix = realpath(options.directory) workdir = join(prefix, 'workdirs', options.name) slices_conf = join(workdir, '.slices') try: with open(slices_conf, 'r') as fh: workdir_slices = int(fh.read()) except IOError: workdir_slices = None if workdir_slices and options.slices is None: options.slices = workdir_slices if options.slices is None: from multiprocessing import cpu_count options.slices = cpu_count() if workdir_slices and workdir_slices != options.slices and not options.force: raise UserError('Workdir %r has %d slices, refusing to continue with %d slices' % (workdir, workdir_slices, options.slices,)) if not options.force and exists(options.directory) and listdir(options.directory): raise UserError('Directory %r is not empty.' % (options.directory,)) if not exists(options.directory): makedirs(options.directory) chdir(options.directory) for dir_to_make in ('.socket.dir', 'urd.db',): if not exists(dir_to_make): makedirs(dir_to_make, 0o750) for dir_to_make in (workdir, 'results',): if not exists(dir_to_make): makedirs(dir_to_make) with open(slices_conf, 'w') as fh: fh.write('%d\n' % (options.slices,)) method_dir = options.name if not exists(method_dir): makedirs(method_dir) with open(join(method_dir, '__init__.py'), 'w') as fh: pass with open(join(method_dir, 'methods.conf'), 'w') as fh: fh.write('example\n') with open(join(method_dir, 'a_example.py'), 'w') as fh: fh.write(a_example) with open(join(method_dir, 'build.py'), 'w') as fh: fh.write(build_script) with open('accelerator.conf', 'w') as fh: fh.write(config_template.format( name=quote(options.name), slices=options.slices, input=options.input, major=version_info.major, minor=version_info.minor, micro=version_info.micro, ))
def main(argv): from os import makedirs, listdir, chdir from os.path import exists, join, realpath from sys import version_info from argparse import RawTextHelpFormatter from accelerator.compat import ArgumentParser from accelerator.error import UserError from accelerator.extras import DotDict parser = ArgumentParser( prog=argv.pop(0), description=r''' creates an accelerator project directory. defaults to the current directory. creates accelerator.conf, a method dir, a workdir and result dir. both the method directory and workdir will be named <NAME>, "dev" by default. '''.replace('\t', ''), formatter_class=RawTextHelpFormatter, ) parser.add_argument('--slices', default=None, type=int, help='override slice count detection') parser.add_argument('--name', default='dev', help='name of method dir and workdir, default "dev"') parser.add_argument('--input', default='# /some/path where you want import methods to look.', help='input directory') parser.add_argument('--force', action='store_true', help='go ahead even though directory is not empty, or workdir\nexists with incompatible slice count') parser.add_argument('--tcp', default=False, metavar='HOST/PORT', nargs='?', help='listen on TCP instead of unix sockets.\nspecify HOST (can be IP) to listen on that host\nspecify PORT to use range(PORT, PORT + 3)\nspecify both as HOST:PORT') parser.add_argument('--no-git', action='store_true', help='don\'t create git repository') parser.add_argument('directory', default='.', help='project directory to create. default "."', metavar='DIR', nargs='?') options = parser.parse_args(argv) assert options.name assert '/' not in options.name if options.tcp is False: listen = DotDict( board='.socket.dir/board', server='.socket.dir/server', urd='.socket.dir/urd', ) else: hostport = options.tcp or '' if hostport.endswith(']'): # ipv6 host, port = hostport, None elif ':' in hostport: host, port = hostport.rsplit(':', 1) elif hostport.isdigit(): host, port = '', hostport else: host, port = hostport, None if port: port = int(port) else: port = find_free_ports(0x3000, 0x8000) listen = DotDict( server='%s:%d' % (host, port,), board='%s:%d' % (host, port + 1,), urd='%s:%d' % (host, port + 2,), ) if not options.input.startswith('#'): options.input = quote(realpath(options.input)) prefix = realpath(options.directory) workdir = join(prefix, 'workdirs', options.name) slices_conf = join(workdir, '.slices') try: with open(slices_conf, 'r') as fh: workdir_slices = int(fh.read()) except IOError: workdir_slices = None if workdir_slices and options.slices is None: options.slices = workdir_slices if options.slices is None: from multiprocessing import cpu_count options.slices = cpu_count() if workdir_slices and workdir_slices != options.slices and not options.force: raise UserError('Workdir %r has %d slices, refusing to continue with %d slices' % (workdir, workdir_slices, options.slices,)) if not options.force and exists(options.directory) and listdir(options.directory): raise UserError('Directory %r is not empty.' % (options.directory,)) if not exists(options.directory): makedirs(options.directory) chdir(options.directory) for dir_to_make in ('.socket.dir', 'urd.db',): if not exists(dir_to_make): makedirs(dir_to_make, 0o750) for dir_to_make in (workdir, 'results',): if not exists(dir_to_make): makedirs(dir_to_make) with open(slices_conf, 'w') as fh: fh.write('%d\n' % (options.slices,)) method_dir = options.name if not exists(method_dir): makedirs(method_dir) with open(join(method_dir, '__init__.py'), 'w') as fh: pass with open(join(method_dir, 'methods.conf'), 'w') as fh: fh.write('example\n') with open(join(method_dir, 'a_example.py'), 'w') as fh: fh.write(a_example) with open(join(method_dir, 'build.py'), 'w') as fh: fh.write(build_script) with open('accelerator.conf', 'w') as fh: fh.write(config_template.format( name=quote(options.name), slices=options.slices, input=options.input, major=version_info.major, minor=version_info.minor, micro=version_info.micro, listen=DotDict({k: quote(v) for k, v in listen.items()}), )) if not options.no_git: git(method_dir)
def main(argv, cfg): parser = ArgumentParser( usage="%(prog)s [options] pattern ds [ds [...]] [column [column [...]]", prog=argv.pop(0), ) parser.add_argument( '-c', '--chain', action='store_true', help="follow dataset chains", ) parser.add_argument( '--colour', '--color', nargs='?', const='always', choices=['auto', 'never', 'always'], type=str.lower, help="colour matched text. can be auto, never or always", metavar='WHEN', ) parser.add_argument( '-i', '--ignore-case', action='store_true', help="case insensitive pattern", ) parser.add_argument( '-H', '--headers', action='store_true', help="print column names before output (and on each change)", ) parser.add_argument( '-O', '--ordered', action='store_true', help="output in order (one slice at a time)", ) parser.add_argument( '-g', '--grep', action='append', help="grep this column only, can be specified multiple times", metavar='COLUMN') parser.add_argument( '-s', '--slice', action='append', help="grep this slice only, can be specified multiple times", type=int) parser.add_argument( '-D', '--show-dataset', action='store_true', help="show dataset on matching lines", ) parser.add_argument( '-S', '--show-sliceno', action='store_true', help="show sliceno on matching lines", ) parser.add_argument( '-L', '--show-lineno', action='store_true', help="show lineno (per slice) on matching lines", ) supported_formats = ( 'csv', 'raw', 'json', ) parser.add_argument( '-f', '--format', default='csv', choices=supported_formats, help="output format, csv (default) / " + ' / '.join(supported_formats[1:]), metavar='FORMAT', ) parser.add_argument( '-t', '--separator', help="field separator, default tab / tab-like spaces", ) parser.add_argument('pattern') parser.add_argument( 'dataset', help='can be specified in the same ways as for "ax ds"') parser.add_argument('columns', nargs='*', default=[]) args = parser.parse_intermixed_args(argv) pat_s = re.compile(args.pattern, re.IGNORECASE if args.ignore_case else 0) datasets = [name2ds(cfg, args.dataset)] columns = [] for ds_or_col in args.columns: if columns: columns.append(ds_or_col) else: try: datasets.append(name2ds(cfg, ds_or_col)) except Exception: columns.append(ds_or_col) if not datasets: parser.print_help(file=sys.stderr) return 1 grep_columns = set(args.grep or ()) if grep_columns == set(columns): grep_columns = None if args.slice: want_slices = [] for s in args.slice: assert 0 <= s < g.slices, "Slice %d not available" % (s, ) if s not in want_slices: want_slices.append(s) else: want_slices = list(range(g.slices)) if args.chain: datasets = list(chain.from_iterable(ds.chain() for ds in datasets)) if columns or grep_columns: bad = False need_cols = set(columns) if grep_columns: need_cols.update(grep_columns) for ds in datasets: missing = need_cols - set(ds.columns) if missing: print('ERROR: %s does not have columns %r' % ( ds, missing, ), file=sys.stderr) bad = True if bad: return 1 # never and always override env settings, auto (default) sets from env/tty if args.colour == 'never': colour.disable() highlight_matches = False elif args.colour == 'always': colour.enable() highlight_matches = True else: highlight_matches = colour.enabled # Don't highlight everything when just trying to cat if args.pattern == '': highlight_matches = False separator = args.separator if separator is None and not sys.stdout.isatty(): separator = '\t' if separator is None: # special case where we try to be like a tab, but with spaces. # this is useful because terminals typically don't style tabs. def separate(items, lens): things = [] for item, item_len in zip(items, lens): things.append(item) spaces = 8 - (item_len % 8) things.append(colour(' ' * spaces, 'cyan', 'underline')) return ''.join(things[:-1]) separator = '\t' else: separator_coloured = colour(separator, 'cyan', 'underline') def separate(items, lens): return separator_coloured.join(items) def json_default(obj): if isinstance(obj, (datetime.datetime, datetime.date, datetime.time)): return str(obj) elif isinstance(obj, complex): return [obj.real, obj.imag] else: return repr(obj) if args.format == 'csv': def escape_item(item): if item and (separator in item or item[0] in '\'"' or item[-1] in '\'"'): return '"' + item.replace('\n', '\\n').replace('"', '""') + '"' else: return item.replace('\n', '\\n') errors = 'surrogatepass' else: escape_item = None errors = 'replace' if PY2 else 'surrogateescape' def grep(ds, sliceno): def no_conv(v): return v def mk_conv(col): if ds.columns[col].type in ( 'bytes', 'unicode', 'ascii', ): if not ds.columns[col].none_support: return no_conv return unicode chk = pat_s.search def mk_iter(col): if ds.columns[col].type == 'ascii': it = ds._column_iterator(sliceno, col, _type='unicode') else: it = ds._column_iterator(sliceno, col) if ds.columns[col].type == 'bytes': errors = 'replace' if PY2 else 'surrogateescape' if ds.columns[col].none_support: it = (None if v is None else v.decode('utf-8', errors) for v in it) else: it = (v.decode('utf-8', errors) for v in it) return it def colour_item(item): pos = 0 parts = [] for m in pat_s.finditer(item): a, b = m.span() parts.extend((item[pos:a], colour.red(item[a:b]))) pos = b parts.append(item[pos:]) return ''.join(parts) if args.format == 'json': prefix = {} dumps = json.JSONEncoder(ensure_ascii=False, default=json_default).encode if args.show_dataset: prefix['dataset'] = ds if args.show_sliceno: prefix['sliceno'] = sliceno def show(): d = dict(zip(used_columns, items)) if args.show_lineno: prefix['lineno'] = lineno if prefix: prefix['data'] = d d = prefix return dumps(d).encode('utf-8', 'surrogatepass') else: prefix = [] if args.show_dataset: prefix.append(ds) if args.show_sliceno: prefix.append(str(sliceno)) prefix = tuple(prefix) def show(): data = list(prefix) if args.show_lineno: data.append(unicode(lineno)) if PY2: show_items = (v if isinstance(v, unicode) else str(v).decode('utf-8', 'replace') for v in items) else: show_items = map(str, items) show_items = list(show_items) lens = (len(item) for item in data + show_items) if highlight_matches: show_items = list(map(colour_item, show_items)) if escape_item: lens_unesc = (len(item) for item in data + show_items) show_items = list(map(escape_item, show_items)) lens_esc = (len(item) for item in data + show_items) lens = ( l + esc - unesc for l, unesc, esc in zip(lens, lens_unesc, lens_esc)) data.extend(show_items) return separate(data, lens).encode('utf-8', errors) used_columns = columns or sorted(ds.columns) if grep_columns and grep_columns != set(used_columns): grep_iter = izip(*(mk_iter(col) for col in grep_columns)) conv_items = [mk_conv(col) for col in grep_columns] else: grep_iter = repeat(None) conv_items = [mk_conv(col) for col in used_columns] lines_iter = izip(*(mk_iter(col) for col in used_columns)) for lineno, (grep_items, items) in enumerate(izip(grep_iter, lines_iter)): if any( chk(conv(item)) for conv, item in izip(conv_items, grep_items or items)): # This will be atomic if the line is not too long # (at least up to PIPE_BUF bytes, should be at least 512). write(1, show() + b'\n') def one_slice(sliceno, q, wait_for): try: if q: q.get() for ds in datasets: if ds in wait_for: q.task_done() q.get() grep(ds, sliceno) except KeyboardInterrupt: return except IOError as e: if e.errno == errno.EPIPE: return else: raise finally: # Make sure we are joinable try: q.task_done() except Exception: pass headers_prefix = [] if args.show_dataset: headers_prefix.append('[DATASET]') if args.show_sliceno: headers_prefix.append('[SLICE]') if args.show_lineno: headers_prefix.append('[LINE]') headers = {} if args.headers: if columns: current_headers = columns else: current_headers = None for ds in datasets: candidate_headers = sorted(ds.columns) if candidate_headers != current_headers: headers[ds] = current_headers = candidate_headers current_headers = headers.pop(datasets[0]) def show_headers(headers): if args.format != 'json': show_items = headers_prefix + headers if escape_item: show_items = list(map(escape_item, show_items)) print( separate(map(colour.blue, show_items), map(len, show_items))) show_headers(current_headers) queues = [] children = [] if not args.ordered: q = None wait_for = set(headers) for sliceno in want_slices[1:]: if wait_for: q = JoinableQueue() q.put(None) queues.append(q) p = Process( target=one_slice, args=(sliceno, q, wait_for), name='slice-%d' % (sliceno, ), ) p.daemon = True p.start() children.append(p) want_slices = want_slices[:1] try: for ds in datasets: if ds in headers: for q in queues: q.join() show_headers(headers.pop(ds)) for q in queues: q.put(None) for sliceno in want_slices: grep(ds, sliceno) for c in children: c.join() except KeyboardInterrupt: print()
def main(argv, cfg): # -C overrides -A and -B (which in turn override -C) class ContextAction(Action): def __call__(self, parser, namespace, values, option_string=None): namespace.before_context = namespace.after_context = values parser = ArgumentParser( usage= "%(prog)s [options] [-e] pattern [...] [-d] ds [...] [[-n] column [...]]", description="""positional arguments: pattern (-e, --regexp) dataset (-d, --dataset) can be specified as for "ax ds" columns (-n, --column)""", prog=argv.pop(0), formatter_class=RawTextHelpFormatter, ) parser.add_argument( '-c', '--chain', action='store_true', help="follow dataset chains", ) parser.add_argument( '--colour', '--color', nargs='?', const='always', choices=['auto', 'never', 'always'], type=str.lower, help="colour matched text. can be auto, never or always", metavar='WHEN', ) parser.add_argument( '-i', '--ignore-case', action='store_true', help="case insensitive pattern", ) parser.add_argument( '-v', '--invert-match', action='store_true', help="select non-matching lines", ) parser.add_argument( '-o', '--only-matching', action='store_true', help="only print matching part (or columns with -l)", ) parser.add_argument( '-l', '--list-matching', action='store_true', help= "only print matching datasets (or slices with -S)\nwhen used with -o, only print matching columns", ) parser.add_argument( '-H', '--headers', action='store_true', help="print column names before output (and on each change)", ) parser.add_argument( '-O', '--ordered', action='store_true', help="output in order (one slice at a time)", ) parser.add_argument( '-M', '--allow-missing-columns', action='store_true', help="datasets are allowed to not have (some) columns", ) parser.add_argument( '-g', '--grep', action='append', help="grep this column only, can be specified multiple times", metavar='COLUMN') parser.add_argument( '-s', '--slice', action='append', help="grep this slice only, can be specified multiple times", type=int) parser.add_argument( '-D', '--show-dataset', action='store_true', help="show dataset on matching lines", ) parser.add_argument( '-S', '--show-sliceno', action='store_true', help="show sliceno on matching lines", ) parser.add_argument( '-L', '--show-lineno', action='store_true', help="show lineno (per slice) on matching lines", ) supported_formats = ( 'csv', 'raw', 'json', ) parser.add_argument( '-f', '--format', default='csv', choices=supported_formats, help="output format, csv (default) / " + ' / '.join(supported_formats[1:]), metavar='FORMAT', ) parser.add_argument( '-t', '--separator', help="field separator, default tab / tab-like spaces", ) parser.add_argument( '-T', '--tab-length', type=int, metavar='LENGTH', help="field alignment, always uses spaces as separator", ) parser.add_argument( '-B', '--before-context', type=int, default=0, metavar='NUM', help="print NUM lines of leading context", ) parser.add_argument( '-A', '--after-context', type=int, default=0, metavar='NUM', help="print NUM lines of trailing context", ) parser.add_argument( '-C', '--context', type=int, default=0, metavar='NUM', action=ContextAction, help="print NUM lines of context\n" + "context is only taken from the same slice of the same\n" + "dataset, and may intermix with output from other\n" + "slices. Use -O to avoid that, or -S -L to see it.", ) parser.add_argument('-e', '--regexp', default=[], action='append', dest='patterns', help=SUPPRESS) parser.add_argument('-d', '--dataset', default=[], action='append', dest='datasets', help=SUPPRESS) parser.add_argument('-n', '--column', default=[], action='append', dest='columns', help=SUPPRESS) parser.add_argument('words', nargs='*', help=SUPPRESS) args = parser.parse_intermixed_args(argv) if args.before_context < 0 or args.after_context < 0: print('Context must be >= 0', file=sys.stderr) return 1 columns = args.columns try: args.datasets = [name2ds(cfg, ds) for ds in args.datasets] except NoSuchWhateverError as e: print(e, file=sys.stderr) return 1 for word in args.words: if not args.patterns: args.patterns.append(word) elif columns and args.datasets: columns.append(word) else: try: args.datasets.append(name2ds(cfg, word)) except NoSuchWhateverError as e: if not args.datasets: print(e, file=sys.stderr) return 1 columns.append(word) if not args.patterns or not args.datasets: parser.print_help(file=sys.stderr) return 1 datasets = args.datasets patterns = [] for pattern in args.patterns: try: patterns.append( re.compile(pattern, re.IGNORECASE if args.ignore_case else 0)) except re.error as e: print("Bad pattern %r:\n%s" % ( pattern, e, ), file=sys.stderr) return 1 grep_columns = set(args.grep or ()) if grep_columns == set(columns): grep_columns = set() if args.slice: want_slices = [] for s in args.slice: assert 0 <= s < g.slices, "Slice %d not available" % (s, ) if s not in want_slices: want_slices.append(s) else: want_slices = list(range(g.slices)) if len(want_slices) == 1: # it will be automatically ordered, so let's not work for it. args.ordered = False if args.only_matching: if args.list_matching: args.list_matching = False only_matching = 'columns' else: only_matching = 'part' else: only_matching = False if args.chain: datasets = list(chain.from_iterable(ds.chain() for ds in datasets)) def columns_for_ds(ds, columns=columns): if columns: return [n for n in columns if n in ds.columns] else: return sorted(ds.columns) if columns or grep_columns: if args.allow_missing_columns: keep_datasets = [] for ds in datasets: if not columns_for_ds(ds): continue if grep_columns and not columns_for_ds(ds, grep_columns): continue keep_datasets.append(ds) if not keep_datasets: return 0 datasets = keep_datasets else: bad = False need_cols = set(columns) if grep_columns: need_cols.update(grep_columns) for ds in datasets: missing = need_cols - set(ds.columns) if missing: print('ERROR: %s does not have columns %r' % ( ds, missing, ), file=sys.stderr) bad = True if bad: return 1 # For the status reporting, this gives how many lines have been processed # when reaching each ds ix, per slice. Ends with an extra fictional ds, # i.e. the total number of lines for that slice. And then the same again, # to simplify the code in the status shower. total_lines_per_slice_at_ds = [[0] * g.slices] for ds in datasets: total_lines_per_slice_at_ds.append( [a + b for a, b in zip(total_lines_per_slice_at_ds[-1], ds.lines)]) total_lines_per_slice_at_ds.append(total_lines_per_slice_at_ds[-1]) status_interval = { # twice per percent, but not too often or too seldom sliceno: min(max(total_lines_per_slice_at_ds[-1][sliceno] // 200, 10), 5000) for sliceno in want_slices } # never and always override env settings, auto (default) sets from env/tty if args.colour == 'never': colour.disable() highlight_matches = False elif args.colour == 'always': colour.enable() highlight_matches = True else: args.colour = 'auto' highlight_matches = colour.enabled # Don't highlight everything when just trying to cat if args.patterns == ['']: highlight_matches = False # Don't highlight anything with -l if args.list_matching: highlight_matches = False if args.format == 'json': # headers was just a mistake, ignore it args.headers = False separator = args.separator if args.tab_length: separator = None elif separator is None and not sys.stdout.isatty(): separator = '\t' if separator is None: # special case where we try to be like a tab, but with spaces. # this is useful because terminals typically don't style tabs. # and also so you can change the length of tabs. if (args.tab_length or 0) < 1: args.tab_length = 8 def separate(items, lens): things = [] for item, item_len in zip(items, lens): things.append(item) spaces = args.tab_length - (item_len % args.tab_length) things.append(colour(' ' * spaces, 'grep/separator')) return ''.join(things[:-1]) separator = '\t' else: separator_coloured = colour(separator, 'grep/separator') def separate(items, lens): return separator_coloured.join(items) def json_default(obj): if isinstance(obj, (datetime.datetime, datetime.date, datetime.time)): return str(obj) elif isinstance(obj, complex): return [obj.real, obj.imag] else: return repr(obj) if args.format == 'csv': def escape_item(item): if item and (separator in item or item[0] in '\'"' or item[-1] in '\'"'): return '"' + item.replace('\n', '\\n').replace('"', '""') + '"' else: return item.replace('\n', '\\n') errors = 'surrogatepass' else: escape_item = None errors = 'replace' if PY2 else 'surrogateescape' # This is for the ^T handling. Each slice sends an update when finishing # a dataset, and every status_interval[sliceno] lines while iterating. # To minimise the data sent the only information sent over the queue # is (sliceno, finished_dataset). # Status printing is triggered by ^T (or SIGINFO if that is available) # or by SIGUSR1. # Pressing it again within two seconds prints stats per slice too. q_status = mp.LockFreeQueue() def status_collector(): q_status.make_reader() status = {sliceno: [0, 0] for sliceno in want_slices} # [ds_ix, done_lines] total_lines = sum(total_lines_per_slice_at_ds[-1]) previous = [0] # base colour conf in if stderr is a tty, not stdout. if args.colour == 'auto': colour.configure_from_environ(stdout=sys.stderr) def show(sig, frame): t = monotonic() verbose = (previous[0] + 2 > t) # within 2 seconds of previous previous[0] = t ds_ixes = [] progress_lines = [] progress_fraction = [] for sliceno in want_slices: ds_ix, done_lines = status[sliceno] ds_ixes.append(ds_ix) max_possible = min( done_lines + status_interval[sliceno], total_lines_per_slice_at_ds[ds_ix + 1][sliceno]) done_lines = (done_lines + max_possible) / 2 # middle of the possibilities progress_lines.append(done_lines) total = total_lines_per_slice_at_ds[-1][sliceno] if total == 0: progress_fraction.append(1) else: progress_fraction.append(done_lines / total) progress_total = sum(progress_lines) / (total_lines or 1) bad_cutoff = progress_total - 0.1 if verbose: show_ds = (len(datasets) > 1 and min(ds_ixes) != max(ds_ixes)) for sliceno, ds_ix, p in zip(want_slices, ds_ixes, progress_fraction): if ds_ix == len(datasets): msg = 'DONE' else: msg = '{0:d}% of {1:n} lines'.format( round(p * 100), total_lines_per_slice_at_ds[-1][sliceno]) if show_ds: msg = '%s (in %s)' % ( msg, datasets[ds_ix].quoted, ) msg = '%9d: %s' % ( sliceno, msg, ) if p < bad_cutoff: msg = colour(msg, 'grep/infohighlight') else: msg = colour(msg, 'grep/info') write(2, msg.encode('utf-8') + b'\n') msg = '{0:d}% of {1:n} lines'.format(round(progress_total * 100), total_lines) if len(datasets) > 1: min_ds = min(ds_ixes) max_ds = max(ds_ixes) if min_ds < len(datasets): ds_name = datasets[min_ds].quoted extra = '' if min_ds == max_ds else ' ++' msg = '%s (in %s%s)' % ( msg, ds_name, extra, ) worst = min(progress_fraction) if worst < bad_cutoff: msg = '%s, worst %d%%' % ( msg, round(worst * 100), ) msg = colour(' SUMMARY: %s' % (msg, ), 'grep/info') write(2, msg.encode('utf-8') + b'\n') for signame in ('SIGINFO', 'SIGUSR1'): if hasattr(signal, signame): sig = getattr(signal, signame) signal.signal(sig, show) if hasattr(signal, 'pthread_sigmask'): signal.pthread_sigmask(signal.SIG_UNBLOCK, {sig}) tc_original = None using_stdin = False if not hasattr(signal, 'SIGINFO') and sys.stdin.isatty(): # ^T wont work automatically on this OS, so we need to handle it as terminal input import termios from accelerator.compat import selectors sel = selectors.DefaultSelector() sel.register(0, selectors.EVENT_READ) sel.register(q_status.r, selectors.EVENT_READ) try: tc_original = termios.tcgetattr(0) tc_changed = list(tc_original) tc_changed[3] &= ~(termios.ICANON | termios.IEXTEN) termios.tcsetattr(0, termios.TCSADRAIN, tc_changed) using_stdin = True except Exception: pass # we can't set stdin nonblocking, because it's probably the same # file description as stdout, so work around that with alarms. def got_alarm(sig, frame): raise IOError() signal.signal(signal.SIGALRM, got_alarm) try: while True: if using_stdin: do_q = False for key, _ in sel.select(): if key.fd == 0: try: signal.alarm( 1 ) # in case something else read it we block for max 1 second try: pressed = ord(os.read(0, 1)) finally: signal.alarm(0) if pressed == 20: write(2, b'\n') # "^T" shows in the terminal os.kill(os.getpid(), signal.SIGUSR1) except Exception: pass elif key.fd == q_status.r: do_q = True if not do_q: continue try: sliceno, finished_dataset = q_status.get() except QueueEmpty: return if finished_dataset: ds_ix = status[sliceno][0] + 1 status[sliceno] = [ ds_ix, total_lines_per_slice_at_ds[ds_ix][sliceno] ] else: status[sliceno][1] += status_interval[sliceno] finally: if tc_original is not None: try: termios.tcsetattr(0, termios.TCSADRAIN, tc_original) except Exception: pass status_process = mp.SimplifiedProcess(target=status_collector, name='ax grep status') # everything else will write, so make it a writer right away q_status.make_writer() # Output is only allowed while holding this lock, so that long lines # do not get intermixed. (Or when alone in producing output.) io_lock = Lock() # This contains some extra stuff to be a better base for the other # outputters. # When used directly it enforces no ordering, but merges smaller writes # to keep the number of syscalls down. class Outputter: def __init__(self, q_in, q_out): self.q_in = q_in self.q_out = q_out self.buffer = [] self.merge_buffer = b'' def put(self, data): self.merge_buffer += data if len(self.merge_buffer) >= 1024: self.move_merge() def move_merge(self): if self.merge_buffer: with io_lock: write(1, self.merge_buffer) self.merge_buffer = b'' def start(self, ds): pass def end(self, ds): self.move_merge() def finish(self): pass def full(self): return len(self.buffer) > 5000 def excite(self): self.move_merge() if self.buffer: self.pump(False) # Partially ordered output, each header change acts as a fence. # This is used in all slices except the first. # # The queue gets True when the previous slice is ready for the next # header change, and None when the header is printed (and it's ok # to resume output). class HeaderWaitOutputter(Outputter): def start(self, ds): if ds in headers: self.add_wait() else: self.excite() def add_wait(self): # Each sync point is separated by None in the buffer self.buffer.append(None) self.buffer.append(b'') # Avoid need for special case in .drain self.pump() def move_merge(self): data = self.merge_buffer self.merge_buffer = b'' if self.buffer: self.pump() if self.buffer: self.buffer.append(data) return with io_lock: write(1, data) def pump(self, wait=None): if wait is None: wait = self.full() try: got = self.q_in.get(wait) except QueueEmpty: if wait: # previous slice has exited without sending all messages raise return if got is True: # since pump is only called when we have outputted all # currently allowed output or when the next message is an # unblock for such output we can just unconditionally send # the True on to the next slice here. self.q_out.put(True) self.pump(wait) return else: self.q_out.put(None) self.drain() def drain(self): assert self.buffer[ 0] is None, 'The buffer must always stop at a sync point (or empty)' with io_lock: for pos, data in enumerate(self.buffer[1:], 1): if data is None: break elif data: write(1, data) else: # We did not reach the next fence, so last item is real data # and needs to be removed. (The buffer will then be empty and # output will continue directly until reaching the sync point.) pos += 1 self.buffer[:pos] = () def finish(self): while self.buffer: self.pump(True) # Partially ordered output, each header change acts as a fence. # This is used only in the first slice, and outputs the headers. # # When it is ready to output headers it sends True in the queue. # When the True has travelled around the queue ring all slices are # ready, the headers are printed, and None is sent to let the other # slices resume output. # (When the None returns it is ignored, because output is resumed # as soon as the headers are printed.) class HeaderOutputter(HeaderWaitOutputter): def add_wait(self): if not self.buffer: self.q_out.put(True) self.buffer.append(None) self.buffer.append( b'') # Avoid need for special case in .drain/.put self.pump() def drain(self): assert self.buffer[ 0] is None, 'The buffer must always stop at a sync point (or empty)' with io_lock: for pos, data in enumerate(self.buffer[1:], 1): if data is None: self.q_out.put(True) break elif data: write(1, data) else: pos += 1 self.buffer[:pos] = () def pump(self, wait=None): if wait is None: wait = self.full() try: got = self.q_in.get(wait) except QueueEmpty: if wait: # previous slice has exited without sending all messages raise return if got is True: # The True we put in when reaching the fence has travelled # all the way around the queue ring, it's time to print the # new headers write(1, next(headers_iter)) # and then unblock the other slices self.q_out.put(None) self.drain() # No else, when the None comes back we just drop it. if not wait: self.pump(False) # Fully ordered output, each slice waits for the previous slice. # For each ds, waits for None (anything really) before starting, # sends None when done. class OrderedOutputter(Outputter): def start(self, ds): # Each ds is separated by None in the buffer self.buffer.append(None) self.buffer.append(b'') # Avoid need for special case in .drain self.pump() def end(self, ds): self.move_merge() if not self.buffer: # We are done with this ds, so let next slice continue self.q_out.put(None) def pump(self, wait=None): if wait is None: wait = self.full() try: self.q_in.get(wait) except QueueEmpty: if wait: # previous slice has exited without sending all messages raise return self.drain() def move_merge(self): data = self.merge_buffer self.merge_buffer = b'' if self.buffer: self.pump() if self.buffer: self.buffer.append(data) return # No need for a lock, the other slices aren't writing concurrently. write(1, data) def drain(self): assert self.buffer[0] is None for pos, data in enumerate(self.buffer[1:], 1): if data is None: # We are done with this ds, so let next slice continue self.q_out.put(None) break elif data: write(1, data) else: # We did not reach the next ds, so last item is real data and # needs to be removed. (The buffer will then be empty and # output will continue directly until reaching the next ds.) pos += 1 self.buffer[:pos] = () def finish(self): not_finished = bool(self.buffer) while self.buffer: self.pump(True) if not_finished: self.q_out.put(None) # Same as above but for the first slice so it prints headers when needed. class OrderedHeaderOutputter(OrderedOutputter): def start(self, ds): # Each ds is separated by None in the buffer self.buffer.append(None) if ds in headers: # Headers changed, start with those. self.buffer.append(next(headers_iter)) else: self.buffer.append( b'') # Avoid need for special case in .drain self.pump() # Choose the right outputter for the kind of sync we need. def outputter(q_in, q_out, first_slice=False): if args.list_matching: cls = Outputter elif args.ordered: if first_slice: cls = OrderedHeaderOutputter else: cls = OrderedOutputter elif headers: if first_slice: cls = HeaderOutputter else: cls = HeaderWaitOutputter else: cls = Outputter return cls(q_in, q_out) # Make printer for the selected output options def make_show(prefix, used_columns): def matching_ranges(item): ranges = [] for p in patterns: ranges.extend(m.span() for m in p.finditer(item)) if not ranges: return # merge overlapping/adjacent ranges ranges.sort() ranges = iter(ranges) start, stop = next(ranges) for a, b in ranges: if a <= stop: stop = max(stop, b) else: yield start, stop start, stop = a, b yield start, stop def filter_item(item): return ''.join(item[a:b] for a, b in matching_ranges(item)) if args.format == 'json': dumps = json.JSONEncoder(ensure_ascii=False, default=json_default).encode def show(lineno, items): if only_matching == 'part': items = [filter_item(unicode(item)) for item in items] if only_matching == 'columns': d = { k: v for k, v in zip(used_columns, items) if filter_item(unicode(v)) } else: d = dict(zip(used_columns, items)) if args.show_lineno: prefix['lineno'] = lineno if prefix: prefix['data'] = d d = prefix return dumps(d).encode('utf-8', 'surrogatepass') + b'\n' else: def colour_item(item): pos = 0 parts = [] for a, b in matching_ranges(item): parts.extend( (item[pos:a], colour(item[a:b], 'grep/highlight'))) pos = b parts.append(item[pos:]) return ''.join(parts) def show(lineno, items): data = list(prefix) if args.show_lineno: data.append(unicode(lineno)) show_items = map(unicode, items) if only_matching: if only_matching == 'columns': show_items = (item if filter_item(item) else '' for item in show_items) else: show_items = map(filter_item, show_items) show_items = list(show_items) lens = (len(item) for item in data + show_items) if highlight_matches: show_items = list(map(colour_item, show_items)) if escape_item: lens_unesc = (len(item) for item in data + show_items) show_items = list(map(escape_item, show_items)) lens_esc = (len(item) for item in data + show_items) lens = ( l + esc - unesc for l, unesc, esc in zip(lens, lens_unesc, lens_esc)) data.extend(show_items) return separate(data, lens).encode('utf-8', errors) + b'\n' return show # This is called for each slice in each dataset. # Each slice has a separate process (the same for all datasets). # The first slice runs in the main process (unless -l), everything # else runs from one_slice. def grep(ds, sliceno, out): out.start(ds) if len(patterns) == 1: chk = patterns[0].search else: def chk(s): return any(p.search(s) for p in patterns) first = [True] def mk_iter(col): kw = {} if first[0]: first[0] = False lines = ds.lines[sliceno] if lines > status_interval[sliceno]: def cb(n): q_status.put((sliceno, False)) out.excite() kw['callback'] = cb kw['callback_interval'] = status_interval[sliceno] if ds.columns[col].type == 'ascii': kw['_type'] = 'unicode' it = ds._column_iterator(sliceno, col, **kw) if ds.columns[col].type == 'bytes': errors = 'replace' if PY2 else 'surrogateescape' if ds.columns[col].none_support: it = (None if v is None else v.decode('utf-8', errors) for v in it) else: it = (v.decode('utf-8', errors) for v in it) return it used_columns = columns_for_ds(ds) used_grep_columns = grep_columns and columns_for_ds(ds, grep_columns) if grep_columns and set(used_grep_columns) != set(used_columns): grep_iter = izip(*(mk_iter(col) for col in used_grep_columns)) else: grep_iter = repeat(None) lines_iter = izip(*(mk_iter(col) for col in used_columns)) if args.before_context: before = deque((), args.before_context) else: before = None if args.format == 'json': prefix = {} if args.show_dataset: prefix['dataset'] = ds if args.show_sliceno: prefix['sliceno'] = sliceno show = make_show(prefix, used_columns) else: prefix = [] if args.show_dataset: prefix.append(ds) if args.show_sliceno: prefix.append(str(sliceno)) prefix = tuple(prefix) show = make_show(prefix, used_columns) if args.invert_match: maybe_invert = operator.not_ else: maybe_invert = bool to_show = 0 for lineno, (grep_items, items) in enumerate(izip(grep_iter, lines_iter)): if maybe_invert( any(chk(unicode(item)) for item in grep_items or items)): if q_list: q_list.put((ds, sliceno)) return while before: out.put(show(*before.popleft())) to_show = 1 + args.after_context if to_show: out.put(show(lineno, items)) to_show -= 1 elif before is not None: before.append((lineno, items)) out.end(ds) # This runs in a separate process for each slice except the first # one (unless -l), which is handled specially in the main process. def one_slice(sliceno, q_in, q_out, q_to_close): if q_to_close: q_to_close.close() if q_in: q_in.make_reader() if q_out: q_out.make_writer() if q_list: q_list.make_writer() try: out = outputter(q_in, q_out) for ds in datasets: if seen_list is None or ds not in seen_list: grep(ds, sliceno, out) q_status.put((sliceno, True)) out.finish() except QueueEmpty: # some other process died, no need to print an error here sys.exit(1) headers_prefix = [] if args.show_dataset: headers_prefix.append('[DATASET]') if args.show_sliceno: headers_prefix.append('[SLICE]') if args.show_lineno: headers_prefix.append('[LINE]') # {ds: headers} for each ds where headers change (not including the first). # this is every ds where sync between slices has to happen when not --ordered. headers = OrderedDict() if args.headers: current_headers = None for ds in datasets: candidate_headers = columns_for_ds(ds) if candidate_headers != current_headers: headers[ds] = current_headers = candidate_headers def gen_headers(headers): show_items = headers_prefix + headers if escape_item: show_items = list(map(escape_item, show_items)) coloured = (colour(item, 'grep/header') for item in show_items) txt = separate(coloured, map(len, show_items)) return txt.encode('utf-8', 'surrogatepass') + b'\n' # remove the starting ds, so no header changes means no special handling. current_headers = headers.pop(datasets[0]) if not args.list_matching: write(1, gen_headers(current_headers)) headers_iter = iter(map(gen_headers, headers.values())) q_in = q_out = first_q_out = q_to_close = q_list = None children = [status_process] seen_list = None if args.list_matching: # in this case all slices get their own process # and the main process just prints the maching slices q_list = mp.LockFreeQueue() separate_process_slices = want_slices if not args.show_sliceno: seen_list = mp.MpSet() else: separate_process_slices = want_slices[1:] if args.ordered or headers: # needs to sync in some way q_in = first_q_out = mp.LockFreeQueue() for sliceno in separate_process_slices: if q_in: q_out = mp.LockFreeQueue() p = mp.SimplifiedProcess( target=one_slice, args=( sliceno, q_in, q_out, q_to_close, ), name='slice-%d' % (sliceno, ), ) children.append(p) if q_in and q_in is not first_q_out: q_in.close() q_to_close = first_q_out q_in = q_out if q_in: q_out = first_q_out q_in.make_reader() q_out.make_writer() if args.ordered: q_in.put_local(None) del q_to_close del first_q_out try: if args.list_matching: if args.headers: headers_prefix = ['[DATASET]'] if seen_list is None: headers_prefix.append('[SLICE]') write(1, gen_headers([])) ordered_res = defaultdict(set) q_list.make_reader() if seen_list is None: used_columns = ['dataset', 'sliceno'] else: used_columns = ['dataset'] inner_show = make_show({} if args.format == 'json' else [], used_columns) def show(ds, sliceno=None): if sliceno is None: items = [ds] else: items = [ds, sliceno] write(1, inner_show(None, items)) while True: try: ds, sliceno = q_list.get() except QueueEmpty: break if seen_list is None: if args.ordered: ordered_res[ds].add(sliceno) else: show(ds, sliceno) elif ds not in seen_list: seen_list.add(ds) if not args.ordered: show(ds) if args.ordered: for ds in datasets: if seen_list is None: for sliceno in sorted(ordered_res[ds]): show(ds, sliceno) else: if ds in seen_list: show(ds) else: out = outputter(q_in, q_out, first_slice=True) sliceno = want_slices[0] for ds in datasets: grep(ds, sliceno, out) q_status.put((sliceno, True)) out.finish() except QueueEmpty: # don't print an error, probably a subprocess died from EPIPE before # the main process. (or the subprocess already printed an error.) return 1 q_status.close() for c in children: c.join() if c.exitcode: return 1