示例#1
0
文件: grep.py 项目: eBay/accelerator
def main(argv, cfg):
    parser = ArgumentParser(
        usage="%(prog)s [options] pattern ds [ds [...]] [column [column [...]]",
        prog=argv.pop(0),
    )
    parser.add_argument(
        '-c',
        '--chain',
        action='store_true',
        help="follow dataset chains",
    )
    parser.add_argument(
        '--colour',
        '--color',
        nargs='?',
        const='always',
        choices=['auto', 'never', 'always'],
        type=str.lower,
        help="colour matched text. can be auto, never or always",
        metavar='WHEN',
    )
    parser.add_argument(
        '-i',
        '--ignore-case',
        action='store_true',
        help="case insensitive pattern",
    )
    parser.add_argument(
        '-H',
        '--headers',
        action='store_true',
        help="print column names before output (and on each change)",
    )
    parser.add_argument(
        '-O',
        '--ordered',
        action='store_true',
        help="output in order (one slice at a time)",
    )
    parser.add_argument(
        '-g',
        '--grep',
        action='append',
        help="grep this column only, can be specified multiple times",
        metavar='COLUMN')
    parser.add_argument(
        '-s',
        '--slice',
        action='append',
        help="grep this slice only, can be specified multiple times",
        type=int)
    parser.add_argument(
        '-D',
        '--show-dataset',
        action='store_true',
        help="show dataset on matching lines",
    )
    parser.add_argument(
        '-S',
        '--show-sliceno',
        action='store_true',
        help="show sliceno on matching lines",
    )
    parser.add_argument(
        '-L',
        '--show-lineno',
        action='store_true',
        help="show lineno (per slice) on matching lines",
    )
    supported_formats = (
        'csv',
        'raw',
        'json',
    )
    parser.add_argument(
        '-f',
        '--format',
        default='csv',
        choices=supported_formats,
        help="output format, csv (default) / " +
        ' / '.join(supported_formats[1:]),
        metavar='FORMAT',
    )
    parser.add_argument(
        '-t',
        '--separator',
        help="field separator, default tab / tab-like spaces",
    )
    parser.add_argument('pattern')
    parser.add_argument(
        'dataset', help='can be specified in the same ways as for "ax ds"')
    parser.add_argument('columns', nargs='*', default=[])
    args = parser.parse_intermixed_args(argv)

    pat_s = re.compile(args.pattern, re.IGNORECASE if args.ignore_case else 0)
    datasets = [name2ds(cfg, args.dataset)]
    columns = []

    for ds_or_col in args.columns:
        if columns:
            columns.append(ds_or_col)
        else:
            try:
                datasets.append(name2ds(cfg, ds_or_col))
            except Exception:
                columns.append(ds_or_col)

    if not datasets:
        parser.print_help(file=sys.stderr)
        return 1

    grep_columns = set(args.grep or ())
    if grep_columns == set(columns):
        grep_columns = None

    if args.slice:
        want_slices = []
        for s in args.slice:
            assert 0 <= s < g.slices, "Slice %d not available" % (s, )
            if s not in want_slices:
                want_slices.append(s)
    else:
        want_slices = list(range(g.slices))

    if args.chain:
        datasets = list(chain.from_iterable(ds.chain() for ds in datasets))

    if columns or grep_columns:
        bad = False
        need_cols = set(columns)
        if grep_columns:
            need_cols.update(grep_columns)
        for ds in datasets:
            missing = need_cols - set(ds.columns)
            if missing:
                print('ERROR: %s does not have columns %r' % (
                    ds,
                    missing,
                ),
                      file=sys.stderr)
                bad = True
        if bad:
            return 1

    # never and always override env settings, auto (default) sets from env/tty
    if args.colour == 'never':
        colour.disable()
        highlight_matches = False
    elif args.colour == 'always':
        colour.enable()
        highlight_matches = True
    else:
        highlight_matches = colour.enabled

    # Don't highlight everything when just trying to cat
    if args.pattern == '':
        highlight_matches = False

    separator = args.separator
    if separator is None and not sys.stdout.isatty():
        separator = '\t'

    if separator is None:
        # special case where we try to be like a tab, but with spaces.
        # this is useful because terminals typically don't style tabs.
        def separate(items, lens):
            things = []
            for item, item_len in zip(items, lens):
                things.append(item)
                spaces = 8 - (item_len % 8)
                things.append(colour(' ' * spaces, 'cyan', 'underline'))
            return ''.join(things[:-1])

        separator = '\t'
    else:
        separator_coloured = colour(separator, 'cyan', 'underline')

        def separate(items, lens):
            return separator_coloured.join(items)

    def json_default(obj):
        if isinstance(obj, (datetime.datetime, datetime.date, datetime.time)):
            return str(obj)
        elif isinstance(obj, complex):
            return [obj.real, obj.imag]
        else:
            return repr(obj)

    if args.format == 'csv':

        def escape_item(item):
            if item and (separator in item or item[0] in '\'"'
                         or item[-1] in '\'"'):
                return '"' + item.replace('\n', '\\n').replace('"', '""') + '"'
            else:
                return item.replace('\n', '\\n')

        errors = 'surrogatepass'
    else:
        escape_item = None
        errors = 'replace' if PY2 else 'surrogateescape'

    def grep(ds, sliceno):
        def no_conv(v):
            return v

        def mk_conv(col):
            if ds.columns[col].type in (
                    'bytes',
                    'unicode',
                    'ascii',
            ):
                if not ds.columns[col].none_support:
                    return no_conv
            return unicode

        chk = pat_s.search

        def mk_iter(col):
            if ds.columns[col].type == 'ascii':
                it = ds._column_iterator(sliceno, col, _type='unicode')
            else:
                it = ds._column_iterator(sliceno, col)
            if ds.columns[col].type == 'bytes':
                errors = 'replace' if PY2 else 'surrogateescape'
                if ds.columns[col].none_support:
                    it = (None if v is None else v.decode('utf-8', errors)
                          for v in it)
                else:
                    it = (v.decode('utf-8', errors) for v in it)
            return it

        def colour_item(item):
            pos = 0
            parts = []
            for m in pat_s.finditer(item):
                a, b = m.span()
                parts.extend((item[pos:a], colour.red(item[a:b])))
                pos = b
            parts.append(item[pos:])
            return ''.join(parts)

        if args.format == 'json':
            prefix = {}
            dumps = json.JSONEncoder(ensure_ascii=False,
                                     default=json_default).encode
            if args.show_dataset:
                prefix['dataset'] = ds
            if args.show_sliceno:
                prefix['sliceno'] = sliceno

            def show():
                d = dict(zip(used_columns, items))
                if args.show_lineno:
                    prefix['lineno'] = lineno
                if prefix:
                    prefix['data'] = d
                    d = prefix
                return dumps(d).encode('utf-8', 'surrogatepass')
        else:
            prefix = []
            if args.show_dataset:
                prefix.append(ds)
            if args.show_sliceno:
                prefix.append(str(sliceno))
            prefix = tuple(prefix)

            def show():
                data = list(prefix)
                if args.show_lineno:
                    data.append(unicode(lineno))
                if PY2:
                    show_items = (v if isinstance(v, unicode) else
                                  str(v).decode('utf-8', 'replace')
                                  for v in items)
                else:
                    show_items = map(str, items)
                show_items = list(show_items)
                lens = (len(item) for item in data + show_items)
                if highlight_matches:
                    show_items = list(map(colour_item, show_items))
                if escape_item:
                    lens_unesc = (len(item) for item in data + show_items)
                    show_items = list(map(escape_item, show_items))
                    lens_esc = (len(item) for item in data + show_items)
                    lens = (
                        l + esc - unesc
                        for l, unesc, esc in zip(lens, lens_unesc, lens_esc))
                data.extend(show_items)
                return separate(data, lens).encode('utf-8', errors)

        used_columns = columns or sorted(ds.columns)
        if grep_columns and grep_columns != set(used_columns):
            grep_iter = izip(*(mk_iter(col) for col in grep_columns))
            conv_items = [mk_conv(col) for col in grep_columns]
        else:
            grep_iter = repeat(None)
            conv_items = [mk_conv(col) for col in used_columns]
        lines_iter = izip(*(mk_iter(col) for col in used_columns))
        for lineno, (grep_items,
                     items) in enumerate(izip(grep_iter, lines_iter)):
            if any(
                    chk(conv(item))
                    for conv, item in izip(conv_items, grep_items or items)):
                # This will be atomic if the line is not too long
                # (at least up to PIPE_BUF bytes, should be at least 512).
                write(1, show() + b'\n')

    def one_slice(sliceno, q, wait_for):
        try:
            if q:
                q.get()
            for ds in datasets:
                if ds in wait_for:
                    q.task_done()
                    q.get()
                grep(ds, sliceno)
        except KeyboardInterrupt:
            return
        except IOError as e:
            if e.errno == errno.EPIPE:
                return
            else:
                raise
        finally:
            # Make sure we are joinable
            try:
                q.task_done()
            except Exception:
                pass

    headers_prefix = []
    if args.show_dataset:
        headers_prefix.append('[DATASET]')
    if args.show_sliceno:
        headers_prefix.append('[SLICE]')
    if args.show_lineno:
        headers_prefix.append('[LINE]')

    headers = {}
    if args.headers:
        if columns:
            current_headers = columns
        else:
            current_headers = None
            for ds in datasets:
                candidate_headers = sorted(ds.columns)
                if candidate_headers != current_headers:
                    headers[ds] = current_headers = candidate_headers
            current_headers = headers.pop(datasets[0])

        def show_headers(headers):
            if args.format != 'json':
                show_items = headers_prefix + headers
                if escape_item:
                    show_items = list(map(escape_item, show_items))
                print(
                    separate(map(colour.blue, show_items),
                             map(len, show_items)))

        show_headers(current_headers)

    queues = []
    children = []
    if not args.ordered:
        q = None
        wait_for = set(headers)
        for sliceno in want_slices[1:]:
            if wait_for:
                q = JoinableQueue()
                q.put(None)
                queues.append(q)
            p = Process(
                target=one_slice,
                args=(sliceno, q, wait_for),
                name='slice-%d' % (sliceno, ),
            )
            p.daemon = True
            p.start()
            children.append(p)
        want_slices = want_slices[:1]

    try:
        for ds in datasets:
            if ds in headers:
                for q in queues:
                    q.join()
                show_headers(headers.pop(ds))
                for q in queues:
                    q.put(None)
            for sliceno in want_slices:
                grep(ds, sliceno)
        for c in children:
            c.join()
    except KeyboardInterrupt:
        print()
示例#2
0
def main():
    # As of python 3.8 the default start_method is 'spawn' on macOS.
    # This doesn't work for us. 'fork' is fairly unsafe on macOS,
    # but it's better than not working at all. See
    # https://bugs.python.org/issue33725
    # for more information.
    import multiprocessing
    if hasattr(multiprocessing, 'set_start_method'):
        # If possible, make the forkserver (used by database updates) pre-import everthing
        if hasattr(multiprocessing, 'set_forkserver_preload'):
            multiprocessing.set_forkserver_preload(
                ['accelerator', 'accelerator.server'])
        multiprocessing.set_start_method('fork')

    from accelerator import g
    g.running = 'shell'

    from accelerator.autoflush import AutoFlush
    main_argv, argv = split_args(sys.argv[1:])
    sys.stdout = AutoFlush(sys.stdout)
    sys.stderr = AutoFlush(sys.stderr)

    aliases = {
        'cat': 'grep ""',
    }
    aliases.update(parse_user_config() or ())
    while argv and argv[0] in aliases:
        try:
            expanded = shlex.split(aliases[argv[0]])
        except ValueError as e:
            raise ValueError('Failed to expand alias %s (%r): %s' % (
                argv[0],
                aliases[argv[0]],
                e,
            ))
        more_main_argv, argv = split_args(expanded + argv[1:])
        main_argv.extend(more_main_argv)

    epilog = ['commands:', '']
    cmdlen = max(len(cmd) for cmd in COMMANDS)
    template = '  %%%ds  %%s' % (cmdlen, )
    for cmd, func in sorted(COMMANDS.items()):
        epilog.append(template % (
            cmd,
            func.help,
        ))
    epilog.append('')
    epilog.append('aliases:')
    epilog.extend('%s = %s' % item for item in sorted(aliases.items()))
    epilog.append('')
    epilog.append('use %(prog)s <command> --help for <command> usage')
    parser = ArgumentParser(
        usage='%(prog)s [--config CONFIG_FILE] command [args]',
        epilog='\n'.join(epilog),
        formatter_class=RawDescriptionHelpFormatter,
    )
    parser.add_argument('--config',
                        metavar='CONFIG_FILE',
                        help='configuration file')
    parser.add_argument('--version',
                        action='store_true',
                        help='alias for the version command')
    args = parser.parse_args(main_argv)
    if args.version:
        sys.exit(cmd_version(()))
    args.command = argv.pop(0) if argv else None
    if args.command not in COMMANDS:
        parser.print_help(file=sys.stderr)
        print(file=sys.stderr)
        if args.command is not None:
            print('Unknown command "%s"' % (args.command, ), file=sys.stderr)
        sys.exit(2)
    config_fn = args.config
    if args.command == 'init':
        config_fn = False
    cmd = COMMANDS[args.command]
    debug_cmd = getattr(cmd, 'is_debug', False)
    try:
        setup(config_fn, debug_cmd)
        argv.insert(0, '%s %s' % (
            basename(sys.argv[0]),
            args.command,
        ))
        return cmd(argv)
    except UserError as e:
        print(e, file=sys.stderr)
        return 1
    except IOError as e:
        if e.errno == errno.EPIPE and debug_cmd:
            return
        else:
            raise
示例#3
0
def main(argv, cfg):
    usage = "%(prog)s [options] pattern ds [ds [...]] [column [column [...]]"
    parser = ArgumentParser(usage=usage, prog=argv.pop(0))
    parser.add_argument(
        '-c',
        '--chain',
        action='store_true',
        help="follow dataset chains",
    )
    parser.add_argument(
        '-C',
        '--color',
        action='store_true',
        help="color matched text",
    )
    parser.add_argument(
        '-i',
        '--ignore-case',
        action='store_true',
        help="case insensitive pattern",
    )
    parser.add_argument(
        '-H',
        '--headers',
        action='store_true',
        help="print column names before output (and on each change)",
    )
    parser.add_argument(
        '-o',
        '--ordered',
        action='store_true',
        help="output in order (one slice at a time)",
    )
    parser.add_argument(
        '-g',
        '--grep',
        action='append',
        help="grep this column only, can be specified multiple times",
        metavar='COLUMN')
    parser.add_argument(
        '-s',
        '--slice',
        action='append',
        help="grep this slice only, can be specified multiple times",
        type=int)
    parser.add_argument('-t',
                        '--separator',
                        help="field separator (default tab)",
                        default='\t')
    parser.add_argument(
        '-D',
        '--show-dataset',
        action='store_true',
        help="show dataset on matching lines",
    )
    parser.add_argument(
        '-S',
        '--show-sliceno',
        action='store_true',
        help="show sliceno on matching lines",
    )
    parser.add_argument(
        '-L',
        '--show-lineno',
        action='store_true',
        help="show lineno (per slice) on matching lines",
    )
    parser.add_argument('pattern')
    parser.add_argument(
        'dataset', help='can be specified in the same ways as for "ax ds"')
    parser.add_argument('columns', nargs='*', default=[])
    args = parser.parse_intermixed_args(argv)

    pat_s = re.compile(args.pattern, re.IGNORECASE if args.ignore_case else 0)
    pat_b = re.compile(args.pattern.encode('utf-8'),
                       re.IGNORECASE if args.ignore_case else 0)
    datasets = [name2ds(cfg, args.dataset)]
    columns = []

    separator_s = args.separator
    separator_b = separator_s.encode('utf-8')

    for ds_or_col in args.columns:
        if columns:
            columns.append(ds_or_col)
        else:
            try:
                datasets.append(name2ds(cfg, ds_or_col))
            except Exception:
                columns.append(ds_or_col)

    if not datasets:
        parser.print_help(file=sys.stderr)
        return 1

    grep_columns = set(args.grep or ())
    if grep_columns == set(columns):
        grep_columns = None

    if args.slice:
        want_slices = []
        for s in args.slice:
            assert 0 <= s < g.slices, "Slice %d not available" % (s, )
            if s not in want_slices:
                want_slices.append(s)
    else:
        want_slices = list(range(g.slices))

    if args.chain:
        datasets = list(chain.from_iterable(ds.chain() for ds in datasets))

    if columns:
        bad = False
        for ds in datasets:
            missing = set(columns) - set(ds.columns)
            if missing:
                print('ERROR: %s does not have columns %r' % (
                    ds,
                    missing,
                ),
                      file=sys.stderr)
                bad = True
        if bad:
            return 1

    def grep(ds, sliceno):
        # Use bytes for everything if anything is bytes, str otherwise. (For speed.)
        if any(ds.columns[col].backing_type == 'bytes'
               for col in (grep_columns or columns or ds.columns)):

            def strbytes(v):
                return str(v).encode('utf-8', 'replace')

            def mk_iter(col):
                if ds.columns[col].backing_type in (
                        'bytes',
                        'unicode',
                        'ascii',
                ):
                    return ds._column_iterator(sliceno, col, _type='bytes')
                else:
                    return imap(strbytes, ds._column_iterator(sliceno, col))

            chk = pat_b.search
        else:

            def mk_iter(col):
                if ds.columns[col].backing_type in (
                        'unicode',
                        'ascii',
                ):
                    return ds._column_iterator(sliceno, col, _type='unicode')
                else:
                    return imap(str, ds._column_iterator(sliceno, col))

            chk = pat_s.search

        def fmt(v):
            if not isinstance(v, (unicode, bytes)):
                v = str(v)
            if isinstance(v, unicode):
                v = v.encode('utf-8', 'replace')
            return v

        def color(item):
            pos = 0
            parts = []
            for m in pat_b.finditer(item):
                a, b = m.span()
                parts.extend((item[pos:a], b'\x1b[31m', item[a:b], b'\x1b[m'))
                pos = b
            parts.append(item[pos:])
            return b''.join(parts)

        prefix = []
        if args.show_dataset:
            prefix.append(ds.encode('utf-8'))
        if args.show_sliceno:
            prefix.append(str(sliceno).encode('utf-8'))
        prefix = tuple(prefix)

        def show(prefix, items):
            items = map(fmt, items)
            if args.color:
                items = map(color, items)
            # This will be atomic if the line is not too long
            # (at least up to PIPE_BUF bytes, should be at least 512).
            write(1, separator_b.join(prefix + tuple(items)) + b'\n')

        if grep_columns and grep_columns != set(columns or ds.columns):
            grep_iter = izip(*(mk_iter(col) for col in grep_columns))
            lines_iter = ds.iterate(sliceno, columns)
        else:
            grep_iter = repeat(None)
            lines_iter = izip(*(mk_iter(col)
                                for col in (columns or sorted(ds.columns))))
        lines = izip(grep_iter, lines_iter)
        if args.show_lineno:
            for lineno, (grep_items, items) in enumerate(lines):
                if any(imap(chk, grep_items or items)):
                    show(prefix + (str(lineno).encode('utf-8'), ), items)
        else:
            for grep_items, items in lines:
                if any(imap(chk, grep_items or items)):
                    show(prefix, items)

    def one_slice(sliceno, q, wait_for):
        try:
            if q:
                q.get()
            for ds in datasets:
                if ds in wait_for:
                    q.task_done()
                    q.get()
                grep(ds, sliceno)
        except KeyboardInterrupt:
            return
        except IOError as e:
            if e.errno == errno.EPIPE:
                return
            else:
                raise
        finally:
            # Make sure we are joinable
            try:
                q.task_done()
            except Exception:
                pass

    headers_prefix = []
    if args.show_dataset:
        headers_prefix.append('[DATASET]')
    if args.show_sliceno:
        headers_prefix.append('[SLICE]')
    if args.show_lineno:
        headers_prefix.append('[LINE]')

    headers = {}
    if args.headers:
        if columns:
            current_headers = columns
        else:
            current_headers = None
            for ds in datasets:
                candidate_headers = sorted(ds.columns)
                if candidate_headers != current_headers:
                    headers[ds] = current_headers = candidate_headers
            current_headers = headers.pop(datasets[0])

        def show_headers(headers):
            print('\x1b[34m' + separator_s.join(headers_prefix + headers) +
                  '\x1b[m')

        show_headers(current_headers)

    queues = []
    children = []
    if not args.ordered:
        q = None
        wait_for = set(headers)
        for sliceno in want_slices[1:]:
            if wait_for:
                q = JoinableQueue()
                q.put(None)
                queues.append(q)
            p = Process(
                target=one_slice,
                args=(sliceno, q, wait_for),
                name='slice-%d' % (sliceno, ),
            )
            p.daemon = True
            p.start()
            children.append(p)
        want_slices = want_slices[:1]

    try:
        for ds in datasets:
            if ds in headers:
                for q in queues:
                    q.join()
                show_headers(headers.pop(ds))
                for q in queues:
                    q.put(None)
            for sliceno in want_slices:
                grep(ds, sliceno)
        for c in children:
            c.join()
    except KeyboardInterrupt:
        print()
示例#4
0
def main(argv, cfg):
    # -C overrides -A and -B (which in turn override -C)
    class ContextAction(Action):
        def __call__(self, parser, namespace, values, option_string=None):
            namespace.before_context = namespace.after_context = values

    parser = ArgumentParser(
        usage=
        "%(prog)s [options] [-e] pattern [...] [-d] ds [...] [[-n] column [...]]",
        description="""positional arguments:
  pattern               (-e, --regexp)
  dataset               (-d, --dataset) can be specified as for "ax ds"
  columns               (-n, --column)""",
        prog=argv.pop(0),
        formatter_class=RawTextHelpFormatter,
    )
    parser.add_argument(
        '-c',
        '--chain',
        action='store_true',
        help="follow dataset chains",
    )
    parser.add_argument(
        '--colour',
        '--color',
        nargs='?',
        const='always',
        choices=['auto', 'never', 'always'],
        type=str.lower,
        help="colour matched text. can be auto, never or always",
        metavar='WHEN',
    )
    parser.add_argument(
        '-i',
        '--ignore-case',
        action='store_true',
        help="case insensitive pattern",
    )
    parser.add_argument(
        '-v',
        '--invert-match',
        action='store_true',
        help="select non-matching lines",
    )
    parser.add_argument(
        '-o',
        '--only-matching',
        action='store_true',
        help="only print matching part (or columns with -l)",
    )
    parser.add_argument(
        '-l',
        '--list-matching',
        action='store_true',
        help=
        "only print matching datasets (or slices with -S)\nwhen used with -o, only print matching columns",
    )
    parser.add_argument(
        '-H',
        '--headers',
        action='store_true',
        help="print column names before output (and on each change)",
    )
    parser.add_argument(
        '-O',
        '--ordered',
        action='store_true',
        help="output in order (one slice at a time)",
    )
    parser.add_argument(
        '-M',
        '--allow-missing-columns',
        action='store_true',
        help="datasets are allowed to not have (some) columns",
    )
    parser.add_argument(
        '-g',
        '--grep',
        action='append',
        help="grep this column only, can be specified multiple times",
        metavar='COLUMN')
    parser.add_argument(
        '-s',
        '--slice',
        action='append',
        help="grep this slice only, can be specified multiple times",
        type=int)
    parser.add_argument(
        '-D',
        '--show-dataset',
        action='store_true',
        help="show dataset on matching lines",
    )
    parser.add_argument(
        '-S',
        '--show-sliceno',
        action='store_true',
        help="show sliceno on matching lines",
    )
    parser.add_argument(
        '-L',
        '--show-lineno',
        action='store_true',
        help="show lineno (per slice) on matching lines",
    )
    supported_formats = (
        'csv',
        'raw',
        'json',
    )
    parser.add_argument(
        '-f',
        '--format',
        default='csv',
        choices=supported_formats,
        help="output format, csv (default) / " +
        ' / '.join(supported_formats[1:]),
        metavar='FORMAT',
    )
    parser.add_argument(
        '-t',
        '--separator',
        help="field separator, default tab / tab-like spaces",
    )
    parser.add_argument(
        '-T',
        '--tab-length',
        type=int,
        metavar='LENGTH',
        help="field alignment, always uses spaces as separator",
    )
    parser.add_argument(
        '-B',
        '--before-context',
        type=int,
        default=0,
        metavar='NUM',
        help="print NUM lines of leading context",
    )
    parser.add_argument(
        '-A',
        '--after-context',
        type=int,
        default=0,
        metavar='NUM',
        help="print NUM lines of trailing context",
    )
    parser.add_argument(
        '-C',
        '--context',
        type=int,
        default=0,
        metavar='NUM',
        action=ContextAction,
        help="print NUM lines of context\n" +
        "context is only taken from the same slice of the same\n" +
        "dataset, and may intermix with output from other\n" +
        "slices. Use -O to avoid that, or -S -L to see it.",
    )
    parser.add_argument('-e',
                        '--regexp',
                        default=[],
                        action='append',
                        dest='patterns',
                        help=SUPPRESS)
    parser.add_argument('-d',
                        '--dataset',
                        default=[],
                        action='append',
                        dest='datasets',
                        help=SUPPRESS)
    parser.add_argument('-n',
                        '--column',
                        default=[],
                        action='append',
                        dest='columns',
                        help=SUPPRESS)
    parser.add_argument('words', nargs='*', help=SUPPRESS)
    args = parser.parse_intermixed_args(argv)

    if args.before_context < 0 or args.after_context < 0:
        print('Context must be >= 0', file=sys.stderr)
        return 1

    columns = args.columns

    try:
        args.datasets = [name2ds(cfg, ds) for ds in args.datasets]
    except NoSuchWhateverError as e:
        print(e, file=sys.stderr)
        return 1

    for word in args.words:
        if not args.patterns:
            args.patterns.append(word)
        elif columns and args.datasets:
            columns.append(word)
        else:
            try:
                args.datasets.append(name2ds(cfg, word))
            except NoSuchWhateverError as e:
                if not args.datasets:
                    print(e, file=sys.stderr)
                    return 1
                columns.append(word)

    if not args.patterns or not args.datasets:
        parser.print_help(file=sys.stderr)
        return 1

    datasets = args.datasets
    patterns = []
    for pattern in args.patterns:
        try:
            patterns.append(
                re.compile(pattern, re.IGNORECASE if args.ignore_case else 0))
        except re.error as e:
            print("Bad pattern %r:\n%s" % (
                pattern,
                e,
            ), file=sys.stderr)
            return 1

    grep_columns = set(args.grep or ())
    if grep_columns == set(columns):
        grep_columns = set()

    if args.slice:
        want_slices = []
        for s in args.slice:
            assert 0 <= s < g.slices, "Slice %d not available" % (s, )
            if s not in want_slices:
                want_slices.append(s)
    else:
        want_slices = list(range(g.slices))

    if len(want_slices) == 1:
        # it will be automatically ordered, so let's not work for it.
        args.ordered = False

    if args.only_matching:
        if args.list_matching:
            args.list_matching = False
            only_matching = 'columns'
        else:
            only_matching = 'part'
    else:
        only_matching = False

    if args.chain:
        datasets = list(chain.from_iterable(ds.chain() for ds in datasets))

    def columns_for_ds(ds, columns=columns):
        if columns:
            return [n for n in columns if n in ds.columns]
        else:
            return sorted(ds.columns)

    if columns or grep_columns:
        if args.allow_missing_columns:
            keep_datasets = []
            for ds in datasets:
                if not columns_for_ds(ds):
                    continue
                if grep_columns and not columns_for_ds(ds, grep_columns):
                    continue
                keep_datasets.append(ds)
            if not keep_datasets:
                return 0
            datasets = keep_datasets
        else:
            bad = False
            need_cols = set(columns)
            if grep_columns:
                need_cols.update(grep_columns)
            for ds in datasets:
                missing = need_cols - set(ds.columns)
                if missing:
                    print('ERROR: %s does not have columns %r' % (
                        ds,
                        missing,
                    ),
                          file=sys.stderr)
                    bad = True
            if bad:
                return 1

    # For the status reporting, this gives how many lines have been processed
    # when reaching each ds ix, per slice. Ends with an extra fictional ds,
    # i.e. the total number of lines for that slice. And then the same again,
    # to simplify the code in the status shower.
    total_lines_per_slice_at_ds = [[0] * g.slices]
    for ds in datasets:
        total_lines_per_slice_at_ds.append(
            [a + b for a, b in zip(total_lines_per_slice_at_ds[-1], ds.lines)])
    total_lines_per_slice_at_ds.append(total_lines_per_slice_at_ds[-1])
    status_interval = {
        # twice per percent, but not too often or too seldom
        sliceno: min(max(total_lines_per_slice_at_ds[-1][sliceno] // 200, 10),
                     5000)
        for sliceno in want_slices
    }

    # never and always override env settings, auto (default) sets from env/tty
    if args.colour == 'never':
        colour.disable()
        highlight_matches = False
    elif args.colour == 'always':
        colour.enable()
        highlight_matches = True
    else:
        args.colour = 'auto'
        highlight_matches = colour.enabled

    # Don't highlight everything when just trying to cat
    if args.patterns == ['']:
        highlight_matches = False
    # Don't highlight anything with -l
    if args.list_matching:
        highlight_matches = False

    if args.format == 'json':
        # headers was just a mistake, ignore it
        args.headers = False

    separator = args.separator
    if args.tab_length:
        separator = None
    elif separator is None and not sys.stdout.isatty():
        separator = '\t'

    if separator is None:
        # special case where we try to be like a tab, but with spaces.
        # this is useful because terminals typically don't style tabs.
        # and also so you can change the length of tabs.
        if (args.tab_length or 0) < 1:
            args.tab_length = 8

        def separate(items, lens):
            things = []
            for item, item_len in zip(items, lens):
                things.append(item)
                spaces = args.tab_length - (item_len % args.tab_length)
                things.append(colour(' ' * spaces, 'grep/separator'))
            return ''.join(things[:-1])

        separator = '\t'
    else:
        separator_coloured = colour(separator, 'grep/separator')

        def separate(items, lens):
            return separator_coloured.join(items)

    def json_default(obj):
        if isinstance(obj, (datetime.datetime, datetime.date, datetime.time)):
            return str(obj)
        elif isinstance(obj, complex):
            return [obj.real, obj.imag]
        else:
            return repr(obj)

    if args.format == 'csv':

        def escape_item(item):
            if item and (separator in item or item[0] in '\'"'
                         or item[-1] in '\'"'):
                return '"' + item.replace('\n', '\\n').replace('"', '""') + '"'
            else:
                return item.replace('\n', '\\n')

        errors = 'surrogatepass'
    else:
        escape_item = None
        errors = 'replace' if PY2 else 'surrogateescape'

    # This is for the ^T handling. Each slice sends an update when finishing
    # a dataset, and every status_interval[sliceno] lines while iterating.
    # To minimise the data sent the only information sent over the queue
    # is (sliceno, finished_dataset).
    # Status printing is triggered by ^T (or SIGINFO if that is available)
    # or by SIGUSR1.
    # Pressing it again within two seconds prints stats per slice too.
    q_status = mp.LockFreeQueue()

    def status_collector():
        q_status.make_reader()
        status = {sliceno: [0, 0] for sliceno in want_slices}
        #            [ds_ix, done_lines]
        total_lines = sum(total_lines_per_slice_at_ds[-1])
        previous = [0]
        # base colour conf in if stderr is a tty, not stdout.
        if args.colour == 'auto':
            colour.configure_from_environ(stdout=sys.stderr)

        def show(sig, frame):
            t = monotonic()
            verbose = (previous[0] + 2 > t)  # within 2 seconds of previous
            previous[0] = t
            ds_ixes = []
            progress_lines = []
            progress_fraction = []
            for sliceno in want_slices:
                ds_ix, done_lines = status[sliceno]
                ds_ixes.append(ds_ix)
                max_possible = min(
                    done_lines + status_interval[sliceno],
                    total_lines_per_slice_at_ds[ds_ix + 1][sliceno])
                done_lines = (done_lines +
                              max_possible) / 2  # middle of the possibilities
                progress_lines.append(done_lines)
                total = total_lines_per_slice_at_ds[-1][sliceno]
                if total == 0:
                    progress_fraction.append(1)
                else:
                    progress_fraction.append(done_lines / total)
            progress_total = sum(progress_lines) / (total_lines or 1)
            bad_cutoff = progress_total - 0.1
            if verbose:
                show_ds = (len(datasets) > 1 and min(ds_ixes) != max(ds_ixes))
                for sliceno, ds_ix, p in zip(want_slices, ds_ixes,
                                             progress_fraction):
                    if ds_ix == len(datasets):
                        msg = 'DONE'
                    else:
                        msg = '{0:d}% of {1:n} lines'.format(
                            round(p * 100),
                            total_lines_per_slice_at_ds[-1][sliceno])
                        if show_ds:
                            msg = '%s (in %s)' % (
                                msg,
                                datasets[ds_ix].quoted,
                            )
                    msg = '%9d: %s' % (
                        sliceno,
                        msg,
                    )
                    if p < bad_cutoff:
                        msg = colour(msg, 'grep/infohighlight')
                    else:
                        msg = colour(msg, 'grep/info')
                    write(2, msg.encode('utf-8') + b'\n')
            msg = '{0:d}% of {1:n} lines'.format(round(progress_total * 100),
                                                 total_lines)
            if len(datasets) > 1:
                min_ds = min(ds_ixes)
                max_ds = max(ds_ixes)
                if min_ds < len(datasets):
                    ds_name = datasets[min_ds].quoted
                    extra = '' if min_ds == max_ds else ' ++'
                    msg = '%s (in %s%s)' % (
                        msg,
                        ds_name,
                        extra,
                    )
            worst = min(progress_fraction)
            if worst < bad_cutoff:
                msg = '%s, worst %d%%' % (
                    msg,
                    round(worst * 100),
                )
            msg = colour('  SUMMARY: %s' % (msg, ), 'grep/info')
            write(2, msg.encode('utf-8') + b'\n')

        for signame in ('SIGINFO', 'SIGUSR1'):
            if hasattr(signal, signame):
                sig = getattr(signal, signame)
                signal.signal(sig, show)
                if hasattr(signal, 'pthread_sigmask'):
                    signal.pthread_sigmask(signal.SIG_UNBLOCK, {sig})
        tc_original = None
        using_stdin = False
        if not hasattr(signal, 'SIGINFO') and sys.stdin.isatty():
            # ^T wont work automatically on this OS, so we need to handle it as terminal input
            import termios
            from accelerator.compat import selectors
            sel = selectors.DefaultSelector()
            sel.register(0, selectors.EVENT_READ)
            sel.register(q_status.r, selectors.EVENT_READ)
            try:
                tc_original = termios.tcgetattr(0)
                tc_changed = list(tc_original)
                tc_changed[3] &= ~(termios.ICANON | termios.IEXTEN)
                termios.tcsetattr(0, termios.TCSADRAIN, tc_changed)
                using_stdin = True
            except Exception:
                pass
            # we can't set stdin nonblocking, because it's probably the same
            # file description as stdout, so work around that with alarms.
            def got_alarm(sig, frame):
                raise IOError()

            signal.signal(signal.SIGALRM, got_alarm)
        try:
            while True:
                if using_stdin:
                    do_q = False
                    for key, _ in sel.select():
                        if key.fd == 0:
                            try:
                                signal.alarm(
                                    1
                                )  # in case something else read it we block for max 1 second
                                try:
                                    pressed = ord(os.read(0, 1))
                                finally:
                                    signal.alarm(0)
                                if pressed == 20:
                                    write(2,
                                          b'\n')  # "^T" shows in the terminal
                                    os.kill(os.getpid(), signal.SIGUSR1)
                            except Exception:
                                pass
                        elif key.fd == q_status.r:
                            do_q = True
                    if not do_q:
                        continue
                try:
                    sliceno, finished_dataset = q_status.get()
                except QueueEmpty:
                    return
                if finished_dataset:
                    ds_ix = status[sliceno][0] + 1
                    status[sliceno] = [
                        ds_ix, total_lines_per_slice_at_ds[ds_ix][sliceno]
                    ]
                else:
                    status[sliceno][1] += status_interval[sliceno]
        finally:
            if tc_original is not None:
                try:
                    termios.tcsetattr(0, termios.TCSADRAIN, tc_original)
                except Exception:
                    pass

    status_process = mp.SimplifiedProcess(target=status_collector,
                                          name='ax grep status')
    # everything else will write, so make it a writer right away
    q_status.make_writer()

    # Output is only allowed while holding this lock, so that long lines
    # do not get intermixed. (Or when alone in producing output.)
    io_lock = Lock()

    # This contains some extra stuff to be a better base for the other
    # outputters.
    # When used directly it enforces no ordering, but merges smaller writes
    # to keep the number of syscalls down.

    class Outputter:
        def __init__(self, q_in, q_out):
            self.q_in = q_in
            self.q_out = q_out
            self.buffer = []
            self.merge_buffer = b''

        def put(self, data):
            self.merge_buffer += data
            if len(self.merge_buffer) >= 1024:
                self.move_merge()

        def move_merge(self):
            if self.merge_buffer:
                with io_lock:
                    write(1, self.merge_buffer)
                self.merge_buffer = b''

        def start(self, ds):
            pass

        def end(self, ds):
            self.move_merge()

        def finish(self):
            pass

        def full(self):
            return len(self.buffer) > 5000

        def excite(self):
            self.move_merge()
            if self.buffer:
                self.pump(False)

    # Partially ordered output, each header change acts as a fence.
    # This is used in all slices except the first.
    #
    # The queue gets True when the previous slice is ready for the next
    # header change, and None when the header is printed (and it's ok
    # to resume output).

    class HeaderWaitOutputter(Outputter):
        def start(self, ds):
            if ds in headers:
                self.add_wait()
            else:
                self.excite()

        def add_wait(self):
            # Each sync point is separated by None in the buffer
            self.buffer.append(None)
            self.buffer.append(b'')  # Avoid need for special case in .drain
            self.pump()

        def move_merge(self):
            data = self.merge_buffer
            self.merge_buffer = b''
            if self.buffer:
                self.pump()
                if self.buffer:
                    self.buffer.append(data)
                    return
            with io_lock:
                write(1, data)

        def pump(self, wait=None):
            if wait is None:
                wait = self.full()
            try:
                got = self.q_in.get(wait)
            except QueueEmpty:
                if wait:
                    # previous slice has exited without sending all messages
                    raise
                return
            if got is True:
                # since pump is only called when we have outputted all
                # currently allowed output or when the next message is an
                # unblock for such output we can just unconditionally send
                # the True on to the next slice here.
                self.q_out.put(True)
                self.pump(wait)
                return
            else:
                self.q_out.put(None)
                self.drain()

        def drain(self):
            assert self.buffer[
                0] is None, 'The buffer must always stop at a sync point (or empty)'
            with io_lock:
                for pos, data in enumerate(self.buffer[1:], 1):
                    if data is None:
                        break
                    elif data:
                        write(1, data)
                else:
                    # We did not reach the next fence, so last item is real data
                    # and needs to be removed. (The buffer will then be empty and
                    # output will continue directly until reaching the sync point.)
                    pos += 1
            self.buffer[:pos] = ()

        def finish(self):
            while self.buffer:
                self.pump(True)

    # Partially ordered output, each header change acts as a fence.
    # This is used only in the first slice, and outputs the headers.
    #
    # When it is ready to output headers it sends True in the queue.
    # When the True has travelled around the queue ring all slices are
    # ready, the headers are printed, and None is sent to let the other
    # slices resume output.
    # (When the None returns it is ignored, because output is resumed
    # as soon as the headers are printed.)

    class HeaderOutputter(HeaderWaitOutputter):
        def add_wait(self):
            if not self.buffer:
                self.q_out.put(True)
            self.buffer.append(None)
            self.buffer.append(
                b'')  # Avoid need for special case in .drain/.put
            self.pump()

        def drain(self):
            assert self.buffer[
                0] is None, 'The buffer must always stop at a sync point (or empty)'
            with io_lock:
                for pos, data in enumerate(self.buffer[1:], 1):
                    if data is None:
                        self.q_out.put(True)
                        break
                    elif data:
                        write(1, data)
                else:
                    pos += 1
            self.buffer[:pos] = ()

        def pump(self, wait=None):
            if wait is None:
                wait = self.full()
            try:
                got = self.q_in.get(wait)
            except QueueEmpty:
                if wait:
                    # previous slice has exited without sending all messages
                    raise
                return
            if got is True:
                # The True we put in when reaching the fence has travelled
                # all the way around the queue ring, it's time to print the
                # new headers
                write(1, next(headers_iter))
                # and then unblock the other slices
                self.q_out.put(None)
                self.drain()
                # No else, when the None comes back we just drop it.
            if not wait:
                self.pump(False)

    # Fully ordered output, each slice waits for the previous slice.
    # For each ds, waits for None (anything really) before starting,
    # sends None when done.

    class OrderedOutputter(Outputter):
        def start(self, ds):
            # Each ds is separated by None in the buffer
            self.buffer.append(None)
            self.buffer.append(b'')  # Avoid need for special case in .drain
            self.pump()

        def end(self, ds):
            self.move_merge()
            if not self.buffer:
                # We are done with this ds, so let next slice continue
                self.q_out.put(None)

        def pump(self, wait=None):
            if wait is None:
                wait = self.full()
            try:
                self.q_in.get(wait)
            except QueueEmpty:
                if wait:
                    # previous slice has exited without sending all messages
                    raise
                return
            self.drain()

        def move_merge(self):
            data = self.merge_buffer
            self.merge_buffer = b''
            if self.buffer:
                self.pump()
                if self.buffer:
                    self.buffer.append(data)
                    return
            # No need for a lock, the other slices aren't writing concurrently.
            write(1, data)

        def drain(self):
            assert self.buffer[0] is None
            for pos, data in enumerate(self.buffer[1:], 1):
                if data is None:
                    # We are done with this ds, so let next slice continue
                    self.q_out.put(None)
                    break
                elif data:
                    write(1, data)
            else:
                # We did not reach the next ds, so last item is real data and
                # needs to be removed. (The buffer will then be empty and
                # output will continue directly until reaching the next ds.)
                pos += 1
            self.buffer[:pos] = ()

        def finish(self):
            not_finished = bool(self.buffer)
            while self.buffer:
                self.pump(True)
            if not_finished:
                self.q_out.put(None)

    # Same as above but for the first slice so it prints headers when needed.

    class OrderedHeaderOutputter(OrderedOutputter):
        def start(self, ds):
            # Each ds is separated by None in the buffer
            self.buffer.append(None)
            if ds in headers:
                # Headers changed, start with those.
                self.buffer.append(next(headers_iter))
            else:
                self.buffer.append(
                    b'')  # Avoid need for special case in .drain
            self.pump()

    # Choose the right outputter for the kind of sync we need.
    def outputter(q_in, q_out, first_slice=False):
        if args.list_matching:
            cls = Outputter
        elif args.ordered:
            if first_slice:
                cls = OrderedHeaderOutputter
            else:
                cls = OrderedOutputter
        elif headers:
            if first_slice:
                cls = HeaderOutputter
            else:
                cls = HeaderWaitOutputter
        else:
            cls = Outputter
        return cls(q_in, q_out)

    # Make printer for the selected output options
    def make_show(prefix, used_columns):
        def matching_ranges(item):
            ranges = []
            for p in patterns:
                ranges.extend(m.span() for m in p.finditer(item))
            if not ranges:
                return
            # merge overlapping/adjacent ranges
            ranges.sort()
            ranges = iter(ranges)
            start, stop = next(ranges)
            for a, b in ranges:
                if a <= stop:
                    stop = max(stop, b)
                else:
                    yield start, stop
                    start, stop = a, b
            yield start, stop

        def filter_item(item):
            return ''.join(item[a:b] for a, b in matching_ranges(item))

        if args.format == 'json':
            dumps = json.JSONEncoder(ensure_ascii=False,
                                     default=json_default).encode

            def show(lineno, items):
                if only_matching == 'part':
                    items = [filter_item(unicode(item)) for item in items]
                if only_matching == 'columns':
                    d = {
                        k: v
                        for k, v in zip(used_columns, items)
                        if filter_item(unicode(v))
                    }
                else:
                    d = dict(zip(used_columns, items))
                if args.show_lineno:
                    prefix['lineno'] = lineno
                if prefix:
                    prefix['data'] = d
                    d = prefix
                return dumps(d).encode('utf-8', 'surrogatepass') + b'\n'
        else:

            def colour_item(item):
                pos = 0
                parts = []
                for a, b in matching_ranges(item):
                    parts.extend(
                        (item[pos:a], colour(item[a:b], 'grep/highlight')))
                    pos = b
                parts.append(item[pos:])
                return ''.join(parts)

            def show(lineno, items):
                data = list(prefix)
                if args.show_lineno:
                    data.append(unicode(lineno))
                show_items = map(unicode, items)
                if only_matching:
                    if only_matching == 'columns':
                        show_items = (item if filter_item(item) else ''
                                      for item in show_items)
                    else:
                        show_items = map(filter_item, show_items)
                show_items = list(show_items)
                lens = (len(item) for item in data + show_items)
                if highlight_matches:
                    show_items = list(map(colour_item, show_items))
                if escape_item:
                    lens_unesc = (len(item) for item in data + show_items)
                    show_items = list(map(escape_item, show_items))
                    lens_esc = (len(item) for item in data + show_items)
                    lens = (
                        l + esc - unesc
                        for l, unesc, esc in zip(lens, lens_unesc, lens_esc))
                data.extend(show_items)
                return separate(data, lens).encode('utf-8', errors) + b'\n'

        return show

    # This is called for each slice in each dataset.
    # Each slice has a separate process (the same for all datasets).
    # The first slice runs in the main process (unless -l), everything
    # else runs from one_slice.

    def grep(ds, sliceno, out):
        out.start(ds)
        if len(patterns) == 1:
            chk = patterns[0].search
        else:

            def chk(s):
                return any(p.search(s) for p in patterns)

        first = [True]

        def mk_iter(col):
            kw = {}
            if first[0]:
                first[0] = False
                lines = ds.lines[sliceno]
                if lines > status_interval[sliceno]:

                    def cb(n):
                        q_status.put((sliceno, False))
                        out.excite()

                    kw['callback'] = cb
                    kw['callback_interval'] = status_interval[sliceno]
            if ds.columns[col].type == 'ascii':
                kw['_type'] = 'unicode'
            it = ds._column_iterator(sliceno, col, **kw)
            if ds.columns[col].type == 'bytes':
                errors = 'replace' if PY2 else 'surrogateescape'
                if ds.columns[col].none_support:
                    it = (None if v is None else v.decode('utf-8', errors)
                          for v in it)
                else:
                    it = (v.decode('utf-8', errors) for v in it)
            return it

        used_columns = columns_for_ds(ds)
        used_grep_columns = grep_columns and columns_for_ds(ds, grep_columns)
        if grep_columns and set(used_grep_columns) != set(used_columns):
            grep_iter = izip(*(mk_iter(col) for col in used_grep_columns))
        else:
            grep_iter = repeat(None)
        lines_iter = izip(*(mk_iter(col) for col in used_columns))
        if args.before_context:
            before = deque((), args.before_context)
        else:
            before = None
        if args.format == 'json':
            prefix = {}
            if args.show_dataset:
                prefix['dataset'] = ds
            if args.show_sliceno:
                prefix['sliceno'] = sliceno
            show = make_show(prefix, used_columns)
        else:
            prefix = []
            if args.show_dataset:
                prefix.append(ds)
            if args.show_sliceno:
                prefix.append(str(sliceno))
            prefix = tuple(prefix)
            show = make_show(prefix, used_columns)
        if args.invert_match:
            maybe_invert = operator.not_
        else:
            maybe_invert = bool
        to_show = 0
        for lineno, (grep_items,
                     items) in enumerate(izip(grep_iter, lines_iter)):
            if maybe_invert(
                    any(chk(unicode(item)) for item in grep_items or items)):
                if q_list:
                    q_list.put((ds, sliceno))
                    return
                while before:
                    out.put(show(*before.popleft()))
                to_show = 1 + args.after_context
            if to_show:
                out.put(show(lineno, items))
                to_show -= 1
            elif before is not None:
                before.append((lineno, items))
        out.end(ds)

    # This runs in a separate process for each slice except the first
    # one (unless -l), which is handled specially in the main process.

    def one_slice(sliceno, q_in, q_out, q_to_close):
        if q_to_close:
            q_to_close.close()
        if q_in:
            q_in.make_reader()
        if q_out:
            q_out.make_writer()
        if q_list:
            q_list.make_writer()
        try:
            out = outputter(q_in, q_out)
            for ds in datasets:
                if seen_list is None or ds not in seen_list:
                    grep(ds, sliceno, out)
                q_status.put((sliceno, True))
            out.finish()
        except QueueEmpty:
            # some other process died, no need to print an error here
            sys.exit(1)

    headers_prefix = []
    if args.show_dataset:
        headers_prefix.append('[DATASET]')
    if args.show_sliceno:
        headers_prefix.append('[SLICE]')
    if args.show_lineno:
        headers_prefix.append('[LINE]')

    # {ds: headers} for each ds where headers change (not including the first).
    # this is every ds where sync between slices has to happen when not --ordered.
    headers = OrderedDict()
    if args.headers:
        current_headers = None
        for ds in datasets:
            candidate_headers = columns_for_ds(ds)
            if candidate_headers != current_headers:
                headers[ds] = current_headers = candidate_headers

        def gen_headers(headers):
            show_items = headers_prefix + headers
            if escape_item:
                show_items = list(map(escape_item, show_items))
            coloured = (colour(item, 'grep/header') for item in show_items)
            txt = separate(coloured, map(len, show_items))
            return txt.encode('utf-8', 'surrogatepass') + b'\n'

        # remove the starting ds, so no header changes means no special handling.
        current_headers = headers.pop(datasets[0])
        if not args.list_matching:
            write(1, gen_headers(current_headers))
        headers_iter = iter(map(gen_headers, headers.values()))

    q_in = q_out = first_q_out = q_to_close = q_list = None
    children = [status_process]
    seen_list = None
    if args.list_matching:
        # in this case all slices get their own process
        # and the main process just prints the maching slices
        q_list = mp.LockFreeQueue()
        separate_process_slices = want_slices
        if not args.show_sliceno:
            seen_list = mp.MpSet()
    else:
        separate_process_slices = want_slices[1:]
        if args.ordered or headers:
            # needs to sync in some way
            q_in = first_q_out = mp.LockFreeQueue()
    for sliceno in separate_process_slices:
        if q_in:
            q_out = mp.LockFreeQueue()
        p = mp.SimplifiedProcess(
            target=one_slice,
            args=(
                sliceno,
                q_in,
                q_out,
                q_to_close,
            ),
            name='slice-%d' % (sliceno, ),
        )
        children.append(p)
        if q_in and q_in is not first_q_out:
            q_in.close()
        q_to_close = first_q_out
        q_in = q_out
    if q_in:
        q_out = first_q_out
        q_in.make_reader()
        q_out.make_writer()
        if args.ordered:
            q_in.put_local(None)
    del q_to_close
    del first_q_out

    try:
        if args.list_matching:
            if args.headers:
                headers_prefix = ['[DATASET]']
                if seen_list is None:
                    headers_prefix.append('[SLICE]')
                write(1, gen_headers([]))
            ordered_res = defaultdict(set)
            q_list.make_reader()
            if seen_list is None:
                used_columns = ['dataset', 'sliceno']
            else:
                used_columns = ['dataset']
            inner_show = make_show({} if args.format == 'json' else [],
                                   used_columns)

            def show(ds, sliceno=None):
                if sliceno is None:
                    items = [ds]
                else:
                    items = [ds, sliceno]
                write(1, inner_show(None, items))

            while True:
                try:
                    ds, sliceno = q_list.get()
                except QueueEmpty:
                    break
                if seen_list is None:
                    if args.ordered:
                        ordered_res[ds].add(sliceno)
                    else:
                        show(ds, sliceno)
                elif ds not in seen_list:
                    seen_list.add(ds)
                    if not args.ordered:
                        show(ds)
            if args.ordered:
                for ds in datasets:
                    if seen_list is None:
                        for sliceno in sorted(ordered_res[ds]):
                            show(ds, sliceno)
                    else:
                        if ds in seen_list:
                            show(ds)
        else:
            out = outputter(q_in, q_out, first_slice=True)
            sliceno = want_slices[0]
            for ds in datasets:
                grep(ds, sliceno, out)
                q_status.put((sliceno, True))
            out.finish()
    except QueueEmpty:
        # don't print an error, probably a subprocess died from EPIPE before
        # the main process. (or the subprocess already printed an error.)
        return 1

    q_status.close()
    for c in children:
        c.join()
        if c.exitcode:
            return 1
示例#5
0
def main():
    # Several commands use SIGUSR1 which (naturally...) defaults to killing the
    # process, so start by blocking that to minimise the race time.
    if hasattr(signal, 'pthread_sigmask'):
        signal.pthread_sigmask(signal.SIG_BLOCK, {signal.SIGUSR1})
    else:
        # Or if we can't block it, just ignore it.
        signal.signal(signal.SIGUSR1, signal.SIG_IGN)

    # As of python 3.8 the default start_method is 'spawn' on macOS.
    # This doesn't work for us. 'fork' is fairly unsafe on macOS,
    # but it's better than not working at all. See
    # https://bugs.python.org/issue33725
    # for more information.
    import multiprocessing
    if hasattr(multiprocessing, 'set_start_method'):
        # If possible, make the forkserver (used by database updates) pre-import everthing
        if hasattr(multiprocessing, 'set_forkserver_preload'):
            multiprocessing.set_forkserver_preload(
                ['accelerator', 'accelerator.server'])
        multiprocessing.set_start_method('fork')

    from accelerator import g
    g.running = 'shell'

    from accelerator.autoflush import AutoFlush
    main_argv, argv = split_args(sys.argv[1:])
    sys.stdout = AutoFlush(sys.stdout)
    sys.stderr = AutoFlush(sys.stderr)

    # configuration defaults
    aliases = {
        'cat': 'grep -e ""',
    }
    colour_d = {
        'warning': ('RED', ),
        'highlight': ('BOLD', ),
        'grep/highlight': ('RED', ),
        'info': ('BRIGHTBLUE', ),
        'infohighlight': (
            'BOLD',
            'BRIGHTBLUE',
        ),
        'separator': (
            'CYAN',
            'UNDERLINE',
        ),
        'header': (
            'BRIGHTBLUE',
            'BOLD',
        ),
    }
    parse_user_config(aliases, colour_d)
    colour._names.update(colour_d)

    used_aliases = []
    while argv and argv[0] in aliases:
        alias = argv[0]
        if alias == 'noalias':  # save the user from itself
            break
        try:
            expanded = shlex.split(aliases[alias])
        except ValueError as e:
            raise ValueError('Failed to expand alias %s (%r): %s' % (
                argv[0],
                aliases[argv[0]],
                e,
            ))
        more_main_argv, argv = split_args(expanded + argv[1:])
        main_argv.extend(more_main_argv)
        if expanded and alias == expanded[0]:
            break
        used_aliases.append(alias)
        if alias in used_aliases[:-1]:
            raise ValueError('Alias loop: %r' % (used_aliases, ))

    while argv and argv[0] == 'noalias':
        argv.pop(0)

    epilog = ['commands:', '']
    cmdlen = max(len(cmd) for cmd in COMMANDS)
    template = '  %%%ds  %%s' % (cmdlen, )
    for cmd, func in sorted(COMMANDS.items()):
        epilog.append(template % (
            cmd,
            func.help,
        ))
    epilog.append('')
    epilog.append('aliases:')
    epilog.extend('  %s = %s' % item for item in sorted(aliases.items()))
    epilog.append('')
    epilog.append('use "' +
                  colour('%(prog)s <command> --help', 'help/highlight') +
                  '" for <command> usage')
    epilog.append('try "' + colour('%(prog)s intro', 'help/highlight') +
                  '" for an introduction')
    parser = ArgumentParser(
        usage='%(prog)s [--config CONFIG_FILE] command [args]',
        epilog='\n'.join(epilog),
        formatter_class=RawDescriptionHelpFormatter,
    )
    parser.add_argument('--config',
                        metavar='CONFIG_FILE',
                        help='configuration file')
    parser.add_argument('--version',
                        action='store_true',
                        help='alias for the version command')
    args = parser.parse_args(main_argv)
    if args.version:
        sys.exit(cmd_version(()))
    args.command = argv.pop(0) if argv else None
    if args.command not in COMMANDS:
        parser.print_help(file=sys.stderr)
        if args.command is not None:
            print(file=sys.stderr)
            print('Unknown command "%s"' % (args.command, ), file=sys.stderr)
        sys.exit(2)
    config_fn = args.config
    if args.command in (
            'init',
            'intro',
            'version',
    ):
        config_fn = False
    cmd = COMMANDS[args.command]
    debug_cmd = getattr(cmd, 'is_debug', False)
    try:
        setup(config_fn, debug_cmd)
        argv.insert(0, '%s %s' % (
            basename(sys.argv[0]),
            args.command,
        ))
        return cmd(argv)
    except UserError as e:
        print(e, file=sys.stderr)
        return 1
    except OSError as e:
        if e.errno == errno.EPIPE:
            return 1
        else:
            raise
    except KeyboardInterrupt:
        # Exiting with KeyboardInterrupt causes python to print a traceback.
        # We don't want that, but we do want to exit from SIGINT (so the
        # calling process can know that happened).
        signal.signal(signal.SIGINT, signal.SIG_DFL)
        os.kill(os.getpid(), signal.SIGINT)
        # If that didn't work let's re-raise the KeyboardInterrupt.
        raise