def Run(self, cmd_val): # type: (cmd_value__Argv) -> int """ printf: printf [-v var] format [argument ...] """ arg_r = args.Reader(cmd_val.argv, spids=cmd_val.arg_spids) arg_r.Next() # skip argv[0] arg = PRINTF_SPEC.Parse(arg_r) fmt, fmt_spid = arg_r.ReadRequired2('requires a format string') varargs, spids = arg_r.Rest2() #log('fmt %s', fmt) #log('vals %s', vals) arena = self.parse_ctx.arena if fmt in self.parse_cache: parts = self.parse_cache[fmt] else: line_reader = reader.StringLineReader(fmt, arena) # TODO: Make public lexer = self.parse_ctx._MakeLexer(line_reader) p = _FormatStringParser(lexer) arena.PushSource(source.ArgvWord(fmt_spid)) try: parts = p.Parse() except error.Parse as e: self.errfmt.PrettyPrintError(e) return 2 # parse error finally: arena.PopSource() self.parse_cache[fmt] = parts if 0: print() for part in parts: part.PrettyPrint() print() out = [] arg_index = 0 num_args = len(varargs) backslash_c = False while True: for part in parts: if isinstance(part, printf_part.Literal): token = part.token if token.id == Id.Format_EscapedPercent: s = '%' else: s = word_compile.EvalCStringToken(token.id, token.val) out.append(s) elif isinstance(part, printf_part.Percent): flags = None if len(part.flags) > 0: flags = '' for flag_token in part.flags: flags += flag_token.val width = None if part.width: if part.width.id in (Id.Format_Num, Id.Format_Zero): width = part.width.val width_spid = part.width.span_id elif part.width.id == Id.Format_Star: if arg_index < num_args: width = varargs[arg_index] width_spid = spids[arg_index] arg_index += 1 else: width = '' width_spid = runtime.NO_SPID else: raise AssertionError() try: width = int(width) except ValueError: if width_spid == runtime.NO_SPID: width_spid = part.width.span_id self.errfmt.Print( "printf got invalid number %r for the width", s, span_id=width_spid) return 1 precision = None if part.precision: if part.precision.id == Id.Format_Dot: precision = '0' precision_spid = part.precision.span_id elif part.precision.id in (Id.Format_Num, Id.Format_Zero): precision = part.precision.val precision_spid = part.precision.span_id elif part.precision.id == Id.Format_Star: if arg_index < num_args: precision = varargs[arg_index] precision_spid = spids[arg_index] arg_index += 1 else: precision = '' precision_spid = runtime.NO_SPID else: raise AssertionError() try: precision = int(precision) except ValueError: if precision_spid == runtime.NO_SPID: precision_spid = part.precision.span_id self.errfmt.Print( "printf got invalid number %r for the precision", s, span_id=precision_spid) return 1 if arg_index < num_args: s = varargs[arg_index] word_spid = spids[arg_index] arg_index += 1 else: s = '' word_spid = runtime.NO_SPID typ = part.type.val if typ == 's': if precision is not None: s = s[:precision] # truncate elif typ == 'q': s = qsn.maybe_shell_encode(s) elif typ == 'b': # Process just like echo -e, except \c handling is simpler. parts = [] # type: List[str] lex = match.EchoLexer(s) while True: id_, value = lex.Next() if id_ == Id.Eol_Tok: # Note: This is really a NUL terminator break p = word_compile.EvalCStringToken(id_, value) # Unusual behavior: '\c' aborts processing! if p is None: backslash_c = True break parts.append(p) s = ''.join(parts) elif typ in 'diouxX' or part.type.id == Id.Format_Time: try: d = int(s) except ValueError: if len(s) >= 1 and s[0] in '\'"': # TODO: utf-8 decode s[1:] to be more correct. Probably # depends on issue #366, a utf-8 library. # Note: len(s) == 1 means there is a NUL (0) after the quote.. d = ord(s[1]) if len(s) >= 2 else 0 elif part.type.id == Id.Format_Time and len( s) == 0 and word_spid == runtime.NO_SPID: # Note: No argument means -1 for %(...)T as in Bash Reference # Manual 4.2 "If no argument is specified, conversion behaves # as if -1 had been given." d = -1 else: # This works around the fact that in the arg recycling case, you have no spid. if word_spid == runtime.NO_SPID: self.errfmt.Print( "printf got invalid number %r for this substitution", s, span_id=part.type.span_id) else: self.errfmt.Print( "printf got invalid number %r", s, span_id=word_spid) return 1 if typ in 'di': s = str(d) elif typ in 'ouxX': if d < 0: e_die( "Can't format negative number %d with %%%s", d, typ, span_id=part.type.span_id) if typ == 'u': s = str(d) elif typ == 'o': s = '%o' % d elif typ == 'x': s = '%x' % d elif typ == 'X': s = '%X' % d elif part.type.id == Id.Format_Time: # %(...)T # Initialize timezone: # `localtime' uses the current timezone information initialized # by `tzset'. The function `tzset' refers to the environment # variable `TZ'. When the exported variable `TZ' is present, # its value should be reflected in the real environment # variable `TZ' before call of `tzset'. # # Note: unlike LANG, TZ doesn't seem to change behavior if it's # not exported. # # TODO: In Oil, provide an API that doesn't rely on libc's # global state. tzcell = self.mem.GetCell('TZ') if tzcell and tzcell.exported and tzcell.val.tag_( ) == value_e.Str: tzval = cast(value__Str, tzcell.val) posix.putenv('TZ', tzval.s) time.tzset() # Handle special values: # User can specify two special values -1 and -2 as in Bash # Reference Manual 4.2: "Two special argument values may be # used: -1 represents the current time, and -2 represents the # time the shell was invoked." from # https://www.gnu.org/software/bash/manual/html_node/Bash-Builtins.html#index-printf if d == -1: # the current time d = time.time() elif d == -2: # the shell start time d = shell_start_time s = time.strftime(typ[1:-2], time.localtime(d)) if precision is not None: s = s[:precision] # truncate else: raise AssertionError() else: raise AssertionError() if width is not None: if flags: if '-' in flags: s = s.ljust(width, ' ') elif '0' in flags: s = s.rjust(width, '0') else: pass else: s = s.rjust(width, ' ') out.append(s) else: raise AssertionError() if backslash_c: # 'printf %b a\cb xx' - \c terminates processing! break if arg_index >= num_args: break # Otherwise there are more args. So cycle through the loop once more to # implement the 'arg recycling' behavior. result = ''.join(out) if arg.v: var_name = arg.v # Notes: # - bash allows a[i] here (as in unset and ${!x}), but we haven't # implemented it. # - TODO: get the span_id for arg.v! if not match.IsValidVarName(var_name): raise error.Usage('got invalid variable name %r' % var_name) state.SetStringDynamic(self.mem, var_name, result) else: sys.stdout.write(result) return 0
def Run(self, cmd_val): # type: (cmd_value__Argv) -> int """ printf: printf [-v var] format [argument ...] """ attrs, arg_r = flag_spec.ParseCmdVal('printf', cmd_val) arg = arg_types.printf(attrs.attrs) fmt, fmt_spid = arg_r.ReadRequired2('requires a format string') varargs, spids = arg_r.Rest2() #log('fmt %s', fmt) #log('vals %s', vals) arena = self.parse_ctx.arena if fmt in self.parse_cache: parts = self.parse_cache[fmt] else: line_reader = reader.StringLineReader(fmt, arena) # TODO: Make public lexer = self.parse_ctx._MakeLexer(line_reader) parser = _FormatStringParser(lexer) with alloc.ctx_Location(arena, source.ArgvWord(fmt_spid)): try: parts = parser.Parse() except error.Parse as e: self.errfmt.PrettyPrintError(e) return 2 # parse error self.parse_cache[fmt] = parts if 0: print() for part in parts: part.PrettyPrint() print() out = [] # type: List[str] arg_index = 0 num_args = len(varargs) backslash_c = False while True: for part in parts: UP_part = part if part.tag_() == printf_part_e.Literal: part = cast(printf_part__Literal, UP_part) token = part.token if token.id == Id.Format_EscapedPercent: s = '%' else: s = word_compile.EvalCStringToken(token) out.append(s) elif part.tag_() == printf_part_e.Percent: part = cast(printf_part__Percent, UP_part) flags = [] # type: List[str] if len(part.flags) > 0: for flag_token in part.flags: flags.append(flag_token.val) width = -1 # nonexistent if part.width: if part.width.id in (Id.Format_Num, Id.Format_Zero): width_str = part.width.val width_spid = part.width.span_id elif part.width.id == Id.Format_Star: if arg_index < num_args: width_str = varargs[arg_index] width_spid = spids[arg_index] arg_index += 1 else: width_str = '' # invalid width_spid = runtime.NO_SPID else: raise AssertionError() try: width = int(width_str) except ValueError: if width_spid == runtime.NO_SPID: width_spid = part.width.span_id self.errfmt.Print_("printf got invalid width %r" % width_str, span_id=width_spid) return 1 precision = -1 # nonexistent if part.precision: if part.precision.id == Id.Format_Dot: precision_str = '0' precision_spid = part.precision.span_id elif part.precision.id in (Id.Format_Num, Id.Format_Zero): precision_str = part.precision.val precision_spid = part.precision.span_id elif part.precision.id == Id.Format_Star: if arg_index < num_args: precision_str = varargs[arg_index] precision_spid = spids[arg_index] arg_index += 1 else: precision_str = '' precision_spid = runtime.NO_SPID else: raise AssertionError() try: precision = int(precision_str) except ValueError: if precision_spid == runtime.NO_SPID: precision_spid = part.precision.span_id self.errfmt.Print_( 'printf got invalid precision %r' % precision_str, span_id=precision_spid) return 1 #log('index=%d n=%d', arg_index, num_args) if arg_index < num_args: s = varargs[arg_index] word_spid = spids[arg_index] arg_index += 1 else: s = '' word_spid = runtime.NO_SPID typ = part.type.val if typ == 's': if precision >= 0: s = s[:precision] # truncate elif typ == 'q': s = qsn.maybe_shell_encode(s) elif typ == 'b': # Process just like echo -e, except \c handling is simpler. c_parts = [] # type: List[str] lex = match.EchoLexer(s) while True: id_, tok_val = lex.Next() if id_ == Id.Eol_Tok: # Note: This is really a NUL terminator break # TODO: add span_id from argv tok = Token(id_, runtime.NO_SPID, tok_val) p = word_compile.EvalCStringToken(tok) # Unusual behavior: '\c' aborts processing! if p is None: backslash_c = True break c_parts.append(p) s = ''.join(c_parts) elif typ in 'diouxX' or part.type.id == Id.Format_Time: try: d = int(s) except ValueError: if len(s) >= 1 and s[0] in '\'"': # TODO: utf-8 decode s[1:] to be more correct. Probably # depends on issue #366, a utf-8 library. # Note: len(s) == 1 means there is a NUL (0) after the quote.. d = ord(s[1]) if len(s) >= 2 else 0 elif part.type.id == Id.Format_Time and len( s) == 0 and word_spid == runtime.NO_SPID: # Note: No argument means -1 for %(...)T as in Bash Reference # Manual 4.2 "If no argument is specified, conversion behaves # as if -1 had been given." d = -1 else: if word_spid == runtime.NO_SPID: # Blame the format string blame_spid = part.type.span_id else: blame_spid = word_spid self.errfmt.Print_( 'printf expected an integer, got %r' % s, span_id=blame_spid) return 1 if typ in 'di': s = str(d) elif typ in 'ouxX': if d < 0: e_die( "Can't format negative number %d with %%%s", d, typ, span_id=part.type.span_id) if typ == 'u': s = str(d) elif typ == 'o': s = mylib.octal(d) elif typ == 'x': s = mylib.hex_lower(d) elif typ == 'X': s = mylib.hex_upper(d) elif part.type.id == Id.Format_Time: # %(...)T # Initialize timezone: # `localtime' uses the current timezone information initialized # by `tzset'. The function `tzset' refers to the environment # variable `TZ'. When the exported variable `TZ' is present, # its value should be reflected in the real environment # variable `TZ' before call of `tzset'. # # Note: unlike LANG, TZ doesn't seem to change behavior if it's # not exported. # # TODO: In Oil, provide an API that doesn't rely on libc's # global state. tzcell = self.mem.GetCell('TZ') if tzcell and tzcell.exported and tzcell.val.tag_( ) == value_e.Str: tzval = cast(value__Str, tzcell.val) posix.putenv('TZ', tzval.s) time_.tzset() # Handle special values: # User can specify two special values -1 and -2 as in Bash # Reference Manual 4.2: "Two special argument values may be # used: -1 represents the current time, and -2 represents the # time the shell was invoked." from # https://www.gnu.org/software/bash/manual/html_node/Bash-Builtins.html#index-printf if d == -1: # the current time ts = time_.time() elif d == -2: # the shell start time ts = self.shell_start_time else: ts = d s = time_.strftime(typ[1:-2], time_.localtime(ts)) if precision >= 0: s = s[:precision] # truncate else: raise AssertionError() else: raise AssertionError() if width >= 0: if len(flags): if '-' in flags: s = s.ljust(width, ' ') elif '0' in flags: s = s.rjust(width, '0') else: pass else: s = s.rjust(width, ' ') out.append(s) else: raise AssertionError() if backslash_c: # 'printf %b a\cb xx' - \c terminates processing! break if arg_index >= num_args: break # Otherwise there are more args. So cycle through the loop once more to # implement the 'arg recycling' behavior. result = ''.join(out) if arg.v is not None: # TODO: get the span_id for arg.v! v_spid = runtime.NO_SPID arena = self.parse_ctx.arena a_parser = self.parse_ctx.MakeArithParser(arg.v) with alloc.ctx_Location(arena, source.ArgvWord(v_spid)): try: anode = a_parser.Parse() except error.Parse as e: ui.PrettyPrintError(e, arena) # show parse error e_usage('Invalid -v expression', span_id=v_spid) lval = self.arith_ev.EvalArithLhs(anode, v_spid) if not self.exec_opts.eval_unsafe_arith( ) and lval.tag_() != lvalue_e.Named: e_usage( '-v expected a variable name. shopt -s eval_unsafe_arith allows expressions', span_id=v_spid) state.SetRef(self.mem, lval, value.Str(result)) else: mylib.Stdout().write(result) return 0
def _Format(self, parts, varargs, spids, out): # type: (List[printf_part_t], List[str], List[int], List[str]) -> int """Hairy printf formatting logic.""" arg_index = 0 num_args = len(varargs) backslash_c = False while True: # loop over arguments for part in parts: # loop over parsed format string UP_part = part if part.tag_() == printf_part_e.Literal: part = cast(printf_part__Literal, UP_part) token = part.token if token.id == Id.Format_EscapedPercent: s = '%' else: s = word_compile.EvalCStringToken(token) out.append(s) elif part.tag_() == printf_part_e.Percent: # Note: This case is very long, but hard to refactor because of the # error cases and "recycling" of args! (arg_index, return 1, etc.) part = cast(printf_part__Percent, UP_part) # TODO: These calculations are independent of the data, so could be # cached flags = [] # type: List[str] if len(part.flags) > 0: for flag_token in part.flags: flags.append(flag_token.val) width = -1 # nonexistent if part.width: if part.width.id in (Id.Format_Num, Id.Format_Zero): width_str = part.width.val width_spid = part.width.span_id elif part.width.id == Id.Format_Star: if arg_index < num_args: width_str = varargs[arg_index] width_spid = spids[arg_index] arg_index += 1 else: width_str = '' # invalid width_spid = runtime.NO_SPID else: raise AssertionError() try: width = int(width_str) except ValueError: if width_spid == runtime.NO_SPID: width_spid = part.width.span_id self.errfmt.Print_("printf got invalid width %r" % width_str, span_id=width_spid) return 1 precision = -1 # nonexistent if part.precision: if part.precision.id == Id.Format_Dot: precision_str = '0' precision_spid = part.precision.span_id elif part.precision.id in (Id.Format_Num, Id.Format_Zero): precision_str = part.precision.val precision_spid = part.precision.span_id elif part.precision.id == Id.Format_Star: if arg_index < num_args: precision_str = varargs[arg_index] precision_spid = spids[arg_index] arg_index += 1 else: precision_str = '' precision_spid = runtime.NO_SPID else: raise AssertionError() try: precision = int(precision_str) except ValueError: if precision_spid == runtime.NO_SPID: precision_spid = part.precision.span_id self.errfmt.Print_( 'printf got invalid precision %r' % precision_str, span_id=precision_spid) return 1 if arg_index < num_args: s = varargs[arg_index] word_spid = spids[arg_index] arg_index += 1 has_arg = True else: s = '' word_spid = runtime.NO_SPID has_arg = False typ = part.type.val if typ == 's': if precision >= 0: s = s[:precision] # truncate elif typ == 'q': # TODO: most shells give \' for single quote, while OSH gives $'\'' # this could matter when SSH'ing s = qsn.maybe_shell_encode(s) elif typ == 'b': # Process just like echo -e, except \c handling is simpler. c_parts = [] # type: List[str] lex = match.EchoLexer(s) while True: id_, tok_val = lex.Next() if id_ == Id.Eol_Tok: # Note: This is really a NUL terminator break # TODO: add span_id from argv tok = Token(id_, runtime.NO_SPID, tok_val) p = word_compile.EvalCStringToken(tok) # Unusual behavior: '\c' aborts processing! if p is None: backslash_c = True break c_parts.append(p) s = ''.join(c_parts) elif part.type.id == Id.Format_Time or typ in 'diouxX': # %(...)T and %d share this complex integer conversion logic try: d = int( s ) # note: spaces like ' -42 ' accepted and normalized except ValueError: # 'a is interpreted as the ASCII value of 'a' if len(s) >= 1 and s[0] in '\'"': # TODO: utf-8 decode s[1:] to be more correct. Probably # depends on issue #366, a utf-8 library. # Note: len(s) == 1 means there is a NUL (0) after the quote.. d = ord(s[1]) if len(s) >= 2 else 0 # No argument means -1 for %(...)T as in Bash Reference Manual # 4.2 "If no argument is specified, conversion behaves as if -1 # had been given." elif not has_arg and part.type.id == Id.Format_Time: d = -1 else: blame_spid = word_spid if has_arg else part.type.span_id self.errfmt.Print_( 'printf expected an integer, got %r' % s, span_id=blame_spid) return 1 if part.type.id == Id.Format_Time: # Initialize timezone: # `localtime' uses the current timezone information initialized # by `tzset'. The function `tzset' refers to the environment # variable `TZ'. When the exported variable `TZ' is present, # its value should be reflected in the real environment # variable `TZ' before call of `tzset'. # # Note: unlike LANG, TZ doesn't seem to change behavior if it's # not exported. # # TODO: In Oil, provide an API that doesn't rely on libc's # global state. tzcell = self.mem.GetCell('TZ') if tzcell and tzcell.exported and tzcell.val.tag_( ) == value_e.Str: tzval = cast(value__Str, tzcell.val) posix.putenv('TZ', tzval.s) time_.tzset() # Handle special values: # User can specify two special values -1 and -2 as in Bash # Reference Manual 4.2: "Two special argument values may be # used: -1 represents the current time, and -2 represents the # time the shell was invoked." from # https://www.gnu.org/software/bash/manual/html_node/Bash-Builtins.html#index-printf if d == -1: # the current time ts = time_.time() elif d == -2: # the shell start time ts = self.shell_start_time else: ts = d s = time_.strftime(typ[1:-2], time_.localtime(ts)) if precision >= 0: s = s[:precision] # truncate else: # typ in 'diouxX' # Disallowed because it depends on 32- or 64- bit if d < 0 and typ in 'ouxX': e_die( "Can't format negative number %d with %%%s", d, typ, span_id=part.type.span_id) if typ == 'o': s = mylib.octal(d) elif typ == 'x': s = mylib.hex_lower(d) elif typ == 'X': s = mylib.hex_upper(d) else: # diu s = str(d) # without spaces like ' -42 ' # There are TWO different ways to ZERO PAD, and they differ on # the negative sign! See spec/builtin-printf zero_pad = 0 # no zero padding if width >= 0 and '0' in flags: zero_pad = 1 # style 1 elif precision > 0 and len(s) < precision: zero_pad = 2 # style 2 if zero_pad: negative = (s[0] == '-') if negative: digits = s[1:] sign = '-' if zero_pad == 1: # [%06d] -42 becomes [-00042] (6 TOTAL) n = width - 1 else: # [%6.6d] -42 becomes [-000042] (1 for '-' + 6) n = precision else: digits = s sign = '' if zero_pad == 1: n = width else: n = precision s = sign + digits.rjust(n, '0') else: raise AssertionError() if width >= 0: if '-' in flags: s = s.ljust(width, ' ') else: s = s.rjust(width, ' ') out.append(s) else: raise AssertionError() if backslash_c: # 'printf %b a\cb xx' - \c terminates processing! break if arg_index >= num_args: break # Otherwise there are more args. So cycle through the loop once more to # implement the 'arg recycling' behavior. return 0