Пример #1
0
def _MakeAssignment(parse_ctx, assign_kw, suffix_words):
  """Create an command.Assignment node from a keyword and a list of words.

  NOTE: We don't allow dynamic assignments like:

  local $1

  This can be replaced with eval 'local $1'
  """
  # First parse flags, e.g. -r -x -a -A.  None of the flags have arguments.
  flags = []
  n = len(suffix_words)
  i = 1
  while i < n:
    w = suffix_words[i]
    ok, static_val, quoted = word.StaticEval(w)
    if not ok or quoted:
      break  # can't statically evaluate

    if static_val.startswith('-'):
      flags.append(static_val)
    else:
      break  # not a flag, rest are args
    i += 1

  # Now parse bindings or variable names
  pairs = []
  while i < n:
    w = suffix_words[i]
    # declare x[y]=1 is valid
    left_token, close_token, part_offset = word.DetectAssignment(w)
    if left_token:
      pair = _MakeAssignPair(parse_ctx, (left_token, close_token, part_offset, w))
    else:
      # In aboriginal in variables/sources: export_if_blank does export "$1".
      # We should allow that.

      # Parse this differently then?  # dynamic-export?  It sets global
      # variables.
      ok, static_val, quoted = word.StaticEval(w)
      if not ok or quoted:
        p_die("Variable names must be unquoted constants", word=w)

      # No value is equivalent to ''
      if not match.IsValidVarName(static_val):
        p_die('Invalid variable name %r', static_val, word=w)

      lhs = lhs_expr.LhsName(static_val)
      lhs.spids.append(word.LeftMostSpanForWord(w))
      pair = syntax_asdl.assign_pair(lhs, assign_op_e.Equal, None)

      left_spid = word.LeftMostSpanForWord(w)
      pair.spids.append(left_spid)
    pairs.append(pair)

    i += 1

  node = command.Assignment(assign_kw, flags, pairs)
  return node
Пример #2
0
  def _Visit(self, node):
    """
    """
    #log('VISIT %s', node.__class__.__name__)

    # NOTE: The tags are not unique!!!  We would need this:
    # if isinstance(node, ast.command) and node.tag == command_e.SimpleCommand:
    # But it's easier to check the __class__ attribute.

    cls = node.__class__
    if cls is command.SimpleCommand:
      #log('SimpleCommand %s', node.words)
      #log('--')
      #node.PrettyPrint()

      # Things to consider:
      # - source and .
      # - DONE builtins: get a list from builtin.py
      # - DONE functions: have to enter function definitions into a dictionary
      # - Commands that call others: sudo, su, find, xargs, etc.
      # - builtins that call others: exec, command
      #   - except not command -v!

      if not node.words:
        return

      w = node.words[0]
      ok, argv0, _ = word.StaticEval(w)
      if not ok:
        log("Couldn't statically evaluate %r", w)
        return

      if (builtin.ResolveSpecial(argv0) == builtin_e.NONE and
          builtin.ResolveAssign(argv0) == builtin_e.NONE and
          builtin.Resolve(argv0) == builtin_e.NONE):
        self.progs_used[argv0] = True

      # NOTE: If argv1 is $0, then we do NOT print a warning!
      if argv0 == 'sudo':
        if len(node.words) < 2:
          return
        w1 = node.words[1]
        ok, argv1, _ = word.StaticEval(w1)
        if not ok:
          log("Couldn't statically evaluate %r", w)
          return

        # Should we mark them behind 'sudo'?  e.g. "sudo apt install"?
        self.progs_used[argv1] = True

    elif cls is command.FuncDef:
      self.funcs_defined[node.name] = True
Пример #3
0
def _ParseHereDocBody(parse_ctx, h, line_reader, arena):
  """Fill in attributes of a pending here doc node."""
  # "If any character in word is quoted, the delimiter shall be formed by
  # performing quote removal on word, and the here-document lines shall not
  # be expanded. Otherwise, the delimiter shall be the word itself."
  # NOTE: \EOF counts, or even E\OF
  ok, delimiter, delim_quoted = word.StaticEval(h.here_begin)
  if not ok:
    p_die('Invalid here doc delimiter', word=h.here_begin)

  here_lines, last_line = _ReadHereLines(line_reader, h, delimiter)

  if delim_quoted:  # << 'EOF'
    # LiteralPart for each line.
    h.stdin_parts = _MakeLiteralHereLines(here_lines, arena)
  else:
    line_reader = reader.VirtualLineReader(here_lines, arena)
    w_parser = parse_ctx.MakeWordParserForHereDoc(line_reader)
    w_parser.ReadHereDocBody(h.stdin_parts)  # fills this in

  end_line_id, end_line, end_pos = last_line

  # Create a span with the end terminator.  Maintains the invariant that
  # the spans "add up".
  line_span = syntax_asdl.line_span(end_line_id, end_pos, len(end_line))
  h.here_end_span_id = arena.AddLineSpan(line_span)
Пример #4
0
 def testStaticEvalWord(self):
     expr = r'\EOF'  # Quoted here doc delimiter
     w_parser = _InitWordParser(expr)
     w = w_parser.ReadWord(lex_mode_e.Outer)
     ok, s, quoted = word.StaticEval(w)
     self.assertEqual(True, ok)
     self.assertEqual('EOF', s)
     self.assertEqual(True, quoted)
Пример #5
0
    def _ReadPatSubVarOp(self, lex_mode):
        # type: (lex_mode_t) -> suffix_op__PatSub
        """
    Match     = ('/' | '#' | '%') WORD
    VarSub    = ...
              | VarOf '/' Match '/' WORD
    """
        pat = self._ReadVarOpArg(lex_mode,
                                 eof_type=Id.Lit_Slash,
                                 empty_ok=False)
        assert isinstance(pat, word__CompoundWord)  # Because empty_ok=False

        if len(pat.parts) == 1:
            ok, s, quoted = word.StaticEval(pat)
            if ok and s == '/' and not quoted:  # Looks like ${a////c}, read again
                self._Next(lex_mode)
                self._Peek()
                p = word_part.LiteralPart(self.cur_token)
                pat.parts.append(p)

        if len(pat.parts) == 0:
            p_die('Pattern in ${x/pat/replace} must not be empty',
                  token=self.cur_token)

        replace_mode = Id.Undefined_Tok
        # Check for / # % modifier on pattern.
        first_part = pat.parts[0]
        if isinstance(first_part, word_part__LiteralPart):
            lit_id = first_part.token.id
            if lit_id in (Id.Lit_Slash, Id.Lit_Pound, Id.Lit_Percent):
                pat.parts.pop(0)
                replace_mode = lit_id

        # NOTE: If there is a modifier, the pattern can be empty, e.g.
        # ${s/#/foo} and ${a/%/foo}.

        if self.token_type == Id.Right_VarSub:
            # e.g. ${v/a} is the same as ${v/a/}  -- empty replacement string
            return suffix_op.PatSub(pat, None, replace_mode)

        if self.token_type == Id.Lit_Slash:
            replace = self._ReadVarOpArg(lex_mode)  # do not stop at /

            self._Peek()
            if self.token_type != Id.Right_VarSub:
                # NOTE: I think this never happens.
                # We're either in the VS_ARG_UNQ or VS_ARG_DQ lex state, and everything
                # there is Lit_ or Left_, except for }.
                p_die("Expected } after replacement string, got %s",
                      self.cur_token,
                      token=self.cur_token)

            return suffix_op.PatSub(pat, replace, replace_mode)

        # Happens with ${x//} and ${x///foo}, see test/parse-errors.sh
        p_die("Expected } after pat sub, got %r",
              self.cur_token.val,
              token=self.cur_token)
Пример #6
0
    def testGitComment(self):
        # ;# is a comment!  Gah.
        # Conclusion: Comments are NOT LEXICAL.  They are part of word parsing.

        node = assert_ParseCommandList(
            self, """\
. "$TEST_DIRECTORY"/diff-lib.sh ;# test-lib chdir's into trash
""")
        self.assertEqual(command_e.Sentence, node.tag)
        self.assertEqual(2, len(node.child.words))

        # This is NOT a comment
        node = assert_ParseCommandList(self, """\
echo foo#bar
""")
        self.assertEqual(command_e.SimpleCommand, node.tag)
        self.assertEqual(2, len(node.words))
        _, s, _ = word.StaticEval(node.words[1])
        self.assertEqual('foo#bar', s)

        # This is a comment
        node = assert_ParseCommandList(self, """\
echo foo #comment
""")
        self.assertEqual(command_e.SimpleCommand, node.tag)
        self.assertEqual(2, len(node.words))
        _, s, _ = word.StaticEval(node.words[1])
        self.assertEqual('foo', s)

        # Empty comment
        node = assert_ParseCommandList(self, """\
echo foo #
""")
        self.assertEqual(command_e.SimpleCommand, node.tag)
        self.assertEqual(2, len(node.words))
        _, s, _ = word.StaticEval(node.words[1])
        self.assertEqual('foo', s)
Пример #7
0
  def _ParseForEachLoop(self):
    node = command.ForEach()
    node.do_arg_iter = False

    ok, iter_name, quoted = word.StaticEval(self.cur_word)
    if not ok or quoted:
      p_die("Loop variable name should be a constant", word=self.cur_word)
    if not match.IsValidVarName(iter_name):
      p_die("Invalid loop variable name", word=self.cur_word)
    node.iter_name = iter_name
    self._Next()  # skip past name

    self._NewlineOk()

    in_spid = const.NO_INTEGER
    semi_spid = const.NO_INTEGER

    self._Peek()
    if self.c_id == Id.KW_In:
      self._Next()  # skip in

      in_spid = word.LeftMostSpanForWord(self.cur_word) + 1
      iter_words, semi_spid = self.ParseForWords()
      assert iter_words is not None

      words2 = braces.BraceDetectAll(iter_words)
      words3 = word.TildeDetectAll(words2)
      node.iter_words = words3

    elif self.c_id == Id.Op_Semi:
      node.do_arg_iter = True  # implicit for loop
      self._Next()

    elif self.c_id == Id.KW_Do:
      node.do_arg_iter = True  # implicit for loop
      # do not advance

    else:  # for foo BAD
      p_die('Unexpected word after for loop variable', word=self.cur_word)

    node.spids.extend((in_spid, semi_spid))

    body_node = self.ParseDoGroup()
    assert body_node is not None

    node.body = body_node
    return node
Пример #8
0
    def DoCommand(self, node, local_symbols, at_top_level=False):
        if node.tag == command_e.CommandList:
            # TODO: How to distinguish between echo hi; echo bye; and on separate
            # lines
            for child in node.children:
                self.DoCommand(child, local_symbols, at_top_level=at_top_level)

        elif node.tag == command_e.SimpleCommand:
            # How to preserve spaces between words?  Do you want to do it?
            # Well you need to test this:
            #
            # echo foo \
            #   bar

            # TODO: Need to print until the left most part of the phrase?  the phrase
            # is a word, binding, redirect.
            #self.cursor.PrintUntil()

            if node.more_env:
                (left_spid, ) = node.more_env[0].spids
                self.cursor.PrintUntil(left_spid)
                self.f.write('env ')

                # We only need to transform the right side, not left side.
                for pair in node.more_env:
                    self.DoWordInCommand(pair.val, local_symbols)

            # More translations:
            # - . to source
            # - eval to sh-eval

            if node.words:
                first_word = node.words[0]
                ok, val, quoted = word.StaticEval(first_word)
                word0_spid = word.LeftMostSpanForWord(first_word)
                if ok and not quoted:
                    if val == '[':
                        last_word = node.words[-1]
                        # Check if last word is ]
                        ok, val, quoted = word.StaticEval(last_word)
                        if ok and not quoted and val == ']':
                            # Replace [ with 'test'
                            self.cursor.PrintUntil(word0_spid)
                            self.cursor.SkipUntil(word0_spid + 1)
                            self.f.write('test')

                            for w in node.words[1:-1]:
                                self.DoWordInCommand(w, local_symbols)

                            # Now omit ]
                            last_spid = word.LeftMostSpanForWord(last_word)
                            self.cursor.PrintUntil(last_spid -
                                                   1)  # Get the space before
                            self.cursor.SkipUntil(last_spid +
                                                  1)  # ] takes one spid
                            return
                        else:
                            raise RuntimeError('Got [ without ]')

                    elif val == '.':
                        self.cursor.PrintUntil(word0_spid)
                        self.cursor.SkipUntil(word0_spid + 1)
                        self.f.write('source')
                        return

            for w in node.words:
                self.DoWordInCommand(w, local_symbols)

            # NOTE: This will change to "phrase"?  Word or redirect.
            for r in node.redirects:
                self.DoRedirect(r, local_symbols)

            # TODO: Print the terminator.  Could be \n or ;
            # Need to print env like PYTHONPATH = 'foo' && ls
            # Need to print redirects:
            # < > are the same.  << is here string, and >> is assignment.
            # append is >+

            # TODO: static_eval of simple command
            # - [ -> "test".  Eliminate trailing ].
            # - . -> source, etc.

        elif node.tag == command_e.Assignment:
            self.DoAssignment(node, at_top_level, local_symbols)

        elif node.tag == command_e.Pipeline:
            # Obscure: |& turns into |- or |+ for stderr.
            # TODO:
            # if ! true; then -> if not true {

            # if ! echo | grep; then -> if not { echo | grep } {
            # }
            # not is like do {}, but it negates the return value I guess.

            for child in node.children:
                self.DoCommand(child, local_symbols)

        elif node.tag == command_e.AndOr:
            for child in node.children:
                self.DoCommand(child, local_symbols)

        elif node.tag == command_e.Sentence:
            # 'ls &' to 'fork ls'
            # Keep ; the same.
            self.DoCommand(node.child, local_symbols)

        # This has to be different in the function case.
        elif node.tag == command_e.BraceGroup:
            # { echo hi; } -> do { echo hi }
            # For now it might be OK to keep 'do { echo hi; }
            #left_spid, right_spid = node.spids
            (left_spid, ) = node.spids

            self.cursor.PrintUntil(left_spid)
            self.cursor.SkipUntil(left_spid + 1)
            self.f.write('do {')

            for child in node.children:
                self.DoCommand(child, local_symbols)

        elif node.tag == command_e.Subshell:
            # (echo hi) -> shell echo hi
            # (echo hi; echo bye) -> shell {echo hi; echo bye}

            (left_spid, right_spid) = node.spids

            self.cursor.PrintUntil(left_spid)
            self.cursor.SkipUntil(left_spid + 1)
            self.f.write('shell {')

            self.DoCommand(node.command_list, local_symbols)

            #self._DebugSpid(right_spid)
            #self._DebugSpid(right_spid + 1)

            #print('RIGHT SPID', right_spid)
            self.cursor.PrintUntil(right_spid)
            self.cursor.SkipUntil(right_spid + 1)
            self.f.write('}')

        elif node.tag == command_e.DParen:
            # (( a == 0 )) is sh-expr ' a == 0 '
            #
            # NOTE: (( n++ )) is auto-translated to sh-expr 'n++', but could be set
            # n++.
            left_spid, right_spid = node.spids
            self.cursor.PrintUntil(left_spid)
            self.cursor.SkipUntil(left_spid + 1)
            self.f.write("sh-expr '")
            self.cursor.PrintUntil(right_spid - 1)  # before ))
            self.cursor.SkipUntil(right_spid +
                                  1)  # after )) -- each one is a token
            self.f.write("'")

        elif node.tag == command_e.DBracket:
            # [[ 1 -eq 2 ]] to (1 == 2)
            self.DoBoolExpr(node.expr)

        elif node.tag == command_e.FuncDef:
            # TODO: skip name
            #self.f.write('proc %s' % node.name)

            # New symbol table for every function.
            new_local_symbols = {}

            # Should be the left most span, including 'function'
            self.cursor.PrintUntil(node.spids[0])

            self.f.write('proc ')
            self.f.write(node.name)
            self.cursor.SkipUntil(node.spids[1])

            if node.body.tag == command_e.BraceGroup:
                # Don't add "do" like a standalone brace group.  Just use {}.
                for child in node.body.children:
                    self.DoCommand(child, new_local_symbols)
            else:
                pass
                # Add {}.
                # proc foo {
                #   shell {echo hi; echo bye}
                # }
                #self.DoCommand(node.body)

        elif node.tag == command_e.BraceGroup:
            for child in node.children:
                self.DoCommand(child, local_symbols)

        elif node.tag == command_e.DoGroup:
            do_spid, done_spid = node.spids
            self.cursor.PrintUntil(do_spid)
            self.cursor.SkipUntil(do_spid + 1)
            self.f.write('{')

            for child in node.children:
                self.DoCommand(child, local_symbols)

            self.cursor.PrintUntil(done_spid)
            self.cursor.SkipUntil(done_spid + 1)
            self.f.write('}')

        elif node.tag == command_e.ForEach:
            # Need to preserve spaces between words, because there can be line
            # wrapping.
            # for x in a b c \
            #    d e f; do

            in_spid, semi_spid = node.spids

            if in_spid == const.NO_INTEGER:
                #self.cursor.PrintUntil()  # 'for x' and then space
                self.f.write('for %s in @Argv ' % node.iter_name)
                self.cursor.SkipUntil(node.body.spids[0])
            else:
                self.cursor.PrintUntil(in_spid +
                                       1)  # 'for x in' and then space
                self.f.write('[')
                for w in node.iter_words:
                    self.DoWordInCommand(w, local_symbols)
                self.f.write(']')
                #print("SKIPPING SEMI %d" % semi_spid, file=sys.stderr)

            if semi_spid != const.NO_INTEGER:
                self.cursor.PrintUntil(semi_spid)
                self.cursor.SkipUntil(semi_spid + 1)

            self.DoCommand(node.body, local_symbols)

        elif node.tag == command_e.ForExpr:
            # Change (( )) to ( ), and then _FixDoGroup
            pass

        elif node.tag == command_e.WhileUntil:

            # Skip 'until', and replace it with 'while not'
            if node.keyword.id == Id.KW_Until:
                kw_spid = node.keyword.span_id
                self.cursor.PrintUntil(kw_spid)
                self.f.write('while not')
                self.cursor.SkipUntil(kw_spid + 1)

            cond = node.cond
            # Skip the semi-colon in the condition, which is ususally a Sentence
            if len(cond) == 1 and cond[0].tag == command_e.Sentence:
                self.DoCommand(cond[0].child, local_symbols)
                semi_spid = cond[0].terminator.span_id
                self.cursor.SkipUntil(semi_spid + 1)

            self.DoCommand(node.body, local_symbols)

        elif node.tag == command_e.If:
            else_spid, fi_spid = node.spids

            # if foo; then -> if foo {
            # elif foo; then -> } elif foo {
            for arm in node.arms:
                elif_spid, then_spid = arm.spids
                if elif_spid != const.NO_INTEGER:
                    self.cursor.PrintUntil(elif_spid)
                    self.f.write('} ')

                cond = arm.cond
                if len(cond) == 1 and cond[0].tag == command_e.Sentence:
                    sentence = cond[0]
                    self.DoCommand(sentence, local_symbols)

                    # Remove semi-colon
                    semi_spid = sentence.terminator.span_id
                    self.cursor.PrintUntil(semi_spid)
                    self.cursor.SkipUntil(semi_spid + 1)
                else:
                    for child in arm.cond:
                        self.DoCommand(child, local_symbols)

                self.cursor.PrintUntil(then_spid)
                self.cursor.SkipUntil(then_spid + 1)
                self.f.write('{')

                for child in arm.action:
                    self.DoCommand(child, local_symbols)

            # else -> } else {
            if node.else_action:
                self.cursor.PrintUntil(else_spid)
                self.f.write('} ')
                self.cursor.PrintUntil(else_spid + 1)
                self.f.write(' {')

                for child in node.else_action:
                    self.DoCommand(child, local_symbols)

            # fi -> }
            self.cursor.PrintUntil(fi_spid)
            self.cursor.SkipUntil(fi_spid + 1)
            self.f.write('}')

        elif node.tag == command_e.Case:
            case_spid, in_spid, esac_spid = node.spids
            self.cursor.PrintUntil(case_spid)
            self.cursor.SkipUntil(case_spid + 1)
            self.f.write('match')

            # Reformat "$1" to $1
            self.DoWordInCommand(node.to_match, local_symbols)

            self.cursor.PrintUntil(in_spid)
            self.cursor.SkipUntil(in_spid + 1)
            self.f.write('{')  # matchstr $var {

            # each arm needs the ) and the ;; node to skip over?
            for arm in node.arms:
                left_spid, rparen_spid, dsemi_spid, last_spid = arm.spids
                #print(left_spid, rparen_spid, dsemi_spid)

                self.cursor.PrintUntil(left_spid)
                # Hm maybe keep | because it's semi-deprecated?  You acn use
                # reload|force-relaod {
                # }
                # e/reload|force-reload/ {
                # }
                # / 'reload' or 'force-reload' / {
                # }
                #
                # Yeah it's the more abbreviated syntax.

                # change | to 'or'
                for pat in arm.pat_list:
                    pass

                self.f.write('with ')
                # Remove the )
                self.cursor.PrintUntil(rparen_spid)
                self.cursor.SkipUntil(rparen_spid + 1)

                for child in arm.action:
                    self.DoCommand(child, local_symbols)

                if dsemi_spid != const.NO_INTEGER:
                    # Remove ;;
                    self.cursor.PrintUntil(dsemi_spid)
                    self.cursor.SkipUntil(dsemi_spid + 1)
                elif last_spid != const.NO_INTEGER:
                    self.cursor.PrintUntil(last_spid)
                else:
                    raise AssertionError(
                        "Expected with dsemi_spid or last_spid in case arm")

            self.cursor.PrintUntil(esac_spid)
            self.cursor.SkipUntil(esac_spid + 1)
            self.f.write('}')  # strmatch $var {

        elif node.tag == command_e.NoOp:
            pass

        elif node.tag == command_e.ControlFlow:
            # No change for break / return / continue
            pass

        elif node.tag == command_e.TimeBlock:
            self.DoCommand(node.pipeline, local_symbols)

        else:
            #log('Command not handled: %s', node)
            raise AssertionError(node.__class__.__name__)
Пример #9
0
    def DoRedirect(self, node, local_symbols):
        #print(node, file=sys.stderr)
        op_spid = node.op.span_id
        op_id = node.op.id
        self.cursor.PrintUntil(op_spid)

        # TODO:
        # - Do < and <& the same way.
        # - How to handle here docs and here docs?
        # - >> becomes >+ or >-, or maybe >>>

        if node.tag == redir_e.Redir:
            if node.fd == const.NO_INTEGER:
                if op_id == Id.Redir_Great:
                    self.f.write('>')  # Allow us to replace the operator
                    self.cursor.SkipUntil(op_spid + 1)
                elif op_id == Id.Redir_GreatAnd:
                    self.f.write('> !')  # Replace >& 2 with > !2
                    spid = word.LeftMostSpanForWord(node.arg_word)
                    self.cursor.SkipUntil(spid)
                    #self.DoWordInCommand(node.arg_word)

            else:
                # NOTE: Spacing like !2>err.txt vs !2 > err.txt can be done in the
                # formatter.
                self.f.write('!%d ' % node.fd)
                if op_id == Id.Redir_Great:
                    self.f.write('>')
                    self.cursor.SkipUntil(op_spid + 1)
                elif op_id == Id.Redir_GreatAnd:
                    self.f.write('> !')  # Replace 1>& 2 with !1 > !2
                    spid = word.LeftMostSpanForWord(node.arg_word)
                    self.cursor.SkipUntil(spid)

            self.DoWordInCommand(node.arg_word, local_symbols)

        elif node.tag == redir_e.HereDoc:
            ok, delimiter, delim_quoted = word.StaticEval(node.here_begin)
            if not ok:
                p_die('Invalid here doc delimiter', word=node.here_begin)

            # Turn everything into <<.  We just change the quotes
            self.f.write('<<')

            #here_begin_spid2 = word.RightMostSpanForWord(node.here_begin)
            if delim_quoted:
                self.f.write(" '''")
            else:
                self.f.write(' """')

            delim_end_spid = word.RightMostSpanForWord(node.here_begin)
            self.cursor.SkipUntil(delim_end_spid + 1)

            #self.cursor.SkipUntil(here_begin_spid + 1)

            # Now print the lines.  TODO: Have a flag to indent these to the level of
            # the owning command, e.g.
            #   cat <<EOF
            # EOF
            # Or since most here docs are the top level, you could just have a hack
            # for a fixed indent?  TODO: Look at real use cases.
            for part in node.stdin_parts:
                self.DoWordPart(part, local_symbols)

            self.cursor.SkipUntil(node.here_end_span_id + 1)
            if delim_quoted:
                self.f.write("'''\n")
            else:
                self.f.write('"""\n')

            # Need
            #self.cursor.SkipUntil(here_end_spid2)

        else:
            raise AssertionError(node.__class__.__name__)

        # <<< 'here word'
        # << 'here word'
        #
        # 2> out.txt
        # !2 > out.txt

        # cat 1<< EOF
        # hello $name
        # EOF
        # cat !1 << """
        # hello $name
        # """
        #
        # cat << 'EOF'
        # no expansion
        # EOF
        #   cat <<- 'EOF'
        #   no expansion and indented
        #
        # cat << '''
        # no expansion
        # '''
        #   cat << '''
        #   no expansion and indented
        #   '''

        # Warn about multiple here docs on a line.
        # As an obscure feature, allow
        # cat << \'ONE' << \"TWO"
        # 123
        # ONE
        # 234
        # TWO
        # The _ is an indicator that it's not a string to be piped in.
        pass
Пример #10
0
  def ParseSimpleCommand(self, cur_aliases):
    """
    Fixed transcription of the POSIX grammar (TODO: port to grammar/Shell.g)

    io_file        : '<'       filename
                   | LESSAND   filename
                     ...

    io_here        : DLESS     here_end
                   | DLESSDASH here_end

    redirect       : IO_NUMBER (io_redirect | io_here)

    prefix_part    : ASSIGNMENT_WORD | redirect
    cmd_part       : WORD | redirect

    assign_kw      : Declare | Export | Local | Readonly

    # Without any words it is parsed as a command, not an assigment
    assign_listing : assign_kw

    # Now we have something to do (might be changing assignment flags too)
    # NOTE: any prefixes should be a warning, but they are allowed in shell.
    assignment     : prefix_part* assign_kw (WORD | ASSIGNMENT_WORD)+

    # an external command, a function call, or a builtin -- a "word_command"
    word_command   : prefix_part* cmd_part+

    simple_command : assign_listing
                   | assignment
                   | proc_command

    Simple imperative algorithm:

    1) Read a list of words and redirects.  Append them to separate lists.
    2) Look for the first non-assignment word.  If it's declare, etc., then
    keep parsing words AND assign words.  Otherwise, just parse words.
    3) If there are no non-assignment words, then it's a global assignment.

    { redirects, global assignments } OR
    { redirects, prefix_bindings, words } OR
    { redirects, ERROR_prefix_bindings, keyword, assignments, words }

    THEN CHECK that prefix bindings don't have any array literal parts!
    global assignment and keyword assignments can have the of course.
    well actually EXPORT shouldn't have them either -- WARNING

    3 cases we want to warn: prefix_bindings for assignment, and array literal
    in prefix bindings, or export

    A command can be an assignment word, word, or redirect on its own.

        ls
        >out.txt

        >out.txt FOO=bar   # this touches the file, and hten

    Or any sequence:
        ls foo bar
        <in.txt ls foo bar >out.txt
        <in.txt ls >out.txt foo bar

    Or add one or more environment bindings:
        VAR=val env
        >out.txt VAR=val env

    here_end vs filename is a matter of whether we test that it's quoted.  e.g.
    <<EOF vs <<'EOF'.
    """
    result = self._ScanSimpleCommand()
    redirects, words = result

    if not words:  # e.g.  >out.txt  # redirect without words
      node = command.SimpleCommand()
      node.redirects = redirects
      return node

    preparsed_list, suffix_words = _SplitSimpleCommandPrefix(words)

    if not suffix_words:  # ONE=1 a[x]=1 TWO=2  (with no other words)
      if redirects:
        left_token, _, _, _ = preparsed_list[0]
        p_die("Global assignment shouldn't have redirects", token=left_token)

      pairs = []
      for preparsed in preparsed_list:
        pairs.append(_MakeAssignPair(self.parse_ctx, preparsed))

      node = command.Assignment(Id.Assign_None, [], pairs)
      left_spid = word.LeftMostSpanForWord(words[0])
      node.spids.append(left_spid)  # no keyword spid to skip past
      return node

    kind, kw_token = word.KeywordToken(suffix_words[0])

    if kind == Kind.Assign:
      # Here we StaticEval suffix_words[1] to see if we have an ASSIGNMENT COMMAND
      # like 'typeset -p', which lists variables -- a SimpleCommand rather than
      # an Assignment.
      #
      # Note we're not handling duplicate flags like 'typeset -pf'.  I see this
      # in bashdb (bash debugger) but it can just be changed to 'typeset -p
      # -f'.
      is_command = False
      if len(suffix_words) > 1:
        ok, val, _ = word.StaticEval(suffix_words[1])
        if ok and (kw_token.id, val) in self._ASSIGN_COMMANDS:
          is_command = True

      if is_command:  # declare -f, declare -p, typeset -p, etc.
        node = _MakeSimpleCommand(preparsed_list, suffix_words, redirects)
        return node

      if redirects:
        # Attach the error location to the keyword.  It would be more precise
        # to attach it to the
        p_die("Assignments shouldn't have redirects", token=kw_token)

      if preparsed_list:  # FOO=bar local spam=eggs not allowed
        # Use the location of the first value.  TODO: Use the whole word
        # before splitting.
        left_token, _, _, _ = preparsed_list[0]
        p_die("Assignments shouldn't have environment bindings", token=left_token)

      # declare str='', declare -a array=()
      node = _MakeAssignment(self.parse_ctx, kw_token.id, suffix_words)
      node.spids.append(kw_token.span_id)
      return node

    if kind == Kind.ControlFlow:
      if redirects:
        p_die("Control flow shouldn't have redirects", token=kw_token)

      if preparsed_list:  # FOO=bar local spam=eggs not allowed
        # TODO: Change location as above
        left_token, _, _, _ = preparsed_list[0]
        p_die("Control flow shouldn't have environment bindings",
              token=left_token)

      # Attach the token for errors.  (Assignment may not need it.)
      if len(suffix_words) == 1:
        arg_word = None
      elif len(suffix_words) == 2:
        arg_word = suffix_words[1]
      else:
        p_die('Unexpected argument to %r', kw_token.val, word=suffix_words[2])

      return command.ControlFlow(kw_token, arg_word)

    # If any expansions were detected, then parse again.
    node = self._MaybeExpandAliases(suffix_words, cur_aliases)
    if node:
      # NOTE: There are other types of nodes with redirects.  Do they matter?
      if node.tag == command_e.SimpleCommand:
        node.redirects = redirects
        _AppendMoreEnv(preparsed_list, node.more_env)
      return node

    # TODO check that we don't have env1=x x[1]=y env2=z here.

    # FOO=bar printenv.py FOO
    node = _MakeSimpleCommand(preparsed_list, suffix_words, redirects)
    return node
Пример #11
0
  def _MaybeExpandAliases(self, words, cur_aliases):
    """Try to expand aliases.

    Our implementation of alias has two design choices:
    - Where to insert it in parsing.  We do it at the end of ParseSimpleCommand.
    - What grammar rule to parse the expanded alias buffer with.  In our case
      it's ParseCommand().

    This doesn't quite match what other shells do, but I can't figure out a
    better places.

    Most test cases pass, except for ones like:

    alias LBRACE='{'
    LBRACE echo one; echo two; }

    alias MULTILINE='echo 1
    echo 2
    echo 3'
    MULTILINE

    NOTE: dash handles aliases in a totally diferrent way.  It has a global
    variable checkkwd in parser.c.  It assigns it all over the grammar, like
    this:

    checkkwd = CHKNL | CHKKWD | CHKALIAS;

    The readtoken() function checks (checkkwd & CHKALIAS) and then calls
    lookupalias().  This seems to provide a consistent behavior among shells,
    but it's less modular and testable.

    Bash also uses a global 'parser_state & PST_ALEXPNEXT'.

    Returns:
      A command node if any aliases were expanded, or None otherwise.
    """
    # The last char that we might parse.
    right_spid = word.RightMostSpanForWord(words[-1])
    first_word_str = None  # for error message

    expanded = []
    i = 0
    n = len(words)

    while i < n:
      w = words[i]

      ok, word_str, quoted = word.StaticEval(w)
      if not ok or quoted:
        break

      alias_exp = self.aliases.get(word_str)
      if alias_exp is None:
        break

      # Prevent infinite loops.  This is subtle: we want to prevent infinite
      # expansion of alias echo='echo x'.  But we don't want to prevent
      # expansion of the second word in 'echo echo', so we add 'i' to
      # "cur_aliases".
      if (word_str, i) in cur_aliases:
        break

      if i == 0:
        first_word_str = word_str  # for error message

      #log('%r -> %r', word_str, alias_exp)
      cur_aliases.append((word_str, i))
      expanded.append(alias_exp)
      i += 1

      if not alias_exp.endswith(' '):
        # alias e='echo [ ' is the same expansion as
        # alias e='echo ['
        # The trailing space indicates whether we should continue to expand
        # aliases; it's not part of it.
        expanded.append(' ')
        break  # No more expansions

    if not expanded:  # No expansions; caller does parsing.
      return None

    # We got some expansion.  Now copy the rest of the words.

    # We need each NON-REDIRECT word separately!  For example:
    # $ echo one >out two
    # dash/mksh/zsh go beyond the first redirect!
    while i < n:
      w = words[i]
      left_spid = word.LeftMostSpanForWord(w)
      right_spid = word.RightMostSpanForWord(w)

      # Adapted from tools/osh2oil.py Cursor.PrintUntil
      for span_id in xrange(left_spid, right_spid + 1):
        span = self.arena.GetLineSpan(span_id)
        line = self.arena.GetLine(span.line_id)
        piece = line[span.col : span.col + span.length]
        expanded.append(piece)

      expanded.append(' ')  # Put space back between words.
      i += 1

    code_str = ''.join(expanded)
    lines = code_str.splitlines(True)  # Keep newlines

    line_info = []
    # TODO: Add location information
    self.arena.PushSource(
        '<expansion of alias %r at line %d of %s>' %
        (first_word_str, -1, 'TODO'))
    try:
      for i, line in enumerate(lines):
        line_id = self.arena.AddLine(line, i+1)
        line_info.append((line_id, line, 0))
    finally:
      self.arena.PopSource()

    line_reader = reader.VirtualLineReader(line_info, self.arena)
    cp = self.parse_ctx.MakeOshParser(line_reader)

    try:
      node = cp.ParseCommand(cur_aliases=cur_aliases)
    except util.ParseError as e:
      # Failure to parse alias expansion is a fatal error
      # We don't need more handling here/
      raise

    if 0:
      log('AFTER expansion:')
      from osh import ast_lib
      ast_lib.PrettyPrint(node)
    return node
Пример #12
0
    def testPatSub(self):
        w = _assertReadWord(self, '${var/pat/replace}')
        op = _GetSuffixOp(self, w)
        self.assertUnquoted('pat', op.pat)
        self.assertUnquoted('replace', op.replace)
        self.assertEqual(Id.Undefined_Tok, op.replace_mode)

        w = _assertReadWord(self, '${var//pat/replace}')  # sub all
        op = _GetSuffixOp(self, w)
        self.assertUnquoted('pat', op.pat)
        self.assertUnquoted('replace', op.replace)
        self.assertEqual(Id.Lit_Slash, op.replace_mode)

        w = _assertReadWord(self, '${var/%pat/replace}')  # prefix
        op = _GetSuffixOp(self, w)
        self.assertUnquoted('pat', op.pat)
        self.assertUnquoted('replace', op.replace)
        self.assertEqual(Id.Lit_Percent, op.replace_mode)

        w = _assertReadWord(self, '${var/#pat/replace}')  # suffix
        op = _GetSuffixOp(self, w)
        self.assertUnquoted('pat', op.pat)
        self.assertUnquoted('replace', op.replace)
        self.assertEqual(Id.Lit_Pound, op.replace_mode)

        w = _assertReadWord(self, '${var/pat}')  # no replacement
        w = _assertReadWord(self, '${var//pat}')  # no replacement
        op = _GetSuffixOp(self, w)
        self.assertUnquoted('pat', op.pat)
        self.assertEqual(None, op.replace)
        self.assertEqual(Id.Lit_Slash, op.replace_mode)

        # replace with slash
        w = _assertReadWord(self, '${var/pat//}')
        op = _GetSuffixOp(self, w)
        self.assertUnquoted('pat', op.pat)
        self.assertUnquoted('/', op.replace)

        # replace with two slashes unquoted
        w = _assertReadWord(self, '${var/pat///}')
        op = _GetSuffixOp(self, w)
        self.assertUnquoted('pat', op.pat)
        self.assertUnquoted('//', op.replace)

        # replace with two slashes quoted
        w = _assertReadWord(self, '${var/pat/"//"}')
        op = _GetSuffixOp(self, w)
        self.assertUnquoted('pat', op.pat)

        ok, s, quoted = word.StaticEval(op.replace)
        self.assertTrue(ok)
        self.assertEqual('//', s)
        self.assertTrue(quoted)

        # Real example found in the wild!
        # http://www.oilshell.org/blog/2016/11/07.html
        w = _assertReadWord(self, r'${var////\\/}')
        op = _GetSuffixOp(self, w)
        self.assertEqual(Id.Lit_Slash, op.replace_mode)

        self.assertUnquoted('/', op.pat)

        ok, s, quoted = word.StaticEval(op.replace)
        self.assertTrue(ok)
        self.assertEqual(r'\/', s)
Пример #13
0
 def assertUnquoted(self, expected, w):
     ok, s, quoted = word.StaticEval(w)
     self.assertTrue(ok)
     self.assertEqual(expected, s)
     self.assertFalse(quoted)