Пример #1
0
    def _Visit(self, node):
        """
    """
        #log('VISIT %s', node.__class__.__name__)

        # NOTE: The tags are not unique!!!  We would need this:
        # if isinstance(node, ast.command) and node.tag == command_e.SimpleCommand:
        # But it's easier to check the __class__ attribute.

        cls = node.__class__
        if cls is ast.SimpleCommand:
            #log('SimpleCommand %s', node.words)
            #log('--')
            #ast_lib.PrettyPrint(node)

            # Things to consider:
            # - source and .
            # - DONE builtins: get a list from builtin.py
            # - DONE functions: have to enter function definitions into a dictionary
            # - Commands that call others: sudo, su, find, xargs, etc.
            # - builtins that call others: exec, command
            #   - except not command -v!

            if not node.words:
                return

            w = node.words[0]
            ok, argv0, _ = word.StaticEval(w)
            if not ok:
                log("Couldn't statically evaluate %r", w)
                return

            if (builtin.ResolveSpecial(argv0) == builtin_e.NONE
                    and builtin.Resolve(argv0) == builtin_e.NONE):
                self.progs_used[argv0] = True

            # NOTE: If argv1 is $0, then we do NOT print a warning!
            if argv0 == 'sudo':
                if len(node.words) < 2:
                    return
                w1 = node.words[1]
                ok, argv1, _ = word.StaticEval(w1)
                if not ok:
                    log("Couldn't statically evaluate %r", w)
                    return

                # Should we mark them behind 'sudo'?  e.g. "sudo apt install"?
                self.progs_used[argv1] = True

        elif cls is ast.FuncDef:
            self.funcs_defined[node.name] = True
Пример #2
0
  def ParseRedirect(self):
    """
    Problem: You don't know which kind of redir_node to instantiate before
    this?  You could stuff them all in one node, and then have a switch() on
    the type.

    You need different types.
    """
    if not self._Peek(): return None
    assert self.c_kind == Kind.Redir, self.cur_word

    left_spid = self.cur_word.token.span_id

    # For now only supporting single digit descriptor
    first_char = self.cur_word.token.val[0]
    if first_char.isdigit():
      fd = int(first_char)
    else:
      fd = -1

    if self.c_id in (Id.Redir_DLess, Id.Redir_DLessDash):  # here doc
      node = ast.HereDoc()
      node.op_id = self.c_id
      node.arg_word = None  # not read yet
      node.fd = fd
      node.was_filled = False
      node.spids.append(left_spid)
      self._Next()

      if not self._Peek(): return None
      # "If any character in word is quoted, the delimiter shall be formed by
      # performing quote removal on word, and the here-document lines shall not
      # be expanded. Otherwise, the delimiter shall be the word itself."
      # NOTE: \EOF counts, or even E\OF
      ok, node.here_end, quoted = word.StaticEval(self.cur_word)
      if not ok:
        self._BadWord('Error evaluating here doc delimiter: %s', self.cur_word)
        return None
      node.do_expansion = not quoted
      self._Next()

    else:
      node = ast.Redirect()
      node.op_id = self.c_id
      node.fd = fd
      node.spids.append(left_spid)
      self._Next()

      if not self._Peek(): return None
      if self.c_kind != Kind.Word:
        self.AddErrorContext(
            'Expected word after redirect operator', word=self.cur_word)
        return None

      new_word = word.TildeDetect(self.cur_word)
      node.arg_word = new_word or self.cur_word
      self._Next()

    return node
Пример #3
0
 def testStaticEvalWord(self):
   expr = r'\EOF'  # Quoted here doc delimiter
   w_parser = InitWordParser(expr)
   w = w_parser.ReadWord(LexMode.OUTER)
   ok, s, quoted = word.StaticEval(w)
   self.assertEqual(True, ok)
   self.assertEqual('EOF', s)
   self.assertEqual(True, quoted)
Пример #4
0
  def _ReadPatSubVarOp(self, lex_mode):
    """
    Match     = ('/' | '#' | '%') WORD
    VarSub    = ...
              | VarOf '/' Match '/' WORD
    """
    do_all = False
    do_prefix = False
    do_suffix = False

    pat = self._ReadVarOpArg(lex_mode, eof_type=Id.Lit_Slash, empty_ok=False)
    if not pat: return None

    if len(pat.parts) == 1:
      ok, s, quoted = word.StaticEval(pat)
      if ok and s == '/' and not quoted:  # Looks like ${a////c}, read again
        self._Next(lex_mode)
        self._Peek()
        p = ast.LiteralPart(self.cur_token)
        pat.parts.append(p)

    if len(pat.parts) == 0:
      self._BadToken("Pattern must not be empty: %r", token=self.cur_token)
      return None
    else:
      first_part = pat.parts[0]
      if first_part.tag == word_part_e.LiteralPart:
        lit_id = first_part.token.id
        if lit_id == Id.Lit_Slash:
          do_all = True
          pat.parts.pop(0)
        elif lit_id == Id.Lit_Pound:
          do_prefix = True
          pat.parts.pop(0)
        elif lit_id == Id.Lit_Percent:
          do_suffix = True
          pat.parts.pop(0)

    #self._Peek()
    if self.token_type == Id.Right_VarSub:
      # e.g. ${v/a} is the same as ${v/a/}  -- empty replacement string
      return ast.PatSub(pat, None, do_all, do_prefix, do_suffix)

    elif self.token_type == Id.Lit_Slash:
      replace = self._ReadVarOpArg(lex_mode)  # do not stop at /
      if not replace: return None

      self._Peek()
      if self.token_type == Id.Right_VarSub:
        return ast.PatSub(pat, replace, do_all, do_prefix, do_suffix)

      else:
        self._BadToken("Expected } after pat sub, got %s", self.cur_token)
        return None

    else:
      self._BadToken("Expected } after pat sub, got %s", self.cur_token)
      return None
Пример #5
0
  def _ParseForEachLoop(self):
    node = ast.ForEach()
    node.do_arg_iter = False

    ok, iter_name, quoted = word.StaticEval(self.cur_word)
    if not ok or quoted:
      self.AddErrorContext(
          "Invalid for loop variable", word=self.cur_word)
      return None
    if not VAR_NAME_RE.match(iter_name):
      self.AddErrorContext(
          "Invalid for loop variable name", word=self.cur_word)
      return None
    node.iter_name = iter_name
    self._Next()  # skip past name

    if not self._NewlineOk(): return None

    in_spid = const.NO_INTEGER
    semi_spid = const.NO_INTEGER

    if not self._Peek(): return None
    if self.c_id == Id.KW_In:
      self._Next()  # skip in

      in_spid = word.LeftMostSpanForWord(self.cur_word) + 1
      x = self.ParseForWords()
      if x is None:
        return None
      iter_words, semi_spid = x
      words2 = braces.BraceDetectAll(iter_words)
      words3 = word.TildeDetectAll(words2)

      if iter_words is None:  # empty list of words is OK
        return None
      node.iter_words = words3

    elif self.c_id == Id.Op_Semi:
      node.do_arg_iter = True  # implicit for loop
      self._Next()

    elif self.c_id == Id.KW_Do:
      node.do_arg_iter = True  # implicit for loop
      # do not advance

    else:
      self.AddErrorContext("Unexpected word in for loop: %s", self.cur_word,
          word=self.cur_word)
      return None

    node.spids.extend((in_spid, semi_spid))

    body_node = self.ParseDoGroup()
    if not body_node: return None

    node.body = body_node
    return node
Пример #6
0
    def _ReadPatSubVarOp(self, lex_mode):
        """
    Match     = ('/' | '#' | '%') WORD
    VarSub    = ...
              | VarOf '/' Match '/' WORD
    """
        pat = self._ReadVarOpArg(lex_mode,
                                 eof_type=Id.Lit_Slash,
                                 empty_ok=False)

        if len(pat.parts) == 1:
            ok, s, quoted = word.StaticEval(pat)
            if ok and s == '/' and not quoted:  # Looks like ${a////c}, read again
                self._Next(lex_mode)
                self._Peek()
                p = ast.LiteralPart(self.cur_token)
                pat.parts.append(p)

        if len(pat.parts) == 0:
            p_die('Pattern in ${x/pat/replace} must not be empty',
                  token=self.cur_token)

        replace_mode = Id.Undefined_Tok
        # Check for / # % modifier on pattern.
        first_part = pat.parts[0]
        if first_part.tag == word_part_e.LiteralPart:
            lit_id = first_part.token.id
            if lit_id in (Id.Lit_Slash, Id.Lit_Pound, Id.Lit_Percent):
                pat.parts.pop(0)
                replace_mode = lit_id

        # NOTE: If there is a modifier, the pattern can be empty, e.g.
        # ${s/#/foo} and ${a/%/foo}.

        if self.token_type == Id.Right_VarSub:
            # e.g. ${v/a} is the same as ${v/a/}  -- empty replacement string
            return ast.PatSub(pat, None, replace_mode)

        if self.token_type == Id.Lit_Slash:
            replace = self._ReadVarOpArg(lex_mode)  # do not stop at /

            self._Peek()
            if self.token_type != Id.Right_VarSub:
                # NOTE: I think this never happens.
                # We're either in the VS_ARG_UNQ or VS_ARG_DQ lex state, and everything
                # there is Lit_ or Left_, except for }.
                p_die("Expected } after replacement string, got %s",
                      self.cur_token,
                      token=self.cur_token)

            return ast.PatSub(pat, replace, replace_mode)

        # Happens with ${x//} and ${x///foo}, see test/parse-errors.sh
        p_die("Expected } after pat sub, got %r",
              self.cur_token.val,
              token=self.cur_token)
Пример #7
0
    def testGitComment(self):
        # ;# is a comment!  Gah.
        # Conclusion: Comments are NOT LEXICAL.  They are part of word parsing.

        node = assert_ParseCommandList(
            self, """\
. "$TEST_DIRECTORY"/diff-lib.sh ;# test-lib chdir's into trash
""")
        self.assertEqual(command_e.Sentence, node.tag)
        self.assertEqual(2, len(node.child.words))

        # This is NOT a comment
        node = assert_ParseCommandList(self, """\
echo foo#bar
""")
        self.assertEqual(command_e.SimpleCommand, node.tag)
        self.assertEqual(2, len(node.words))
        _, s, _ = word.StaticEval(node.words[1])
        self.assertEqual('foo#bar', s)

        # This is a comment
        node = assert_ParseCommandList(self, """\
echo foo #comment
""")
        self.assertEqual(command_e.SimpleCommand, node.tag)
        self.assertEqual(2, len(node.words))
        _, s, _ = word.StaticEval(node.words[1])
        self.assertEqual('foo', s)

        # Empty comment
        node = assert_ParseCommandList(self, """\
echo foo #
""")
        self.assertEqual(command_e.SimpleCommand, node.tag)
        self.assertEqual(2, len(node.words))
        _, s, _ = word.StaticEval(node.words[1])
        self.assertEqual('foo', s)
Пример #8
0
  def _ParseForEachLoop(self):
    node = ast.ForEach()
    node.do_arg_iter = False

    ok, value, quoted = word.StaticEval(self.cur_word)
    if not ok or quoted:
      self.AddErrorContext(
          "Invalid for loop variable: %s", self.cur_word, word=self.cur_word)
      return None
    node.iter_name = value
    self._Next()  # skip past name

    if not self._NewlineOk(): return None

    in_spid = -1
    semi_spid = -1

    if not self._Peek(): return None
    if self.c_id == Id.KW_In:
      self._Next()  # skip in

      in_spid = word.LeftMostSpanForWord(self.cur_word) + 1
      iter_words, semi_spid = self.ParseForWords()
      if iter_words is None:  # empty list of words is OK
        return None
      node.iter_words = iter_words

    elif self.c_id == Id.Op_Semi:
      node.do_arg_iter = True  # implicit for loop
      self._Next()

    elif self.c_id == Id.KW_Do:
      node.do_arg_iter = True  # implicit for loop
      # do not advance

    else:
      self.AddErrorContext("Unexpected word in for loop: %s", self.cur_word,
          word=self.cur_word)
      return None

    node.spids.extend((in_spid, semi_spid))

    body_node = self.ParseDoGroup()
    if not body_node: return None

    node.body = body_node
    return node
Пример #9
0
  def _MakeAssignment(self, assign_kw, suffix_words):
    bindings = []
    for i, w in enumerate(suffix_words):
      if i == 0:
        continue  # skip over local, export, etc.

      left_spid = word.LeftMostSpanForWord(w)

      kv = word.LooksLikeAssignment(w)
      if kv:
        k, v = kv
        t = word.TildeDetect(v)
        if t:
          # t is an unevaluated word with TildeSubPart
          pair = (k, t, left_spid)
        else:
          pair = (k, v, left_spid)  # v is unevaluated word
      else:
        # In aboriginal in variables/sources: export_if_blank does export "$1".
        # We should allow that.
        ok, value, quoted = word.StaticEval(w)
        if not ok or quoted:
          self.AddErrorContext(
              'Variable names must be constant strings, got %s', w, word=w)
          return None
        pair = (value, None, left_spid)  # No value is equivalent to ''
      bindings.append(pair)

    pairs = []
    for lhs, rhs, spid in bindings:
      p = ast.assign_pair(ast.LeftVar(lhs), rhs)
      p.spids.append(spid)
      pairs.append(p)

    node = ast.Assignment(assign_kw, pairs)

    return node
Пример #10
0
    def ParseFactor(self):
        """
    Factor  : WORD
            | UNARY_OP WORD
            | WORD BINARY_OP WORD
            | '(' Expr ')'
    """
        #print('ParseFactor %s %s' % (self.b_kind, IdName(self.op_id)))
        if self.b_kind == Kind.BoolUnary:
            # Just save the type and not the token itself?
            op = self.op_id
            if not self._Next(): return None
            w = self.cur_word
            if not self._Next(): return None
            node = ast.BoolUnary(op, w)
            return node

        if self.b_kind == Kind.Word:
            # Peek ahead another token.
            t2 = self._LookAhead()
            t2_op_id = word.BoolId(t2)
            t2_b_kind = LookupKind(t2_op_id)

            # Redir PUN for < and >
            if t2_b_kind in (Kind.BoolBinary, Kind.Redir):
                left = self.cur_word

                if not self._Next(): return None
                op = self.op_id

                # TODO: Need to change to LexMode.BASH_REGEX.
                # _Next(lex_mode) then?
                is_regex = t2_op_id == Id.BoolBinary_EqualTilde
                if is_regex:
                    if not self._Next(lex_mode=LexMode.BASH_REGEX): return None
                else:
                    if not self._Next(): return None

                right = self.cur_word
                if is_regex:
                    ok, regex_str, unused_quoted = word.StaticEval(right)
                    # doesn't contain $foo, etc.
                    if ok and not libc.regex_parse(regex_str):
                        self.AddErrorContext("Invalid regex: %r" % regex_str,
                                             word=right)
                        return None

                if not self._Next(): return None
                return ast.BoolBinary(op, left, right)
            else:
                # [[ foo ]]
                w = self.cur_word
                if not self._Next(): return None
                return ast.WordTest(w)

        if self.op_id == Id.Op_LParen:
            if not self._Next(): return None
            node = self.ParseExpr()
            if self.op_id != Id.Op_RParen:
                raise RuntimeError("Expected ), got %s", self.cur_word)
            if not self._Next(): return None
            return node

        # TODO: A proper error, e.g. for "&&"
        raise AssertionError("Unexpected token: %s" % self.cur_word)
Пример #11
0
  def ParseSimpleCommand(self):
    """
    Fixed transcription of the POSIX grammar (TODO: port to grammar/Shell.g)

    io_file        : '<'       filename
                   | LESSAND   filename
                     ...

    io_here        : DLESS     here_end
                   | DLESSDASH here_end

    redirect       : IO_NUMBER (io_redirect | io_here)

    prefix_part    : ASSIGNMENT_WORD | redirect
    cmd_part       : WORD | redirect

    assign_kw      : Declare | Export | Local | Readonly

    # Without any words it is parsed as a command, not an assigment
    assign_listing : assign_kw

    # Now we have something to do (might be changing assignment flags too)
    # NOTE: any prefixes should be a warning, but they are allowed in shell.
    assignment     : prefix_part* assign_kw (WORD | ASSIGNMENT_WORD)+

    # an external command, a function call, or a builtin -- a "word_command"
    word_command   : prefix_part* cmd_part+

    simple_command : assign_listing
                   | assignment
                   | proc_command

    Simple imperative algorithm:

    1) Read a list of words and redirects.  Append them to separate lists.
    2) Look for the first non-assignment word.  If it's declare, etc., then
    keep parsing words AND assign words.  Otherwise, just parse words.
    3) If there are no non-assignment words, then it's a global assignment.

    { redirects, global assignments } OR
    { redirects, prefix_bindings, words } OR
    { redirects, ERROR_prefix_bindings, keyword, assignments, words }

    THEN CHECK that prefix bindings don't have any array literal parts!
    global assignment and keyword assignments can have the of course.
    well actually EXPORT shouldn't have them either -- WARNING

    3 cases we want to warn: prefix_bindings for assignment, and array literal
    in prefix bindings, or export

    A command can be an assignment word, word, or redirect on its own.

        ls
        >out.txt

        >out.txt FOO=bar   # this touches the file, and hten

    Or any sequence:
        ls foo bar
        <in.txt ls foo bar >out.txt
        <in.txt ls >out.txt foo bar

    Or add one or more environment bindings:
        VAR=val env
        >out.txt VAR=val env

    here_end vs filename is a matter of whether we test that it's quoted.  e.g.
    <<EOF vs <<'EOF'.
    """
    result = self._ScanSimpleCommand()
    if not result: return None
    redirects, words = result

    if not words:  # e.g.  >out.txt  # redirect without words
      node = ast.SimpleCommand()
      node.redirects = redirects
      return node

    prefix_bindings, suffix_words = self._SplitSimpleCommandPrefix(words)

    if not suffix_words:  # ONE=1 TWO=2  (with no other words)
      if redirects:
        binding1 = prefix_bindings[0]
        _, _, _, spid = binding1
        self.AddErrorContext('Got redirects in global assignment',
                             span_id=spid)
        return None

      pairs = []
      for lhs, op, rhs, spid in prefix_bindings:
        p = ast.assign_pair(ast.LhsName(lhs), op, rhs)
        p.spids.append(spid)
        pairs.append(p)

      node = ast.Assignment(Id.Assign_None, [], pairs)
      left_spid = word.LeftMostSpanForWord(words[0])
      node.spids.append(left_spid)  # no keyword spid to skip past
      return node

    kind, kw_token = word.KeywordToken(suffix_words[0])

    if kind == Kind.Assign:
      # Here we StaticEval suffix_words[1] to see if it's a command like
      # 'typeset -p'.  Then it becomes a SimpleCommand node instead of an
      # Assignment.  Note we're not handling duplicate flags like 'typeset
      # -pf'.  I see this in bashdb (bash debugger) but it can just be changed
      # to 'typeset -p -f'.
      is_command = False
      if len(suffix_words) > 1:
        ok, val, _ = word.StaticEval(suffix_words[1])
        if ok and (kw_token.id, val) in self._ASSIGN_COMMANDS:
          is_command = True

      if is_command:  # declare -f, declare -p, typeset -p, etc.
        node = self._MakeSimpleCommand(prefix_bindings, suffix_words,
                                       redirects)
        return node

      else:  # declare str='', declare -a array=()
        if redirects:
          # Attach the error location to the keyword.  It would be more precise
          # to attach it to the
          self.AddErrorContext('Got redirects in assignment', token=kw_token)
          return None

        if prefix_bindings:  # FOO=bar local spam=eggs not allowed
          # Use the location of the first value.  TODO: Use the whole word before
          # splitting.
          _, _, v0, _ = prefix_bindings[0]
          self.AddErrorContext(
              'Invalid prefix bindings in assignment: %s', prefix_bindings,
              word=v0)
          return None

        node = self._MakeAssignment(kw_token.id, suffix_words)
        if not node: return None
        node.spids.append(kw_token.span_id)
        return node

    elif kind == Kind.ControlFlow:
      if redirects:
        self.AddErrorContext('Got redirects in control flow: %s', redirects)
        return None

      if prefix_bindings:  # FOO=bar local spam=eggs not allowed
        # Use the location of the first value.  TODO: Use the whole word before
        # splitting.
        _, _, v0, _ = prefix_bindings[0]
        self.AddErrorContext(
            'Invalid prefix bindings in control flow: %s', prefix_bindings,
            word=v0)
        return None

      # Attach the token for errors.  (Assignment may not need it.)
      if len(suffix_words) == 1:
        arg_word = None
      elif len(suffix_words) == 2:
        arg_word = suffix_words[1]
      else:
        self.AddErrorContext('Too many arguments')
        return None

      return ast.ControlFlow(kw_token, arg_word)

    else:
      node = self._MakeSimpleCommand(prefix_bindings, suffix_words, redirects)
      return node
Пример #12
0
  def _MakeAssignment(self, assign_kw, suffix_words):
    # First parse flags, e.g. -r -x -a -A.  None of the flags have arguments.
    flags = []
    n = len(suffix_words)
    i = 1
    while i < n:
      w = suffix_words[i]
      ok, static_val, quoted = word.StaticEval(w)
      if not ok or quoted:
        break  # can't statically evaluate

      if static_val.startswith('-'):
        flags.append(static_val)
      else:
        break  # not a flag, rest are args
      i += 1

    # Now parse bindings or variable names
    assignments = []
    while i < n:
      w = suffix_words[i]
      left_spid = word.LeftMostSpanForWord(w)
      kov = word.LooksLikeAssignment(w)
      if kov:
        k, op, v = kov
        t = word.TildeDetect(v)
        if t:
          # t is an unevaluated word with TildeSubPart
          a = (k, op, t, left_spid)
        else:
          a = (k, op, v, left_spid)  # v is unevaluated word
      else:
        # In aboriginal in variables/sources: export_if_blank does export "$1".
        # We should allow that.

        # Parse this differently then?
        # dynamic-export?
        # It sets global variables.
        ok, static_val, quoted = word.StaticEval(w)
        if not ok or quoted:
           self.AddErrorContext(
               'Variable names must be constant strings, got %s', w, word=w)
           return None

        # No value is equivalent to ''
        m = VAR_NAME_RE.match(static_val)
        if not m:
          self.AddErrorContext('Invalid variable name %r', static_val, word=w)
          return None
        a = (static_val, assign_op_e.Equal, None, left_spid)

      assignments.append(a)
      i += 1

    # TODO: Also make with LhsIndexedName
    pairs = []
    for lhs, op, rhs, spid in assignments:
      p = ast.assign_pair(ast.LhsName(lhs), op, rhs)
      p.spids.append(spid)
      pairs.append(p)

    node = ast.Assignment(assign_kw, flags, pairs)

    return node
Пример #13
0
  def testPatSub(self):
    w = _assertReadWord(self, '${var/pat/replace}')
    op = _GetSuffixOp(self, w)
    self.assertFalse(op.do_all)
    self.assertFalse(op.do_prefix)
    self.assertFalse(op.do_suffix)
    self.assertUnquoted('pat', op.pat)
    self.assertUnquoted('replace', op.replace)

    w = _assertReadWord(self, '${var//pat/replace}')  # sub all
    op = _GetSuffixOp(self, w)
    self.assertTrue(op.do_all)
    self.assertUnquoted('pat', op.pat)
    self.assertUnquoted('replace', op.replace)

    w = _assertReadWord(self, '${var/%pat/replace}')  # prefix
    op = _GetSuffixOp(self, w)
    self.assertTrue(op.do_prefix)
    self.assertUnquoted('pat', op.pat)
    self.assertUnquoted('replace', op.replace)

    w = _assertReadWord(self, '${var/#pat/replace}')  # suffix
    op = _GetSuffixOp(self, w)
    self.assertTrue(op.do_suffix)
    self.assertUnquoted('pat', op.pat)
    self.assertUnquoted('replace', op.replace)

    w = _assertReadWord(self, '${var/pat}')  # no replacement
    w = _assertReadWord(self, '${var//pat}')  # no replacement
    op = _GetSuffixOp(self, w)
    self.assertTrue(op.do_all)
    self.assertUnquoted('pat', op.pat)
    self.assertEqual(None, op.replace)

    # replace with slash
    w = _assertReadWord(self, '${var/pat//}')
    op = _GetSuffixOp(self, w)
    self.assertUnquoted('pat', op.pat)
    self.assertUnquoted('/', op.replace)

    # replace with two slashes unquoted
    w = _assertReadWord(self, '${var/pat///}')
    op = _GetSuffixOp(self, w)
    self.assertUnquoted('pat', op.pat)
    self.assertUnquoted('//', op.replace)

    # replace with two slashes quoted
    w = _assertReadWord(self, '${var/pat/"//"}')
    op = _GetSuffixOp(self, w)
    self.assertUnquoted('pat', op.pat)

    ok, s, quoted = word.StaticEval(op.replace)
    self.assertTrue(ok)
    self.assertEqual('//', s)
    self.assertTrue(quoted)

    # Real example found in the wild!
    # http://www.oilshell.org/blog/2016/11/07.html
    w = _assertReadWord(self, r'${var////\\/}')
    op = _GetSuffixOp(self, w)
    self.assertTrue(op.do_all)

    self.assertUnquoted('/', op.pat)

    ok, s, quoted = word.StaticEval(op.replace)
    self.assertTrue(ok)
    self.assertEqual(r'\/', s)
Пример #14
0
 def assertUnquoted(self, expected, w):
   ok, s, quoted = word.StaticEval(w)
   self.assertTrue(ok)
   self.assertEqual(expected, s)
   self.assertFalse(quoted)
Пример #15
0
    def ParseFactor(self):
        """
    Factor  : WORD
            | UNARY_OP WORD
            | WORD BINARY_OP WORD
            | '(' Expr ')'
    """
        if self.b_kind == Kind.BoolUnary:
            # Just save the type and not the token itself?
            op = self.op_id
            if not self._Next(): return None
            w = self.cur_word
            if not self._Next(): return None
            node = ast.BoolUnary(op, w)
            return node

        if self.b_kind == Kind.Word:
            # Peek ahead another token.
            t2 = self._LookAhead()
            t2_op_id = word.BoolId(t2)
            t2_b_kind = LookupKind(t2_op_id)

            #log('t2 %s / t2_op_id %s / t2_b_kind %s', t2, t2_op_id, t2_b_kind)
            # Redir pun for < and >, -a and -o pun
            if t2_b_kind in (Kind.BoolBinary, Kind.Redir):
                left = self.cur_word

                if not self._Next(): return None
                op = self.op_id

                # TODO: Need to change to lex_mode_e.BASH_REGEX.
                # _Next(lex_mode) then?
                is_regex = t2_op_id == Id.BoolBinary_EqualTilde
                if is_regex:
                    if not self._Next(lex_mode=lex_mode_e.BASH_REGEX):
                        return None
                else:
                    if not self._Next(): return None

                right = self.cur_word
                if is_regex:
                    # TODO: Quoted parts need to be regex-escaped, e.g. [[ $a =~ "{" ]].
                    # I don't think libc has a function to do this.  Escape these
                    # characters:
                    # https://www.gnu.org/software/sed/manual/html_node/ERE-syntax.html0

                    ok, regex_str, unused_quoted = word.StaticEval(right)

                    # doesn't contain $foo, etc.
                    if ok and not libc.regex_parse(regex_str):
                        self.AddErrorContext("Invalid regex: %r" % regex_str,
                                             word=right)
                        return None

                if not self._Next(): return None
                return ast.BoolBinary(op, left, right)
            else:
                # [[ foo ]]
                w = self.cur_word
                if not self._Next(): return None
                return ast.WordTest(w)

        if self.op_id == Id.Op_LParen:
            if not self._Next(): return None
            node = self.ParseExpr()
            if self.op_id != Id.Op_RParen:
                self.AddErrorContext('Expected ), got %s',
                                     self.cur_word,
                                     word=self.cur_word)
                return None
            if not self._Next(): return None
            return node

        # TODO: A proper error, e.g. for [[ && ]] or [[ ]]
        self.AddErrorContext('Unexpected token: %s' % self.cur_word,
                             word=self.cur_word)
        return None
Пример #16
0
  def DoCommand(self, node, local_symbols, at_top_level=False):
    if node.tag == command_e.CommandList:
      # TODO: How to distinguish between echo hi; echo bye; and on separate
      # lines
      for child in node.children:
        self.DoCommand(child, local_symbols, at_top_level=at_top_level)

    elif node.tag == command_e.SimpleCommand:
      # How to preserve spaces between words?  Do you want to do it?
      # Well you need to test this:
      #
      # echo foo \
      #   bar

      # TODO: Need to print until the left most part of the phrase?  the phrase
      # is a word, binding, redirect.
      #self.cursor.PrintUntil()

      if node.more_env:
        (left_spid,) = node.more_env[0].spids
        self.cursor.PrintUntil(left_spid)
        self.f.write('env ')

        # We only need to transform the right side, not left side.
        for pair in node.more_env:
          self.DoWordInCommand(pair.val, local_symbols)

      # More translations:
      # - . to source
      # - eval to sh-eval

      if node.words:
        first_word = node.words[0]
        ok, val, quoted = word.StaticEval(first_word)
        word0_spid = word.LeftMostSpanForWord(first_word)
        if ok and not quoted:
          if val == '[':
            last_word = node.words[-1]
            # Check if last word is ]
            ok, val, quoted = word.StaticEval(last_word)
            if ok and not quoted and val == ']':
              # Replace [ with 'test'
              self.cursor.PrintUntil(word0_spid)
              self.cursor.SkipUntil(word0_spid + 1)
              self.f.write('test')

              for w in node.words[1:-1]:
                self.DoWordInCommand(w, local_symbols)

              # Now omit ]
              last_spid = word.LeftMostSpanForWord(last_word)
              self.cursor.PrintUntil(last_spid - 1)  # Get the space before
              self.cursor.SkipUntil(last_spid + 1)  # ] takes one spid
              return
            else:
              raise RuntimeError('Got [ without ]')

          elif val == '.':
            self.cursor.PrintUntil(word0_spid)
            self.cursor.SkipUntil(word0_spid + 1)
            self.f.write('source')
            return

      for w in node.words:
        self.DoWordInCommand(w, local_symbols)

      # NOTE: This will change to "phrase"?  Word or redirect.
      for r in node.redirects:
        self.DoRedirect(r, local_symbols)

      # TODO: Print the terminator.  Could be \n or ;
      # Need to print env like PYTHONPATH = 'foo' && ls
      # Need to print redirects:
      # < > are the same.  << is here string, and >> is assignment.
      # append is >+

      # TODO: static_eval of simple command
      # - [ -> "test".  Eliminate trailing ].
      # - . -> source, etc.

    elif node.tag == command_e.Assignment:
      self.DoAssignment(node, at_top_level, local_symbols)

    elif node.tag == command_e.Pipeline:
      # Obscure: |& turns into |- or |+ for stderr.
      # TODO:
      # if ! true; then -> if not true {

      # if ! echo | grep; then -> if not { echo | grep } {
      # }
      # not is like do {}, but it negates the return value I guess.

      for child in node.children:
        self.DoCommand(child, local_symbols)

    elif node.tag == command_e.AndOr:
      for child in node.children:
        self.DoCommand(child, local_symbols)

    elif node.tag == command_e.Sentence:
      # 'ls &' to 'fork ls'
      # Keep ; the same.
      self.DoCommand(node.child, local_symbols)

    # This has to be different in the function case.
    elif node.tag == command_e.BraceGroup:
      # { echo hi; } -> do { echo hi }
      # For now it might be OK to keep 'do { echo hi; }
      #left_spid, right_spid = node.spids
      (left_spid,) = node.spids

      self.cursor.PrintUntil(left_spid)
      self.cursor.SkipUntil(left_spid + 1)
      self.f.write('do {')

      for child in node.children:
        self.DoCommand(child, local_symbols)

    elif node.tag == command_e.Subshell:
      # (echo hi) -> shell echo hi
      # (echo hi; echo bye) -> shell {echo hi; echo bye}

      (left_spid, right_spid) = node.spids

      self.cursor.PrintUntil(left_spid)
      self.cursor.SkipUntil(left_spid + 1)
      self.f.write('shell {')

      self.DoCommand(node.child, local_symbols)

      #self._DebugSpid(right_spid)
      #self._DebugSpid(right_spid + 1)

      #print('RIGHT SPID', right_spid)
      self.cursor.PrintUntil(right_spid)
      self.cursor.SkipUntil(right_spid + 1)
      self.f.write('}')

    elif node.tag == command_e.DParen:
      # (( a == 0 )) is sh-expr ' a == 0 '
      #
      # NOTE: (( n++ )) is auto-translated to sh-expr 'n++', but could be set
      # n++.
      left_spid, right_spid = node.spids
      self.cursor.PrintUntil(left_spid)
      self.cursor.SkipUntil(left_spid + 1)
      self.f.write("sh-expr '")
      self.cursor.PrintUntil(right_spid - 1)  # before ))
      self.cursor.SkipUntil(right_spid + 1)  # after )) -- each one is a token
      self.f.write("'")

    elif node.tag == command_e.DBracket:
      # [[ 1 -eq 2 ]] to (1 == 2)
      self.DoBoolExpr(node.expr)

    elif node.tag == command_e.FuncDef:
      # TODO: skip name
      #self.f.write('proc %s' % node.name)

      # New symbol table for every function.
      new_local_symbols = {}

      # Should be the left most span, including 'function'
      self.cursor.PrintUntil(node.spids[0])

      self.f.write('proc ')
      self.f.write(node.name)
      self.cursor.SkipUntil(node.spids[1])

      if node.body.tag == command_e.BraceGroup:
        # Don't add "do" like a standalone brace group.  Just use {}.
        for child in node.body.children:
          self.DoCommand(child, new_local_symbols)
      else:
        pass
        # Add {}.
        # proc foo {
        #   shell {echo hi; echo bye}
        # }
        #self.DoCommand(node.body)

    elif node.tag == command_e.BraceGroup:
      for child in node.children:
        self.DoCommand(child, local_symbols)

    elif node.tag == command_e.DoGroup:
      do_spid, done_spid = node.spids
      self.cursor.PrintUntil(do_spid)
      self.cursor.SkipUntil(do_spid + 1)
      self.f.write('{')

      for child in node.children:
        self.DoCommand(child, local_symbols)

      self.cursor.PrintUntil(done_spid)
      self.cursor.SkipUntil(done_spid + 1)
      self.f.write('}')

    elif node.tag == command_e.ForEach:
      # Need to preserve spaces between words, because there can be line
      # wrapping.
      # for x in a b c \
      #    d e f; do

      in_spid, semi_spid = node.spids

      if in_spid == const.NO_INTEGER:
        #self.cursor.PrintUntil()  # 'for x' and then space
        self.f.write('for %s in @Argv ' % node.iter_name)
        self.cursor.SkipUntil(node.body.spids[0])
      else:
        self.cursor.PrintUntil(in_spid + 1)  # 'for x in' and then space
        self.f.write('[')
        for w in node.iter_words:
          self.DoWordInCommand(w, local_symbols)
        self.f.write(']')
        #print("SKIPPING SEMI %d" % semi_spid, file=sys.stderr)

      if semi_spid != const.NO_INTEGER:
        self.cursor.PrintUntil(semi_spid)
        self.cursor.SkipUntil(semi_spid + 1)

      self.DoCommand(node.body, local_symbols)

    elif node.tag == command_e.ForExpr:
      # Change (( )) to ( ), and then _FixDoGroup
      pass

    elif node.tag == command_e.WhileUntil:

      # Skip 'until', and replace it with 'while not'
      if node.keyword.id == Id.KW_Until:
        kw_spid = node.keyword.span_id
        self.cursor.PrintUntil(kw_spid)
        self.f.write('while not')
        self.cursor.SkipUntil(kw_spid + 1)

      cond = node.cond
      # Skip the semi-colon in the condition, which is ususally a Sentence
      if len(cond) == 1 and cond[0].tag == command_e.Sentence:
        self.DoCommand(cond[0].child, local_symbols)
        semi_spid = cond[0].terminator.span_id
        self.cursor.SkipUntil(semi_spid + 1)

      self.DoCommand(node.body, local_symbols)

    elif node.tag == command_e.If:
      else_spid, fi_spid = node.spids

      # if foo; then -> if foo {
      # elif foo; then -> } elif foo {
      for arm in node.arms:
        elif_spid, then_spid = arm.spids
        if elif_spid != const.NO_INTEGER:
          self.cursor.PrintUntil(elif_spid)
          self.f.write('} ')

        cond = arm.cond
        if len(cond) == 1 and cond[0].tag == command_e.Sentence:
          sentence = cond[0]
          self.DoCommand(sentence, local_symbols)

          # Remove semi-colon
          semi_spid = sentence.terminator.span_id
          self.cursor.PrintUntil(semi_spid)
          self.cursor.SkipUntil(semi_spid + 1)
        else:
          for child in arm.cond:
            self.DoCommand(child, local_symbols)

        self.cursor.PrintUntil(then_spid)
        self.cursor.SkipUntil(then_spid + 1)
        self.f.write('{')

        for child in arm.action:
          self.DoCommand(child, local_symbols)

      # else -> } else {
      if node.else_action:
        self.cursor.PrintUntil(else_spid)
        self.f.write('} ')
        self.cursor.PrintUntil(else_spid + 1)
        self.f.write(' {')

        for child in node.else_action:
          self.DoCommand(child, local_symbols)

      # fi -> }
      self.cursor.PrintUntil(fi_spid)
      self.cursor.SkipUntil(fi_spid + 1)
      self.f.write('}')

    elif node.tag == command_e.Case:
      case_spid, in_spid, esac_spid = node.spids
      self.cursor.PrintUntil(case_spid)
      self.cursor.SkipUntil(case_spid + 1)
      self.f.write('match')

      # Reformat "$1" to $1
      self.DoWordInCommand(node.to_match, local_symbols)

      self.cursor.PrintUntil(in_spid)
      self.cursor.SkipUntil(in_spid + 1)
      self.f.write('{')  # matchstr $var {

      # each arm needs the ) and the ;; node to skip over?
      for arm in node.arms:
        left_spid, rparen_spid, dsemi_spid, last_spid = arm.spids
        #print(left_spid, rparen_spid, dsemi_spid)

        self.cursor.PrintUntil(left_spid)
        # Hm maybe keep | because it's semi-deprecated?  You acn use
        # reload|force-relaod {
        # }
        # e/reload|force-reload/ {
        # }
        # / 'reload' or 'force-reload' / {
        # }
        #
        # Yeah it's the more abbreviated syntax.

        # change | to 'or'
        for pat in arm.pat_list:
          pass

        self.f.write('with ')
        # Remove the )
        self.cursor.PrintUntil(rparen_spid)
        self.cursor.SkipUntil(rparen_spid + 1)

        for child in arm.action:
          self.DoCommand(child, local_symbols)

        if dsemi_spid != const.NO_INTEGER:
          # Remove ;;
          self.cursor.PrintUntil(dsemi_spid)
          self.cursor.SkipUntil(dsemi_spid + 1)
        elif last_spid != const.NO_INTEGER:
          self.cursor.PrintUntil(last_spid)
        else:
          raise AssertionError(
              "Expected with dsemi_spid or last_spid in case arm")

      self.cursor.PrintUntil(esac_spid)
      self.cursor.SkipUntil(esac_spid + 1)
      self.f.write('}')  # strmatch $var {

    elif node.tag == command_e.NoOp:
      pass

    elif node.tag == command_e.ControlFlow:
      # No change for break / return / continue
      pass

    elif node.tag == command_e.TimeBlock:
      self.DoCommand(node.pipeline, local_symbols)

    else:
      #log('Command not handled: %s', node)
      raise AssertionError(node.__class__.__name__)
Пример #17
0
  def DoRedirect(self, node, local_symbols):
    #print(node, file=sys.stderr)
    op_spid = node.op.span_id
    op_id = node.op.id
    self.cursor.PrintUntil(op_spid)

    # TODO:
    # - Do < and <& the same way.
    # - How to handle here docs and here docs?
    # - >> becomes >+ or >-, or maybe >>>

    if node.tag == redir_e.Redir:
      if node.fd == const.NO_INTEGER:
        if op_id == Id.Redir_Great:
          self.f.write('>')  # Allow us to replace the operator
          self.cursor.SkipUntil(op_spid + 1)
        elif op_id == Id.Redir_GreatAnd:
          self.f.write('> !')  # Replace >& 2 with > !2
          spid = word.LeftMostSpanForWord(node.arg_word)
          self.cursor.SkipUntil(spid)
          #self.DoWordInCommand(node.arg_word)

      else:
        # NOTE: Spacing like !2>err.txt vs !2 > err.txt can be done in the
        # formatter.
        self.f.write('!%d ' % node.fd)
        if op_id == Id.Redir_Great:
          self.f.write('>')
          self.cursor.SkipUntil(op_spid + 1)
        elif op_id == Id.Redir_GreatAnd:
          self.f.write('> !')  # Replace 1>& 2 with !1 > !2
          spid = word.LeftMostSpanForWord(node.arg_word)
          self.cursor.SkipUntil(spid)

      self.DoWordInCommand(node.arg_word, local_symbols)

    elif node.tag == redir_e.HereDoc:
      ok, delimiter, delim_quoted = word.StaticEval(node.here_begin)
      if not ok:
        p_die('Invalid here doc delimiter', word=node.here_begin)

      # Turn everything into <<.  We just change the quotes
      self.f.write('<<')

      #here_begin_spid2 = word.RightMostSpanForWord(node.here_begin)
      if delim_quoted:
        self.f.write(" '''")
      else:
        self.f.write(' """')

      delim_end_spid = word.RightMostSpanForWord(node.here_begin)
      self.cursor.SkipUntil(delim_end_spid + 1)

      #self.cursor.SkipUntil(here_begin_spid + 1)

      # Now print the lines.  TODO: Have a flag to indent these to the level of
      # the owning command, e.g.
      #   cat <<EOF
      # EOF
      # Or since most here docs are the top level, you could just have a hack
      # for a fixed indent?  TODO: Look at real use cases.
      for part in node.stdin_parts:
        self.DoWordPart(part, local_symbols)

      self.cursor.SkipUntil(node.here_end_span_id + 1)
      if delim_quoted:
        self.f.write("'''\n")
      else:
        self.f.write('"""\n')

      # Need
      #self.cursor.SkipUntil(here_end_spid2)

    else:
      raise AssertionError(node.__class__.__name__)

    # <<< 'here word'
    # << 'here word'
    #
    # 2> out.txt
    # !2 > out.txt

    # cat 1<< EOF
    # hello $name
    # EOF
    # cat !1 << """
    # hello $name
    # """
    #
    # cat << 'EOF'
    # no expansion
    # EOF
    #   cat <<- 'EOF'
    #   no expansion and indented
    #
    # cat << '''
    # no expansion
    # '''
    #   cat << '''
    #   no expansion and indented
    #   '''

    # Warn about multiple here docs on a line.
    # As an obscure feature, allow
    # cat << \'ONE' << \"TWO"
    # 123
    # ONE
    # 234
    # TWO
    # The _ is an indicator that it's not a string to be piped in.
    pass