示例#1
0
 def __init__(self, printfunc=print):
     self.printfunc = printfunc
     # The following structures must
     # already be defined here so that they are retained for shell
     # input (which is split on several ast.Programs).
     # Everything that can be accessed with a name/identifier, is stored
     # either in globals or locals.
     # Globals is a dictionary that maps global names to instances. It
     # contains modules, typedefs, functions.
     # Locals is an array which acts as a stack for local variables.
     # Top-level statements behave like being encapsulated in an implicit
     # main()-function, i.e. their variables are not global!
     # I think that we could always use a self.locals.append() whenever
     # new names are pushed on the stack (function arguments and in let
     # statements) but it looks more high-level to access the variables
     # always in the same way, which is: self.locals[symbol_tree.get_index()].
     # To enable this when declaring variables, we use a list that
     # automatically grows.
     #self.globals : typing.Dict[str, ]
     self.locals = StackList()
     # Modules/Imports, custom types and function definitions are accessed through
     # the ast.TranslationUnit directly. The evaluator can access/read the symbol table
     # to identify stuff.
     # Here, initialized empty, filled with content when evaluating.
     self.current_unit = TranslationUnitRef(
         ast.TranslationUnit([], collections.OrderedDict(),
                             collections.OrderedDict(), [], {}))
     # All imported modules
     self.modules: typing.Dict[str, ast.TranslationUnit] = {}
     #
     self.symbol_tree = SymbolTree()
示例#2
0
 def __init__(
     self,
     symbol_table_snapshot=None,
     modules: typing.Optional[typing.Dict[str,
                                          ast.TranslationUnit]] = None):
     self.symbol_tree = SymbolTree(symbol_table_snapshot)
     self.modules: typing.Dict[str,
                               ast.TranslationUnit] = modules if isinstance(
                                   modules, dict) else {}
示例#3
0
 def parse_function_definition(self) -> ast.FunctionDefinition:
     toks = TokenList()
     # FUNC foo (bar : int) : str { ... }
     if not toks.add(self.match(token.FUNC)):
         raise Exception("Expected function definition.")
     # func FOO (bar : int) : str { ... }
     if not toks.add(self.match(token.IDENTIFIER)):
         raise ParseException("Expected function name.")
     name = self.peek(-1).lexeme
     if name in self.symbols_global:
         raise ParseException(f"Name '{name}' already exists in symbol table. Function definition impossible.")
     # Register function name before parsing parameter names (no parameter name should have the function name!)
     self.symbols_global[name] = bongtypes.UnknownType()
     # (
     if not toks.add(self.match(token.LPAREN)):
         raise ParseException("Expected ( to start the parameter list.")
     # Parameters
     parameter_names, parameter_types = self.parse_parameters()
     # )
     if not toks.add(self.match(token.RPAREN)):
         raise ParseException("Expected ) to end the parameter list.")
     # Return types
     return_types : typing.List[ast.BongtypeIdentifier] = []
     if toks.add(self.match(token.COLON)):
         self.check_eof("Return type list expected.")
         return_types.append(self.parse_type())
         while toks.add(self.match(token.COMMA)):
             return_types.append(self.parse_type())
     # {
     if not self.peek().type == token.LBRACE:
         raise ParseException("Expected function body.")
     # New local symbol table (tree) for statement block
     # We could just store the global symbol table in the object because
     # it will always be the same. But remembering the previous symbol
     # table here theoretically allows to parse function definitions inside
     # other functions (the local symbol table would be properly restored
     # then).
     global_symbol_tree = self.symbol_tree
     self.symbol_tree = SymbolTree()
     # Parameters
     for param,typ in zip(parameter_names,parameter_types):
         if param in self.symbol_tree:
             raise ParseException(f"Argument name '{param}' appears twice in function definition")
         self.symbol_tree.register(param, bongtypes.UnknownType())
     # Snapshot before block is parsed (this changes the state of the tree)
     func_symbol_tree_snapshot = self.symbol_tree.take_snapshot()
     # Function body
     body = self.block_stmt()
     # Restore symbol table/tree
     self.symbol_tree = global_symbol_tree
     return ast.FunctionDefinition(toks, name, parameter_names, parameter_types, return_types, body, func_symbol_tree_snapshot)
示例#4
0
    def __init__(self, lexer, snapshot=None, basepath=None):
        self.lexer = lexer

        self.basepath = basepath if basepath != None else os.getcwd()

        self.symbols_global : typing.Dict[str, bongtypes.BaseNode] = {}
        self.symbol_tree = SymbolTree()
        if snapshot != None:
            # When restoring the global dictionary, we need to copy the dict.
            # Otherwise, we change the snapshot that the caller (the repl)
            # will (most probably) reuse.
            self.symbols_global = snapshot[0].copy() # overwrite
            self.symbol_tree.restore_snapshot(snapshot[1]) # restore
        else:
            # Only when initializing symbol tables for the first time, register
            # builtin stuff
            for bfuncname, bfunc in bong_builtins.functions.items():
                self.symbols_global[bfuncname] = bongtypes.BuiltinFunction(bfunc[1])
            for btypename, btype in bongtypes.basic_types.items():
                self.symbols_global[btypename] = bongtypes.Typedef(btype())
示例#5
0
 def resolve_function_interface(self, function: ast.FunctionDefinition,
                                unit: ast.TranslationUnit):
     parameters = bongtypes.TypeList([])
     returns = bongtypes.TypeList([])
     for param_name, param_type in zip(function.parameter_names,
                                       function.parameter_types):
         typ = self.resolve_type(param_type, unit, function)
         parameters.append(typ)
         SymbolTree(function.symbol_tree_snapshot)[param_name] = typ
     for ret in function.return_types:
         returns.append(self.resolve_type(ret, unit, function))
     unit.symbols_global[function.name] = bongtypes.Function(
         parameters, returns)
示例#6
0
class Eval:
    # Defined here so that it can be used by the parser
    BUILTIN_ENVIRONMENT = {"sys_argv": sys.argv}

    def __init__(self, printfunc=print):
        self.printfunc = printfunc
        # The following structures must
        # already be defined here so that they are retained for shell
        # input (which is split on several ast.Programs).
        # Everything that can be accessed with a name/identifier, is stored
        # either in globals or locals.
        # Globals is a dictionary that maps global names to instances. It
        # contains modules, typedefs, functions.
        # Locals is an array which acts as a stack for local variables.
        # Top-level statements behave like being encapsulated in an implicit
        # main()-function, i.e. their variables are not global!
        # I think that we could always use a self.locals.append() whenever
        # new names are pushed on the stack (function arguments and in let
        # statements) but it looks more high-level to access the variables
        # always in the same way, which is: self.locals[symbol_tree.get_index()].
        # To enable this when declaring variables, we use a list that
        # automatically grows.
        #self.globals : typing.Dict[str, ]
        self.locals = StackList()
        # Modules/Imports, custom types and function definitions are accessed through
        # the ast.TranslationUnit directly. The evaluator can access/read the symbol table
        # to identify stuff.
        # Here, initialized empty, filled with content when evaluating.
        self.current_unit = TranslationUnitRef(
            ast.TranslationUnit([], collections.OrderedDict(),
                                collections.OrderedDict(), [], {}))
        # All imported modules
        self.modules: typing.Dict[str, ast.TranslationUnit] = {}
        #
        self.symbol_tree = SymbolTree()

    def restore_symbol_tree(self, node: SymbolTreeNode):
        self.symbol_tree.restore_snapshot(node)

    def evaluate(self, node: ast.BaseNode) -> ValueList:
        if isinstance(node, ast.Program):
            # Register all imported modules
            for k, m in node.modules.items():
                self.modules[k] = m
            # Then evaluate the main module/file/input
            return self.evaluate(node.main_unit)
        elif isinstance(node, ast.TranslationUnit):
            # First, retain/copy all function definitions. The other stuff seems
            # not to be required currently.
            # Here, we can not just set the current unit to node to retain
            # function definitions across evaluations in shell mode.
            for k, f in node.function_definitions.items():
                self.current_unit.unit.function_definitions[k] = f
            # Set the current symbol table (which could be a reused one)
            self.current_unit.unit.symbols_global = node.symbols_global
            # Afterwards, run all non-function statements
            res = ValueList([])
            for stmt in node.statements:
                res = self.evaluate(stmt)
                if res.returned():
                    # ast.Program is the top-level-node, return means exit then
                    # https://docs.python.org/3/library/sys.html#sys.exit says:
                    # int -> int, Null -> 0, other -> 1
                    # This behaviour seems reasonable here
                    sys.exit(res[0] if len(res) > 0 else None)
            return res
        elif isinstance(node, ast.Block):
            symtree = self.symbol_tree.take_snapshot()
            result = ValueList([])
            for stmt in node.stmts:
                result = self.evaluate(stmt)
                if result.returned():
                    break
            self.symbol_tree.restore_snapshot(symtree)
            return result
        elif isinstance(node, ast.Return):
            if node.result == None:
                return ValueList([], True)
            result = self.evaluate(node.result)
            result.unwind_return = True
            return result
        elif isinstance(node, ast.IfElseStatement):
            cond = node.cond
            if isTruthy(self.evaluate(cond)):
                return self.evaluate(node.thn)
            elif isinstance(node.els, ast.BaseNode):
                return self.evaluate(node.els)
            return ValueList([])
        elif isinstance(node, ast.WhileStatement):
            ret = ValueList([])
            while isTruthy(self.evaluate(node.cond)):
                ret = self.evaluate(node.t)
                if ret.returned():
                    break
            return ret
        elif isinstance(node, ast.AssignOp):
            values = self.evaluate(node.rhs)
            self.assign(node.lhs, values)
            return values
        elif isinstance(node, ast.BinOp):
            op = node.op
            lhs = self.evaluate(node.lhs)[0]
            rhs = self.evaluate(node.rhs)[0]
            if op == "+":
                res = lhs + rhs
            elif op == "-":
                res = lhs - rhs
            elif op == "*":
                res = lhs * rhs
            elif op == "/":
                if isinstance(lhs, int):
                    res = lhs // rhs
                else:
                    res = lhs / rhs
            elif op == "%":
                res = lhs % rhs
            elif op == "^":
                res = lhs**rhs
            elif op == "&&":
                res = lhs and rhs
            elif op == "||":
                res = lhs or rhs
            elif op == "==":
                res = lhs == rhs
            elif op == "!=":
                res = lhs != rhs
            elif op == "<":
                res = lhs < rhs
            elif op == ">":
                res = lhs > rhs
            elif op == "<=":
                res = lhs <= rhs
            elif op == ">=":
                res = lhs >= rhs
            else:
                raise Exception("unrecognised operator: " + str(node.op))
            return ValueList([res])
        elif isinstance(node, ast.UnaryOp):
            op = node.op
            if op == "!":
                val = not self.evaluate(node.rhs)[0]
            elif op == "-":
                val = -self.evaluate(node.rhs)[0]
            else:
                raise Exception("unrecognised unary operator: " + str(node.op))
            return ValueList([val])
        elif isinstance(node, ast.Integer):
            return ValueList([node.value])
        elif isinstance(node, ast.Float):
            return ValueList([node.value])
        elif isinstance(node, ast.String):
            return ValueList([node.value])
        elif isinstance(node, ast.Bool):
            return ValueList([node.value])
        elif isinstance(node, ast.SysCall):
            return self.callprogram(node)
        elif isinstance(node, ast.Pipeline):
            if len(node.elements) < 2:
                raise Exception(
                    "Pipelines should have more than one element. This seems to be a parser bug."
                )
            syscalls = []
            # First pipeline element: First syscall or stdin
            if isinstance(node.elements[0], ast.SysCall):
                syscalls.append(node.elements[0])
                stdin = None
            else:
                stdin = self.evaluate(node.elements[0])
            # Other pipeline elements until last: syscalls
            for sc in node.elements[1:-1]:
                assert (isinstance(sc, ast.SysCall))
                syscalls.append(sc)
            # Last pipeline element: Last syscall or stdout (+stderr)
            if isinstance(node.elements[-1], ast.SysCall):
                syscalls.append(node.elements[-1])
                assignto = None
            else:
                assignto = node.elements[-1]
            # Special case: piping an ordinary expression into a variable
            if len(syscalls) == 0:
                raise Exception("The special case, assigning regular values"
                                " via pipelines, is not supported currently.")
                """
                if assignto == None:
                    raise Exception("Assertion error: Whenever a pipeline has no syscalls, it should consist of an expression that is assigned to something. No assignment was found here.")
                self.assign(assignto, stdin)
                return stdin
                """
            processes = []
            for syscall in syscalls[:-1]:
                process = self.callprogram(syscall, stdin, True)
                processes.append(process)
                stdin = process.stdout
            numOutputPipes = 0 if assignto == None else self.numInputsExpected(
                assignto)
            lastProcess = self.callprogram(syscalls[-1], stdin, numOutputPipes)
            # So, there is this single case that is different from everything else
            # and that needs special treatment:
            # Whenever the first process is opened with stdin=PIPE, we must
            # close its stdin except when this is the only process, then we
            # must not close the stdin, because then communicate() will fail.
            if not isinstance(node.elements[0],
                              ast.SysCall) and len(processes):
                processes[0].stdin.close()
            outstreams = lastProcess.communicate()
            for process in processes:
                process.wait()
            # Assign stdout,stderr to variables
            #results = ValueList(outstreams[:numOutputPipes])
            results = ValueList([])
            for o in outstreams[:numOutputPipes]:
                results.append(o.decode('utf-8'))
            if isinstance(assignto, ast.PipelineLet):  # copied from ast.Let
                if len(assignto.names) != len(results):
                    raise Exception(
                        "number of expressions between rhs and lhs do not match"
                    )
                self.symbol_tree.restore_snapshot(
                    assignto.symbol_tree_snapshot)
                for name, result in zip(assignto.names, results):
                    index = self.symbol_tree.get_index(name)
                    self.locals[index] = result
            elif isinstance(assignto, ast.ExpressionList):
                self.assign(assignto, results)
            elif isinstance(assignto, ast.BaseNode):
                self.assign(ast.ExpressionList(assignto.tokens, [assignto]),
                            results)
            # Return exitcode of subprocess
            return ValueList([lastProcess.returncode])
        elif isinstance(node, ast.Identifier):
            if node.name in self.symbol_tree:
                index = self.symbol_tree.get_index(node.name)
                return ValueList([self.locals[index]])
            elif node.name in self.current_unit.unit.symbols_global:
                pass
                # TODO Add global environment
            raise Exception(
                f"Unknown identifier '{node.name}' specified. TODO: global environment."
            )
        elif isinstance(node, ast.IndexAccess):
            index = self.evaluate(node.rhs)[0]
            lhs = self.evaluate(node.lhs)[0]
            return ValueList([lhs[index]])
        elif isinstance(node, ast.DotAccess):
            # The following is only used for StructValue, modules are only used
            # for module- and function-access which is handled in FunctionCall below.
            val = self.evaluate(node.lhs)[0][node.rhs]
            return ValueList([val])
        elif isinstance(node, ast.FunctionCall):
            # node.name should either be an ast.Identifier, then we call a function
            # in the current module/unit, or an ast.DotAccess, then we call a function
            # in the specified module/unit.
            if isinstance(node.name, ast.Identifier):
                unit = self.current_unit.unit
                funcname = node.name.name
            elif isinstance(node.name, ast.DotAccess):
                unit = self.get_module(node.name.lhs)
                funcname = node.name.rhs
            else:
                raise Exception(
                    "Identifier or DotAccess for function name expected.")
            # Change (PUSH) the current unit. We also do this if we call a function
            # in the current unit/module because then we do not have to decide
            # afterwards if we have to pop the translation unit back, we just do it.
            self.current_unit = TranslationUnitRef(unit, self.current_unit)
            try:
                # Evaluate arguments (with old scope)
                # TODO Here, we can maybe use args = self.evaluate(ExprList)
                # instead of building a new array from scratch?
                args = []
                for a in node.args:
                    args.append(self.evaluate(a)[0])
                # Call by value!
                args = copy.deepcopy(args)
                # Call function, either builtin or defined
                if isinstance(unit.symbols_global[funcname],
                              bongtypes.Function):
                    # Bong function
                    function = unit.function_definitions[funcname]
                    symbol_tree_snapshot = self.symbol_tree.take_snapshot()
                    self.symbol_tree.restore_snapshot(
                        function.symbol_tree_snapshot)
                    local_env_snapshot = self.locals
                    self.locals = StackList()
                    try:
                        # Add arguments to new local environment, then eval func
                        for name, arg in zip(function.parameter_names, args):
                            index = self.symbol_tree.get_index(name)
                            self.locals[index] = arg
                        result = self.evaluate(function.body)
                    finally:
                        self.symbol_tree.restore_snapshot(symbol_tree_snapshot)
                        self.locals = local_env_snapshot
                    if result.returned():
                        result.unwind_return = False
                        return result
                    return result
                else:
                    # Builtin function
                    return bong_builtins.functions[funcname][0](args)
            finally:
                # Change back (POP) the current unit
                self.current_unit = self.current_unit.parent
        elif isinstance(node, ast.Print):
            self.printfunc(self.evaluate(node.expr))
        elif isinstance(node, ast.Let):
            # First, evaluate all rhses (those are possibly encapsulated in an
            # ExpressionList, so no need to iterate here
            results = self.evaluate(node.expr)
            # Then, assign results. This order of execution additionally prevents
            # the rhs of a let statement to use the variables declared on the
            # left side.
            if len(node.names) != len(results):
                raise Exception(
                    "number of expressions between rhs and lhs do not match")
            self.symbol_tree.restore_snapshot(node.symbol_tree_snapshot)
            for name, result in zip(node.names, results):
                index = self.symbol_tree.get_index(name)
                self.locals[index] = result
        elif isinstance(node, ast.Array):
            elements = []
            for e in node.elements:
                elements.append(self.evaluate(e)[0])
            return ValueList([elements])
        elif isinstance(node, ast.StructValue):
            assert (isinstance(node.name, ast.Identifier)
                    or isinstance(node.name, ast.DotAccess))
            structval = StructValue(node.name)
            for name, expr in node.fields.items():
                structval[name] = self.evaluate(expr)[0]
            return ValueList([structval])
        elif isinstance(node, ast.ExpressionList):
            results = ValueList([])
            for exp in node.elements:
                # ValueList is a FlatList and an append to FlatList is
                # automatically flattened. Not indexing into the result
                # of evaluate() here is crucial because the result could be
                # an empty ValueList (e.g. function calls)
                results.append(self.evaluate(exp))
            return results
        else:
            raise Exception("unknown ast node")
        return ValueList([])  # Satisfy mypy

    def assign(self, lhs: ast.ExpressionList, rhs: ValueList):
        if len(rhs) != len(lhs):
            raise Exception("number of elements on lhs and rhs does not match")
        for l, value in zip(lhs, rhs):
            # lhs evaluation: The lhs can be a variable assignment, an
            # index access, a DotAccess
            if isinstance(l, ast.Identifier):
                name = l.name
                stack_index = self.symbol_tree.get_index(name)
                self.locals[stack_index] = value
            elif isinstance(l, ast.IndexAccess):
                index_access_index = self.evaluate(l.rhs)[0]
                array = self.evaluate(l.lhs)[0]
                array[index_access_index] = value
            elif isinstance(l, ast.DotAccess):
                struct = self.evaluate(l.lhs)[0]
                struct[l.rhs] = value
            else:
                raise Exception(
                    "Can only assign to variable or indexed variable")

    def numInputsExpected(self, assignto):
        if isinstance(assignto, ast.PipelineLet):
            return len(assignto.names)
        elif isinstance(assignto, ast.ExpressionList):
            return len(assignto.elements)
        else:  # Currently only used in pipelines, it's a single variable then
            return 1

    def callprogram(self, program, stdin=None, numOutputPipes=0):
        # TODO We pass a whole ast.SysCall object to callprogram, only the args
        # list would be enough. Should we change that? This would simplify this
        # method itself and calling builtin functions.
        #
        # Before doing anything, expand ~ to user's home directory
        cmd = []
        home_directory = os.path.expanduser("~")
        for arg in program.args:
            if arg.startswith("~"):
                arg = home_directory + arg[1:]
            cmd.append(arg)
        # Check bong builtins first. Until now, only 'cd' defined
        if cmd[0] == "cd":
            if stdin != None or numOutputPipes != 0:
                print("bong: cd: can not be piped")
                # TODO Here, the calling pipe will crash :( return something
                # usable instead!
                return None
            return self.call_cd(cmd)
        path_var = os.environ['PATH'].split(':')
        # Special case: Syscalls with relative or absolute path ('./foo', '../foo', '/foo/bar', 'foo/bar')
        if (cmd[0].startswith('./') or cmd[0].startswith('../')
                or cmd[0].startswith('/') or '/' in cmd[0]):
            path_var = [""]
        for path in path_var:
            if len(path) > 0:
                if not path.endswith('/'):
                    path += "/"
                filepath = path + cmd[0]
            else:
                filepath = cmd[0]
            if os.path.isfile(filepath) and os.access(filepath, os.X_OK):
                # Simple syscall
                if stdin == None and numOutputPipes == 0:
                    compl = subprocess.run(cmd)
                    return compl.returncode
                # Piped syscall
                # lhs = subprocess.Popen(["ls"], stdout=subprocess.PIPE)
                # rhs = subprocess.Popen(["grep", "foo"], stdin=lhs.stdout)
                # lhs.stdout.close()
                # rhs.communicate()
                # -> I call stdout.close() on all but the last subprocesses
                # -> I call communicate() only on the last subprocess
                # TODO Is that actually the right approach?
                else:
                    # a) this is the leftmost syscall of a pipe or
                    # -> Create the process with stdin=None
                    # b) the previous step of the pipe was a syscall
                    # -> Create the process with stdin=stdin
                    # c) lhs of the pipe was variable or function
                    # -> Create the process with stdin=PIPE and write the value into stdin
                    case_a = stdin == None
                    case_b = isinstance(stdin,
                                        io.BufferedReader)  # _io.Buff...?
                    case_c = not (case_a or case_b)
                    stdin_arg = stdin if not case_c else subprocess.PIPE
                    stdout_arg = subprocess.PIPE if numOutputPipes > 0 else None
                    stderr_arg = subprocess.PIPE if numOutputPipes > 1 else None
                    proc = subprocess.Popen(cmd,
                                            stdin=stdin_arg,
                                            stdout=stdout_arg,
                                            stderr=stderr_arg)
                    if case_c:
                        # Prevent possible bytestreams from being interpreted
                        # as strings. Currently 2019-12-22, this is not strictly
                        # required because we don't have bytestreams yet and
                        # everything is nicely decoded to strings but in the
                        # future, we have to do this here!
                        if type(stdin) == bytes:
                            proc.stdin.write(stdin)
                        else:
                            proc.stdin.write(str(stdin).encode("utf-8"))
                        #proc.stdin.close()
                    # Now, after having created this process, we can run the
                    # stdout.close() on the previous process (if there was one)
                    # stdout of the previous is stdin here.
                    if isinstance(stdin, io.BufferedReader):  # _io.Buff...?
                        stdin.close()
                    return proc
        print("bong: {}: command not found".format(cmd[0]))

    def call_cd(self, args):
        if len(args) > 2:
            print("bong: cd: too many arguments")
            return 1
        try:
            if len(args) > 1:
                if (args[1] == "-"
                    ):  # Everything bash can do, we can do better.
                    if hasattr(self, "prev_directory"):
                        self.change_dir(self.prev_directory)
                else:
                    self.change_dir(args[1])
            else:
                self.change_dir(os.path.expanduser('~'))
            return 0
        except Exception as e:
            print("bong: cd: {}".format(str(e)))
            return 1

    def change_dir(self, new_dir):
        prev_dir = os.getcwd()
        os.chdir(
            new_dir)  # This can fail so everything else happens afterwards
        self.prev_directory = prev_dir
        # Now, we send the escape codes to tell the terminal (emulator) the
        # new directory
        current_dir = os.getcwd()
        sys.stdout.write("\x1b]7;file:" + current_dir +
                         "\x07")  # Tell the cwd to our terminal (emulator)
        home_directory = os.path.expanduser("~")
        if current_dir.startswith(home_directory):
            window_title = "~" + current_dir[len(
                home_directory):]  # ~ + home-dir skipped in current dir
        else:
            window_title = current_dir
        sys.stdout.write("\x1b]2;bong " + window_title +
                         "\x07")  # Set the window title

    # Takes an Identifier or DotAccess which should describe a module
    # and returns the corresponding ast.TranslationUnit. The search
    # is started at self.current_unit's symbol table. For each resolution
    # step, another (the next) symbol table is used.
    def get_module(
        self, name: ast.BaseNode
    ) -> ast.TranslationUnit:  # name should be Identifier (returns current_unit) or DotAccess (returns resolved DotAccess.lhs)
        # DotAccesses are forwarded until an Identifier is found. The
        # Identifier uses the current_unit's symbol table to resolve
        # the module. The DotAccesses use the returned units to resolve
        # further modules afterwards.
        if isinstance(name, ast.Identifier):
            module = self.current_unit.unit.symbols_global[name.name]
        elif isinstance(name, ast.DotAccess):
            unit = self.get_module(name.lhs)
            module = unit.symbols_global[name.rhs]
        else:
            raise Exception("Identifier or DotAccess expected.")
        if not isinstance(module, bongtypes.Module):
            raise Exception("Module expected.")
        return self.modules[module.path]
示例#7
0
class TypeChecker:
    def __init__(
        self,
        symbol_table_snapshot=None,
        modules: typing.Optional[typing.Dict[str,
                                             ast.TranslationUnit]] = None):
        self.symbol_tree = SymbolTree(symbol_table_snapshot)
        self.modules: typing.Dict[str,
                                  ast.TranslationUnit] = modules if isinstance(
                                      modules, dict) else {}

    def checkprogram(
            self,
            main_unit: ast.TranslationUnit) -> typing.Optional[ast.Program]:
        try:
            return self.checkprogram_uncaught(main_unit)
        except TypecheckException as e:
            if e.node != None:
                loc = e.node.get_location()
                posstring = f" in {loc[0]}, line {loc[1]} col {loc[2]} to line {loc[3]} col {loc[4]}"
            else:
                posstring = ""
            print(f"TypecheckError{posstring}: {str(e.msg)}", file=sys.stderr)
            return None

    # The typechecker has to assign types in the symbol table for
    # - function definitions (parameter types, return types)
    # - struct definitions
    # - let statements
    # Since assigning types for let statements requires a full
    # ast pass, we do all type assignments in the typechecker.
    # Like this, it is not split on several components.
    #
    # Anyways, resolving custom types has to be done first so that
    # types are available for subsequent steps.
    # Next, function interfaces can/must be resolved so that their
    # type requirements are available for function calls.
    # Finally, everything else can be checked (function bodies
    # can only be checked here, not earlier).
    # This pattern resembles how the evaluator handles
    # FunctionDefinitions and all other statements differently.
    #
    # If we want to be insane, we could do all of this in one
    # single pass: Collect all type- and function-definitions first,
    # then resolve types and function interfaces as needed (check
    # the symbol-table if this has to be done yet).
    # Anyways, it can safely be assumed that the split approach
    # is more maintainable, debuggable, understandable.
    # If you want to try out the insane approach, just insert
    # resolve_type() and resolve_function_interface() at the
    # appropriate places when check()ing the ast.
    def checkprogram_uncaught(self,
                              main_unit: ast.TranslationUnit) -> ast.Program:
        # DEBUG
        #print("Global symbols:", main_unit.symbols_global)
        #print(main_unit.symbol_tree)
        #print(main_unit)
        # Theoretically, everything is accessible via this chain:
        # self.program.main_unit.symbols_global
        # Anyways, for convenience, we make everything accessible here.
        self.main_unit = main_unit
        self.symbols_global = main_unit.symbols_global
        program = ast.Program(self.modules, main_unit)
        # Resolve module imports first
        self.parse_imports(main_unit)
        # Then resolve types
        self.resolve_types(main_unit)
        for unit in self.modules.values():
            self.symbols_global = unit.symbols_global
            self.resolve_types(unit)
        # Resolve function interfaces
        self.symbols_global = main_unit.symbols_global
        self.resolve_function_interfaces(main_unit)
        for unit in self.modules.values():
            self.symbols_global = unit.symbols_global
            self.resolve_function_interfaces(unit)
        # Typecheck the rest (also assigning variable types)
        # Functions in modules
        for unit in self.modules.values():
            self.symbols_global = unit.symbols_global
            for func in unit.function_definitions.values():
                res, turn = self.check(func)
        # Functions in main_module / main_unit
        self.symbols_global = main_unit.symbols_global
        for func in main_unit.function_definitions.values():
            res, turn = self.check(func)
        # Statements in main_module / main_unit
        for stmt in main_unit.statements:
            res, turn = self.check(stmt)
            # If there is a possible return value,
            if turn != Return.NO:
                # ensure it is an int
                expect = bongtypes.TypeList([bongtypes.Integer()])
                if not res.sametype(expect):
                    raise TypecheckException(
                        "Return type of program does not evaluate to int.",
                        stmt)
        return program

    def parse_imports(self, parent_unit: ast.TranslationUnit):
        for imp_stmt in parent_unit.import_statements:
            if imp_stmt.path not in self.modules:
                # Parse
                # TODO this should be encapsulated more nicely. Currently, same code
                # as in main.py
                try:
                    with open(imp_stmt.path) as f:
                        code = f.read()
                except Exception as e:
                    raise TypecheckException(
                        f"Importing {imp_stmt.path} impossible:"
                        f" '{e}'", imp_stmt)
                l = lexer.Lexer(code, imp_stmt.path)
                p = parser.Parser(l)
                child_unit = p.compile()
                # add2modmap
                if not isinstance(child_unit, ast.TranslationUnit):
                    raise TypecheckException(
                        f"Importing {imp_stmt.path}"
                        " failed.", imp_stmt)
                self.modules[imp_stmt.path] = child_unit
                # Recurse
                self.parse_imports(child_unit)
            # Add to symbol table
            parent_unit.symbols_global[imp_stmt.name] = bongtypes.Module(
                imp_stmt.path)

    def resolve_types(self, unit: ast.TranslationUnit):
        for typename, struct_def in unit.struct_definitions.items():
            self.resolve_type(ast.BongtypeIdentifier([typename], 0), unit,
                              struct_def)

    # Resolve a given BongtypeIdentifier to an actual type. For custom types,
    # this method will not return the bongtypes.Typedef, but the value type instead,
    # i.e. the Typedefs will be unpacked.
    # It can crash whenever an inner type in a struct, a type hint in a function
    # interface or a type hint in a let statement uses a typename that is not defined.
    # Currently, recursive types are prevented. But actually, it would be
    # possible to instantiate a type that refers to itself via an array because
    # that array could be empty. Therefore, it would be nice if we could:
    # 1. Allow recursive types first.
    # 2. Check if there is a recursive circle without arrays
    # struct T { x : T } is an error because it is infinite
    # struct T { x : []T } is OK
    def resolve_type(self, identifier: ast.BongtypeIdentifier,
                     unit: ast.TranslationUnit,
                     node: ast.BaseNode) -> bongtypes.ValueType:
        # Arrays are resolved recursively
        if identifier.num_array_levels > 0:
            return bongtypes.Array(
                self.resolve_type(
                    ast.BongtypeIdentifier(identifier.typename,
                                           identifier.num_array_levels - 1),
                    unit, node))
        # If a module name is given, propagate to the module
        if len(identifier.typename) > 1:
            modulename = identifier.typename[0]
            remaining_typename = identifier.typename[1:]
            # The following checks are a little bit convoluted to satisfy mypy
            if (not modulename in unit.symbols_global):
                raise TypecheckException(
                    f"Module {modulename} not found in"
                    " symbol table.", node)
            module_sym = unit.symbols_global[modulename]
            if not isinstance(module_sym, bongtypes.Module):
                raise TypecheckException(
                    f"Symbol {modulename} is not a module,"
                    f" instead it is {module_sym}.", node)
            modulepath = module_sym.path
            if not modulepath in self.modules:
                raise TypecheckException(
                    f"Module {module_sym} not found"
                    " in module dictionary.", node)
            child_unit = self.modules[modulepath]
            remaining_typeidentifier = ast.BongtypeIdentifier(
                remaining_typename, 0)
            return self.resolve_type(remaining_typeidentifier, child_unit,
                                     node)
        # Otherwise, the typename is the only item in the list
        typename = identifier.typename[0]
        # Check missing type
        if not typename in unit.symbols_global:
            raise TypecheckException(
                f"Type {typename} can not be"
                " resolved.", node)
        # Already known types can be returned
        if not unit.symbols_global[typename].sametype(bongtypes.UnknownType()):
            typedef = unit.symbols_global[typename]
            # Prevent recursive types
            if isinstance(typedef, bongtypes.UnfinishedType):
                raise TypecheckException(
                    f"Type {typename} is recursive."
                    " This is currently not allowed for several reasons.",
                    node)
            if not isinstance(typedef, bongtypes.Typedef):
                raise TypecheckException(
                    f"Type {typename} can not be"
                    " resolved.", node)
            return typedef.value_type  # unpack
        # Everything else (structs) will be determined by determining the inner types
        if not typename in unit.struct_definitions:
            raise TypecheckException(
                f"Type {typename} can not be"
                " resolved.", node)
        struct_def = unit.struct_definitions[typename]
        # For recursion prevention, remember that we have started this type
        unit.symbols_global[typename] = bongtypes.UnfinishedType()
        fields: typing.Dict[str, bongtypes.ValueType] = {}
        for name, type_identifier in struct_def.fields.items():
            fields[name] = self.resolve_type(type_identifier, unit, struct_def)
        value_type = bongtypes.Struct(typename, fields)
        unit.symbols_global[typename] = bongtypes.Typedef(value_type)
        return value_type

    def resolve_function_interfaces(self, unit: ast.TranslationUnit):
        for func_definition in unit.function_definitions.values():
            self.resolve_function_interface(func_definition, unit)

    def resolve_function_interface(self, function: ast.FunctionDefinition,
                                   unit: ast.TranslationUnit):
        parameters = bongtypes.TypeList([])
        returns = bongtypes.TypeList([])
        for param_name, param_type in zip(function.parameter_names,
                                          function.parameter_types):
            typ = self.resolve_type(param_type, unit, function)
            parameters.append(typ)
            SymbolTree(function.symbol_tree_snapshot)[param_name] = typ
        for ret in function.return_types:
            returns.append(self.resolve_type(ret, unit, function))
        unit.symbols_global[function.name] = bongtypes.Function(
            parameters, returns)

    def is_writable(self, node: ast.BaseNode):
        # Identifiers can describe modules, function names, types, variables. Only variables
        # are writable and only those will be as ValueTypes in the symbol table so this is
        # how we can determine the writability of this node.
        if isinstance(node, ast.Identifier):
            if node.name in self.symbol_tree:
                return True
            else:
                return False
            #return isinstance(self.symbol_table[node.name].typ, bongtypes.ValueType)
        # IndexAccess and DotAccess are writable whenever the lhs is writable, e.g.
        # foo().bar not writable
        # foo.bar[0] writable if foo is a writable variable
        # foo.bar()[0].baz not writable because function call's result is not writable
        # mod.foo not writable if mod is a module, then mod.foo is a type
        elif isinstance(node, ast.IndexAccess):
            return self.is_writable(node.lhs)
        elif isinstance(node, ast.DotAccess):
            return self.is_writable(node.lhs)
        elif isinstance(node, ast.ExpressionList):
            for n in node.inner_nodes:
                if not self.is_writable(n):
                    return False
            return True
        # Everything else shouldn't be writable (function calls, blocks, ...)
        else:
            return False

    # Determine the type of the ast node.
    # This method returns the TypeList (0, 1 or N elements) that the node will
    # evaluate to and a return hint that tells us if the node contains a
    # return statement and if it is sure that this return will be invoked. This
    # information is required to check/guarantee the return type of function
    # definitions.
    def check(self, node: ast.BaseNode) -> typing.Tuple[TypeList, Return]:
        if isinstance(node, ast.Block):
            symbol_tree_snapshot = self.symbol_tree.take_snapshot()
            # All return statements in a whole block must match so that the
            # whole block is consistent.
            block_return: typing.Tuple[TypeList,
                                       Return] = (TypeList([]), Return.NO)
            for stmt in node.stmts:
                stmt_return = self.check(stmt)
                if stmt_return[1] != Return.NO:
                    if block_return[1] == Return.NO:
                        # initialize
                        block_return = stmt_return
                    else:
                        # ensure that all return types are the same
                        if not block_return[0].sametype(stmt_return[0]):
                            raise TypecheckException(
                                "Return type does not match previous return type in block.",
                                stmt)
                        # If at least one statement in the block definitely
                        # returns, the whole block definitely returns
                        # -> a YES overwrites a MAYBE
                        if stmt_return[1] == Return.YES:
                            block_return = stmt_return  # block_return[1] = Return.YES
                            # Here, we could theoretically break from the
                            # loop because subsequent statements will not
                            # be executed. But no break has the benefit
                            # that the following code is already typechecked.
                            # When the return is removed, the typechecker
                            # result will not change.
            # Restore scope
            self.symbol_tree.restore_snapshot(symbol_tree_snapshot)
            return block_return
        if isinstance(node, ast.Return):
            if node.result == None:
                return bongtypes.TypeList([]), Return.YES
            res, turn = self.check(node.result)  # turn should be false here
            return res, Return.YES
        if isinstance(node, ast.IfElseStatement):
            cond, turn = self.check(node.cond)
            if len(cond) == 0 or type(cond[0]) != bongtypes.Boolean:
                raise TypecheckException(
                    "If statement requires boolean condition.", node.cond)
            a, aturn = self.check(node.thn)
            if isinstance(node.els, ast.BaseNode):
                b, bturn = self.check(node.els)
            else:
                b, bturn = a, Return.NO  # if there is no else, it won't return
            # 1. if { } else { } -> OK
            # 2. if { return } else { } -> OK
            # 3. if { } else { return } -> OK
            # 4. if { return } else { return } -> returns should match!
            # If there is no 'else', this is covered by 1. and 2.
            if aturn != Return.NO and bturn != Return.NO:  # 4
                if not a.sametype(b):
                    raise TypecheckException(
                        "'If' and 'Else' branch's return type do not match.",
                        node)
                # Here, only if both are YES, the whole if-else is YES
                if aturn == Return.YES and bturn == Return.YES:
                    return a, Return.YES
                return a, Return.MAYBE
            if aturn != Return.NO:  # 2
                return a, Return.MAYBE
            if bturn != Return.NO:  # 3
                return b, Return.MAYBE
            return TypeList([]), Return.NO  # 1
        if isinstance(node, ast.WhileStatement):
            types, turn = self.check(node.cond)
            if len(types) != 1:
                raise TypecheckException(
                    "While statement requires a single"
                    " boolean value as condition.", node.cond)
            if type(types[0]) != bongtypes.Boolean:
                raise TypecheckException(
                    "While statement requires boolean condition.", node.cond)
            types, turn = self.check(node.t)
            if turn != Return.NO:
                return types, Return.MAYBE
            return types, turn
        if isinstance(node, ast.AssignOp):
            rhs, turn = self.check(node.rhs)
            lhs, turn = self.check(node.lhs)
            match_types(lhs, rhs, node,
                        ("Variable and expression types in assignment do"
                         f" not match. Lhs expects '{lhs}' but rhs evaluates"
                         f" to '{rhs}'"))
            if not self.is_writable(node.lhs):
                raise TypecheckException(
                    "Lhs of assignment is no writable variable!", node.lhs)
            return lhs, Return.NO
        if isinstance(node, ast.BinOp):
            op = node.op
            # For BinOps, most bongtypes' operators are overloaded
            # Not overloaded: 'and' and 'or'
            lhslist, turn = self.check(node.lhs)
            rhslist, turn = self.check(node.rhs)
            assert len(lhslist) == 1 and len(rhslist) == 1
            lhstyp = lhslist[0]
            rhstyp = rhslist[0]
            try:  # Catch all BongtypeExceptions
                if op == "+":
                    # TODO "+" is a valid operator for arrays but we do not do
                    # the proper empty-array check with match_types() here. Should
                    # we do that?
                    return TypeList([lhstyp + rhstyp]), Return.NO
                if op == "-":
                    return TypeList([lhstyp - rhstyp]), Return.NO
                if op == "*":
                    return TypeList([lhstyp * rhstyp]), Return.NO
                if op == "/":
                    return TypeList([lhstyp / rhstyp]), Return.NO
                if op == "%":
                    return TypeList([lhstyp % rhstyp]), Return.NO
                if op == "^":
                    return TypeList([lhstyp**rhstyp]), Return.NO
                if op == "&&":
                    if type(lhstyp) != bongtypes.Boolean:
                        raise TypecheckException(
                            "Logical 'and' expects boolean operands. Left operand is not boolean.",
                            node.lhs)
                    if type(rhstyp) != bongtypes.Boolean:
                        raise TypecheckException(
                            "Logical 'and' expects boolean operands. Right operand is not boolean.",
                            node.rhs)
                    return TypeList([bongtypes.Boolean()]), Return.NO
                if op == "||":
                    if type(lhstyp) != bongtypes.Boolean:
                        raise TypecheckException(
                            "Logical 'or' expects boolean operands. Left operand not boolean.",
                            node.lhs)
                    if type(rhstyp) != bongtypes.Boolean:
                        raise TypecheckException(
                            "Logical 'or' expects boolean operands. Right operand is not boolean.",
                            node.rhs)
                    return TypeList([bongtypes.Boolean()]), Return.NO
                if op == "==":
                    return TypeList([lhstyp.eq(rhstyp)]), Return.NO
                if op == "!=":
                    return TypeList([lhstyp.ne(rhstyp)]), Return.NO
                if op == "<":
                    return TypeList([lhstyp < rhstyp]), Return.NO
                if op == ">":
                    return TypeList([lhstyp > rhstyp]), Return.NO
                if op == "<=":
                    return TypeList([lhstyp <= rhstyp]), Return.NO
                if op == ">=":
                    return TypeList([lhstyp >= rhstyp]), Return.NO
                else:
                    raise Exception("unrecognised binary operator: " +
                                    str(node.op))
            except BongtypeException as e:  # ... and transform to TypecheckExc
                raise TypecheckException(e.msg, node)
        elif isinstance(node, ast.UnaryOp):
            try:  # Catch all BongtypeExceptions ...
                op = node.op
                if op == "!":
                    rhs, turn = self.check(node.rhs)
                    if len(rhs) != 1 or type(rhs[0]) != bongtypes.Boolean:
                        raise TypecheckException(
                            "Logical 'not' expects boolean operand.", node)
                    return TypeList([bongtypes.Boolean()]), Return.NO
                if op == "-":
                    rhstype, turn = self.check(node.rhs)
                    if len(rhstype) != 1 or not (
                            type(rhstype[0]) == bongtypes.Integer
                            or type(rhstype[0]) == bongtypes.Float):
                        raise TypecheckException("Negate expects number.",
                                                 node)
                    return rhstype, Return.NO
                raise Exception("unrecognised unary operator: " + str(node.op))
            except BongtypeException as e:  # ... and transform to TypecheckExc
                raise TypecheckException(e.msg, node)
        elif isinstance(node, ast.Integer):
            return TypeList([bongtypes.Integer()]), Return.NO
        elif isinstance(node, ast.Float):
            return TypeList([bongtypes.Float()]), Return.NO
        elif isinstance(node, ast.String):
            return TypeList([bongtypes.String()]), Return.NO
        elif isinstance(node, ast.Bool):
            return TypeList([bongtypes.Boolean()]), Return.NO
        elif isinstance(node, ast.SysCall):
            return TypeList([bongtypes.Integer()]), Return.NO
        elif isinstance(node, ast.Pipeline):
            # Also see evaluator -> ast.Pipeline, it is very similar
            if len(node.elements) < 2:
                raise TypecheckException(
                    "Pipelines should have more than one element. This seems to be a parser bug.",
                    node)
            programcalls = []
            strtype = TypeList([bongtypes.String()
                                ])  # used for checking stdin and stdout
            # Check pipeline input types
            if isinstance(node.elements[0], ast.SysCall):
                programcalls.append(node.elements[0])
            else:
                stdin, turn = self.check(node.elements[0])  # turn == NO
                if not stdin.sametype(strtype):
                    raise TypecheckException(
                        "The input to a pipeline should evaluate to a string, {} was found instead."
                        .format(stdin), node.elements[0])
            # Collect programcalls
            for elem in node.elements[1:-1]:
                if not isinstance(elem, ast.SysCall):
                    raise TypecheckException(
                        "The main part of a pipeline (all"
                        " elements but the first and last) should only consist"
                        f" of program calls, '{elem}' found instead.", elem)
                programcalls.append(elem)
            # Check pipeline output types
            if isinstance(node.elements[-1], ast.SysCall):
                programcalls.append(node.elements[-1])
            else:
                assignto = node.elements[-1]
                # Either the assignto is a PipelineLet, then check it manually,
                # or the assignto is something else, then do the same checks as for assignments.
                if isinstance(assignto, ast.PipelineLet):
                    names = assignto.names
                    if len(names) > 2 or len(names) == 0:
                        raise TypecheckException(
                            "The output of a pipeline can only be written to one or two string variables, let with {} variables  was found instead."
                            .format(len(names)), assignto)
                    for name, type_identifier in zip(assignto.names,
                                                     assignto.types):
                        if isinstance(type_identifier, ast.BongtypeIdentifier):
                            typ = self.resolve_type(type_identifier,
                                                    self.main_unit, assignto)
                            if not typ.sametype(bongtypes.String()):
                                raise TypecheckException(
                                    "The output of a pipeline"
                                    " can only be written to string variables, let"
                                    f" with explicit type '{typ}' was found instead.",
                                    assignto)
                        else:
                            pass
                        self.symbol_tree.restore_snapshot(
                            assignto.symbol_tree_snapshot)
                        self.symbol_tree[name] = bongtypes.String()
                else:
                    output, turn = self.check(assignto)
                    writable = self.is_writable(assignto)
                    if (not writable or
                        (not output.sametype(strtype) and not output.sametype(
                            TypeList([bongtypes.String(),
                                      bongtypes.String()])))):
                        raise TypecheckException(
                            "The output of a pipeline can only"
                            f" be written to string variables, {assignto} found"
                            " instead.", assignto)
            # Check that everything in between actually is a program call
            for pcall in programcalls:
                if not isinstance(pcall, ast.SysCall):
                    raise TypecheckException(
                        "Everything in the center of a pipeline must be a programmcall, '{}' was found instead."
                        .format(pcall), pcall)
            return TypeList([bongtypes.Integer()]), Return.NO
        elif isinstance(node, ast.Identifier):
            if node.name in self.symbol_tree:
                return TypeList([self.symbol_tree[node.name]]), Return.NO
            elif node.name in self.symbols_global:
                return TypeList([self.symbols_global[node.name]]), Return.NO
            raise TypecheckException(f"{node.name} is undefined.", node)
        elif isinstance(node, ast.IndexAccess):
            index, turn = self.check(node.rhs)
            if len(index) != 1 or type(index[0]) != bongtypes.Integer:
                raise TypecheckException("Indexing requires Integer.",
                                         node.rhs)
            lhs, turn = self.check(node.lhs)
            if len(lhs) != 1:
                raise TypecheckException(
                    "Indexing requires a single variable.", node.lhs)
            if isinstance(lhs[0], bongtypes.String):  # bong string
                return lhs, Return.NO
            if isinstance(lhs[0], bongtypes.Array):  # bong array
                return TypeList([lhs[0].contained_type]), Return.NO
            raise TypecheckException("IndexAccess with unsupported type.",
                                     node.lhs)
        elif isinstance(node, ast.DotAccess):
            lhs, turn = self.check(node.lhs)
            if len(lhs) != 1:
                raise TypecheckException(
                    "DotAccess requires a single variable/identifier.",
                    node.lhs)
            if isinstance(lhs[0], bongtypes.Struct):  # bong struct
                if node.rhs not in lhs[0].fields:
                    raise TypecheckException(
                        f"Name '{node.rhs}' not found in"
                        f" struct '{node.lhs}'.", node)
                value_type = lhs[0].fields[node.rhs]
                return TypeList([value_type]), Return.NO
            elif isinstance(lhs[0], bongtypes.Module):  # module
                modulepath = lhs[0].path
                if not modulepath in self.modules:
                    raise TypecheckException(
                        f"Module {node.lhs} can not be"
                        " resolved.", node)
                module = self.modules[modulepath]
                if node.rhs not in module.symbols_global:
                    raise TypecheckException(
                        f"Name '{node.rhs}' not found in"
                        f" module '{node.lhs}' which resolved to '{lhs[0]}'.",
                        node)
                return TypeList([module.symbols_global[node.rhs]]), Return.NO
            raise TypecheckException("DotAccess with unsupported type.",
                                     node.lhs)
        elif isinstance(node, ast.FunctionDefinition):
            # The function interface should already be completely in the symbol table.
            # Here, we only check that the function block is valid and that it returns
            # what we expect!
            func = self.symbols_global[node.name]  # bongtypes.Function
            assert (isinstance(func, bongtypes.Function))
            # Before function body checking, save/restore symbol table
            # The current snapshot should be empty here because function
            # definitions are typechecked before the main statements are
            # checked. But anyways, logically, this is the right approach!
            symbol_tree_snapshot = self.symbol_tree.take_snapshot()
            self.symbol_tree.restore_snapshot(node.symbol_tree_snapshot)
            # Compare expected with actual result/return
            expect = func.return_types
            actual, turn = self.check(node.body)
            match_types(
                expect, actual, node, "Function return type does not"
                f" match function declaration. Declared '{expect}' but"
                f" returned '{actual}'.")
            # Enforce that there is a return statement if we require it
            if len(expect) > 0:  # Return required
                if turn != Return.YES:  # Return not guaranteed
                    raise TypecheckException("Point of no return reached!",
                                             node)
                    raise TypecheckException(
                        "Function declaration expects return type"
                        f" '{expect}' but a return statement that"
                        " will definitely be invoked is missing.", node)
            # Restore symbol table before function call
            self.symbol_tree.restore_snapshot(symbol_tree_snapshot)
            return TypeList(
                []), Return.NO  # FunctionDefinition itself returns nothing
        if isinstance(node, ast.FunctionCall):
            funcs, turn = self.check(node.name)
            if len(funcs) != 1:
                raise TypecheckException(
                    f"'{node.name}' does not resolve to a function.",
                    node.name)
            func = funcs[0]
            if type(func) != bongtypes.Function and type(
                    func) != bongtypes.BuiltinFunction:
                raise TypecheckException(f"'{node.name}' is not a function.",
                                         node)
            argtypes, turn = self.check(node.args)
            # Check builtin functions
            if isinstance(func, bongtypes.BuiltinFunction):
                try:
                    return func.check(argtypes), Return.NO
                except BongtypeException as e:  # Convert to TypecheckException
                    raise TypecheckException(e.msg, node)
            # Otherwise, it is a bong function that has well-defined parameter types
            assert (isinstance(func, bongtypes.Function))
            match_types(
                func.parameter_types, argtypes, node,
                (f"Function '{node.name}' expects parameters of type "
                 f"'{func.parameter_types}' but '{argtypes}' were given."))
            # If everything goes fine (function can be called), it returns
            # whatever the function declaration says \o/
            return func.return_types, Return.NO
        elif isinstance(node, ast.Print):
            self.check(node.expr)  # We can print anything but don't care
            return TypeList([]), Return.NO
        elif isinstance(node, ast.Let):
            # Check rhs expression
            results, turn = self.check(node.expr)
            if len(node.names) != len(results):
                raise TypecheckException(
                    "Number of expressions on rhs of let statement does not match the number of variables.",
                    node)
            # Before handling the lhs of the let statement, set the correct
            # scope symbol table. This is necessary so that all symbol table
            # interaction affects the variables that are declared by the
            # let statement
            self.symbol_tree.restore_snapshot(node.symbol_tree_snapshot)
            # Then, check the type information and store to symbol table
            for name, type_identifier, result in zip(node.names, node.types,
                                                     results):
                if isinstance(type_identifier, ast.BongtypeIdentifier):
                    typ = self.resolve_type(type_identifier, self.main_unit,
                                            node)
                    result = merge_types(
                        typ, result, node,
                        "Assignment in let statement impossible: '{}' has type '{}' but expression has type '{}'."
                        .format(name, typ, result))
                else:
                    if not is_specific_type(result):
                        raise TypecheckException(
                            "Automatic type for variable '{}' but rhs is no definitive type either, '{}' found instead."
                            .format(name, result), node)
                self.symbol_tree[name] = result
            return TypeList([]), Return.NO
        elif isinstance(node, ast.Array):
            # Super complicated things can happen here:
            # Imagine the array contains function calls like
            # arr = [foonc(), baar()]
            # and those functions return multiple values.
            # Then, check(ast.Array.elements : ExpressionList) -- see below -- creates
            # a TypeList and the multiple return values from the functions are TypeLists
            # themselves. When those are added to the main TypeList, it is flattened
            # automatically. In the result, we just have a List of types. And here, we
            # just have to check that all those types are equal.
            # I'm fascinated how everything magically works automatically. Isn't that beautiful?
            types, turn = self.check(node.elements)
            inner_type: bongtypes.ValueType = bongtypes.AutoType()
            # Otherwise, all contained types should match
            for i, typ in enumerate(types):
                inner_type = merge_types(inner_type, typ, node)
            return TypeList([bongtypes.Array(inner_type)]), Return.NO
        elif isinstance(node, ast.StructValue):
            struct_types, turn = self.check(node.name)
            if len(struct_types) != 1:
                raise TypecheckException(
                    f"'{node.name}' does not resolve to a (single) struct.",
                    node.name)
            struct_type = struct_types[0]
            if (type(struct_type) != bongtypes.Typedef
                    or type(struct_type.value_type) != bongtypes.Struct):
                raise TypecheckException(
                    f"'{node.name}' is not a struct type.", node)
            fields: typing.Dict[str, bongtypes.ValueType] = {}
            for name, value in node.fields.items():
                argtypes, turn = self.check(value)
                if len(argtypes) != 1:
                    raise TypecheckException(
                        "Expression does not evaluate"
                        " to a single value.", value)
                # Duplicates are caught in the parser, we can just assign here.
                if not isinstance(argtypes[0], bongtypes.ValueType):
                    raise TypecheckException("ValueType expected", value)
                fields[name] = argtypes[0]
            # TODO See issue #27: Currently, we only write the resolved struct
            # type's name into the struct value here.
            struct_val = bongtypes.Struct(struct_type.value_type.name, fields)
            typ = merge_types(struct_type.value_type, struct_val, node)
            return TypeList([typ]), Return.NO
        elif isinstance(node, ast.ExpressionList):
            types = bongtypes.TypeList([])
            for exp in node:
                typlist, turn = self.check(exp)
                types.append(typlist)  # TypeLists are automatically flattened
            return types, Return.NO
        else:
            raise Exception("unknown ast node")
        return None
示例#8
0
class Parser:
    def __init__(self, lexer, snapshot=None, basepath=None):
        self.lexer = lexer

        self.basepath = basepath if basepath != None else os.getcwd()

        self.symbols_global : typing.Dict[str, bongtypes.BaseNode] = {}
        self.symbol_tree = SymbolTree()
        if snapshot != None:
            # When restoring the global dictionary, we need to copy the dict.
            # Otherwise, we change the snapshot that the caller (the repl)
            # will (most probably) reuse.
            self.symbols_global = snapshot[0].copy() # overwrite
            self.symbol_tree.restore_snapshot(snapshot[1]) # restore
        else:
            # Only when initializing symbol tables for the first time, register
            # builtin stuff
            for bfuncname, bfunc in bong_builtins.functions.items():
                self.symbols_global[bfuncname] = bongtypes.BuiltinFunction(bfunc[1])
            for btypename, btype in bongtypes.basic_types.items():
                self.symbols_global[btypename] = bongtypes.Typedef(btype())

    # TODO Somehow, the Parser is re-initialized each input round, the
    # evaluator is not. This is somehow the reason why snapshots have to be
    # taken and restored on the parser.
    # I guess this design can be revised, too.
    def take_snapshot(self) -> typing.Tuple[typing.Dict[str, bongtypes.BaseType], SymbolTreeNode]:
        return self.symbols_global, self.symbol_tree.take_snapshot()

    def compile(self) -> typing.Optional[ast.TranslationUnit]:
        try:
            return self.compile_uncaught()
        except lexer.TokenizeException as e:
            print(f"LexerError in {e.filepath}, line {e.line},"
                    f" column {e.col}: {e.msg}", file=sys.stderr)
        except ParseException as e:
            t = self.peek(e.offset)
            if t.lexeme != None:
                lexeme = t.lexeme
            else:
                lexeme = t.type
            print("ParseError: Token '{}' found in {}, line {}, column {}: {}".format(lexeme, t.filepath, t.line, t.col, e.msg), file=sys.stderr) # t.length unused
        return None
    def compile_uncaught(self) -> ast.TranslationUnit:
        # init_token_access() can throw EofException so it should not
        # be done in the constructor.
        self.init_token_access()
        imp_stmts : typing.List[ast.Import] = []
        struct_stmts : collections.OrderedDict[str, ast.StructDefinition] = collections.OrderedDict()
        func_stmts : collections.OrderedDict[str, ast.FunctionDefinition] = collections.OrderedDict()
        statements : typing.List[ast.BaseNode] = []
        while self.peek().type != token.EOF:
            stmt = self.top_level_stmt()
            if isinstance(stmt, ast.Import):
                imp_stmts.append(stmt)
            elif isinstance(stmt, ast.StructDefinition):
                if stmt.name in struct_stmts:
                    raise Exception("Struct Definition with same name generated twice.")
                struct_stmts[stmt.name] = stmt
            elif isinstance(stmt, ast.FunctionDefinition):
                func_stmts[stmt.name] = stmt
            else:
                statements.append(stmt)
        return ast.TranslationUnit(imp_stmts, struct_stmts, func_stmts,
                statements, self.symbols_global)

    def top_level_stmt(self) -> ast.BaseNode:
        if self.peek().type == token.IMPORT:
            return self.parse_import()
        if self.peek().type == token.STRUCT:
            return self.parse_struct_definition()
        if self.peek().type == token.FUNC:
            return self.parse_function_definition()
        return self.stmt()

    def stmt(self) -> ast.BaseNode:
        if self.peek().type == token.PRINT:
            return self.print_stmt()
        if self.peek().type == token.LET:
            return self.let_stmt()
        if self.peek().type == token.IF:
            return self.if_stmt()
        if self.peek().type == token.RETURN:
            return self.return_stmt()
        if self.peek().type == token.WHILE:
            return self.while_stmt()
        if self.peek().type == token.LBRACE:
            return self.block_stmt()
        if (self.peek().type == token.IDENTIFIER or
                self.peek().type == token.INT_VALUE or
                self.peek().type == token.FLOAT_VALUE or
                self.peek().type == token.BOOL_VALUE or
                self.peek().type == token.LPAREN or
                self.peek().type == token.OP_SUB or
                self.peek().type == token.OP_NEG or
                self.peek().type == token.LBRACKET or
                self.peek().type == token.STRING):
            return self.expression_stmt()
        # Special cases: Syscalls in current directory like './foo' or with
        # absolute path like '/foo/bar'
        if (self.peek(0).type==token.OP_DIV and
                self.peek(1).type==token.IDENTIFIER or
                self.peek(0).type==token.DOT and
                self.peek(1).type==token.OP_DIV or
                self.peek(0).type==token.DOT and
                self.peek(1).type==token.DOT and
                self.peek(2).type==token.OP_DIV):
            return self.expression_stmt()
        raise ParseException("Unknown statement found.")

    def parse_import(self):
        toks = TokenList()
        if not toks.add(self.match(token.IMPORT)):
            raise Exception("Expected import statement.")
        if not toks.add(self.match(token.STRING)):
            raise ParseException("Expected module path as string.")
        path = self.peek(-1).lexeme
        if not toks.add(self.match(token.AS)):
            raise ParseException("Expected as")
        if not toks.add(self.match(token.IDENTIFIER)):
            raise ParseException("Expected module alias name.")
        name = self.peek(-1).lexeme
        toks.add(self.match(token.SEMICOLON))
        if not os.path.isabs(path):
            path = os.path.join(self.basepath, path)
        if name in self.symbols_global:
            raise ParseException(f"Name '{name}' already exists in global symbol table. Import impossible.")
        self.symbols_global[name] = bongtypes.UnknownType()
        return ast.Import(toks, name, path)

    def parse_function_definition(self) -> ast.FunctionDefinition:
        toks = TokenList()
        # FUNC foo (bar : int) : str { ... }
        if not toks.add(self.match(token.FUNC)):
            raise Exception("Expected function definition.")
        # func FOO (bar : int) : str { ... }
        if not toks.add(self.match(token.IDENTIFIER)):
            raise ParseException("Expected function name.")
        name = self.peek(-1).lexeme
        if name in self.symbols_global:
            raise ParseException(f"Name '{name}' already exists in symbol table. Function definition impossible.")
        # Register function name before parsing parameter names (no parameter name should have the function name!)
        self.symbols_global[name] = bongtypes.UnknownType()
        # (
        if not toks.add(self.match(token.LPAREN)):
            raise ParseException("Expected ( to start the parameter list.")
        # Parameters
        parameter_names, parameter_types = self.parse_parameters()
        # )
        if not toks.add(self.match(token.RPAREN)):
            raise ParseException("Expected ) to end the parameter list.")
        # Return types
        return_types : typing.List[ast.BongtypeIdentifier] = []
        if toks.add(self.match(token.COLON)):
            self.check_eof("Return type list expected.")
            return_types.append(self.parse_type())
            while toks.add(self.match(token.COMMA)):
                return_types.append(self.parse_type())
        # {
        if not self.peek().type == token.LBRACE:
            raise ParseException("Expected function body.")
        # New local symbol table (tree) for statement block
        # We could just store the global symbol table in the object because
        # it will always be the same. But remembering the previous symbol
        # table here theoretically allows to parse function definitions inside
        # other functions (the local symbol table would be properly restored
        # then).
        global_symbol_tree = self.symbol_tree
        self.symbol_tree = SymbolTree()
        # Parameters
        for param,typ in zip(parameter_names,parameter_types):
            if param in self.symbol_tree:
                raise ParseException(f"Argument name '{param}' appears twice in function definition")
            self.symbol_tree.register(param, bongtypes.UnknownType())
        # Snapshot before block is parsed (this changes the state of the tree)
        func_symbol_tree_snapshot = self.symbol_tree.take_snapshot()
        # Function body
        body = self.block_stmt()
        # Restore symbol table/tree
        self.symbol_tree = global_symbol_tree
        return ast.FunctionDefinition(toks, name, parameter_names, parameter_types, return_types, body, func_symbol_tree_snapshot)

    def parse_struct_definition(self) -> ast.StructDefinition:
        toks = TokenList()
        # STRUCT foo {bar : int, ...}
        if not toks.add(self.match(token.STRUCT)):
            raise Exception("Expected struct definition.")
        # func FOO {bar : int, ...}
        if not toks.add(self.match(token.IDENTIFIER)):
            raise ParseException("Expected struct name.")
        name = self.peek(-1).lexeme
        if name in self.symbols_global:
            raise ParseException(f"Name '{name}' already exists in global symbol table. Struct definition impossible.")
        # {
        if not toks.add(self.match(token.LBRACE)):
            raise ParseException("Expected { to start the field list.")
        # Fields
        field_names, field_types = self.parse_parameters()
        if len(field_names) == 0:
            raise ParseException(f"Struct {name} is empty.")
        fields : typing.Dict[str, ast.BongtypeIdentifier] = {}
        for field_name, field_type in zip(field_names, field_types):
            if field_name in fields:
                raise ParseException(f"Field '{field_name}' found multiple times"
                        " in struct '{name}'.")
            fields[field_name] = field_type
        # If } occurs on its own line, an implicit semicolon is inserted
        # after the fields
        self.match(token.SEMICOLON)
        # }
        self.check_eof("Expected } to end the field list.")
        if not toks.add(self.match(token.RBRACE)):
            raise ParseException("Expected } to end the field list.")
        # If everything went fine, register the struct name
        self.symbols_global[name] = bongtypes.UnknownType()
        return ast.StructDefinition(toks, name, fields)

    # Used by parse_function_definition() and parse_struct_definition()
    def parse_parameters(self) -> typing.Tuple[typing.List[str],typing.List[ast.BongtypeIdentifier]]:
        parameter_names : typing.List[str] = []
        parameter_types : typing.List[ast.BongtypeIdentifier] = []
        self.check_eof("Parameter list expected")
        if self.peek().type != token.IDENTIFIER:
            return (parameter_names, parameter_types)
        name, typ = self.parse_parameter()
        parameter_names.append(name)
        parameter_types.append(typ)
        while self.match(token.COMMA):
            name, typ = self.parse_parameter()
            parameter_names.append(name)
            parameter_types.append(typ)
        return (parameter_names, parameter_types)
    def parse_parameter(self) -> typing.Tuple[str,ast.BongtypeIdentifier]:
        self.check_eof("Another parameter expected")
        if not self.match(token.IDENTIFIER):
            raise ParseException("Expected identifier as parameter name.")
        name = self.peek(-1).lexeme
        if not self.match(token.COLON):
            raise ParseException("Expected type hint for function parameter.")
        typ = self.parse_type()
        return (name, typ)
    # Used by function definition and let statement
    def parse_type(self) -> ast.BongtypeIdentifier:
        num_array_levels = 0
        while self.match(token.LBRACKET):
            if not self.match(token.RBRACKET):
                raise ParseException("Expected closing bracket ']' in type specification.")
            num_array_levels += 1
        if not self.match(token.IDENTIFIER):
            raise ParseException("Expected identifier as module or type.")
        typename = [self.peek(-1).lexeme]
        while self.match(token.DOT):
            if not self.match(token.IDENTIFIER):
                raise ParseException("Expected identifier as module or type.")
            typename.append(self.peek(-1).lexeme)
        return ast.BongtypeIdentifier(typename, num_array_levels)

    def return_stmt(self) -> ast.Return:
        toks = TokenList()
        if not toks.add(self.match(token.RETURN)):
            raise Exception("Expected return statement.")
        if toks.add(self.match(token.SEMICOLON)):
            return ast.Return(toks)
        expr = self.parse_commata_expressions()
        toks.add(self.match(token.SEMICOLON))
        return ast.Return(toks, expr)

    def expression_stmt(self) -> ast.BaseNode:
        expr = self.assignment()
        if tok := self.match(token.SEMICOLON):
            expr.tokens.append(tok)
        return expr