Exemplo n.º 1
0
    def __init__(self):
        self.setUpLogging()

        # Program counter: position of the next instruction to execute.
        self.PC = 0
        self.endAddress = 0

        # Structure of the stores used in the vm.
        self.variables = {}
        self.code = []
        self.rulesCode = []
        self.macrosCode = []
        self.preprocessCode = []
        self.trie = SystemTrie()

        # Current code section in execution (a macro, a rule, ...).
        self.currentCodeSection = self.code

        # Execution state of the vm.
        self.status = VM_STATUS.HALTED

        # Transfer stage (chunker, interchunk or postchunk).
        self.transferStage = None
        # Chunker mode to process in shallow mode or advanced transfer.
        self.chunkerMode = None

        # Input will be divided in words with their patterns information.
        self.words = []
        self.superblanks = []
        self.lastSuperblank = -1
        self.currentWords = []
        self.nextPattern = 0

        # Components used by the vm.
        self.tokenizer = None
        self.callStack = CallStack(self)
        self.stack = SystemStack()
        self.loader = None
        self.interpreter = Interpreter(self)

        # Components used only in debug mode.
        self.debugger = None
        self.debugMode = False

        self.input = sys.stdin
        # We use 'buffer' to get a stream of bytes, not str, because we want to
        # encode it using utf-8 (just for safety).
        self.output = sys.stdout.buffer
Exemplo n.º 2
0
 def __init__(self, parser: Parser):
     self.parser = parser
     self.analyzer = SemanticAnalyzer()
     self.callstack = CallStack()
Exemplo n.º 3
0
class Interpreter(Visitor):
    """
    Interpreter inherit from Visitor and interpret it when visiting the abstract syntax tree
    """
    def __init__(self, parser: Parser):
        self.parser = parser
        self.analyzer = SemanticAnalyzer()
        self.callstack = CallStack()

    def error(self, error_code: ErrorCode, token):
        raise RuntimeError(
            error_code=error_code,
            token=token,
            message=f'{error_code.value} -> {token}',
        )

    def log(self, msg):
        print(msg)

    def visit_binop(self, node: BinOp):
        left_val = self.visit(node.left)
        right_val = self.visit(node.right)
        # todo type checker
        if node.op.type is TokenType.PLUS:
            return left_val + right_val
        elif node.op.type is TokenType.MINUS:
            return left_val - right_val
        elif node.op.type is TokenType.MUL:
            return left_val * right_val
        elif node.op.type is TokenType.INTEGER_DIV:
            return left_val // right_val
        elif node.op.type is TokenType.FLOAT_DIV:
            return left_val / right_val
        elif node.op.type is TokenType.MOD:
            return left_val % right_val
        elif node.op.type is TokenType.AND:
            return left_val and right_val
        elif node.op.type is TokenType.OR:
            return left_val or right_val
        elif node.op.type is TokenType.EQUALS:
            return left_val == right_val
        elif node.op.type is TokenType.NOT_EQUALS:
            return left_val != right_val
        elif node.op.type is TokenType.GREATER:
            return left_val > right_val
        elif node.op.type is TokenType.GREATER_EQUALS:
            return left_val >= right_val
        elif node.op.type is TokenType.LESS:
            return left_val < right_val
        elif node.op.type is TokenType.LESS_EQUALS:
            return left_val <= right_val

    def visit_num(self, node: Num):
        return node.value

    def visit_boolean(self, node: Boolean):
        return node.value

    def visit_unaryop(self, node: UnaryOp):
        if node.op.type is TokenType.PLUS:
            return +self.visit(node.factor)
        if node.op.type is TokenType.MINUS:
            return -self.visit(node.factor)
        if node.op.type is TokenType.NOT:
            return not self.visit(node.factor)

    def visit_compound(self, node: Compound):
        for child in node.childrens:
            self.visit(child)

    def visit_var(self, node: Var):
        current_frame: Frame = self.callstack.peek()
        # get value by variable's name
        val = current_frame.get_value(node.name)
        return val

    def visit_assign(self, node: Assign):
        var_name = node.left.name  # get variable's name
        var_value = self.visit(node.right)
        current_frame: Frame = self.callstack.peek()
        if current_frame.type is FrameType.FUNCTION and current_frame.name == var_name:
            current_frame.return_val = var_value
        else:
            current_frame.set_value(var_name, var_value)

    def visit_program(self, node: Program):
        program_name = node.name

        self.log(f'ENTER: PROGRAM {program_name}')

        frame = Frame(name=program_name, type=FrameType.PROGRAM)

        self.callstack.push(frame)
        self.visit(node.block)

        self.log(str(self.callstack))

        self.callstack.pop()
        self.log(f'LEAVE: PROGRAM {program_name}')

    def visit_block(self, node: Block):
        for declaration in node.declarations:
            self.visit(declaration)
        self.visit(node.compound_statement)

    def visit_vardecl(self, node: VarDecl):
        var_name = node.var_node.name
        current_frame: Frame = self.callstack.peek()
        current_frame.define(var_name)

    def visit_procdecl(self, node: ProcedureDecl):
        proc_name = node.token.value
        current_frame: Frame = self.callstack.peek()
        current_frame.define(proc_name)
        current_frame.set_value(proc_name, node)

    def visit_proccall(self, node: ProcedureCall):
        proc_name = node.proc_name
        current_frame = self.callstack.peek()
        proc_node: ProcedureDecl = current_frame.get_value(proc_name)

        self.log(f'ENTER: PROCEDURE {proc_name}')

        # get actual params values
        actual_param_values = [
            self.visit(actual_param) for actual_param in node.actual_params
        ]

        proc_frame = Frame(name=proc_name, type=FrameType.PROCEDURE)

        self.callstack.push(proc_frame)
        current_frame: Frame = self.callstack.peek()

        # map actual params to formal params
        for (formal_param, actual_param_value) in zip(proc_node.params,
                                                      actual_param_values):
            current_frame.define(formal_param.var_node.name)
            current_frame.set_value(formal_param.var_node.name,
                                    actual_param_value)

        self.visit(proc_node.block)
        self.log(str(self.callstack))

        self.callstack.pop()
        self.log(f'LEAVE: PROCEDURE {proc_name}')

    def visit_funcdecl(self, node: FunctionDecl):
        func_name = node.token.value
        current_frame: Frame = self.callstack.peek()
        current_frame.define(func_name)
        current_frame.set_value(func_name, node)

    def visit_funccall(self, node: FunctionCall):
        current_frame = self.callstack.peek()
        func_name = node.func_name
        func_node: FunctionDecl = current_frame.get_value(func_name)

        self.log(f'ENTER: FUNCTION {func_name}')
        func_frame = Frame(name=func_name, type=FrameType.FUNCTION)
        self.callstack.push(func_frame)
        current_frame: Frame = self.callstack.peek()

        # get actual params values to formal params
        actual_param_values = [
            self.visit(actual_param) for actual_param in node.actual_params
        ]

        for (formal_param, actual_param_value) in zip(func_node.params,
                                                      actual_param_values):
            current_frame.define(formal_param.var_node.name)
            current_frame.set_value(formal_param.var_node.name,
                                    actual_param_value)

        self.visit(func_node.block)
        self.log(str(self.callstack))
        self.log(f'LEAVE: FUNCTION {func_name}')

        return_val = current_frame.return_val
        self.callstack.pop()
        if return_val is None:
            self.error(error_code=ErrorCode.MISSING_RETURN, token=node.token)
        return return_val

    def visit_condition(self, node: Condition):
        if self.visit(node.condition_node):
            self.visit(node.then_node)
        elif node.else_node is not None:
            self.visit(node.else_node)

    def visit_then(self, node: Then):
        self.visit(node.child)

    def visit_else(self, node: Else):
        self.visit(node.child)

    def visit_while(self, node: WhileLoop):
        while self.visit(node.conditon_node) is True:
            try:
                self.visit(node.body_node)
            except ContinueError:
                continue
            except BreakError:
                break

    def visit_continue(self, node: Continue):
        raise ContinueError()

    def visit_break(self, node: Break):
        raise BreakError()

    def interpret(self):
        ast = self.parser.parse()
        self.analyzer.visit(ast)
        self.visit(ast)
Exemplo n.º 4
0
class VM:
    """This class encapsulates all the VM processing."""

    def __init__(self):
        self.setUpLogging()

        # Program counter: position of the next instruction to execute.
        self.PC = 0
        self.endAddress = 0

        # Structure of the stores used in the vm.
        self.variables = {}
        self.code = []
        self.rulesCode = []
        self.macrosCode = []
        self.preprocessCode = []
        self.trie = SystemTrie()

        # Current code section in execution (a macro, a rule, ...).
        self.currentCodeSection = self.code

        # Execution state of the vm.
        self.status = VM_STATUS.HALTED

        # Transfer stage (chunker, interchunk or postchunk).
        self.transferStage = None
        # Chunker mode to process in shallow mode or advanced transfer.
        self.chunkerMode = None

        # Input will be divided in words with their patterns information.
        self.words = []
        self.superblanks = []
        self.lastSuperblank = -1
        self.currentWords = []
        self.nextPattern = 0

        # Components used by the vm.
        self.tokenizer = None
        self.callStack = CallStack(self)
        self.stack = SystemStack()
        self.loader = None
        self.interpreter = Interpreter(self)

        # Components used only in debug mode.
        self.debugger = None
        self.debugMode = False

        self.input = sys.stdin
        # We use 'buffer' to get a stream of bytes, not str, because we want to
        # encode it using utf-8 (just for safety).
        self.output = sys.stdout.buffer

    def setUpLogging(self):
        """Set at least an error through stderr logger"""

        self.formatStr = "%(levelname)s: %(filename)s[%(lineno)d]:\t%(message)s"
        self.logger = logging.getLogger("vm")

        errorHandler = logging.StreamHandler(sys.stderr)
        errorHandler.setFormatter(logging.Formatter(self.formatStr))
        errorHandler.setLevel(logging.ERROR)
        self.logger.addHandler(errorHandler)

    def setDebugMode(self):
        """Set the debug mode, creating a debugger an setting it up as a proxy."""
        self.debugMode = True
        self.debugger = Debugger(self, self.interpreter)
        # Set the debugger as a proxy.
        self.interpreter = self.debugger

        # Create a logging handler for debugging messages.
        debugHandler = logging.StreamHandler(sys.stdout)
        debugHandler.setFormatter(logging.Formatter(self.formatStr))
        debugHandler.setLevel(logging.DEBUG)
        self.logger.addHandler(debugHandler)

    def setLoader(self, header, t1xFile):
        """Set the loader to use depending on the header of the code file."""

        if "assembly" in header:
            self.loader = AssemblyLoader(self, t1xFile)
        else:
            return False
        return True

    def setTransferStage(self, transferHeader):
        """Set the transfer stage to process by the vm."""

        if "transfer" in transferHeader:
            self.transferStage = TRANSFER_STAGE.CHUNKER
            self.tokenizer = TransferWordTokenizer()
            # Set chunker mode, by default 'lu'.
            if "chunk" in transferHeader:
                self.chunkerMode = CHUNKER_MODE.CHUNK
            else:
                self.chunkerMode = CHUNKER_MODE.LU
        elif "interchunk" in transferHeader:
            self.transferStage = TRANSFER_STAGE.INTERCHUNK
            self.tokenizer = ChunkWordTokenizer()
        elif "postchunk" in transferHeader:
            self.transferStage = TRANSFER_STAGE.POSTCHUNK
            self.tokenizer = ChunkWordTokenizer(solveRefs=True, parseContent=True)

    def tokenizeInput(self):
        """Call to the tokenizer to divide the input in tokens."""

        self.words, self.superblanks = self.tokenizer.tokenize(self.input)

    def initializeVM(self):
        """Execute code to initialize the VM, e.g. default values for vars."""

        self.PC = 0
        self.status = VM_STATUS.RUNNING
        while self.status == VM_STATUS.RUNNING and self.PC < len(self.code):
            self.interpreter.execute(self.code[self.PC])

    def getSourceWord(self, pos):
        """Get the part of a source word needed for pattern matching, depending
           on the transfer stage."""

        if self.transferStage == TRANSFER_STAGE.CHUNKER:
            return self.words[pos].source.lu
        elif self.transferStage == TRANSFER_STAGE.INTERCHUNK:
            word = self.words[pos].chunk
            return word.attrs["lem"] + word.attrs["tags"]
        else:
            return self.words[pos].chunk.attrs["lem"]

    def getNextInputPattern(self):
        """Get the next input pattern to analyze, lowering the lemma first."""

        try:
            pattern = self.getSourceWord(self.nextPattern)
            tag = pattern.find("<")
            pattern = pattern[:tag].lower() + pattern[tag:]
            self.nextPattern += 1
        except IndexError:
            return None

        return pattern

    def getUniqueSuperblank(self, pos):
        """Get the superblank at pos avoiding duplicates."""

        try:
            if pos != self.lastSuperblank:
                self.lastSuperblank = pos
                return self.superblanks[pos]
        except IndexError:
            pass

        return ""

    def selectNextRule(self):
        """Select the next rule to execute depending on the transfer stage."""

        if self.transferStage == TRANSFER_STAGE.POSTCHUNK:
            self.selectNextRulePostChunk()
        else:
            self.selectNextRuleLRLM()

    def selectNextRulePostChunk(self):
        """Select the next rule trying to match patterns one by one."""

        # Go through all the patterns until one matches a rule.
        while self.nextPattern < len(self.words):
            startPatternPos = self.nextPattern
            pattern = self.getNextInputPattern()
            ruleNumber = self.trie.getRuleNumber(pattern)
            if ruleNumber is not None:
                #                print('Pattern "{}" match rule: {}'.format(pattern, ruleNumber))
                self.setRuleSelected(ruleNumber, startPatternPos, pattern)
                return
            else:
                self.processUnmatchedPattern(self.words[startPatternPos])

        # if there isn't any rule at all to execute, stop the vm.
        self.status = VM_STATUS.HALTED

    def selectNextRuleLRLM(self):
        """Select the next rule to execute matching the LRLM pattern."""

        longestMatch = None
        nextPatternToProcess = self.nextPattern

        # Go through all the patterns until one matches a rule.
        while self.nextPattern < len(self.words):
            startPatternPos = self.nextPattern
            # Get the next pattern to process
            pattern = self.getNextInputPattern()
            curNodes = self.trie.getPatternNodes(pattern)
            nextPatternToProcess += 1

            # Get the longest match, left to right
            fullPattern = pattern
            while len(curNodes) > 0:
                # Update the longest match if needed.
                ruleNumber = self.trie.getRuleNumber(fullPattern)
                if ruleNumber is not None:
                    longestMatch = ruleNumber
                    nextPatternToProcess = self.nextPattern

                # Continue trying to match current pattern + the next one.
                pattern = self.getNextInputPattern()
                if pattern:
                    fullPattern += pattern
                nextNodes = []
                for node in curNodes:
                    nextNodes.extend(self.trie.getPatternNodes(pattern, node))
                curNodes = nextNodes

            # If the pattern doesn't match, we will continue with the next one.
            # If there is a match of a group of patterns, we will continue with
            # the last unmatched pattern.
            self.nextPattern = nextPatternToProcess

            # Get the full pattern matched by the rule.
            if self.nextPattern < len(self.words):
                end = fullPattern.find(self.getSourceWord(self.nextPattern))
                if end > 0:
                    fullPattern = fullPattern[:end]

            # If there is a longest match, set the rule to process
            if longestMatch is not None:
                #                print('Pattern "{}" match rule: {}'.format(fullPattern, longestMatch))
                self.setRuleSelected(longestMatch, startPatternPos, fullPattern)
                return
            # Otherwise, process the unmatched pattern.
            else:
                self.processUnmatchedPattern(self.words[self.nextPattern - 1])

            longestMatch = None

        # if there isn't any rule at all to execute, stop the vm.
        self.status = VM_STATUS.HALTED

    def setRuleSelected(self, ruleNumber, startPos, pattern):
        """Set a rule and its words as current ones."""

        # Output the leading superblank of the matched pattern.
        self.writeOutput(self.getUniqueSuperblank(startPos))

        # Add only a reference to the index pos of words, to avoid copying them.
        wordsIndex = []
        while startPos != self.nextPattern:
            wordsIndex.append(startPos)
            startPos += 1

        # Create an entry in the call stack with the rule to execute.
        self.callStack.push("rules", ruleNumber, wordsIndex)

        if self.debugMode:
            self.debugger.ruleSelected(pattern, ruleNumber)

    def processRuleEnd(self):
        """Do all the processing needed when rule ends."""

        # Output the trailing superblank of the matched pattern.
        self.writeOutput(self.getUniqueSuperblank(self.nextPattern))

    def processUnmatchedPattern(self, word):
        """Output unmatched patterns as the default form."""
        default = ""

        # Output the leading superblank of the unmatched pattern.
        default += self.getUniqueSuperblank(self.nextPattern - 1)

        # For the chunker, output the default version of the unmatched pattern.
        if self.transferStage == TRANSFER_STAGE.CHUNKER:
            # If the target word is empty, we don't need to output anything.
            if word.target.lu != "":
                wordTL = "^" + word.target.lu + "$"
                if self.chunkerMode == CHUNKER_MODE.CHUNK:
                    if wordTL[1] == "*":
                        default += "^unknown<unknown>{" + wordTL + "}$"
                    else:
                        default += "^default<default>{" + wordTL + "}$"
                else:
                    default += wordTL

        # For the interchunk stage only need to output the complete chunk.
        elif self.transferStage == TRANSFER_STAGE.INTERCHUNK:
            default += "^" + word.chunk.lu + "$"

        # Lastly, for the postchunk stage output the lexical units inside chunks
        # with the case of the chunk pseudolemma.
        else:
            default += word.chunk.attrs["chcontent"][1:-1]

        # Output the trailing superblank of the matched pattern.
        default += self.getUniqueSuperblank(self.nextPattern)

        self.writeOutput(default)

    def terminateVM(self):
        """Do all the processing needed when the vm is being turned off."""

        pass

    def writeOutput(self, string):
        """A single entry point to write strings to the output."""

        self.output.write(string.encode("utf-8"))

    def run(self):
        """Load, preprocess and execute the contents of the files."""

        try:
            self.loader.load()
            self.interpreter.preprocess()
            self.initializeVM()
            self.tokenizeInput()

            if self.debugMode:
                self.debugger.start()

            # Select the first rule. If there isn't one, the vm work has ended.
            if self.status == VM_STATUS.RUNNING:
                self.selectNextRule()
            while self.status == VM_STATUS.RUNNING:
                # Execute the rule selected until it ends.
                while self.status == VM_STATUS.RUNNING and self.PC < self.endAddress:
                    self.interpreter.execute(self.currentCodeSection[self.PC])

                self.processRuleEnd()
                # Select the next rule to execute.
                if self.status == VM_STATUS.RUNNING:
                    self.selectNextRule()

        except (Exception) as e:
            self.logger.exception(e)
            exit(1)

        self.terminateVM()

    def printCodeSections(self):
        """Print all the code sections for information or debugging purposes."""

        self.loader.printSection(self.code, "Code")
        self.loader.printSection(self.preprocessCode, "Preprocess")
        self.loader.printSection(self.rulesCode, "Rules", enum=True)
        self.loader.printSection(self.macrosCode, "Macros", enum=True)