def __init__(self): self.setUpLogging() # Program counter: position of the next instruction to execute. self.PC = 0 self.endAddress = 0 # Structure of the stores used in the vm. self.variables = {} self.code = [] self.rulesCode = [] self.macrosCode = [] self.preprocessCode = [] self.trie = SystemTrie() # Current code section in execution (a macro, a rule, ...). self.currentCodeSection = self.code # Execution state of the vm. self.status = VM_STATUS.HALTED # Transfer stage (chunker, interchunk or postchunk). self.transferStage = None # Chunker mode to process in shallow mode or advanced transfer. self.chunkerMode = None # Input will be divided in words with their patterns information. self.words = [] self.superblanks = [] self.lastSuperblank = -1 self.currentWords = [] self.nextPattern = 0 # Components used by the vm. self.tokenizer = None self.callStack = CallStack(self) self.stack = SystemStack() self.loader = None self.interpreter = Interpreter(self) # Components used only in debug mode. self.debugger = None self.debugMode = False self.input = sys.stdin # We use 'buffer' to get a stream of bytes, not str, because we want to # encode it using utf-8 (just for safety). self.output = sys.stdout.buffer
def __init__(self, parser: Parser): self.parser = parser self.analyzer = SemanticAnalyzer() self.callstack = CallStack()
class Interpreter(Visitor): """ Interpreter inherit from Visitor and interpret it when visiting the abstract syntax tree """ def __init__(self, parser: Parser): self.parser = parser self.analyzer = SemanticAnalyzer() self.callstack = CallStack() def error(self, error_code: ErrorCode, token): raise RuntimeError( error_code=error_code, token=token, message=f'{error_code.value} -> {token}', ) def log(self, msg): print(msg) def visit_binop(self, node: BinOp): left_val = self.visit(node.left) right_val = self.visit(node.right) # todo type checker if node.op.type is TokenType.PLUS: return left_val + right_val elif node.op.type is TokenType.MINUS: return left_val - right_val elif node.op.type is TokenType.MUL: return left_val * right_val elif node.op.type is TokenType.INTEGER_DIV: return left_val // right_val elif node.op.type is TokenType.FLOAT_DIV: return left_val / right_val elif node.op.type is TokenType.MOD: return left_val % right_val elif node.op.type is TokenType.AND: return left_val and right_val elif node.op.type is TokenType.OR: return left_val or right_val elif node.op.type is TokenType.EQUALS: return left_val == right_val elif node.op.type is TokenType.NOT_EQUALS: return left_val != right_val elif node.op.type is TokenType.GREATER: return left_val > right_val elif node.op.type is TokenType.GREATER_EQUALS: return left_val >= right_val elif node.op.type is TokenType.LESS: return left_val < right_val elif node.op.type is TokenType.LESS_EQUALS: return left_val <= right_val def visit_num(self, node: Num): return node.value def visit_boolean(self, node: Boolean): return node.value def visit_unaryop(self, node: UnaryOp): if node.op.type is TokenType.PLUS: return +self.visit(node.factor) if node.op.type is TokenType.MINUS: return -self.visit(node.factor) if node.op.type is TokenType.NOT: return not self.visit(node.factor) def visit_compound(self, node: Compound): for child in node.childrens: self.visit(child) def visit_var(self, node: Var): current_frame: Frame = self.callstack.peek() # get value by variable's name val = current_frame.get_value(node.name) return val def visit_assign(self, node: Assign): var_name = node.left.name # get variable's name var_value = self.visit(node.right) current_frame: Frame = self.callstack.peek() if current_frame.type is FrameType.FUNCTION and current_frame.name == var_name: current_frame.return_val = var_value else: current_frame.set_value(var_name, var_value) def visit_program(self, node: Program): program_name = node.name self.log(f'ENTER: PROGRAM {program_name}') frame = Frame(name=program_name, type=FrameType.PROGRAM) self.callstack.push(frame) self.visit(node.block) self.log(str(self.callstack)) self.callstack.pop() self.log(f'LEAVE: PROGRAM {program_name}') def visit_block(self, node: Block): for declaration in node.declarations: self.visit(declaration) self.visit(node.compound_statement) def visit_vardecl(self, node: VarDecl): var_name = node.var_node.name current_frame: Frame = self.callstack.peek() current_frame.define(var_name) def visit_procdecl(self, node: ProcedureDecl): proc_name = node.token.value current_frame: Frame = self.callstack.peek() current_frame.define(proc_name) current_frame.set_value(proc_name, node) def visit_proccall(self, node: ProcedureCall): proc_name = node.proc_name current_frame = self.callstack.peek() proc_node: ProcedureDecl = current_frame.get_value(proc_name) self.log(f'ENTER: PROCEDURE {proc_name}') # get actual params values actual_param_values = [ self.visit(actual_param) for actual_param in node.actual_params ] proc_frame = Frame(name=proc_name, type=FrameType.PROCEDURE) self.callstack.push(proc_frame) current_frame: Frame = self.callstack.peek() # map actual params to formal params for (formal_param, actual_param_value) in zip(proc_node.params, actual_param_values): current_frame.define(formal_param.var_node.name) current_frame.set_value(formal_param.var_node.name, actual_param_value) self.visit(proc_node.block) self.log(str(self.callstack)) self.callstack.pop() self.log(f'LEAVE: PROCEDURE {proc_name}') def visit_funcdecl(self, node: FunctionDecl): func_name = node.token.value current_frame: Frame = self.callstack.peek() current_frame.define(func_name) current_frame.set_value(func_name, node) def visit_funccall(self, node: FunctionCall): current_frame = self.callstack.peek() func_name = node.func_name func_node: FunctionDecl = current_frame.get_value(func_name) self.log(f'ENTER: FUNCTION {func_name}') func_frame = Frame(name=func_name, type=FrameType.FUNCTION) self.callstack.push(func_frame) current_frame: Frame = self.callstack.peek() # get actual params values to formal params actual_param_values = [ self.visit(actual_param) for actual_param in node.actual_params ] for (formal_param, actual_param_value) in zip(func_node.params, actual_param_values): current_frame.define(formal_param.var_node.name) current_frame.set_value(formal_param.var_node.name, actual_param_value) self.visit(func_node.block) self.log(str(self.callstack)) self.log(f'LEAVE: FUNCTION {func_name}') return_val = current_frame.return_val self.callstack.pop() if return_val is None: self.error(error_code=ErrorCode.MISSING_RETURN, token=node.token) return return_val def visit_condition(self, node: Condition): if self.visit(node.condition_node): self.visit(node.then_node) elif node.else_node is not None: self.visit(node.else_node) def visit_then(self, node: Then): self.visit(node.child) def visit_else(self, node: Else): self.visit(node.child) def visit_while(self, node: WhileLoop): while self.visit(node.conditon_node) is True: try: self.visit(node.body_node) except ContinueError: continue except BreakError: break def visit_continue(self, node: Continue): raise ContinueError() def visit_break(self, node: Break): raise BreakError() def interpret(self): ast = self.parser.parse() self.analyzer.visit(ast) self.visit(ast)
class VM: """This class encapsulates all the VM processing.""" def __init__(self): self.setUpLogging() # Program counter: position of the next instruction to execute. self.PC = 0 self.endAddress = 0 # Structure of the stores used in the vm. self.variables = {} self.code = [] self.rulesCode = [] self.macrosCode = [] self.preprocessCode = [] self.trie = SystemTrie() # Current code section in execution (a macro, a rule, ...). self.currentCodeSection = self.code # Execution state of the vm. self.status = VM_STATUS.HALTED # Transfer stage (chunker, interchunk or postchunk). self.transferStage = None # Chunker mode to process in shallow mode or advanced transfer. self.chunkerMode = None # Input will be divided in words with their patterns information. self.words = [] self.superblanks = [] self.lastSuperblank = -1 self.currentWords = [] self.nextPattern = 0 # Components used by the vm. self.tokenizer = None self.callStack = CallStack(self) self.stack = SystemStack() self.loader = None self.interpreter = Interpreter(self) # Components used only in debug mode. self.debugger = None self.debugMode = False self.input = sys.stdin # We use 'buffer' to get a stream of bytes, not str, because we want to # encode it using utf-8 (just for safety). self.output = sys.stdout.buffer def setUpLogging(self): """Set at least an error through stderr logger""" self.formatStr = "%(levelname)s: %(filename)s[%(lineno)d]:\t%(message)s" self.logger = logging.getLogger("vm") errorHandler = logging.StreamHandler(sys.stderr) errorHandler.setFormatter(logging.Formatter(self.formatStr)) errorHandler.setLevel(logging.ERROR) self.logger.addHandler(errorHandler) def setDebugMode(self): """Set the debug mode, creating a debugger an setting it up as a proxy.""" self.debugMode = True self.debugger = Debugger(self, self.interpreter) # Set the debugger as a proxy. self.interpreter = self.debugger # Create a logging handler for debugging messages. debugHandler = logging.StreamHandler(sys.stdout) debugHandler.setFormatter(logging.Formatter(self.formatStr)) debugHandler.setLevel(logging.DEBUG) self.logger.addHandler(debugHandler) def setLoader(self, header, t1xFile): """Set the loader to use depending on the header of the code file.""" if "assembly" in header: self.loader = AssemblyLoader(self, t1xFile) else: return False return True def setTransferStage(self, transferHeader): """Set the transfer stage to process by the vm.""" if "transfer" in transferHeader: self.transferStage = TRANSFER_STAGE.CHUNKER self.tokenizer = TransferWordTokenizer() # Set chunker mode, by default 'lu'. if "chunk" in transferHeader: self.chunkerMode = CHUNKER_MODE.CHUNK else: self.chunkerMode = CHUNKER_MODE.LU elif "interchunk" in transferHeader: self.transferStage = TRANSFER_STAGE.INTERCHUNK self.tokenizer = ChunkWordTokenizer() elif "postchunk" in transferHeader: self.transferStage = TRANSFER_STAGE.POSTCHUNK self.tokenizer = ChunkWordTokenizer(solveRefs=True, parseContent=True) def tokenizeInput(self): """Call to the tokenizer to divide the input in tokens.""" self.words, self.superblanks = self.tokenizer.tokenize(self.input) def initializeVM(self): """Execute code to initialize the VM, e.g. default values for vars.""" self.PC = 0 self.status = VM_STATUS.RUNNING while self.status == VM_STATUS.RUNNING and self.PC < len(self.code): self.interpreter.execute(self.code[self.PC]) def getSourceWord(self, pos): """Get the part of a source word needed for pattern matching, depending on the transfer stage.""" if self.transferStage == TRANSFER_STAGE.CHUNKER: return self.words[pos].source.lu elif self.transferStage == TRANSFER_STAGE.INTERCHUNK: word = self.words[pos].chunk return word.attrs["lem"] + word.attrs["tags"] else: return self.words[pos].chunk.attrs["lem"] def getNextInputPattern(self): """Get the next input pattern to analyze, lowering the lemma first.""" try: pattern = self.getSourceWord(self.nextPattern) tag = pattern.find("<") pattern = pattern[:tag].lower() + pattern[tag:] self.nextPattern += 1 except IndexError: return None return pattern def getUniqueSuperblank(self, pos): """Get the superblank at pos avoiding duplicates.""" try: if pos != self.lastSuperblank: self.lastSuperblank = pos return self.superblanks[pos] except IndexError: pass return "" def selectNextRule(self): """Select the next rule to execute depending on the transfer stage.""" if self.transferStage == TRANSFER_STAGE.POSTCHUNK: self.selectNextRulePostChunk() else: self.selectNextRuleLRLM() def selectNextRulePostChunk(self): """Select the next rule trying to match patterns one by one.""" # Go through all the patterns until one matches a rule. while self.nextPattern < len(self.words): startPatternPos = self.nextPattern pattern = self.getNextInputPattern() ruleNumber = self.trie.getRuleNumber(pattern) if ruleNumber is not None: # print('Pattern "{}" match rule: {}'.format(pattern, ruleNumber)) self.setRuleSelected(ruleNumber, startPatternPos, pattern) return else: self.processUnmatchedPattern(self.words[startPatternPos]) # if there isn't any rule at all to execute, stop the vm. self.status = VM_STATUS.HALTED def selectNextRuleLRLM(self): """Select the next rule to execute matching the LRLM pattern.""" longestMatch = None nextPatternToProcess = self.nextPattern # Go through all the patterns until one matches a rule. while self.nextPattern < len(self.words): startPatternPos = self.nextPattern # Get the next pattern to process pattern = self.getNextInputPattern() curNodes = self.trie.getPatternNodes(pattern) nextPatternToProcess += 1 # Get the longest match, left to right fullPattern = pattern while len(curNodes) > 0: # Update the longest match if needed. ruleNumber = self.trie.getRuleNumber(fullPattern) if ruleNumber is not None: longestMatch = ruleNumber nextPatternToProcess = self.nextPattern # Continue trying to match current pattern + the next one. pattern = self.getNextInputPattern() if pattern: fullPattern += pattern nextNodes = [] for node in curNodes: nextNodes.extend(self.trie.getPatternNodes(pattern, node)) curNodes = nextNodes # If the pattern doesn't match, we will continue with the next one. # If there is a match of a group of patterns, we will continue with # the last unmatched pattern. self.nextPattern = nextPatternToProcess # Get the full pattern matched by the rule. if self.nextPattern < len(self.words): end = fullPattern.find(self.getSourceWord(self.nextPattern)) if end > 0: fullPattern = fullPattern[:end] # If there is a longest match, set the rule to process if longestMatch is not None: # print('Pattern "{}" match rule: {}'.format(fullPattern, longestMatch)) self.setRuleSelected(longestMatch, startPatternPos, fullPattern) return # Otherwise, process the unmatched pattern. else: self.processUnmatchedPattern(self.words[self.nextPattern - 1]) longestMatch = None # if there isn't any rule at all to execute, stop the vm. self.status = VM_STATUS.HALTED def setRuleSelected(self, ruleNumber, startPos, pattern): """Set a rule and its words as current ones.""" # Output the leading superblank of the matched pattern. self.writeOutput(self.getUniqueSuperblank(startPos)) # Add only a reference to the index pos of words, to avoid copying them. wordsIndex = [] while startPos != self.nextPattern: wordsIndex.append(startPos) startPos += 1 # Create an entry in the call stack with the rule to execute. self.callStack.push("rules", ruleNumber, wordsIndex) if self.debugMode: self.debugger.ruleSelected(pattern, ruleNumber) def processRuleEnd(self): """Do all the processing needed when rule ends.""" # Output the trailing superblank of the matched pattern. self.writeOutput(self.getUniqueSuperblank(self.nextPattern)) def processUnmatchedPattern(self, word): """Output unmatched patterns as the default form.""" default = "" # Output the leading superblank of the unmatched pattern. default += self.getUniqueSuperblank(self.nextPattern - 1) # For the chunker, output the default version of the unmatched pattern. if self.transferStage == TRANSFER_STAGE.CHUNKER: # If the target word is empty, we don't need to output anything. if word.target.lu != "": wordTL = "^" + word.target.lu + "$" if self.chunkerMode == CHUNKER_MODE.CHUNK: if wordTL[1] == "*": default += "^unknown<unknown>{" + wordTL + "}$" else: default += "^default<default>{" + wordTL + "}$" else: default += wordTL # For the interchunk stage only need to output the complete chunk. elif self.transferStage == TRANSFER_STAGE.INTERCHUNK: default += "^" + word.chunk.lu + "$" # Lastly, for the postchunk stage output the lexical units inside chunks # with the case of the chunk pseudolemma. else: default += word.chunk.attrs["chcontent"][1:-1] # Output the trailing superblank of the matched pattern. default += self.getUniqueSuperblank(self.nextPattern) self.writeOutput(default) def terminateVM(self): """Do all the processing needed when the vm is being turned off.""" pass def writeOutput(self, string): """A single entry point to write strings to the output.""" self.output.write(string.encode("utf-8")) def run(self): """Load, preprocess and execute the contents of the files.""" try: self.loader.load() self.interpreter.preprocess() self.initializeVM() self.tokenizeInput() if self.debugMode: self.debugger.start() # Select the first rule. If there isn't one, the vm work has ended. if self.status == VM_STATUS.RUNNING: self.selectNextRule() while self.status == VM_STATUS.RUNNING: # Execute the rule selected until it ends. while self.status == VM_STATUS.RUNNING and self.PC < self.endAddress: self.interpreter.execute(self.currentCodeSection[self.PC]) self.processRuleEnd() # Select the next rule to execute. if self.status == VM_STATUS.RUNNING: self.selectNextRule() except (Exception) as e: self.logger.exception(e) exit(1) self.terminateVM() def printCodeSections(self): """Print all the code sections for information or debugging purposes.""" self.loader.printSection(self.code, "Code") self.loader.printSection(self.preprocessCode, "Preprocess") self.loader.printSection(self.rulesCode, "Rules", enum=True) self.loader.printSection(self.macrosCode, "Macros", enum=True)