def match(self, lexer: Lexer, debug: int = 0, partial=True): """ try to match lexer's current token with a production list in productions return AST instance: match SUCCESS with a production list in productions return None: match FAILED with whole productions """ tree = Ast(self.name, lexer.current_position(), grammars=[]) recursive_productions = [] for production_list in self.productions: lexer.anchor() if debug: print(' ' * ( debug - 1) + f'### {self.name}.match() with production_list: {production_list}') # productions: [[G1, G2], [G3, G4], ...] <-> G1 G2 | G3 G4 | ... # # try to match all tokes with a production_list, mark the tracker # case 1: matched & break loop # case 2: unmatched, try next production_list in productions until # loop ends and function returns `None` success = self.build_ast(tree, lexer, production_list, recursive_productions, debug) if debug else self.build_ast(tree, lexer, production_list, recursive_productions) if success is True or success is None: # success case or Epsilon case if debug: print( ' ' * (debug - 1) + f'+++ {self.name}.match() SUCCESS') break else: # failed case continue else: if debug: print(' ' * (debug - 1) + f'--- {self.name}.match() FAILED') return None # one production_list is fully matched, pop anchor stack by one lexer.release_anchor() if lexer.current_token is None or tree.children or partial: return tree return None
def match(self, lexer: Lexer, debug: int = 0, partial=True): if debug: print(' ' * (debug - 1) + f'### Group {self.name}.match(), calling super\'s match()') tree = Ast(self.name, lexer.current_position(), grammars=[]) if lexer.current_token is None\ or lexer.current_token.spelling == '_EOF': if self.repeat[0] == 0: return tree return None lexer.anchor() repetition = 0 if self.repeat[1] == -1: # can repeat for infinite times: grammar* | grammar+ | grammar{a,} while True: nodes = super().match(lexer, debug) if nodes is None: break tree.extend(nodes) repetition += 1 if lexer.current_token is None: break else: # repeat for limited times: grammar{a, b} | grammar{a} | [grammar] while True: if repetition >= self.repeat[1]: break nodes = super().match(lexer, debug) if nodes is None: break tree.extend(nodes) repetition += 1 if lexer.current_token is None: break if repetition < self.repeat[0]: # if actual repetition is smaller than minimum times if debug: print( ' ' * (debug - 1) + f'--- Group {self.name}.match() FAILED in minimal repetition)' ) lexer.backward() if debug: print( f'<<< lexer backwarded, current token: {lexer.current_token}' ) return None if debug: print(' ' * (debug - 1) + f'+++ Group {self.name}.match() SUCCESS') lexer.release_anchor() return tree
def match(self, lexer: Lexer, debug: int = 0): # the most fundamental match() function: # match current_token's spelling with Literal's regex text if lexer.current_token is None\ or lexer.current_token.spelling == '_EOF': return None # reserve epsilon expression match to real 'EPSILON_EXPR' instance if self.name != 'EPSILON_EXPR'\ and lexer.current_token.spelling == '_e': return None # if a TEXT grammar meets a text-literal token, return an ast node # directly if self.name == 'TEXT'\ and lexer.current_token.type == TokenType.TEXTLITERAL: node = Ast(self.name, lexer.current_token.position, grammar=lexer.current_token.spelling) if debug: print(' ' * ( debug - 1) + f'+++ {self.name}.match() {repr(self.regex)} with TEXT {lexer.current_token.spelling} finished') lexer.forward() if debug: print( f'>>> lexer forwarded, current token: {lexer.current_token}') return node # if a NAME grammar meets a token with reserved word, return None # directly a reserved word can be only matched by a STRING grammar if self.name == 'NAME'\ and lexer.current_token.spelling in ReservedNames.names: return None # a text-literal token cannot match any other grammars if lexer.current_token.type != TokenType.TEXTLITERAL: regex_obj = re.compile(self.regex) match_result = regex_obj.match(lexer.current_token.spelling) if self.name == 'STRING' and not ( match_result and match_result.span()[1] == len( lexer.current_token.spelling)): # if atom `STRING` match failed, try a special case: match_result = re.match( '\\\'\(\\\'\[\\\\w\\\\W\]\*\?\\\'\|\\\"\[\\\\w\\\\W\]\*\?\\\"\)\\\'', lexer.current_token.spelling) if match_result and match_result.span()[1] == len( lexer.current_token.spelling): # matched, build AST node for this token's spelling, move # lexer to next token node = Ast(self.name, lexer.current_token.position, grammar=lexer.current_token.spelling) if debug: print(' ' * ( debug - 1) + f'+++ {self.name}.match() {repr(self.regex)} with token <{lexer.current_token}> SUCCESS') lexer.forward() if debug: print( f'>>> lexer forwarded, current token: {lexer.current_token}') return node if debug: print(' ' * ( debug - 1) + f'--- {self.name}.match() {repr(self.regex)} with token <{lexer.current_token}> FAILED') return None
def build_ast(self, tree: Ast, lexer: Lexer, production_list: list, recursive_productions: list = None, debug: int = 0): """ build ast tree on the given instance (parameter `tree`) return True: SUCCESS, tree is appended with nodes return False: FAILED, and tree is untouched return None: Epsilon """ if debug: print(' ' * (debug - 1) + f'### {self.name}.build_ast()') for grammar in production_list: nodes = grammar.match(lexer, debug + 4) if debug else grammar.match(lexer) # this grammar not matched: # 1. abandon whole production_list and skipped loop # 2. lexer setup rollback flag if nodes is None: if debug: print(' ' * ( debug - 1) + f'--- {self.name}.build_ast() with grammar <{grammar}> FAILED') tree.empty() lexer.backward() if debug: print( f'<<< lexer backwarded, current token: {lexer.current_token}') return False if nodes == '_E': # Epsilon if debug: print(' ' * (debug - 1) + f'+++ Epsilon match') tree.append(Ast('Epsilon', lexer.current_token.position, grammar='_e')) return None if isinstance(grammar, Group): # grammar is a Group if not self.ignore_set: tree.extend(nodes) else: for node in nodes: if node.name not in self.ignore_set: tree.append(node) else: # grammar is a Token, only one production hence nodes is # actually a `node` if not self.ignore_set or nodes.name not in self.ignore_set: tree.append(nodes) # all grammar in current production_list matched, operation SUCCESS if debug: print(' ' * (debug - 1) + f'+++ {self.name}.build_ast() SUCCESS') return True