def handle_newline(token): text = token.getstr() indent_str = text.rsplit('\n', 1)[1] indent = indent_str.count(' ') + indent_str.count('\t') * tab_len if indent > indent_level[-1]: indent_level.append(indent) indent_token = Token('INDENT', indent_str) indent_token.source_pos = token.getsourcepos() token_queue.append(indent_token) else: while indent < indent_level[-1]: indent_level.pop() dedent_token = Token('DEDENT', indent_str) token_queue.append(dedent_token) return token
def handle_newline(token): text = token.getstr() indent_str = text.rsplit('\n', 1)[1] indent = indent_str.count(' ') + indent_str.count('\t') * tab_len if indent > indent_level[-1]: indent_level.append(indent) indent_token = Token('INDENT', indent_str) indent_token.source_pos = token.getsourcepos() token_queue.append(indent_token) else: while indent < indent_level[-1]: indent_level.pop() dedent_token = Token('DEDENT', indent_str) token_queue.append(dedent_token) return token
def mod_lex(lexer, repl_mode=False): paren_openers = {'LPAREN', 'LBRACE', 'LBRACK'} paren_closers = {'RPAREN', 'RBRACE', 'RBRACK'} token_queue = [] indent_level = [0] ignore_newline = False paren_level = 0 tab_len = 4 def handle_newline(token): text = token.getstr() indent_str = text.rsplit('\n', 1)[1] indent = indent_str.count(' ') + indent_str.count('\t') * tab_len if indent > indent_level[-1]: indent_level.append(indent) indent_token = Token('INDENT', indent_str) indent_token.source_pos = token.getsourcepos() token_queue.append(indent_token) else: while indent < indent_level[-1]: indent_level.pop() dedent_token = Token('DEDENT', indent_str) token_queue.append(dedent_token) return token for token in lexer: while len(token_queue) > 0: queued_token = token_queue.pop() if queued_token.gettokentype() in paren_openers: paren_level += 1 elif queued_token.gettokentype() in paren_closers: paren_level -= 1 ignore_newline = (paren_level > 0) yield queued_token if token.name == 'NAME': for rule in klg.rules: if rule.matches(token.value, 0): token.name = rule.name break elif token.gettokentype() == 'NEWLINE': if not ignore_newline: yield handle_newline(token) continue if token.gettokentype() in paren_openers: paren_level += 1 elif token.gettokentype() in paren_closers: paren_level -= 1 ignore_newline = (paren_level > 0) if token.gettokentype() == 'NAME' and token.getstr().startswith('&'): amp = Token('AMP', '&') amp.source_pos = token.getsourcepos() comma = Token('COMMA', ',') amp.source_pos = token.getsourcepos() name = Token('NAME', token.getstr()[1:]) name.source_pos = token.getsourcepos() yield amp yield comma yield name else: yield token if repl_mode and len(indent_level) > 1: yield REPL_CONTINUE elif repl_mode and paren_level > 0: yield REPL_CONTINUE else: while len(indent_level) > 1: indent_level.pop() yield Token('DEDENT', '') for token in token_queue: yield token
def mod_lex(lexer, repl_mode=False): paren_openers = {'LPAREN', 'LBRACE', 'LBRACK'} paren_closers = {'RPAREN', 'RBRACE', 'RBRACK'} token_queue = [] indent_level = [0] ignore_newline = False paren_level = 0 tab_len = 4 def handle_newline(token): text = token.getstr() indent_str = text.rsplit('\n', 1)[1] indent = indent_str.count(' ') + indent_str.count('\t') * tab_len if indent > indent_level[-1]: indent_level.append(indent) indent_token = Token('INDENT', indent_str) indent_token.source_pos = token.getsourcepos() token_queue.append(indent_token) else: while indent < indent_level[-1]: indent_level.pop() dedent_token = Token('DEDENT', indent_str) token_queue.append(dedent_token) return token for token in lexer: while len(token_queue) > 0: queued_token = token_queue.pop() if queued_token.gettokentype() in paren_openers: paren_level += 1 elif queued_token.gettokentype() in paren_closers: paren_level -= 1 ignore_newline = (paren_level > 0) yield queued_token if token.name == 'NAME': for rule in klg.rules: if rule.matches(token.value, 0): token.name = rule.name break elif token.gettokentype() == 'NEWLINE': if not ignore_newline: yield handle_newline(token) continue if token.gettokentype() in paren_openers: paren_level += 1 elif token.gettokentype() in paren_closers: paren_level -= 1 ignore_newline = (paren_level > 0) if token.gettokentype() == 'NAME' and token.getstr().startswith('&'): amp = Token('AMP', '&') amp.source_pos = token.getsourcepos() comma = Token('COMMA', ',') amp.source_pos = token.getsourcepos() name = Token('NAME', token.getstr()[1:]) name.source_pos = token.getsourcepos() yield amp yield comma yield name else: yield token if repl_mode and len(indent_level) > 1: yield REPL_CONTINUE elif repl_mode and paren_level > 0: yield REPL_CONTINUE else: while len(indent_level) > 1: indent_level.pop() yield Token('DEDENT', '') for token in token_queue: yield token
def mod_lex(lexer, repl_mode=False): paren_openers = {'LPAREN', 'LBRACE', 'LBRACK'} paren_closers = {'RPAREN', 'RBRACE', 'RBRACK'} token_queue = Queue() indent_level = [0] ignore_newline = False paren_level = 0 tab_len = 4 def handle_newline(token): text = token.getstr() indent_str = text.rsplit('\n', 1)[1] indent = indent_str.count(' ') + indent_str.count('\t') * tab_len if indent > indent_level[-1]: indent_level.append(indent) indent_token = Token('INDENT', indent_str) indent_token.source_pos = token.getsourcepos() token_queue.put(indent_token) else: while indent < indent_level[-1]: indent_level.pop() dedent_token = Token('DEDENT', indent_str) token_queue.put(dedent_token) return token for token in lexer: while not token_queue.empty(): queued_token = token_queue.get() if queued_token.gettokentype() in paren_openers: paren_level += 1 elif queued_token.gettokentype() in paren_closers: paren_level -= 1 ignore_newline = (paren_level > 0) if queued_token.gettokentype() == 'NAME' and queued_token.getstr( ).startswith('&'): amp = Token('AMP', '&') amp.source_pos = queued_token.getsourcepos() comma = Token('COMMA', ',') amp.source_pos = queued_token.getsourcepos() name = Token('NAME', queued_token.getstr()[1:]) name.source_pos = queued_token.getsourcepos() yield amp yield comma yield name else: yield queued_token if token.name == 'NAME': for rule in klg.rules: if rule.matches(token.value, 0): token.name = rule.name break if token.gettokentype() in INFIX_OPERATORS: ahead_token = next(lexer) if ahead_token.gettokentype() == 'NEWLINE': pass else: token_queue.put(ahead_token) elif token.gettokentype() == 'NEWLINE': try: ahead_token = next(lexer) _set_keyword(ahead_token) ahead_token_type = ahead_token.gettokentype() except StopIteration: ahead_token = None ahead_token_type = None if not (ignore_newline or ((ahead_token is not None) and ((ahead_token_type in INFIX_OPERATORS) or ahead_token_type == 'DOT_NAME'))): yield handle_newline(token) if ahead_token is not None: token_queue.put(ahead_token) continue if token.gettokentype() in paren_openers: paren_level += 1 elif token.gettokentype() in paren_closers: paren_level -= 1 ignore_newline = (paren_level > 0) if token.gettokentype() == 'NAME' and token.getstr().startswith('&'): amp = Token('AMP', '&') amp.source_pos = token.getsourcepos() comma = Token('COMMA', ',') amp.source_pos = token.getsourcepos() name = Token('NAME', token.getstr()[1:]) name.source_pos = token.getsourcepos() yield amp yield comma yield name else: yield token if repl_mode and len(indent_level) > 1: yield REPL_CONTINUE elif repl_mode and paren_level > 0: yield REPL_CONTINUE else: while not token_queue.empty(): yield token_queue.get() while len(indent_level) > 1: indent_level.pop() yield Token('DEDENT', '')
def mod_lex(lexer, repl_mode=False): paren_openers = {'LPAREN', 'LBRACE', 'LBRACK'} paren_closers = {'RPAREN', 'RBRACE', 'RBRACK'} token_queue = Queue() indent_level = [0] ignore_newline = False paren_level = 0 tab_len = 4 def handle_newline(token): text = token.getstr() indent_str = text.rsplit('\n', 1)[1] indent = indent_str.count(' ') + indent_str.count('\t') * tab_len if indent > indent_level[-1]: indent_level.append(indent) indent_token = Token('INDENT', indent_str) indent_token.source_pos = token.getsourcepos() token_queue.put(indent_token) else: while indent < indent_level[-1]: indent_level.pop() dedent_token = Token('DEDENT', indent_str) token_queue.put(dedent_token) return token for token in lexer: while not token_queue.empty(): queued_token = token_queue.get() if queued_token.gettokentype() in paren_openers: paren_level += 1 elif queued_token.gettokentype() in paren_closers: paren_level -= 1 ignore_newline = (paren_level > 0) if queued_token.gettokentype() == 'NAME' and queued_token.getstr().startswith('&'): amp = Token('AMP', '&') amp.source_pos = queued_token.getsourcepos() comma = Token('COMMA', ',') amp.source_pos = queued_token.getsourcepos() name = Token('NAME', queued_token.getstr()[1:]) name.source_pos = queued_token.getsourcepos() yield amp yield comma yield name else: yield queued_token if token.name == 'NAME': for rule in klg.rules: if rule.matches(token.value, 0): token.name = rule.name break if token.gettokentype() in INFIX_OPERATORS: ahead_token = next(lexer) if ahead_token.gettokentype() == 'NEWLINE': pass else: token_queue.put(ahead_token) elif token.gettokentype() == 'NEWLINE': try: ahead_token = next(lexer) _set_keyword(ahead_token) ahead_token_type = ahead_token.gettokentype() except StopIteration: ahead_token = None ahead_token_type = None if not (ignore_newline or ((ahead_token is not None) and ((ahead_token_type in INFIX_OPERATORS) or ahead_token_type == 'DOT_NAME'))): yield handle_newline(token) if ahead_token is not None: token_queue.put(ahead_token) continue if token.gettokentype() in paren_openers: paren_level += 1 elif token.gettokentype() in paren_closers: paren_level -= 1 ignore_newline = (paren_level > 0) if token.gettokentype() == 'NAME' and token.getstr().startswith('&'): amp = Token('AMP', '&') amp.source_pos = token.getsourcepos() comma = Token('COMMA', ',') amp.source_pos = token.getsourcepos() name = Token('NAME', token.getstr()[1:]) name.source_pos = token.getsourcepos() yield amp yield comma yield name else: yield token if repl_mode and len(indent_level) > 1: yield REPL_CONTINUE elif repl_mode and paren_level > 0: yield REPL_CONTINUE else: while not token_queue.empty(): yield token_queue.get() while len(indent_level) > 1: indent_level.pop() yield Token('DEDENT', '')
def mod_lex(lexer, repl_mode=False): paren_openers = { 'LPAREN', 'LBRACE', 'LBRACK', 'NAME_LPAREN', 'DOT_NAME_LPAREN' } paren_closers = { 'RPAREN', 'RBRACE', 'RBRACK', 'NAME_RPAREN', 'DOT_NAME_RPAREN' } token_queue = Queue() paren_level = 0 # def handle_newline(token): # text = token.getstr() # indent_str = text.rsplit('\n', 1)[1] # indent = indent_str.count(' ') + indent_str.count('\t') * tab_len # if indent > indent_level[-1]: # indent_level.append(indent) # indent_token = Token('INDENT', indent_str) # indent_token.source_pos = token.getsourcepos() # token_queue.put(indent_token) # else: # while indent < indent_level[-1]: # indent_level.pop() # dedent_token = Token('DEDENT', indent_str) # token_queue.put(dedent_token) # if len(text.rsplit('\n')) > 2: # token_queue.put(Token('NEWLINE', '\n')) # return token for token in lexer: while not token_queue.empty(): queued_token = token_queue.get() if queued_token.gettokentype() in paren_openers: paren_level += 1 elif queued_token.gettokentype() in paren_closers: paren_level -= 1 if queued_token.gettokentype() == 'NAME' and queued_token.getstr( ).startswith('&'): amp = Token('AMP', '&') amp.source_pos = queued_token.getsourcepos() comma = Token('COMMA', ',') amp.source_pos = queued_token.getsourcepos() name = Token('NAME', queued_token.getstr()[1:]) name.source_pos = queued_token.getsourcepos() yield amp yield comma yield name else: yield queued_token if token.name == 'NAME': for rule in klg.rules: if rule.matches(token.value, 0): token.name = rule.name break if token.name == 'NAME_LPAREN': for rule in klg.rules: if rule.matches(token.value, 0): token.name = rule.name break if token.gettokentype() in INFIX_OPERATORS: ahead_token = next(lexer) _set_keyword(ahead_token) if ahead_token.gettokentype() == 'NEWLINE': pass else: token_queue.put(ahead_token) elif token.gettokentype() == 'NEWLINE': continue if token.gettokentype() in paren_openers: paren_level += 1 elif token.gettokentype() in paren_closers: paren_level -= 1 if token.gettokentype() == 'NAME' and token.getstr().startswith('&'): amp = Token('AMP', '&') amp.source_pos = token.getsourcepos() comma = Token('COMMA', ',') amp.source_pos = token.getsourcepos() name = Token('NAME', token.getstr()[1:]) name.source_pos = token.getsourcepos() yield amp yield comma yield name else: yield token if repl_mode and paren_level > 0: yield REPL_CONTINUE else: while not token_queue.empty(): yield token_queue.get()