def _internal_tests(): tests = [ ("(", "LPAREN", "("), (")", "RPAREN", ")"), ("1234", "INT", 1234), ("-1234", "INT", -1234), ("1.", "FLOAT", 1.0), ("-1.", "FLOAT", -1.0), ("0.1", "FLOAT", 0.1), ("0.1e-1", "FLOAT", 0.1e-1), (r'""', "STRING", ""), (r'"foo"', "STRING", "foo"), (r'"\""', "STRING", '"'), (r'"\n\t\"\\"', "STRING", '\n\t"\\'), ("#1.0\n(", "LPAREN", "("), ("\n\n(", "LPAREN", "("), ("&1", "SYMBOL", "&1"), ("-", "SYMBOL", "-"), ] for i in tests: (s, type, value) = i lex.input(s) tok = lex.token() try: assert tok.type == type assert tok.value == value assert not lex.token() except AssertionError, e: sys.stderr.write("squeamish: test failed: %s led to %s\n" % (` i `, tok)) raise AssertionError
def run_lexer(): tokens_neglect=( 'OBRACK','CBRACK','COMMA','SHARP', 'LBRACE','RBRACE' ) st=[] lb={} fpath = "test.deout" f = open(fpath,"r") file = f.read() lex.input(file) f.close() i=0 flag=0 while 1: token = lex.token() # Get a token if not token: break # No more tokens if token.type=='INSTRUCTION': st.append([token.value]) flag=1 i+=1 elif token.type=='NEWLINE': if flag==1: flag=0 elif token.type=='LABEL': lb[token.value]=i elif token.type == 'NUMBER' or token.type == 'REGISTER' \ or token.type == 'IDENTIFIER': if flag==1: st[-1].append(token.value) elif token.type in tokens_neglect: pass else : pass return st,lb
def tokenize(self, sourcecode, filesource='<stdin>'): "Tokenize the given string of source code." self.errmsg = NCPTL_Error(filesource) # Keep track of all the comments we've encountered by storing # a mapping from line number to comment (including the initial # hash character). self.line2comment = {} # Initialize the lexer. lex.lex(module=self) # Repeatedly invoke the lexer and return all of the tokens it produces. self.lineno = 1 lex.input(sourcecode) self.toklist = [] while 1: # Acquire the next token and assign it a line number if necessary. token = lex.token() if not token: break if token.lineno < self.lineno: token.lineno = self.lineno # Hack: Disambiguate op_mult and star on the parser's behalf. if token.type in ["comma", "rparen"]: try: if self.toklist[-1].type == "op_mult": self.toklist[-1].type = "star" except IndexError: pass # We now have one more valid token. self.toklist.append(token) return self.toklist
def test_lex(): import sys prog = sys.stdin.read() lex.input(prog) while True: token = lex.token() if not token: break print "(%s, '%s', %d)" % (token.type, token.value, token.lineno)
def get_tokens(String): tokens = lex.input(String) # la s ne plantait pas et il prenait la valeur de la main !!! quelle blague !!! l = [] while 1: token = lex.token() if not token: break l.append(token) return l
def get_tokens(string): tokens = lex.input(s) l = [] while 1: token = lex.token() if not token: break l.append(token) return l
def token_list(rnc): lex.lex() lex.input(rnc) ts = [] while 1: t = lex.token() if t is None: break ts.append(t) return ts
def test_lex(): import sys file = open(sys.argv[1]) lines = file.readlines() file.close() prog = "".join(lines) lex.input(prog) while True: token = lex.token() if not token: break print "(%s, '%s', %d)" % (token.type, token.value, token.lineno)
def lexTheCode(code): lex.input(code) # DEBUG OUTPUT print "----------------" print "TOKENS:" print "----------------" while True: tok = lex.token() if not tok: break print tok
def lexer(self, data): """ helper method to control lexer process. Return the tokenize data string. """ lex.input(data) tokens = [] while 1: tok = lex.token() if not tok: break tokens.append(tok) return tokens
def _external_tests(): while 1: try: s = raw_input("squeamish> ") except EOFError: break if not s: continue lex.input(s) while 1: tok = lex.token() if tok: print ` tok ` else: break
def run_lexer(): """This is just a debugging function that prints out a list of tokens, it's not actually called by the compiler or anything.""" import sys file = open(sys.argv[1]) lines = file.readlines() file.close() strings = "" for i in lines: strings += i lex.input(strings) while 1: token = lex.token() # Get a token if not token: break # No more tokens print "(%s,'%s',%d)" % (token.type, token.value, token.lineno)
def compute_string(s, debug=False): lex.input(s) if debug: while 1: tok = lex.token() if not tok: break if tok.type != 'NEWLINE': print "line %d:%s(%s)"%(tok.lineno, tok.type, tok.value) else: print("line %d:%s(\\n)"%(tok.lineno, tok.type)) result = yacc.parse(s) #, debug=2) print result.__class__ print(explore(result,0)) print("------------------ End Explore ------------------") r = compute(result) print("\nResult = %s of type %s" % (r, r.__class__)) print("\nListing vars") for k in vars: print("%s:%s:%s" % (k, vars[k].__class__, vars[k])) return r
t.lexer.lineno += len(t.value) def t_error(t): raise SyntaxError("syntax error on line %d near '%s'\n" % (t.lineno, t.value)) def t_FLOAT(t): r'\d+\.\d+' t.value = float(t.value) return t def t_NUMBER(t): r'\d+' t.value = float(t.value) return t def t_ID(t): r'[a-zA-Z_][a-zA-Z_0-9]*' t.type = reserved.get(t.value,'ID') # t.value = (t.value, symbol_lookup(t.value)) return t if __name__ == '__main__': lexer = lex.lex() lex.input(sys.stdin.read()) while True: t = lex.token() if not t: break print t.value, '\t:\t', t.type
# Build the lexer import lex lex.lex() source = """ [[this]junk [theother=something]the text to be marked u p[another=thingy]junk ] """ lex.input(source) if 0: while 1: token = lex.token() if not token: break # print "TOKEN", token # ############################################################################################################################### # # Parsing rules precedence = ( # ('left','PLUS','MINUS') ) # dictionary of names names = { }
t_ignore = ' \t\r' # Error handling rule def t_error(t): #print "Illegal character '%s' on line %d" % (t.value[0], t.lineno) t.value = t.value[0] t.skip(1) return t # Build the lexer lexer = lex.lex(optimize=1) def get_lexer(): global lexer lexer = lex.lex(optimize=1) return lexer # debugging if __name__ == '__main__': #pragma: no cover # Test it out data = open(sys.argv[1],"r").read() # Give the lexer some input lex.input(data) # Tokenize while 1: tok = lex.token() if not tok: break # No more input print tok
def __init__(self, fileName): self.main_node = Node.MainNode("main-node", "tmp") self.element_parse_stack = [] debug.debug("Parse file : '" + fileName + "'") self.header_file_name = fileName self.anon_union_counter = [-1, 0] # load all the file data : headerFileStr = tools.file_read_data(fileName) # Strip out template declarations # TODO : What is the real need ??? #headerFileStr = re.sub("template[\t ]*<[^>]*>", "", headerFileStr) # remove all needed \r unneeded ==> this simplify next resExp ... headerFileStr = re.sub("\r", "\r\n", headerFileStr) headerFileStr = re.sub("\r\n\n", "\r\n", headerFileStr) headerFileStr = re.sub("\r", "", headerFileStr) # TODO : Can generate some error ... headerFileStr = re.sub("\#if 0(.*?)(\#endif|\#else)", "", headerFileStr, flags=re.DOTALL) headerFileafter = re.sub("\@interface(.*?)\@end", "", headerFileStr, flags=re.DOTALL) if headerFileStr != headerFileafter : debug.debug(" Objective C interface ... ==> not supported") return #debug.verbose(headerFileStr) #Filter out Extern "C" statements. These are order dependent headerFileStr = re.sub(r'extern( |\t)+"[Cc]"( |\t)*{', "{", headerFileStr) headerFileStr = re.sub(r'\\\n', "##multiline##", headerFileStr) headerFileStr += '\n' debug.debug(headerFileStr) ###### debug.info(headerFileStr) self.stack = [] # token stack to find the namespace and the element name ... self.previous = None self.name_stack = [] # self.brace_depth = 0 self.brace_depth_type = [] self.last_comment = [] self.sub_module_count_brace = 0; lex.lex() lex.input(headerFileStr) self.cur_line = 0 self.cur_char = 0 self.count_pthese = 0 while True: tok = lex.token() if not tok: break debug.verbose("TOK: " + str(tok)) self.stack.append( tok.value ) self.cur_line = tok.lineno self.cur_char = tok.lexpos # special case to remove internal function define in header: if self.previous_is('function') == True: if tok.type == 'OPEN_BRACE': self.sub_module_count_brace += 1 debug.verbose("openBrace " + str(self.sub_module_count_brace)) elif tok.type == 'CLOSE_BRACE': self.sub_module_count_brace -= 1 debug.verbose("closeBrace " + str(self.sub_module_count_brace) + " line:" + str(self.cur_line)) if self.sub_module_count_brace <= 0: self.brace_type_pop() self.last_comment = [] continue # normal case: if tok.type == 'PRECOMP': debug.debug("PRECOMP: " + str(tok)) self.stack = [] self.name_stack = [] self.last_comment = [] # Do nothing for macro ==> many time not needed ... continue if tok.type == 'COMMENT_SINGLELINE_DOC_PREVIOUS': if self.previous_is('enum') == True: if self.name_stack[-1] == ",": self.name_stack[-1] = "//!< " + tok.value self.name_stack.append(",") else: self.name_stack.append("//!< " + tok.value) elif self.previous != None \ and self.previous.get_node_type() == 'variable': self.previous.add_doc([tok.value]) else: #self.last_comment.append(tok.value) pass if tok.type == 'COMMENT_MULTILINE_DOC': self.last_comment.append(tok.value) if tok.type == 'COMMENT_SINGLELINE_DOC': self.last_comment.append(tok.value) if tok.type == 'OPEN_BRACE': if self.count_pthese >= 1: # special case of lamba declaration inside initialisation of constructor self.name_stack.append(tok.value) debug.info("plop 0 " +str(self.count_pthese)) else: # When we open a brace, this is the time to parse the stack ... # Clean the stack : (remove \t\r\n , and concatenate the 'xx', ':', ':', 'yy' in 'xx::yy', self.name_stack = create_compleate_class_name(self.name_stack) if len(self.name_stack) <= 0: #open brace with no name ... self.brace_type_push('empty', []) elif is_a_function(self.name_stack): # need to parse sub function internal description... self.sub_module_count_brace = 1 self.brace_type_push('function', self.name_stack) debug.verbose("openBrace *** " + str(self.name_stack)) elif 'namespace' in self.name_stack: self.brace_type_push('namespace', self.name_stack) elif 'class' in self.name_stack: self.brace_type_push('class', self.name_stack) elif 'enum' in self.name_stack: self.brace_type_push('enum', self.name_stack) elif 'struct' in self.name_stack: self.brace_type_push('struct', self.name_stack) elif 'typedef' in self.name_stack: self.brace_type_push('typedef', self.name_stack) elif 'using' in self.name_stack: self.brace_type_push('using', self.name_stack) elif 'union' in self.name_stack: self.brace_type_push('union', self.name_stack) else: self.brace_type_push('unknow', self.name_stack) self.stack = [] self.name_stack = [] self.last_comment = [] elif tok.type == 'CLOSE_BRACE': if self.count_pthese >= 1: debug.info("plop 2 " +str(self.count_pthese)) # special case of lamba declaration inside initialisation of constructor self.name_stack.append(tok.value) else: if len(self.name_stack) != 0: if self.previous_is('enum') == True: self.brace_type_append('enum list', self.name_stack); else: debug.warning(self.gen_debug_space() + "end brace DROP : " + str(self.name_stack)); self.stack = [] self.name_stack = [] self.last_comment = [] self.brace_type_pop() self.name_stack = create_compleate_class_name(self.name_stack) if tok.type == 'OPEN_PAREN': self.count_pthese += 1 self.name_stack.append(tok.value) elif tok.type == 'CLOSE_PAREN': self.count_pthese -= 1 self.name_stack.append(tok.value) elif tok.type == 'OPEN_SQUARE_BRACKET': self.name_stack.append(tok.value) elif tok.type == 'CLOSE_SQUARE_BRACKET': self.name_stack.append(tok.value) elif tok.type == 'EQUALS': self.name_stack.append(tok.value) elif tok.type == 'COMMA': self.name_stack.append(tok.value) elif tok.type == 'BACKSLASH': self.name_stack.append(tok.value) elif tok.type == 'PIPE': self.name_stack.append(tok.value) elif tok.type == 'PERCENT': self.name_stack.append(tok.value) elif tok.type == 'CARET': self.name_stack.append(tok.value) elif tok.type == 'EXCLAMATION': self.name_stack.append(tok.value) elif tok.type == 'SQUOTE': self.name_stack.append(tok.value) elif tok.type == 'NUMBER': self.name_stack.append(tok.value) elif tok.type == 'MINUS': self.name_stack.append(tok.value) elif tok.type == 'PLUS': self.name_stack.append(tok.value) elif tok.type == 'STRING_LITERAL': self.name_stack.append(tok.value) elif tok.type == 'NAME' \ or tok.type == 'AMPERSTAND' \ or tok.type == 'ASTERISK' \ or tok.type == 'CHAR_LITERAL': self.name_stack.append(tok.value) elif tok.type == 'COLON': if self.name_stack[0] in Node.access_list: debug.debug(self.gen_debug_space() + "change visibility : " + self.name_stack[0]); self.brace_type_change_access(self.name_stack[0]) self.name_stack = [] self.stack = [] else : self.name_stack.append(tok.value) elif tok.type == 'SEMI_COLON': if self.count_pthese >= 1: debug.info("plop 3 " +str(self.count_pthese)) # special case of lamba declaration inside initialisation of constructor self.name_stack.append(tok.value) else: if len(self.name_stack) != 0: self.name_stack = create_compleate_class_name(self.name_stack) if is_a_function(self.name_stack): self.brace_type_append('function', self.name_stack); elif 'namespace' in self.name_stack: debug.debug(self.gen_debug_space() + "find a namespace DECLARATION : " + str(self.name_stack)); elif 'class' in self.name_stack: debug.debug(self.gen_debug_space() + "find a class DECLARATION : " + str(self.name_stack)); elif 'enum' in self.name_stack: debug.debug(self.gen_debug_space() + "find a enum DECLARATION : " + str(self.name_stack)); elif 'struct' in self.name_stack: debug.debug(self.gen_debug_space() + "find a struct DECLARATION : " + str(self.name_stack)); elif 'typedef' in self.name_stack: debug.warning(self.gen_debug_space() + "find a typedef DECLARATION : " + str(self.name_stack) + " ==> lose it ..."); #self.brace_type_push('typedef', self.name_stack); elif 'using' in self.name_stack: debug.info(self.gen_debug_space() + "find a using DECLARATION : " + str(self.name_stack)); self.brace_type_append('using', self.name_stack); elif 'union' in self.name_stack: debug.debug(self.gen_debug_space() + "find a union DECLARATION : " + str(self.name_stack)); else: if self.previous_is('enum') == True: self.brace_type_append('enum list', self.name_stack); else: # TODO : Check if it is true in all case : self.brace_type_append('variable', self.name_stack); #debug.warning(self.gen_debug_space() + "variable : " + str(self.name_stack)); self.stack = [] self.name_stack = [] self.last_comment = []
(t.lineno, t.value)) def t_FLOAT(t): r'\d+\.\d+' t.value = float(t.value) return t def t_NUMBER(t): r'\d+' t.value = float(t.value) return t def t_ID(t): r'[a-zA-Z_][a-zA-Z_0-9]*' t.type = reserved.get(t.value, 'ID') # t.value = (t.value, symbol_lookup(t.value)) return t if __name__ == '__main__': lexer = lex.lex() lex.input(sys.stdin.read()) while True: t = lex.token() if not t: break print t.value, '\t:\t', t.type
def run_lexer(strings): lex.input(strings) while 1: token = lex.token() # Get a token if not token: break # No more tokens
def __init__(self, headerFileName, argType="file"): if argType == "file": self.headerFileName = os.path.expandvars(headerFileName) self.mainClass = os.path.split(self.headerFileName)[1][:-2] headerFileStr = "" # if headerFileName[-2:] != ".h": # raise Exception("file must be a header file and end with .h") elif argType == "string": self.headerFileName = "" self.mainClass = "???" headerFileStr = headerFileName else: raise Exception("Arg type must be either file or string") self.curClass = "" self.classes = {} self.enums = [] self.nameStack = [] self.nameSpaces = [] self.curAccessSpecifier = "private" if len(self.headerFileName): headerFileStr = "\n".join(open(self.headerFileName).readlines()) self.braceDepth = 0 lex.input(headerFileStr) curLine = 0 curChar = 0 # Add for Mediatek code change check global mtkChange mtkChange = False defs = " " macro = "ANDROID_DEFAULT_CODE" ignorePreComp = 0 try: while True: tok = lex.token() # Example: LexToken(COLON,';',1,373) # where (tok.name, tok.value, ?, ?) if not tok: break curLine = tok.lineno curChar = tok.lexpos # For Mediatek code change check begin if tok.type == "PRECOMP_MACRO": if tok.value.startswith("#ifndef") or tok.value.startswith("#ifdef") or tok.value.startswith("#if"): if macro in tok.value: if tok.value.startswith("#ifndef") or tok.value.startswith("#ifdef"): defs = tok.value[:7].split() else: if "!defined" in tok.value: defs = "#ifndef" else: defs = "#ifdef" if "#ifndef" in defs: mtkChange = True else: mtkChange = False elif defs != " ": ignorePreComp = ignorePreComp + 1 elif not ignorePreComp and tok.value.startswith("#else"): if "#ifndef" in defs and mtkChange == True: mtkChange = False elif "#ifdef" in defs and mtkChange == False: mtkChange = True elif tok.value.startswith("#endif"): if ignorePreComp > 0: ignorePreComp = ignorePreComp - 1 elif "#ifndef" in defs or "#ifdef" in defs: mtkChange = False defs = " " ignorePreComp = 0 # For Mediatek code change check end if tok.type == "OPEN_BRACE": if len(self.nameStack) and is_namespace(self.nameStack): self.nameSpaces.append(self.nameStack[1]) if len(self.nameStack) and not is_enum_namestack(self.nameStack): self.evaluate_stack() else: self.nameStack.append(tok.value) self.braceDepth += 1 elif tok.type == "CLOSE_BRACE": if self.braceDepth == 0: continue if self.braceDepth == len(self.nameSpaces): tmp = self.nameSpaces.pop() if len(self.nameStack) and is_enum_namestack(self.nameStack): self.nameStack.append(tok.value) elif self.braceDepth < 10: self.evaluate_stack() else: self.nameStack = [] self.braceDepth -= 1 ######if (self.braceDepth == 0): if self.braceDepth == 1: self.curClass = "" if tok.type == "OPEN_PAREN": self.nameStack.append(tok.value) elif tok.type == "CLOSE_PAREN": self.nameStack.append(tok.value) elif tok.type == "EQUALS": self.nameStack.append(tok.value) elif tok.type == "COMMA": self.nameStack.append(tok.value) elif tok.type == "NUMBER": self.nameStack.append(tok.value) elif tok.type == "MINUS": self.nameStack.append(tok.value) elif tok.type == "PLUS": self.nameStack.append(tok.value) elif tok.type == "STRING_LITERAL": self.nameStack.append(tok.value) elif tok.type == "NAME" or tok.type == "AMPERSTAND" or tok.type == "ASTERISK": if tok.value == "class" or tok.value == "struct": self.nameStack.append(tok.value) elif tok.value in supportedAccessSpecifier and self.braceDepth == len(self.nameSpaces) + 1: self.curAccessSpecifier = tok.value else: self.nameStack.append(tok.value) elif tok.type == "COLON": # Dont want colon to be first in stack if len(self.nameStack) == 0: continue self.nameStack.append(tok.value) elif tok.type == "SEMI_COLON": if self.braceDepth < 10: self.evaluate_stack() except: raise CppParseError( 'Not able to parse %s on line %d evaluating "%s"\nError around: %s' % (self.headerFileName, tok.lineno, tok.value, " ".join(self.nameStack)) )
def __init__(self, headerFileName, argType="file"): if (argType == "file"): self.headerFileName = os.path.expandvars(headerFileName) self.mainClass = os.path.split(self.headerFileName)[1][:-2] headerFileStr = "" # if headerFileName[-2:] != ".h": # raise Exception("file must be a header file and end with .h") elif argType == "string": self.headerFileName = "" self.mainClass = "???" headerFileStr = headerFileName else: raise Exception("Arg type must be either file or string") self.curClass = "" self.classes = {} self.enums = [] self.nameStack = [] self.nameSpaces = [] self.curAccessSpecifier = 'private' if (len(self.headerFileName)): headerFileStr = "\n".join(open(self.headerFileName).readlines()) self.braceDepth = 0 lex.input(headerFileStr) curLine = 0 curChar = 0 #Add for Mediatek code change check global mtkChange mtkChange = False defs = ' ' macro = "ANDROID_DEFAULT_CODE" ignorePreComp = 0 try: while True: tok = lex.token() # Example: LexToken(COLON,';',1,373) # where (tok.name, tok.value, ?, ?) if not tok: break curLine = tok.lineno curChar = tok.lexpos #For Mediatek code change check begin if (tok.type == 'PRECOMP_MACRO'): if (tok.value.startswith("#ifndef") or tok.value.startswith("#ifdef") or tok.value.startswith("#if")): if (macro in tok.value): if (tok.value.startswith("#ifndef") or tok.value.startswith("#ifdef")): defs = tok.value[:7].split() else: if ("!defined" in tok.value): defs = '#ifndef' else: defs = '#ifdef' if ('#ifndef' in defs): mtkChange = True else: mtkChange = False elif (defs != ' '): ignorePreComp = ignorePreComp + 1 elif (not ignorePreComp and tok.value.startswith("#else")): if ('#ifndef' in defs and mtkChange == True): mtkChange = False elif ('#ifdef' in defs and mtkChange == False): mtkChange = True elif (tok.value.startswith("#endif")): if (ignorePreComp > 0): ignorePreComp = ignorePreComp - 1 elif ('#ifndef' in defs or '#ifdef' in defs): mtkChange = False defs = ' ' ignorePreComp = 0 #For Mediatek code change check end if (tok.type == 'OPEN_BRACE'): if len(self.nameStack) and is_namespace(self.nameStack): self.nameSpaces.append(self.nameStack[1]) if len(self.nameStack) and not is_enum_namestack( self.nameStack): self.evaluate_stack() else: self.nameStack.append(tok.value) self.braceDepth += 1 elif (tok.type == 'CLOSE_BRACE'): if self.braceDepth == 0: continue if (self.braceDepth == len(self.nameSpaces)): tmp = self.nameSpaces.pop() if len(self.nameStack) and is_enum_namestack( self.nameStack): self.nameStack.append(tok.value) elif self.braceDepth < 10: self.evaluate_stack() else: self.nameStack = [] self.braceDepth -= 1 ######if (self.braceDepth == 0): if self.braceDepth == 1: self.curClass = "" if (tok.type == 'OPEN_PAREN'): self.nameStack.append(tok.value) elif (tok.type == 'CLOSE_PAREN'): self.nameStack.append(tok.value) elif (tok.type == 'EQUALS'): self.nameStack.append(tok.value) elif (tok.type == 'COMMA'): self.nameStack.append(tok.value) elif (tok.type == 'NUMBER'): self.nameStack.append(tok.value) elif (tok.type == 'MINUS'): self.nameStack.append(tok.value) elif (tok.type == 'PLUS'): self.nameStack.append(tok.value) elif (tok.type == 'STRING_LITERAL'): self.nameStack.append(tok.value) elif (tok.type == 'NAME' or tok.type == 'AMPERSTAND' or tok.type == 'ASTERISK'): if (tok.value == 'class' or tok.value == 'struct'): self.nameStack.append(tok.value) elif (tok.value in supportedAccessSpecifier and self.braceDepth == len(self.nameSpaces) + 1): self.curAccessSpecifier = tok.value else: self.nameStack.append(tok.value) elif (tok.type == 'COLON'): #Dont want colon to be first in stack if len(self.nameStack) == 0: continue self.nameStack.append(tok.value) elif (tok.type == 'SEMI_COLON'): if (self.braceDepth < 10): self.evaluate_stack() except: raise CppParseError( "Not able to parse %s on line %d evaluating \"%s\"\nError around: %s" % (self.headerFileName, tok.lineno, tok.value, " ".join( self.nameStack)))
t_CHAR = r'\'(\\.|[^\\\'])\'' t_LPAREN = r'\(' t_RPAREN = r'\)' t_LBRACKET = r'\[' t_RBRACKET = r'\]' t_LBRACE = r'\{' t_RBRACE = r'\}' t_COMMA = r',' t_SEMI = r';' t_COLON = r':' t_CONDOP = r'\?' # t_ELLIPSIS = r'\.\.\.' def t_status(t): print "illegal '%s' line %d" % (t.value[0], t.lineno) #exit(1) t.skip(1) lex.lex() if __name__ == '__main__': lex.input(data) while 1: tok = lex.token() if not tok: break # No more input print tok
# tfind = tfind+tofind[j] mappedstring[tofind] = fl.group(2) mappedlink[tofind] = fl.group(3) # # Read in file that is to be mapped lines = sys.stdin.read() lex.input(lines) text = '' bracket = 0 vbracket = 0 lstinline_bracket = 0 lstlisting_bracket = 0 while 1: token = lex.token() # Get a token if not token: break # No more tokens if token.type == 'NEWLINE': print text text = '' else: value = token.value # verbatim environment disables bracket count if value == '\\begin{verbatim}': vbracket = vbracket + 1; # lstlisting environment disables bracket count if value == '\\begin{lstlisting}': lstlisting_bracket = lstlisting_bracket + 1; # \href cannot be used in many places in Latex if value in ['\\href{','\\findex{','\\sindex{','\\subsection{','\\chapter{','\\section{','\\caption{'] and vbracket == 0 and lstlisting_bracket == 0: bracket = bracket + 1;
# Read in file that is to be mapped lines = sys.stdin.read() lex.input(lines) text = '' bracket = 0 vbracket = 0 lstinline_bracket = 0 lstinlinemod_bracket = 0 lstlisting_bracket = 0 outputlisting_bracket = 0 bashlisting_bracket = 0 makelisting_bracket = 0 tikzpicture_bracket = 0 while 1: token = lex.token() # Get a token if not token: break # No more tokens if token.type == 'NEWLINE': print(text) text = '' else: value = token.value # various verbatim-style environments disable bracket count # Note that a closing bracket inside a \trl{} will break things #print value + " : " + str(bracket) + str(vbracket) + str(lstinline_bracket) + str(outputlisting_bracket) + str(bashlisting_bracket) + str(makelisting_bracket) + str(lstlisting_bracket) + str(tikzpicture_bracket) if value == '\\begin{verbatim}': vbracket = vbracket + 1 if value == '\\begin{bashlisting}': bashlisting_bracket = bashlisting_bracket + 1 if value == '\\begin{makelisting}': makelisting_bracket = makelisting_bracket + 1