def parseStream(content, uniqueId=""): tokens = [] line = column = sol = 1 scanner = Scanner.LQueue(tokens_2_obj(content, )) scanner.content = content scanner.slice = scanner_slice for tok in scanner: # some inital values (tok isinstanceof Scanner.Token()) token = { "source" : tok.value, "detail" : "", "line" : line, "column" : tok.spos - sol + 1, "id" : uniqueId } # white space if (tok.name == 'white'): continue # end of file elif tok.name == 'eof': token['type'] = 'eof' # line break elif tok.name == 'nl': token['type'] = 'eol' token['source'] = '' # that's the way the old tokenizer does it line += 1 # increase line count sol = tok.spos + tok.len # char pos of next line start # float elif tok.name == 'float': token['type'] = 'number' token['detail'] = 'float' # hex integer elif tok.name == 'hexnum': token['type'] = 'number' token['detail'] = 'int' # integer elif tok.name == 'number': token['type'] = 'number' token['detail'] = 'int' # string elif tok.value in ('"', "'"): # accumulate strings token['type'] = 'string' if tok.value == '"': token['detail'] = 'doublequotes' else: token['detail'] = 'singlequotes' try: token['source'] = parseString(scanner, tok.value) except SyntaxException, e: desc = e.args[0] + " starting with %r..." % (tok.value + e.args[1])[:20] raiseSyntaxException(token, desc) token['source'] = token['source'][:-1] # adapt line number -- this assumes multi-line strings are not generally out linecnt = len(re.findall("\n", token['source'])) if linecnt > 0: line += linecnt # identifier, operator elif tok.name in ("ident", "op", "mulop"): # JS operator symbols if tok.value in lang.TOKENS: # division, div-assignment, regexp if tok.value in ('/', '/='): # accumulate regex literals if (len(tokens) == 0 or ( (tokens[-1]['type'] != 'number') and (tokens[-1]['detail'] != 'RP') and (tokens[-1]['detail'] != 'RB') and (tokens[-1]['type'] != 'name'))): regexp = parseRegexp(scanner) token['type'] = 'regexp' token['source'] = tok.value + regexp else: token['type'] = 'token' token['detail'] = lang.TOKENS[tok.value] # comment, inline elif tok.value == '//': # accumulate inline comments if (len(tokens) == 0 or not is_last_escaped_token(tokens)): commnt = parseCommentI(scanner) token['type'] = 'comment' token['source'] = tok.value + commnt token['begin'] = not hasLeadingContent(tokens) token['end'] = True token['connection'] = "before" if token['begin'] else "after" # "^//...\n i=1;" => comment *before* code; "i=1; //..." => comment *after* code token['multiline'] = False token['detail'] = 'inline' else: print >> sys.stderror, "Inline comment out of context" # comment, multiline elif tok.value == '/*': # accumulate multiline comments if (len(tokens) == 0 or not is_last_escaped_token(tokens)): token['type'] = 'comment' try: commnt = parseCommentM(scanner) except SyntaxException, e: desc = e.args[0] + " starting with \"%r...\"" % (tok.value + e.args[1])[:20] raiseSyntaxException(token, desc) token['source'] = tok.value + commnt token['detail'] = comment.getFormat(token['source']) token['begin'] = not hasLeadingContent(tokens) if restLineIsEmpty(scanner): token['end'] = True else: token['end'] = False if token['begin']: token['source'] = comment.outdent(token['source'], column - 1) token['source'] = comment.correct(token['source']) if token['end'] and not token['begin']: token['connection'] = "after" else: token['connection'] = "before" # adapt line number linecnt = len(re.findall("\n", token['source'])) if linecnt > 0: line += linecnt token['multiline'] = True else: token['multiline'] = False else: print >> sys.stderror, "Multiline comment out of context" # every other operator goes as is else: token['type'] = 'token' token['detail'] = lang.TOKENS[tok.value]
def parseStream(content, uniqueId=""): # make global variables available global parseLine global parseColumn global parseUniqueId # reset global stuff parseColumn = 1 parseLine = 1 parseUniqueId = uniqueId # prepare storage tokens = [] content = protectEscape(content) # print " * searching for patterns..." try: all = R_ALL.findall(content) except RuntimeError: print "Could not parse file %s" % uniqueId print "Generally this means that there is a syntactial problem with your source-code." print "Please omit the usage of nested comments like '/* foo /* bar */'." sys.exit(1) # print " * structuring..." # for item in all: # if type(item) != types.TupleType: # item's no longer a tuple! # item = (item,) # fragment = item[0] while content: mo = R_ALL.search(content) if mo: fragment = mo.group(0) else: break # print "Found: '%s'" % fragment # Handle block comment if comment.R_BLOCK_COMMENT.match(fragment): source = recoverEscape(fragment) format = comment.getFormat(source) multiline = comment.isMultiLine(source) # print "Type:MultiComment" content = parseFragmentLead(content, fragment, tokens) # sort of intelligent "pop" atBegin = not hasLeadingContent(tokens) if re.compile("^\s*\n").search(content): atEnd = True else: atEnd = False # print "Begin: %s, End: %s" % (atBegin, atEnd) # Fixing source content if atBegin: source = comment.outdent(source, parseColumn - 1) source = comment.correct(source) connection = "before" if atEnd and not atBegin: connection = "after" else: connection = "before" tokens.append( { "type": "comment", "detail": format, "multiline": multiline, "connection": connection, "source": source, "id": parseUniqueId, "line": parseLine, "column": parseColumn, "begin": atBegin, "end": atEnd, } ) parseLine += len(fragment.split("\n")) - 1 # Handle inline comment elif comment.R_INLINE_COMMENT.match(fragment): # print "Type:SingleComment" source = recoverEscape(fragment) content = parseFragmentLead(content, fragment, tokens) atBegin = hasLeadingContent(tokens) atEnd = True if atBegin: connection = "after" else: connection = "before" source = comment.correct(source) tokens.append( { "type": "comment", "detail": "inline", "multiline": False, "connection": connection, "source": source, "id": parseUniqueId, "line": parseLine, "column": parseColumn, "begin": atBegin, "end": atEnd, } ) # Handle string elif R_STRING_A.match(fragment): # print "Type:StringA: %s" % fragment content = parseFragmentLead(content, fragment, tokens) source = recoverEscape(fragment)[1:-1] tokens.append( { "type": "string", "detail": "singlequotes", "source": source.replace("\\\n", ""), "id": parseUniqueId, "line": parseLine, "column": parseColumn, } ) newLines = source.count("\\\n") parseLine += newLines if newLines: parseColumn = len(source) - source.rfind("\\\n") + 2 else: parseColumn += len(source) + 2 # Handle string elif R_STRING_B.match(fragment): # print "Type:StringB: %s" % fragment content = parseFragmentLead(content, fragment, tokens) source = recoverEscape(fragment)[1:-1] tokens.append( { "type": "string", "detail": "doublequotes", "source": source.replace("\\\n", ""), "id": parseUniqueId, "line": parseLine, "column": parseColumn, } ) newLines = source.count("\\\n") parseLine += newLines if newLines: parseColumn = len(source) - source.rfind("\\\n") + 2 else: parseColumn += len(source) + 2 # Handle float num elif R_FLOAT.match(fragment): # print "Type:Float: %s" % fragment content = parseFragmentLead(content, fragment, tokens) tokens.append( { "type": "number", "detail": "float", "source": fragment, "id": parseUniqueId, "line": parseLine, "column": parseColumn, } ) # Handle regexps # elif R_REGEXP.search(content[:content.index('\n')]): # mo = R_REGEXP.search(content) # regmatch = mo.group(0) # content = parseFragmentLead(content, regmatch, tokens) # tokens.append({ "type" : "regexp", "detail" : "", "source" : recoverEscape(regmatch), "id" : parseUniqueId, "line" : parseLine, "column" : parseColumn }) # parseColumn += len(regmatch) # Handle operator elif R_OPERATORS.match(fragment): # print "Type:Operator: %s" % fragment content = parseFragmentLead(content, fragment, tokens) tokens.append( { "type": "token", "detail": lang.TOKENS[fragment], "source": fragment, "id": parseUniqueId, "line": parseLine, "column": parseColumn, } ) # Handle everything else else: fragresult = R_REGEXP.search(fragment) if fragresult: # print "Type:RegExp: %s" % fragresult.group(0) if ( R_REGEXP_A.match(fragment) or R_REGEXP_B.match(fragment) or R_REGEXP_C.match(fragment) or R_REGEXP_D.match(fragment) or R_REGEXP_E.match(fragment) ): content = parseFragmentLead(content, fragresult.group(0), tokens) tokens.append( { "type": "regexp", "detail": "", "source": recoverEscape(fragresult.group(0)), "id": parseUniqueId, "line": parseLine, "column": parseColumn, } ) else: print "Bad regular expression: %s" % fragresult.group(0) else: print "Type:None!" # tokens.extend(parsePart(recoverEscape(content))) parsePart(recoverEscape(content), tokens) tokens.append( {"type": "eof", "source": "", "detail": "", "id": parseUniqueId, "line": parseLine, "column": parseColumn} ) return tokens
def parseStream(content, uniqueId=""): tokens = [] line = column = sol = 1 scanner = Scanner.LQueue(tokens_2_obj(content)) for tok in scanner: # tok isinstanceof Scanner.Token() token = {"source": tok.value, "detail": "", "line": line, "column": tok.spos - sol + 1, "id": uniqueId} # white space if tok.name == "white": continue # end of file elif tok.name == "eof": token["type"] = "eof" # line break elif tok.name == "nl": token["type"] = "eol" token["source"] = "" # that's the way the old tokenizer does it line += 1 # increase line count sol = tok.spos + tok.len # char pos of next line start # float elif tok.name == "float": token["type"] = "number" token["detail"] = "float" # hex integer elif tok.name == "hexnum": token["type"] = "number" token["detail"] = "int" # integer elif tok.name == "number": token["type"] = "number" token["detail"] = "int" # string elif tok.value in ('"', "'"): # accumulate strings token["type"] = "string" if tok.value == '"': token["detail"] = "doublequotes" else: token["detail"] = "singlequotes" token["source"] = parseString(scanner, tok.value) token["source"] = token["source"][:-1] # identifier, operator elif tok.name in ("ident", "op", "mulop"): # JS operator symbols if tok.value in lang.TOKENS: # division, div-assignment, regexp if tok.value in ("/", "/="): # accumulate regex literals if len(tokens) == 0 or ( (tokens[-1]["type"] != "number") and (tokens[-1]["detail"] != "RP") and (tokens[-1]["detail"] != "RB") and (tokens[-1]["type"] != "name") ): regexp = parseRegexp(scanner) token["type"] = "regexp" token["source"] = tok.value + regexp else: token["type"] = "token" token["detail"] = lang.TOKENS[tok.value] # comment, inline elif tok.value == "//": # accumulate inline comments if len(tokens) == 0 or not is_last_escaped_token(tokens): commnt = parseCommentI(scanner) token["type"] = "comment" token["source"] = tok.value + commnt token["begin"] = not hasLeadingContent(tokens) token["end"] = True token["connection"] = ( "before" if token["begin"] else "after" ) # "^//...\n i=1;" => comment *before* code; "i=1; //..." => comment *after* code token["multiline"] = False token["detail"] = "inline" else: print >>sys.stderror, "Inline comment out of context" # comment, multiline elif tok.value == "/*": # accumulate multiline comments if len(tokens) == 0 or not is_last_escaped_token(tokens): commnt = parseCommentM(scanner) token["type"] = "comment" token["source"] = tok.value + commnt token["detail"] = comment.getFormat(token["source"]) token["begin"] = not hasLeadingContent(tokens) if restLineIsEmpty(scanner): token["end"] = True else: token["end"] = False if token["begin"]: token["source"] = comment.outdent(token["source"], column - 1) token["source"] = comment.correct(token["source"]) if token["end"] and not token["begin"]: token["connection"] = "after" else: token["connection"] = "before" # adapt line number linecnt = len(re.findall("\n", token["source"])) if linecnt > 0: line += linecnt token["multiline"] = True else: token["multiline"] = False else: print >>sys.stderror, "Multiline comment out of context" # every other operator goes as is else: token["type"] = "token" token["detail"] = lang.TOKENS[tok.value] # JS keywords elif tok.value in lang.RESERVED: token["type"] = "reserved" token["detail"] = lang.RESERVED[tok.value] # JS/BOM objects elif tok.value in lang.BUILTIN: token["type"] = "builtin" # identifier elif tok.value.startswith("__"): token["type"] = "name" token["detail"] = "private" elif tok.value.startswith("_"): token["type"] = "name" token["detail"] = "protected" else: token["type"] = "name" token["detail"] = "public" # unknown token else: print >>sys.stderr, "Unhandled lexem: %s" % tok pass tokens.append(token) return tokens
def parseStream(content, uniqueId=""): tokens = [] line = column = 1 sol = 0 # index of start-of-line scanner = Scanner.LQueue(tokens_2_obj(content)) scanner.content = content scanner.slice = scanner_slice for tok in scanner: # some inital values (tok isinstanceof Scanner.Token()) token = {"source": tok.value, "detail": "", "line": line, "column": tok.spos - sol + 1, "id": uniqueId} # white space if tok.name == "white": continue # end of file elif tok.name == "eof": token["type"] = "eof" # line break elif tok.name == "nl": token["type"] = "eol" token["source"] = "" # that's the way the old tokenizer does it line += 1 # increase line count sol = tok.spos + tok.len # char pos of next line start # float elif tok.name == "float": token["type"] = "number" token["detail"] = "float" # hex integer elif tok.name == "hexnum": token["type"] = "number" token["detail"] = "int" # integer elif tok.name == "number": token["type"] = "number" token["detail"] = "int" # string elif tok.value in ('"', "'"): # accumulate strings token["type"] = "string" if tok.value == '"': token["detail"] = "doublequotes" else: token["detail"] = "singlequotes" try: token["source"] = parseString(scanner, tok.value) except SyntaxException, e: desc = e.args[0] + " starting with %r..." % (tok.value + e.args[1])[:20] raiseSyntaxException(token, desc) token["source"] = token["source"][:-1] # adapt line number -- this assumes multi-line strings are not generally out linecnt = len(re.findall("\n", token["source"])) if linecnt > 0: line += linecnt # identifier, operator elif tok.name in ("ident", "op", "mulop"): # JS operator symbols if tok.value in lang.TOKENS: # division, div-assignment, regexp if tok.value in ("/", "/="): # accumulate regex literals if len(tokens) == 0 or ( (tokens[-1]["type"] != "number") and (tokens[-1]["detail"] != "RP") and (tokens[-1]["detail"] != "RB") and (tokens[-1]["type"] != "name") ): regexp = parseRegexp(scanner) token["type"] = "regexp" token["source"] = tok.value + regexp else: token["type"] = "token" token["detail"] = lang.TOKENS[tok.value] # comment, inline elif tok.value == "//": # accumulate inline comments if len(tokens) == 0 or not is_last_escaped_token(tokens): commnt = parseCommentI(scanner) token["type"] = "comment" token["source"] = tok.value + commnt token["begin"] = not hasLeadingContent(tokens) token["end"] = True token["connection"] = ( "before" if token["begin"] else "after" ) # "^//...\n i=1;" => comment *before* code; "i=1; //..." => comment *after* code token["multiline"] = False token["detail"] = "inline" else: print >>sys.stderror, "Inline comment out of context" # comment, multiline elif tok.value == "/*": # accumulate multiline comments if len(tokens) == 0 or not is_last_escaped_token(tokens): token["type"] = "comment" try: commnt = parseCommentM(scanner) except SyntaxException, e: desc = e.args[0] + ' starting with "%r..."' % (tok.value + e.args[1])[:20] raiseSyntaxException(token, desc) commnt = alignMultiLines(commnt, token["column"]) token["source"] = tok.value + commnt token["detail"] = comment.getFormat(token["source"]) token["begin"] = not hasLeadingContent(tokens) if restLineIsEmpty(scanner): token["end"] = True else: token["end"] = False if token["begin"]: token["source"] = comment.outdent(token["source"], column - 1) token["source"] = comment.correct(token["source"]) if token["end"] and not token["begin"]: token["connection"] = "after" else: token["connection"] = "before" # adapt line number linecnt = len(re.findall("\n", token["source"])) if linecnt > 0: line += linecnt token["multiline"] = True else: token["multiline"] = False else: print >>sys.stderror, "Multiline comment out of context" # every other operator goes as is else: token["type"] = "token" token["detail"] = lang.TOKENS[tok.value]
def parseStream(content, uniqueId=""): tokens = [] line = column = 1 sol = 0 # index of start-of-line scanner = Scanner.LQueue(tokens_2_obj(content, )) scanner.content = content scanner.slice = scanner_slice for tok in scanner: # some inital values (tok isinstanceof Scanner.Token()) token = { "source": tok.value, "detail": "", "line": line, "column": tok.spos - sol + 1, "id": uniqueId } # white space if (tok.name == 'white'): continue # end of file elif tok.name == 'eof': token['type'] = 'eof' # line break elif tok.name == 'nl': token['type'] = 'eol' token['source'] = '' # that's the way the old tokenizer does it line += 1 # increase line count sol = tok.spos + tok.len # char pos of next line start # float elif tok.name == 'float': token['type'] = 'number' token['detail'] = 'float' # hex integer elif tok.name == 'hexnum': token['type'] = 'number' token['detail'] = 'int' # integer elif tok.name == 'number': token['type'] = 'number' token['detail'] = 'int' # string elif tok.value in ('"', "'"): # accumulate strings token['type'] = 'string' if tok.value == '"': token['detail'] = 'doublequotes' else: token['detail'] = 'singlequotes' try: token['source'] = parseString(scanner, tok.value) except SyntaxException, e: desc = e.args[0] + " starting with %r..." % (tok.value + e.args[1])[:20] raiseSyntaxException(token, desc) token['source'] = token['source'][:-1] # adapt line number -- this assumes multi-line strings are not generally out linecnt = len(re.findall("\n", token['source'])) if linecnt > 0: line += linecnt # identifier, operator elif tok.name in ("ident", "op", "mulop"): # JS operator symbols if tok.value in lang.TOKENS: # division, div-assignment, regexp if tok.value in ('/', '/='): # accumulate regex literals if (len(tokens) == 0 or ((tokens[-1]['type'] != 'number') and (tokens[-1]['detail'] != 'RP') and (tokens[-1]['detail'] != 'RB') and (tokens[-1]['type'] != 'name'))): regexp = parseRegexp(scanner) token['type'] = 'regexp' token['source'] = tok.value + regexp else: token['type'] = 'token' token['detail'] = lang.TOKENS[tok.value] # comment, inline elif tok.value == '//': # accumulate inline comments if (len(tokens) == 0 or not is_last_escaped_token(tokens)): commnt = parseCommentI(scanner) token['type'] = 'comment' token['source'] = tok.value + commnt token['begin'] = not hasLeadingContent(tokens) token['end'] = True token['connection'] = "before" if token[ 'begin'] else "after" # "^//...\n i=1;" => comment *before* code; "i=1; //..." => comment *after* code token['multiline'] = False token['detail'] = 'inline' else: print >> sys.stderror, "Inline comment out of context" # comment, multiline elif tok.value == '/*': # accumulate multiline comments if (len(tokens) == 0 or not is_last_escaped_token(tokens)): token['type'] = 'comment' try: commnt = parseCommentM(scanner) except SyntaxException, e: desc = e.args[0] + " starting with \"%r...\"" % ( tok.value + e.args[1])[:20] raiseSyntaxException(token, desc) commnt = alignMultiLines(commnt, token['column']) token['source'] = tok.value + commnt token['detail'] = comment.getFormat(token['source']) token['begin'] = not hasLeadingContent(tokens) if restLineIsEmpty(scanner): token['end'] = True else: token['end'] = False if token['begin']: token['source'] = comment.outdent( token['source'], column - 1) token['source'] = comment.correct(token['source']) if token['end'] and not token['begin']: token['connection'] = "after" else: token['connection'] = "before" # adapt line number linecnt = len(re.findall("\n", token['source'])) if linecnt > 0: line += linecnt token['multiline'] = True else: token['multiline'] = False else: print >> sys.stderror, "Multiline comment out of context" # every other operator goes as is else: token['type'] = 'token' token['detail'] = lang.TOKENS[tok.value]
def parseStream(content, uniqueId=""): # make global variables available global parseLine global parseColumn global parseUniqueId # reset global stuff parseColumn = 1 parseLine = 1 parseUniqueId = uniqueId # prepare storage tokens = [] content = protectEscape(content) # print " * searching for patterns..." try: all = R_ALL.findall(content) except RuntimeError: print "Could not parse file %s" % uniqueId print "Generally this means that there is a syntactial problem with your source-code." print "Please omit the usage of nested comments like '/* foo /* bar */'." sys.exit(1) # print " * structuring..." #for item in all: # if type(item) != types.TupleType: # item's no longer a tuple! # item = (item,) # fragment = item[0] while content: mo = R_ALL.search(content) if mo: fragment = mo.group(0) else: break # print "Found: '%s'" % fragment # Handle block comment if comment.R_BLOCK_COMMENT.match(fragment): source = recoverEscape(fragment) format = comment.getFormat(source) multiline = comment.isMultiLine(source) # print "Type:MultiComment" content = parseFragmentLead(content, fragment, tokens) # sort of intelligent "pop" atBegin = not hasLeadingContent(tokens) if re.compile("^\s*\n").search(content): atEnd = True else: atEnd = False # print "Begin: %s, End: %s" % (atBegin, atEnd) # Fixing source content if atBegin: source = comment.outdent(source, parseColumn - 1) source = comment.correct(source) connection = "before" if atEnd and not atBegin: connection = "after" else: connection = "before" tokens.append({ "type": "comment", "detail": format, "multiline": multiline, "connection": connection, "source": source, "id": parseUniqueId, "line": parseLine, "column": parseColumn, "begin": atBegin, "end": atEnd }) parseLine += len(fragment.split("\n")) - 1 # Handle inline comment elif comment.R_INLINE_COMMENT.match(fragment): # print "Type:SingleComment" source = recoverEscape(fragment) content = parseFragmentLead(content, fragment, tokens) atBegin = hasLeadingContent(tokens) atEnd = True if atBegin: connection = "after" else: connection = "before" source = comment.correct(source) tokens.append({ "type": "comment", "detail": "inline", "multiline": False, "connection": connection, "source": source, "id": parseUniqueId, "line": parseLine, "column": parseColumn, "begin": atBegin, "end": atEnd }) # Handle string elif R_STRING_A.match(fragment): # print "Type:StringA: %s" % fragment content = parseFragmentLead(content, fragment, tokens) source = recoverEscape(fragment)[1:-1] tokens.append({ "type": "string", "detail": "singlequotes", "source": source.replace("\\\n", ""), "id": parseUniqueId, "line": parseLine, "column": parseColumn }) newLines = source.count("\\\n") parseLine += newLines if newLines: parseColumn = len(source) - source.rfind("\\\n") + 2 else: parseColumn += len(source) + 2 # Handle string elif R_STRING_B.match(fragment): # print "Type:StringB: %s" % fragment content = parseFragmentLead(content, fragment, tokens) source = recoverEscape(fragment)[1:-1] tokens.append({ "type": "string", "detail": "doublequotes", "source": source.replace("\\\n", ""), "id": parseUniqueId, "line": parseLine, "column": parseColumn }) newLines = source.count("\\\n") parseLine += newLines if newLines: parseColumn = len(source) - source.rfind("\\\n") + 2 else: parseColumn += len(source) + 2 # Handle float num elif R_FLOAT.match(fragment): # print "Type:Float: %s" % fragment content = parseFragmentLead(content, fragment, tokens) tokens.append({ "type": "number", "detail": "float", "source": fragment, "id": parseUniqueId, "line": parseLine, "column": parseColumn }) # Handle regexps #elif R_REGEXP.search(content[:content.index('\n')]): # mo = R_REGEXP.search(content) # regmatch = mo.group(0) # content = parseFragmentLead(content, regmatch, tokens) # tokens.append({ "type" : "regexp", "detail" : "", "source" : recoverEscape(regmatch), "id" : parseUniqueId, "line" : parseLine, "column" : parseColumn }) # parseColumn += len(regmatch) # Handle operator elif R_OPERATORS.match(fragment): # print "Type:Operator: %s" % fragment content = parseFragmentLead(content, fragment, tokens) tokens.append({ "type": "token", "detail": lang.TOKENS[fragment], "source": fragment, "id": parseUniqueId, "line": parseLine, "column": parseColumn }) # Handle everything else else: fragresult = R_REGEXP.search(fragment) if fragresult: # print "Type:RegExp: %s" % fragresult.group(0) if R_REGEXP_A.match(fragment) or R_REGEXP_B.match( fragment) or R_REGEXP_C.match( fragment) or R_REGEXP_D.match( fragment) or R_REGEXP_E.match(fragment): content = parseFragmentLead(content, fragresult.group(0), tokens) tokens.append({ "type": "regexp", "detail": "", "source": recoverEscape(fragresult.group(0)), "id": parseUniqueId, "line": parseLine, "column": parseColumn }) else: print "Bad regular expression: %s" % fragresult.group(0) else: print "Type:None!" #tokens.extend(parsePart(recoverEscape(content))) parsePart(recoverEscape(content), tokens) tokens.append({ "type": "eof", "source": "", "detail": "", "id": parseUniqueId, "line": parseLine, "column": parseColumn }) return tokens