def parseCommentM(self, scanner): res = [] try: while True: token = scanner.next(r'\*/') # inform the low-level scanner to switch to commentM res.append(token.value) if not Scanner.is_last_escaped(token.value): break except StopIteration: raise SyntaxException("Unterminated multi-line comment:\n '%s'" % u''.join(res)) return u"".join(res)
def parseDelimited(scanner, terminator): tokens = [] for token in scanner: tokens.append(token) if token.value == terminator: if not is_last_escaped_tokobj(tokens): break else: res = scanner.slice(tokens[0].spos, token.spos + token.len) raise SyntaxException("Run-away element", res) return tokens
def parseString(self, scanner, sstart): # parse string literals result = [] try: while True: part = scanner.next(sstart) result.append(part.value) if not Scanner.is_last_escaped(part.value): # be aware of escaped quotes break except StopIteration: raise SyntaxException("Unterminated string: '%s'" % u''.join(result)) return u"".join(result)
def parseCommentM1(scanner): result = [] res = u"" for token in scanner: result.append(token.value) if token.value == '*/': res = u"".join(result) if not Scanner.is_last_escaped(res): break else: # this means we've run out of tokens without finishing the comment res = u"".join(result) raise SyntaxException("Run-away comment", res) return res
def parseString(scanner, sstart): # parse string literals result = [] for token in scanner: result.append(token.value) if token.value == sstart: res = u"".join(result) if not Scanner.is_last_escaped(res): # be aware of escaped quotes break else: # this means we've run out of tokens without finishing the string res = u"".join(result) raise SyntaxException("Non-terminated string", res) return res
def raiseSyntaxException (token, expectedDesc = None): if expectedDesc: msg = "Expected " + expectedDesc + " but found " else: msg = "Unexpected " msg += token["type"] if token["detail"]: msg += "/" + token["detail"] msg += ": '" + token["source"] + "'. file:" + \ token["id"] + ", line:" + str(token["line"]) + \ ", column:" + str(token["column"]) raise SyntaxException(msg)
def __iter__(self): delimiter = None inData = self.inData lenData = len(inData) cursor = 0 while cursor < lenData: if delimiter: mo = self.stringEnd[delimiter].search(inData, pos=cursor) else: mo = self.patt.match(inData, pos=cursor) if mo: mo_lastgroup = mo.lastgroup mstart = mo.start() mend = mo.end() mlength = mend - mstart if cursor != mstart: raise RuntimeError("(This should never happen). There is a scan gap AFTER:\n \"%s\"\nAND BEFORE:\n \"%s\"" % (inData[cursor-100:cursor], inData[mstart:mstart+100])) cursor = mend # when using the 'pos' parameter in re.search, mo.start/end refer to the *entire* underlying string delimiter = (yield (mo_lastgroup, mo.group(mo_lastgroup), mstart, mlength)) else: raise SyntaxException("Unable to tokenize text starting with: \"%s\"" % inData[cursor:cursor+200])
def parseRegexp(self, scanner): # leading '/' is already consumed rexp = "" in_char_class = False token = scanner.next() try: while True: rexp += token.value # accumulate token strings # -- Check last token # character classes if token.value == "[": if not Scanner.is_last_escaped( rexp): # i.e. not preceded by an odd number of "\" in_char_class = True elif token.value == "]" and in_char_class: if not Scanner.is_last_escaped(rexp): in_char_class = False elif token.name in ['nl', 'eof']: raise StopIteration # check for termination of rexp elif rexp[ -1] == "/" and not in_char_class: # rexp[-1] != token.value if token.value == "//" if not Scanner.is_last_escaped(rexp): break token = scanner.next() except StopIteration: raise SyntaxException("Unterminated regexp literal: '%s'" % rexp) # regexp modifiers try: if scanner.peek()[0].name == "ident": token = scanner.next() rexp += token.value except StopIteration: pass return rexp
def raiseSyntaxException(token, desc=u""): msg = desc + " (%s:%d)" % (token['id'], token['line']) raise SyntaxException(msg)
def lookbehind(self, n=1): if n > self.max_look_behind: raise SyntaxException( "TokenStream: can only look %d elements behind" % self.max_look_behind) return self.outData[n]