Python ScannerException примеры использования

Язык программирования: Python

Пространство имен/Пакет: errors

Класс/Тип: ScannerException

Примеров на hotexamples.com: 9

Python ScannerException - 9 примеров найдено. Это лучшие примеры Python кода для errors.ScannerException, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

ScannerException(9)

Основные методы

ScannerException (9)

Пример #1

Показать файл

Файл: scanner.py Проект: nchokkan/Ezhil-Lang

 def get_name(kind):
     """ used in reporting errors in match() on parsing stage """
     if (kind >= 0 and kind < len(Token.token_types)):
         return Token.token_types[kind]
     raise ScannerException("Index out of bounds. Unknown token " +
                            str(kind))
     return None

Пример #2

Показать файл

 def match(self,tokval):
     """ if match return value of token """
     if self.peek().kind != tokval:
         ##print self
         raise ScannerException("cannot find token "+ \
                             Token.get_name(tokval) + " got " \
                             + str(self.peek())  \
                             + " instead!")
     return self.dequeue()

Пример #3

Показать файл

Файл: ezhil_scanner.py Проект: netvarun/Ezhil-Lang

    def get_lexeme(self, chunks, pos):
        if chunks == None:
            return None
        if (self.debug): print("chunks", chunks)
        if chunks == "பதிப்பி":
            tval = EzhilLexeme(chunks, EzhilToken.PRINT)
        elif chunks == "தேர்ந்தெடு":
            tval = EzhilLexeme(chunks, EzhilToken.SWITCH)
        elif chunks == "தேர்வு":
            tval = EzhilLexeme(chunks, EzhilToken.CASE)
        elif chunks == "ஏதேனில்":
            tval = EzhilLexeme(chunks, EzhilToken.OTHERWISE)
        elif chunks == "ஆனால்":
            tval = EzhilLexeme(chunks, EzhilToken.IF)
        elif chunks == "இல்லைஆனால்":
            tval = EzhilLexeme(chunks, EzhilToken.ELSEIF)
        elif chunks == "இல்லை":
            tval = EzhilLexeme(chunks, EzhilToken.ELSE)
        elif chunks == "ஆக":
            tval = EzhilLexeme(chunks, EzhilToken.FOR)
        elif chunks == "ஒவ்வொன்றாக":
            tval = EzhilLexeme(chunks, EzhilToken.FOREACH)
        elif chunks == "இல்":
            tval = EzhilLexeme(chunks, EzhilToken.COMMA)
        elif chunks == "வரை":
            tval = EzhilLexeme(chunks, EzhilToken.WHILE)
        elif chunks == "செய்":
            tval = EzhilLexeme(chunks, EzhilToken.DO)
        elif chunks == "முடியேனில்":
            tval = EzhilLexeme(chunks, EzhilToken.DOWHILE)
        elif chunks == "பின்கொடு":
            tval = EzhilLexeme(chunks, EzhilToken.RETURN)
        elif chunks == "முடி":
            tval = EzhilLexeme(chunks, EzhilToken.END)
        elif chunks == "நிரல்பாகம்":
            tval = EzhilLexeme(chunks, EzhilToken.DEF)
        elif chunks == "தொடர்":
            tval = EzhilLexeme(chunks, EzhilToken.CONTINUE)
        elif chunks == "நிறுத்து":
            tval = EzhilLexeme(chunks, EzhilToken.BREAK)
        elif chunks == "@":
            tval = EzhilLexeme(chunks, EzhilToken.ATRATEOF)
        elif chunks == "=":
            tval = EzhilLexeme(chunks, EzhilToken.EQUALS)
        elif chunks == "-":
            tval = EzhilLexeme(chunks, EzhilToken.MINUS)
        elif chunks == "+":
            tval = EzhilLexeme(chunks, EzhilToken.PLUS)
        elif chunks == ">":
            tval = EzhilLexeme(chunks, EzhilToken.GT)
        elif chunks == "<":
            tval = EzhilLexeme(chunks, EzhilToken.LT)
        elif chunks == ">=":
            tval = EzhilLexeme(chunks, EzhilToken.GTEQ)
        elif chunks == "<=":
            tval = EzhilLexeme(chunks, EzhilToken.LTEQ)
        elif chunks == "==":
            tval = EzhilLexeme(chunks, EzhilToken.EQUALITY)
        elif chunks == "!=":
            tval = EzhilLexeme(chunks, EzhilToken.NEQ)
        elif chunks == "*":
            tval = EzhilLexeme(chunks, EzhilToken.PROD)
        elif chunks == "/":
            tval = EzhilLexeme(chunks, EzhilToken.DIV)
        elif chunks == ",":
            tval = EzhilLexeme(chunks, EzhilToken.COMMA)
        elif chunks == "(":
            tval = EzhilLexeme(chunks, EzhilToken.LPAREN)
        elif chunks == ")":
            tval = EzhilLexeme(chunks, EzhilToken.RPAREN)
        elif chunks == "[":
            tval = EzhilLexeme(chunks, EzhilToken.LSQRBRACE)
        elif chunks == "]":
            tval = EzhilLexeme(chunks, EzhilToken.RSQRBRACE)
        elif chunks == "{":
            tval = Lexeme(chunks, Token.LCURLBRACE)
        elif chunks == "}":
            tval = Lexeme(chunks, Token.RCURLBRACE)
        elif chunks == ":":
            tval = Lexeme(chunks, Token.COLON)
        elif chunks == "%":
            tval = EzhilLexeme(chunks, EzhilToken.MOD)
        elif chunks == "^":
            tval = EzhilLexeme(chunks, EzhilToken.EXP)
        elif chunks == "&&":
            tval = Lexeme(chunks, EzhilToken.LOGICAL_AND)
        elif chunks == "&":
            tval = Lexeme(chunks, EzhilToken.BITWISE_AND)
        elif chunks == "||":
            tval = Lexeme(chunks, EzhilToken.LOGICAL_OR)
        elif chunks == "|":
            tval = Lexeme(chunks, EzhilToken.BITWISE_OR)
        elif chunks == "!":
            tval = Lexeme(chunks, EzhilToken.LOGICAL_NOT)
        elif (chunks[0] == "\"" and chunks[-1] == "\""):
            tval = EzhilLexeme(chunks[1:-1], EzhilToken.STRING)
        elif isdigit(chunks[0]) or chunks[0] == '+' or chunks[0] == '-':
            #tval=EzhilLexeme(float(chunks),EzhilToken.NUMBER)
            # deduce a float or integer
            if (chunks.find('.') >= 0 or chunks.find('e') >= 0
                    or chunks.find('E') >= 0):
                tval = EzhilLexeme(float(chunks), EzhilToken.NUMBER)
            else:
                tval = EzhilLexeme(int(chunks), EzhilToken.NUMBER)
        elif isalpha(chunks[0]) or has_tamil(chunks) or chunks[0] == '_':
            ## check for tamil/english/mixed indentifiers even starting with a lead '_'
            tval = EzhilLexeme(chunks, EzhilToken.ID)
        else:
            raise ScannerException("Lexical error: " + str(chunks) +
                                   " at Line , Col " +
                                   str(self.get_line_col(pos)) + " in file " +
                                   self.fname)

        [l, c] = self.get_line_col(pos)
        tval.set_line_col([l, c])
        tval.set_file_name(self.fname)
        self.tokens.append(tval)

        if (self.debug): print("Lexer token = ", str(tval))

        return l

Пример #4

Показать файл

Файл: ezhil_scanner.py Проект: netvarun/Ezhil-Lang

    def tokenize(self, data=None):
        """ do hard-work of tokenizing and
        put EzhilLexemes into the tokens[] Q """
        if (self.stdin_mode):
            if (self.debug): print(self.tokens)
            ## cleanup the Q for stdin_mode of any EOF that can remain.
            if (len(self.tokens) != 0):
                self.match(EzhilToken.EOF)
            if (len(self.tokens) != 0):
                raise ScannerException(
                    "Lexer: token Q has previous session tokens ")
            self.tokens = list()
        else:
            data = "".join(self.File.readlines())
        if (self.debug): print(data)
        idx = 0
        tok_start_idx = 0

        while (idx < len(data)):
            c = data[idx]
            if (c == ' ' or c == '\t' or c == '\n'):
                if (c == '\n'):
                    ##actual col = idx - col_idx
                    self.update_line_col(idx)
                idx = idx + 1
            elif (c == '\r'):
                idx = idx + 1
                continue
            elif (c == '#'):
                ## single line skip comments like Python/Octave
                start = idx
                while (idx < len(data) and not (data[idx] in ['\r', '\n'])):
                    idx = idx + 1
                if (data[idx] == '\r'):
                    idx = idx + 1
                end = idx
                self.comments[self.line] = data[start:end]
            elif (isdigit(c)):  #or c == '+' or c == '-'  ):
                num = c
                tok_start_idx = idx
                idx = idx + 1
                ## FIXME: this prevents you from +.xyz, or -.xyz use 0.xyz
                ## instead. also may throw an error if we exceed
                ## buffer-length.
                if (c in ['+', '-'] and (idx < len(data))
                        and not isdigit(data[idx])):
                    self.get_lexeme(c, idx)
                    continue
                in_sci_notation = False
                while ((idx < len(data))
                       and (isdigit(data[idx])
                            or data[idx] in ['+', '-', 'e', 'E', '.'])):
                    if (data[idx] in ['+', '-'] and not in_sci_notation):
                        break
                    elif (data[idx] in ['e', 'E']):
                        in_sci_notation = True
                    num = num + data[idx]
                    idx = idx + 1
                self.get_lexeme(num, tok_start_idx)
            elif (c == "\""):
                tok_start_idx = idx
                s = c
                idx = idx + 1
                while (idx < len(data) and (data[idx] != '\"')):
                    if (data[idx] == '\\'):
                        idx = idx + 1
                        if (data[idx] == 'n'):
                            s = s + '\n'
                        elif (data[idx] == 't'):
                            s = s + '\t'
                        else:
                            s = s + data[idx]
                    else:
                        s = s + data[idx]
                    idx = idx + 1
                s = s + data[idx]
                idx = idx + 1
                self.get_lexeme(s, tok_start_idx)
            elif (istamil(c) or isalpha(c) or c == '_'):
                tok_start_idx = idx
                s = c
                idx = idx + 1
                while (
                    (idx < len(data)) and
                    (not data[idx] in EzhilToken.FORBIDDEN_FOR_IDENTIFIERS)):
                    s = s + data[idx]
                    idx = idx + 1
                self.get_lexeme(s, tok_start_idx)
            elif (c in self.unary_binary_ops):
                tok_start_idx = idx
                if (len(data) > (1 + idx)
                        and data[idx + 1] in ['=', '|', '&']):
                    c = c + data[idx + 1]
                    idx = idx + 1
                self.get_lexeme(c, tok_start_idx)
                idx = idx + 1
            elif c == ";":
                # treat as newline
                idx = idx + 1
                continue
            else:
                tok_start_idx = idx
                idx = idx + 1
                self.get_lexeme(c, tok_start_idx)

        tok_start_idx = idx
        ## close the file if not stdin_mode
        if (not self.stdin_mode): self.File.close()

        ## and manually add an EOF statement.
        eof_tok = EzhilLexeme("", EzhilToken.EOF)
        eof_tok.set_line_col(self.get_line_col(tok_start_idx))
        self.tokens.append(eof_tok)
        if (self.debug):
            print("before reverse")
            self.dump_tokens()
        self.tokens.reverse()
        if (self.debug):
            print("after reverse")
            self.dump_tokens()
        return

Пример #5

Показать файл

 def peek(self):
     """ remove Lexeme from the head of Q""" 
     if len(self.tokens) == 0:
         raise ScannerException("tokens[] queue is empty ")
     ##print "**> PEEK-ing, ",self.tokens[-1]
     return self.tokens[-1]

Пример #6

Показать файл

    def tokenize(self,data=None):
        """ do hard-work of tokenizing and
        put Lexemes into the tokens[] Q """
        if ( self.stdin_mode ):
            if ( self.debug ): print(self.tokens)
            ## cleanup the Q for stdin_mode of any EOF that can remain.
            if ( len(self.tokens) != 0 ):
                self.match( Token.EOF )
            if( len(self.tokens) != 0 ):
                raise ScannerException("Lexer: token Q has previous session tokens ")
            self.tokens = list()
        else:
            data = "".join(self.File.readlines())
        
        idx = 0 
        tok_start_idx = 0

        while ( idx < len( data ) ):
            c = data[idx]
            
            if  ( c == ' 'or c == '\t' or c == '\n' ):
                if ( c == '\n' ):
                    ##actual col = idx - col_idx
                    self.update_line_col(idx)
                idx = idx + 1
            elif ( c == '#' ):
                ## single line skip comments like Python/Octave
                while ( idx < len( data ) and data[idx] !='\n' ):
                    idx = idx + 1                    
            elif ( isdigit(c) or c == '+' or c == '-'  ):
                num = c
                tok_start_idx = idx
                idx = idx + 1
                ## FIXME: this prevents you from +.xyz, or -.xyz use 0.xyz 
                ## instead. also may throw an error if we exceed 
                ## buffer-length.                
                if ( c in ['+','-']  and ( idx < len( data ) ) 
                     and not isdigit(data[idx]) ):
                    self.get_lexeme( c , idx )
                    continue
                while ( ( idx < len( data) )
                            and ( isdigit(data[idx]) or data[idx] == '.') ):
                    num = num + data[idx]
                    idx = idx + 1
                self.get_lexeme( num , tok_start_idx  )
            elif ( c == "\"" ):
                tok_start_idx = idx 
                s = c
                idx = idx + 1
                while ( idx < len( data ) and
                         ( data[idx] != '\"' ) ):
                    s = s + data[idx]
                    if ( data[idx] == '\\' ):
                        idx = idx + 1
                    idx  = idx + 1
                s = s+data[idx]
                idx  = idx + 1
                self.get_lexeme( s , tok_start_idx )
            elif ( isalpha( c ) ):
                tok_start_idx = idx 
                s = c
                idx = idx + 1
                while ( ( idx < len( data ) )
                            and ( isalpha(data[idx]) or isdigit( data[idx] )
                                  or data[idx] in [ "\"", "_" ] ) ):
                    s = s + data[idx]
                    idx = idx + 1
                self.get_lexeme( s , tok_start_idx )
            elif ( c in self.unary_binary_ops ):
                tok_start_idx = idx 
                if ( len(data) > ( 1 + idx  ) 
                     and data[idx+1] in ['=','|','&']  ):
                    c = c +data[idx+1]
                    idx = idx + 1
                self.get_lexeme(  c , tok_start_idx )
                idx = idx + 1
            else:
                tok_start_idx = idx 
                idx = idx + 1
                self.get_lexeme( c , tok_start_idx )
        
        tok_start_idx = idx 

        ## close the file if not stdin_mode
        if ( not self.stdin_mode ): self.File.close()

        ## and manually add an EOF statement.
        eof_tok = Lexeme("",Token.EOF )
        eof_tok.set_line_col( self.get_line_col( tok_start_idx ) )
        self.tokens.append( eof_tok )

        self.tokens.reverse()
        return

Пример #7

Показать файл

    def get_lexeme(self,chunks , pos):
        if chunks == None:
            return None

        if chunks == "print":
            tval=Lexeme(chunks,Token.PRINT)
        elif chunks == "if":
            tval = Lexeme( chunks, Token.IF )
        elif chunks == "elseif":
            tval = Lexeme( chunks, Token.ELSEIF )
        elif chunks == "else":
            tval = Lexeme( chunks, Token.ELSE )
        elif chunks == "for":
            tval = Lexeme( chunks, Token.FOR )
        elif chunks == "while":
            tval = Lexeme( chunks, Token.WHILE )
        elif chunks == "do":
            tval = Lexeme( chunks, Token.DO )
        elif chunks == "return":
            tval=Lexeme(chunks,Token.RETURN)
        elif chunks == "end":
            tval=Lexeme(chunks,Token.END)
        elif chunks == "def":
            tval=Lexeme(chunks,Token.DEF)
        elif chunks == "continue":
            tval=Lexeme(chunks,Token.CONTINUE)
        elif chunks == "break":
            tval=Lexeme(chunks,Token.BREAK)
        elif chunks == "=":
            tval=Lexeme(chunks,Token.EQUALS)
        elif chunks == "-":
            tval=Lexeme(chunks,Token.MINUS)
        elif chunks == "+":
            tval=Lexeme(chunks,Token.PLUS)
        elif chunks == ">":
            tval=Lexeme(chunks,Token.GT)
        elif chunks == "<":
            tval=Lexeme(chunks,Token.LT)
        elif chunks == ">=":
            tval=Lexeme(chunks,Token.GTEQ)
        elif chunks == "<=":
            tval=Lexeme(chunks,Token.LTEQ)
        elif chunks == "==":
            tval=Lexeme(chunks,Token.EQUALITY)
        elif chunks == "!=":
            tval=Lexeme(chunks,Token.NEQ)
        elif chunks == "*":
            tval=Lexeme(chunks,Token.PROD)
        elif chunks == "/":
            tval=Lexeme(chunks,Token.DIV)
        elif chunks == ",":
            tval=Lexeme(chunks,Token.COMMA)
        elif chunks == "(":
            tval=Lexeme(chunks,Token.LPAREN)
        elif chunks == ")":
            tval=Lexeme(chunks,Token.RPAREN)
        elif chunks == "[":
            tval=Lexeme(chunks,Token.LSQRBRACE)
        elif chunks == "]":
            tval=Lexeme(chunks,Token.RSQRBRACE)
        elif chunks == "{":
            tval=Lexeme(chunks,Token.LCURLBRACE)
        elif chunks == "}":
            tval=Lexeme(chunks,Token.RCURLBRACE)
        elif chunks == ":":
            tval=Lexeme(chunks,Token.COLON)
        elif chunks == "%":
            tval=Lexeme(chunks,Token.MOD)
        elif chunks == "^":
            tval=Lexeme(chunks,Token.EXP)
        elif chunks == "&&":
            tval=Lexeme(chunks,Token.LOGICAL_AND)
        elif chunks == "&":
            tval=Lexeme(chunks,Token.BITWISE_AND)
        elif chunks == "||":
            tval=Lexeme(chunks,Token.LOGICAL_OR)
        elif chunks == "|":
            tval=Lexeme(chunks,Token.BITWISE_OR)
        elif ( chunks[0] == "\"" and chunks[-1] == "\"" ):
            tval = Lexeme( chunks[1:-1], Token.STRING )
        elif isdigit(chunks[0]) or chunks[0]=='+' or chunks[0]=='-':
            # deduce a float or integer
            if ( chunks.find('.') >= 0 or chunks.find('e') >= 0 or chunks.find('E') >= 0 ):
                tval=Lexeme(float(chunks),Token.NUMBER)
            else:
                tval=Lexeme(int(chunks),Token.NUMBER)
            
        elif isalpha(chunks[0]):
            tval=Lexeme(chunks,Token.ID)
        else:
            raise ScannerException("Lexical error: " + str(chunks) + " at Line , Col "+str(self.get_line_col( pos )) +" in file "+self.fname )
        
        [l,c]=self.get_line_col( pos )
        tval.set_line_col( [l,c] )
        tval.set_file_name( self.fname )
        self.tokens.append( tval )
        return l

Пример #8

Показать файл

    def get_lexeme(self, chunks, pos):
        if (self.debug):
            print(u"get_lexeme", chunks, pos)

        if chunks == None:
            return None

        if chunks == u"பதிப்பி":
            tval = EzhilLexeme(chunks, EzhilToken.PRINT)
        elif chunks == u"தேர்ந்தெடு":
            tval = EzhilLexeme(chunks, EzhilToken.SWITCH)
        elif chunks == u"தேர்வு":
            tval = EzhilLexeme(chunks, EzhilToken.CASE)
        elif chunks == u"ஏதேனில்":
            tval = EzhilLexeme(chunks, EzhilToken.OTHERWISE)
        elif chunks == u"ஆனால்":
            tval = EzhilLexeme(chunks, EzhilToken.IF)
        elif chunks == u"இல்லைஆனால்":
            tval = EzhilLexeme(chunks, EzhilToken.ELSEIF)
        elif chunks == u"இல்லை":
            tval = EzhilLexeme(chunks, EzhilToken.ELSE)
        elif chunks == u"ஆக":
            tval = EzhilLexeme(chunks, EzhilToken.FOR)
        elif chunks == u"ஒவ்வொன்றாக":
            tval = EzhilLexeme(chunks, EzhilToken.FOREACH)
        elif chunks == u"இல்":
            tval = EzhilLexeme(chunks, EzhilToken.COMMA)
        elif chunks == u"வரை":
            tval = EzhilLexeme(chunks, EzhilToken.WHILE)
        elif chunks == u"செய்":
            tval = EzhilLexeme(chunks, EzhilToken.DO)
        elif chunks == u"முடியேனில்":
            tval = EzhilLexeme(chunks, EzhilToken.DOWHILE)
        elif chunks == u"பின்கொடு":
            tval = EzhilLexeme(chunks, EzhilToken.RETURN)
        elif chunks == u"முடி":
            tval = EzhilLexeme(chunks, EzhilToken.END)
        elif chunks == u"நிரல்பாகம்":
            tval = EzhilLexeme(chunks, EzhilToken.DEF)
        elif chunks == u"தொடர்":
            tval = EzhilLexeme(chunks, EzhilToken.CONTINUE)
        elif chunks == u"நிறுத்து":
            tval = EzhilLexeme(chunks, EzhilToken.BREAK)
        elif chunks == u"@":
            tval = EzhilLexeme(chunks, EzhilToken.ATRATEOF)
        elif chunks == u"=":
            tval = EzhilLexeme(chunks, EzhilToken.EQUALS)
        elif chunks == u"-":
            tval = EzhilLexeme(chunks, EzhilToken.MINUS)
        elif chunks == u"+":
            tval = EzhilLexeme(chunks, EzhilToken.PLUS)
        elif chunks == u">":
            tval = EzhilLexeme(chunks, EzhilToken.GT)
        elif chunks == u"<":
            tval = EzhilLexeme(chunks, EzhilToken.LT)
        elif chunks == u">=":
            tval = EzhilLexeme(chunks, EzhilToken.GTEQ)
        elif chunks == u"<=":
            tval = EzhilLexeme(chunks, EzhilToken.LTEQ)
        elif chunks == u"==":
            tval = EzhilLexeme(chunks, EzhilToken.EQUALITY)
        elif chunks == u"!=":
            tval = EzhilLexeme(chunks, EzhilToken.NEQ)
        elif chunks == u"*":
            tval = EzhilLexeme(chunks, EzhilToken.PROD)
        elif chunks == u"/":
            tval = EzhilLexeme(chunks, EzhilToken.DIV)
        elif chunks == u",":
            tval = EzhilLexeme(chunks, EzhilToken.COMMA)
        elif chunks == u"(":
            tval = EzhilLexeme(chunks, EzhilToken.LPAREN)
        elif chunks == u")":
            tval = EzhilLexeme(chunks, EzhilToken.RPAREN)
        elif chunks == u"[":
            tval = EzhilLexeme(chunks, EzhilToken.LSQRBRACE)
        elif chunks == u"]":
            tval = EzhilLexeme(chunks, EzhilToken.RSQRBRACE)
        elif chunks == u"{":
            tval = Lexeme(chunks, Token.LCURLBRACE)
        elif chunks == u"}":
            tval = Lexeme(chunks, Token.RCURLBRACE)
        elif chunks == u":":
            tval = Lexeme(chunks, Token.COLON)
        elif chunks == u"%":
            tval = EzhilLexeme(chunks, EzhilToken.MOD)
        elif chunks == u"^":
            tval = EzhilLexeme(chunks, EzhilToken.EXP)
        elif chunks == u"&&":
            tval = Lexeme(chunks, EzhilToken.LOGICAL_AND)
        elif chunks == u"&":
            tval = Lexeme(chunks, EzhilToken.BITWISE_AND)
        elif chunks == u"||":
            tval = Lexeme(chunks, EzhilToken.LOGICAL_OR)
        elif chunks == u"|":
            tval = Lexeme(chunks, EzhilToken.BITWISE_OR)
        elif chunks == u"!":
            tval = Lexeme(chunks, EzhilToken.LOGICAL_NOT)
        elif (chunks[0] == u"\"" and chunks[-1] == u"\""):
            tval = EzhilLexeme(chunks[1:-1], EzhilToken.STRING)
        elif chunks[0].isdigit() or chunks[0] == '+' or chunks[0] == '-':
            #tval=EzhilLexeme(float(chunks),EzhilToken.NUMBER)
            # deduce a float or integer
            if (chunks.find(u'.') >= 0 or chunks.find(u'e') >= 0
                    or chunks.find(u'E') >= 0):
                tval = EzhilLexeme(float(chunks), EzhilToken.NUMBER)
            else:
                tval = EzhilLexeme(int(chunks), EzhilToken.NUMBER)
        else:
            ## check for tamil/english/mixed indentifiers even starting with a lead '_'
            match_obj = re.match(EzhilToken.RE_ALPHA_NUMERIC_, chunks)
            if match_obj:
                if len(match_obj.group(0)) != len(chunks):
                    raise ScannerException(
                        u"Lexical error: Invalid identifier name '" +
                        unicode(chunks) + u"' at Line , Col " +
                        unicode(self.get_line_col(pos)) + u" in file " +
                        self.fname)
                tval = EzhilLexeme(chunks, EzhilToken.ID)
            else:
                raise ScannerException(u"Lexical error: " + unicode(chunks) +
                                       u" at Line , Col " +
                                       unicode(self.get_line_col(pos)) +
                                       u" in file " + self.fname)

        [l, c] = self.get_line_col(pos)
        tval.set_line_col([l, c])
        tval.set_file_name(self.fname)
        self.tokens.append(tval)

        if (self.debug): print(u"Lexer token = ", tval)
        return l

Пример #9

Показать файл

    def tokenize(self, data=None):
        """ do hard-work of tokenizing and
        put EzhilLexemes into the tokens[] Q """
        if (self.debug): print(u"Start of Ezhil lexer - begin tokenize")
        if (self.stdin_mode):
            if (self.debug): print(self.tokens)
            ## cleanup the Q for stdin_mode of any EOF that can remain.
            if (len(self.tokens) != 0):
                self.match(EzhilToken.EOF)
            if (len(self.tokens) != 0):
                raise ScannerException(
                    "Lexer: token Q has previous session tokens ")
            self.tokens = list()
        else:
            if hasattr(self.File, 'data'):
                data = self.File.data
            else:
                data = u"".join(self.File.readlines())
        if (self.debug): print(data)
        idx = 0
        tok_start_idx = 0

        while (idx < len(data)):
            c = data[idx]
            if (self.debug): print(idx, c)
            if (istamil(c) or c.isalpha() or c == u'_'):
                tok_start_idx = idx
                s = c
                idx = idx + 1
                while ((idx < len(data))
                       and self.is_allowed_for_identifier(data[idx])):
                    s = s + data[idx]
                    idx = idx + 1
                if idx < len(data) and not data[idx].isspace():
                    if data[idx] in ['#', '$', '@', '\'', '"']:
                        raise ScannerException(
                            "Lexer: token %s is not valid for identifier, with prefix %s"
                            % (data[idx], s))
                self.get_lexeme(s, tok_start_idx)
            elif (c.isspace()):  # or c in u' 'or c == u'\t' or c == u'\n'
                if (c == u'\n'):
                    ##actual col = idx - col_idx
                    self.update_line_col(idx)
                idx = idx + 1
            elif (c == u'\r'):
                idx = idx + 1
                continue
            elif (c == u'#'):
                ## single line skip comments like Python/Octave
                start = idx
                while (idx < len(data) and not (data[idx] in [u'\r', u'\n'])):
                    idx = idx + 1
                if (idx < len(data) and data[idx] == u'\r'):
                    idx = idx + 1
                end = idx
                self.comments[self.line] = data[start:end]
            elif (c.isdigit()):  #or c == '+' or c == '-'  ):
                num = c
                tok_start_idx = idx
                idx = idx + 1
                ## FIXME: this prevents you from +.xyz, or -.xyz use 0.xyz
                ## instead. also may throw an error if we exceed
                ## buffer-length.
                if (c in [u'+', u'-'] and (idx < len(data))
                        and not data[idx].isdigit()):
                    self.get_lexeme(c, idx)
                    continue
                in_sci_notation = False
                while ((idx < len(data))
                       and (data[idx].isdigit()
                            or data[idx] in [u'+', u'-', u'e', u'E', u'.'])):
                    if (data[idx] in [u'+', u'-'] and not in_sci_notation):
                        break
                    elif (data[idx] in [u'e', u'E']):
                        in_sci_notation = True
                    num = num + data[idx]
                    idx = idx + 1
                self.get_lexeme(num, tok_start_idx)
            elif (c == u"\""):
                tok_start_idx = idx
                s = c
                idx = idx + 1
                while (idx < len(data) and (data[idx] != u'\"')):
                    if (data[idx] == u'\\'):
                        idx = idx + 1
                        if (data[idx] == u'n'):
                            s = s + u'\n'
                        elif (data[idx] == u't'):
                            s = s + u'\t'
                        else:
                            s = s + data[idx]
                    else:
                        s = s + data[idx]
                    idx = idx + 1
                s = s + data[idx]
                idx = idx + 1
                self.get_lexeme(s, tok_start_idx)
            elif (c in self.unary_binary_ops):
                tok_start_idx = idx
                if (len(data) > (1 + idx)
                        and data[idx + 1] in [u'=', u'|', u'&']):
                    c = c + data[idx + 1]
                    idx = idx + 1
                self.get_lexeme(c, tok_start_idx)
                idx = idx + 1
            elif c == u";":
                # treat as newline
                idx = idx + 1
                continue
            else:
                tok_start_idx = idx
                idx = idx + 1
                self.get_lexeme(c, tok_start_idx)

        tok_start_idx = idx
        ## close the file if not stdin_mode
        if (not self.stdin_mode): self.File.close()

        ## and manually add an EOF statement.
        eof_tok = EzhilLexeme("", EzhilToken.EOF)
        eof_tok.set_line_col(self.get_line_col(tok_start_idx))
        self.tokens.append(eof_tok)
        if (self.debug):
            print(u"before reverse")
            self.dump_tokens()
        self.tokens.reverse()
        if (self.debug):
            print(u"after reverse")
            self.dump_tokens()
        return