Exemplo n.º 1
0
    def parse(self, tokens):
        from pas_parser_utils import parse_statement
        logger.debug("Parsing case statement")
        tokens.match_token(TokenKind.Identifier, 'case')
        self._expression = PascalExpression(self._block)
        self._expression.parse(tokens)
        # expression will consume the 'of' keyword
        while not (tokens.match_lookahead(TokenKind.Identifier, 'end') or tokens.match_lookahead(TokenKind.Identifier, 'else') or tokens.match_lookahead(TokenKind.Identifier, 'otherwise')):
            constant = None
            if (tokens.match_lookahead(TokenKind.Number)):
                constant = tokens.match_token(TokenKind.Number).value
            elif (tokens.match_lookahead(TokenKind.String)):
                constant = tokens.match_token(TokenKind.String.value)
                if len(constant) > 1:
                    raise_error(("Constant expected: %s" %tokens.next_token()), '', is_critical=False)
            elif tokens.match_lookahead(TokenKind.Identifier):
                constant = self._block.resolve_variable(tokens.match_token(TokenKind.Identifier).value)
            else:
                raise_error(("CaseStatement:        Ordinal type expected: %s", tokens.next_token()), '', is_critical=False)

            tokens.match_token(TokenKind.Symbol, ':')

            statement = parse_statement(tokens, self._block)
            tokens.match_lookahead(TokenKind.Symbol, ';', consume=True)
            self._case.append(tuple((constant, statement)))
        if tokens.match_lookahead(TokenKind.Identifier, 'else', consume=True) or tokens.match_lookahead(TokenKind.Identifier, 'otherwise', consume=True):
            while not (tokens.match_lookahead(TokenKind.Identifier, 'end', consume=True)):
                self._else_statements.append(parse_statement(tokens, self._block))
        else:
            tokens.match_token(TokenKind.Identifier, 'end')
        tokens.match_token(TokenKind.Symbol, ';')

        logger.debug("Finished parsing if statement")
Exemplo n.º 2
0
    def parse(self, tokens, do_resolve=True):
        """
        parses a function call
        """
        from pas_arguments import PascalArguments

        logger.debug('Processing function call %s', self._identifier)
        self._identifier = tokens.match_token(TokenKind.Identifier).value

        self._arguments = PascalArguments(self._block)
        self._arguments.parse(tokens)
        if not self._inExpr:
            tokens.match_lookahead(TokenKind.Symbol, ';', consume=True)
        if do_resolve:
            self._points_to = self._block.resolve_function_call(self)
            if self._points_to is self:
                # recursive call...
                # need to get the function I'm being called from...
                self._points_to = self._block._functions[self.name]
            elif self._points_to is None:
                raise_error(
                    ("Unable to resolve function call:  " + self.identifier),
                    '',
                    is_critical=False)

        logger.debug('Ended function call %s', self._identifier)
 def parse(self, tokens):
     from pas_parser_utils import parse_statement
     logger.debug("Parsing while statement")
     tokens.match_token(TokenKind.Identifier, 'while')
     self._expression = PascalExpression(self._block)
     self._expression.parse(tokens)
     self._statement = parse_statement(tokens, self._block)
     # expression will consume the 'then' delimiter
     logger.debug("Finished parsing while statement")
Exemplo n.º 4
0
 def parse(self, tokens):
     from pas_parser_utils import parse_statement
     logger.debug("Parsing repeat statement")
     tokens.match_token(TokenKind.Identifier, 'repeat')
     while not tokens.match_lookahead(TokenKind.Identifier, 'until'):
         self._statements.append(parse_statement(tokens, self._block))
     tokens.match_token(TokenKind.Identifier, 'until')
     self._expression = PascalExpression(self._block)
     self._expression.parse(tokens)
     # expression will consume the 'then' delimiter
     logger.debug("Finished parsing repeat statement")
Exemplo n.º 5
0
 def next_token(self):
     current_token = None
     while current_token == None or current_token._kind == TokenKind.Comment:
         if len(self._lookahead_toks) > 0:
             current_token = self._lookahead_toks[0]
             self._lookahead_toks = self._lookahead_toks[1:]
         else:
             current_token = self._tokeniser.next_token()
         if current_token._kind == TokenKind.Comment:
             logger.debug('TokenStream   : Skipping comment: %s', current_token._value)
     return current_token
Exemplo n.º 6
0
 def lookahead(self, count=1):
     """
     lookahead generates a list of Tokens that has count number of members
     It ignores comments
     """
     logger.debug('TokenStream    : Looking ahead %d', count)
     while len(self._lookahead_toks) < count:
         current_token = self._tokeniser.next_token()
         while current_token._kind == TokenKind.Comment:
             current_token = self._tokeniser.next_token()
         self._lookahead_toks.append(current_token)
     return self._lookahead_toks
Exemplo n.º 7
0
 def lookahead(self,count=1):
     """
     lookahead generates a list of Tokens that has count number of members
     It ignores comments
     """
     logger.debug('TokenStream    : Looking ahead %d', count)
     while len(self._lookahead_toks) < count:
         current_token = self._tokeniser.next_token()
         while current_token._kind == TokenKind.Comment:
             current_token = self._tokeniser.next_token()
         self._lookahead_toks.append(current_token)
     return self._lookahead_toks
Exemplo n.º 8
0
 def next_token(self):
     current_token = None
     while current_token == None or current_token._kind == TokenKind.Comment:
         if len(self._lookahead_toks) > 0:
             current_token = self._lookahead_toks[0]
             self._lookahead_toks = self._lookahead_toks[1:]
         else:
             current_token = self._tokeniser.next_token()
         if current_token._kind == TokenKind.Comment:
             logger.debug('TokenStream   : Skipping comment: %s',
                          current_token._value)
     return current_token
Exemplo n.º 9
0
def find_or_add_type(name):
    if name == None: return None
    from types.pas_type import PascalType

    name_key = name.lower()

    if name_key in _loaded_types:
        return _loaded_types[name_key]
    else:
        logger.debug('Cache     : Created Type %s', name)
        result = PascalType(name)
        _loaded_types[name_key] = result
        return result
Exemplo n.º 10
0
    def parse(self, tokens):
        from pas_parser_utils import parse_statement
        logger.debug("Parsing if statement")
        tokens.match_token(TokenKind.Identifier, 'if')
        self._expression = PascalExpression(self._block)
        self._expression.parse(tokens)
        self._statement = parse_statement(tokens, self._block)
        # expression will consume the 'then' delimiter
        if tokens.match_lookahead(TokenKind.Identifier, 'else'):
            tokens.match_token(TokenKind.Identifier, 'else')
            self._else_statement = parse_statement(tokens, self._block)
        tokens.match_lookahead(TokenKind.Symbol, ';', consume=True)

        logger.debug("Finished parsing if statement")
Exemplo n.º 11
0
 def match_lookahead(self, token_kind, token_value = None, consume = False):
     """
     looks forward one position for a token with a specific value and kind
     if the token is found it returns true and optionally consume the token. 
     Otherwise it returns false without consuming a token.
     """
     logger.debug('TokenStream   : Looking to find %s (%s)%s', token_kind, 
         token_value if token_value != None else 'any',
         ' will consume' if consume else '')
     token = self.lookahead(1)[0]
     result = token._kind == token_kind and (token_value == None or token_value == token._value.lower())
     if consume and result:
         self.match_token(token_kind, token_value)
     return result
Exemplo n.º 12
0
 def match_lookahead(self, token_kind, token_value=None, consume=False):
     """
     looks forward one position for a token with a specific value and kind
     if the token is found it returns true and optionally consume the token. 
     Otherwise it returns false without consuming a token.
     """
     logger.debug('TokenStream   : Looking to find %s (%s)%s', token_kind,
                  token_value if token_value != None else 'any',
                  ' will consume' if consume else '')
     token = self.lookahead(1)[0]
     result = token._kind == token_kind and (
         token_value == None or token_value == token._value.lower())
     if consume and result:
         self.match_token(token_kind, token_value)
     return result
Exemplo n.º 13
0
    def parse(self, tokens):
        from pas_parser_utils import parse_statement
        logger.debug("Parsing case statement")
        tokens.match_token(TokenKind.Identifier, 'case')
        self._expression = PascalExpression(self._block)
        self._expression.parse(tokens)
        # expression will consume the 'of' keyword
        while not (tokens.match_lookahead(TokenKind.Identifier, 'end')
                   or tokens.match_lookahead(TokenKind.Identifier, 'else') or
                   tokens.match_lookahead(TokenKind.Identifier, 'otherwise')):
            constant = None
            if (tokens.match_lookahead(TokenKind.Number)):
                constant = tokens.match_token(TokenKind.Number).value
            elif (tokens.match_lookahead(TokenKind.String)):
                constant = tokens.match_token(TokenKind.String.value)
                if len(constant) > 1:
                    raise_error(
                        ("Constant expected: %s" % tokens.next_token()),
                        '',
                        is_critical=False)
            elif tokens.match_lookahead(TokenKind.Identifier):
                constant = self._block.resolve_variable(
                    tokens.match_token(TokenKind.Identifier).value)
            else:
                raise_error(("CaseStatement:        Ordinal type expected: %s",
                             tokens.next_token()),
                            '',
                            is_critical=False)

            tokens.match_token(TokenKind.Symbol, ':')

            statement = parse_statement(tokens, self._block)
            tokens.match_lookahead(TokenKind.Symbol, ';', consume=True)
            self._case.append(tuple((constant, statement)))
        if tokens.match_lookahead(
                TokenKind.Identifier, 'else',
                consume=True) or tokens.match_lookahead(
                    TokenKind.Identifier, 'otherwise', consume=True):
            while not (tokens.match_lookahead(
                    TokenKind.Identifier, 'end', consume=True)):
                self._else_statements.append(
                    parse_statement(tokens, self._block))
        else:
            tokens.match_token(TokenKind.Identifier, 'end')
        tokens.match_token(TokenKind.Symbol, ';')

        logger.debug("Finished parsing if statement")
    def parse(self, tokens):
        from pascal_parser.pas_parser_utils import parse_type
        from pascal_parser.pas_var import PascalVariable
        from pascal_parser.pas_param_declaration import PascalParameterDeclaration
        logger.debug("Parsing overload declaration")
        tokens.match_token(TokenKind.Identifier, 'operator')
        self._operator = tokens.match_token(TokenKind.Operator).value
        self._parameters = PascalParameterDeclaration(self._block)
        self._parameters.parse(tokens, self)

        name = tokens.match_token(TokenKind.Identifier)
        tokens.match_token(TokenKind.Symbol, ':')
        type = parse_type(tokens, self._block)

        self._result = PascalVariable(name, type)
        tokens.match_token(TokenKind.Symbol, ';')
        logger.debug("Finished parsing operator declaration")
Exemplo n.º 15
0
    def parse(self, tokens):
        from pascal_parser.pas_parser_utils import parse_type
        from pascal_parser.pas_var import PascalVariable
        from pascal_parser.pas_param_declaration import PascalParameterDeclaration
        logger.debug("Parsing overload declaration")
        tokens.match_token(TokenKind.Identifier, 'operator')
        self._operator = tokens.match_token(TokenKind.Operator).value
        self._parameters = PascalParameterDeclaration(self._block)
        self._parameters.parse(tokens, self)

        name = tokens.match_token(TokenKind.Identifier)
        tokens.match_token(TokenKind.Symbol, ':')
        type = parse_type(tokens, self._block)

        self._result = PascalVariable(name, type)
        tokens.match_token(TokenKind.Symbol, ';')
        logger.debug("Finished parsing operator declaration")
Exemplo n.º 16
0
 def tokenise(self, filename):
     '''Initialises the tokeniser with characters loaded from the specified 
     filename. Call `next_token` process each token.
     '''
     
     if isinstance(filename, list):
         logger.debug('Tokenising list')
         self.pas_lines = filename
     else:
         logger.debug('Tokenising %s', filename)
         self._filename = filename
         f = open(filename)
         self.pas_lines = f.readlines()
         f.close()
     
     self._char_no = -1
     self._line_no = 0 #starts at first line
     self._token_val = 'none'
Exemplo n.º 17
0
    def tokenise(self, filename):
        '''Initialises the tokeniser with characters loaded from the specified 
        filename. Call `next_token` process each token.
        '''

        if isinstance(filename, list):
            logger.debug('Tokenising list')
            self.pas_lines = filename
        else:
            logger.debug('Tokenising %s', filename)
            self._filename = filename
            f = open(filename)
            self.pas_lines = f.readlines()
            f.close()

        self._char_no = -1
        self._line_no = 0  #starts at first line
        self._token_val = 'none'
Exemplo n.º 18
0
    def match_token(self, token_kind, token_value = None):
        """
        Looks at the next token, and if it is the same kind as 'token_kind' and
        has the same value as 'token_value' then it is returned. Otherwise an 
        error occurs and the program stops.

        If 'token_value' is None then only the 'token_kind' is checked.
        """
        tok = self.next_token()
        
        if tok._kind != token_kind or (token_value != None and token_value != tok._value.lower()):
            logger.error('TokenStream        %s: found a %s (%s) expected %s (%s)', 
                self._tokeniser.line_details(), 
                tok._kind, tok._value, token_kind, token_value)
            assert False
            
        logger.debug('TokenStream    : Matched token %s (%s)', tok._kind, tok._value)
        return tok
Exemplo n.º 19
0
    def match_one_lookahead(self, token_lst, consume = False):
        """
        Tries to match the next token's value with a list of values
        Returns True if a match is found, and false if there was no match
        """
        tok = self.lookahead()[0]

        logger.debug('TokenStream   : Looking to find %s %s', token_lst, 
            ' will consume' if consume else '')

        for token_kind,token_value in token_lst:
            if tok._kind == token_kind and (token_value == None or token_value == tok._value):
                matched = True
                logger.debug('TokenStream    : Found %s (%s)', tok._kind, tok._value)
                if consume:
                    self.match_token(tok._kind, tok._value)
                return True

        return False
Exemplo n.º 20
0
    def match_one_lookahead(self, token_lst, consume=False):
        """
        Tries to match the next token's value with a list of values
        Returns True if a match is found, and false if there was no match
        """
        tok = self.lookahead()[0]

        logger.debug('TokenStream   : Looking to find %s %s', token_lst,
                     ' will consume' if consume else '')

        for token_kind, token_value in token_lst:
            if tok._kind == token_kind and (token_value == None
                                            or token_value == tok._value):
                matched = True
                logger.debug('TokenStream    : Found %s (%s)', tok._kind,
                             tok._value)
                if consume:
                    self.match_token(tok._kind, tok._value)
                return True

        return False
Exemplo n.º 21
0
    def parse(self, tokens):
        logger.debug("Parsing compound statement")
        if tokens.match_lookahead(TokenKind.Identifier, 'begin'):
            tokens.match_token(TokenKind.Identifier, 'begin')
        while (True):
            # compound statement currently consumes the end keyword, but not the symbol ';' or '.'
            if tokens.match_lookahead(TokenKind.Identifier, 'end'):
                tokens.match_token(TokenKind.Identifier, 'end')             # consume end token
                break
            elif tokens.match_lookahead(TokenKind.Symbol, ';') or tokens.match_lookahead(TokenKind.Symbol, '.'):
                # consumed end already -> somehow?
                tokens.match_token(TokenKind.Symbol)
                break   
            elif tokens.match_lookahead(TokenKind.Identifier, 'until'):
                # repeat..until needs to have the until token so that it knows it's at the end
                break

            else:
                self._statements.append(parse_statement(tokens, self._block))

        logger.debug("Finished parsing compound statement")
Exemplo n.º 22
0
    def match_token(self, token_kind, token_value=None):
        """
        Looks at the next token, and if it is the same kind as 'token_kind' and
        has the same value as 'token_value' then it is returned. Otherwise an 
        error occurs and the program stops.

        If 'token_value' is None then only the 'token_kind' is checked.
        """
        tok = self.next_token()

        if tok._kind != token_kind or (token_value != None
                                       and token_value != tok._value.lower()):
            logger.error(
                'TokenStream        %s: found a %s (%s) expected %s (%s)',
                self._tokeniser.line_details(), tok._kind, tok._value,
                token_kind, token_value)
            assert False

        logger.debug('TokenStream    : Matched token %s (%s)', tok._kind,
                     tok._value)
        return tok
Exemplo n.º 23
0
    def parse(self, tokens, do_resolve=True):
        """
        parses a function call
        """
        from pas_arguments import PascalArguments

        logger.debug('Processing function call %s', self._identifier)
        self._identifier = tokens.match_token(TokenKind.Identifier).value

        self._arguments = PascalArguments(self._block)
        self._arguments.parse(tokens)
        if not self._inExpr:
            tokens.match_lookahead(TokenKind.Symbol, ';', consume=True)
        if do_resolve:
            self._points_to = self._block.resolve_function_call(self)
            if self._points_to is self:
                # recursive call...
                # need to get the function I'm being called from...
                self._points_to = self._block._functions[self.name]
            elif self._points_to is None:
                raise_error(("Unable to resolve function call:  " + self.identifier), '', is_critical=False)

        logger.debug('Ended function call %s', self._identifier)
    def parse(self, tokens):
        logger.debug("Parsing compound statement")
        if tokens.match_lookahead(TokenKind.Identifier, 'begin'):
            tokens.match_token(TokenKind.Identifier, 'begin')
        while (True):
            # compound statement currently consumes the end keyword, but not the symbol ';' or '.'
            if tokens.match_lookahead(TokenKind.Identifier, 'end'):
                tokens.match_token(TokenKind.Identifier,
                                   'end')  # consume end token
                break
            elif tokens.match_lookahead(TokenKind.Symbol,
                                        ';') or tokens.match_lookahead(
                                            TokenKind.Symbol, '.'):
                # consumed end already -> somehow?
                tokens.match_token(TokenKind.Symbol)
                break
            elif tokens.match_lookahead(TokenKind.Identifier, 'until'):
                # repeat..until needs to have the until token so that it knows it's at the end
                break

            else:
                self._statements.append(parse_statement(tokens, self._block))

        logger.debug("Finished parsing compound statement")
Exemplo n.º 25
0
    def parse(self, tokens):
        """
        this method parses the expression
        """
        from tokeniser.pas_token_kind import TokenKind
        logger.debug('Parsing %s expression' % ('inner' if self._innerExpr else ''))
        newContent = None
        while (True):
            # expression over?
            if tokens.match_lookahead(TokenKind.Symbol, ';') or tokens.match_lookahead(TokenKind.Symbol, ',') or tokens.match_lookahead(TokenKind.Identifier, 'then') or tokens.match_lookahead(TokenKind.Identifier, 'do') or tokens.match_lookahead(TokenKind.Identifier, 'of') or tokens.match_lookahead(TokenKind.Symbol, ']'):
                if self._innerExpr:
                    raise_error(('Inner expression terminator expected, %s: ', tokens.next_token()), '', is_critical=False)
                tokens.next_token() # consume the delimiter
                logger.debug('Expression ended')
                break
            # Inner expression ended
            elif tokens.match_lookahead(TokenKind.Symbol, ')'):
                if self._innerExpr:
                    tokens.match_token(TokenKind.Symbol, ')') # consume the delimiter
                    logger.debug('Inner expression ended')
                break
            # starts with an Identifier
            elif tokens.match_lookahead(TokenKind.Identifier):
                # Function Call
                if tokens.lookahead(2)[1].value is '(':
                    newContent = PascalFunctionCall(self._block, inExpr=True)
                    newContent.parse(tokens)
                # Variable
                else:
                    newContent = PascalVariableReference(self._block)
                    newContent.parse(tokens)
            # Number
            elif tokens.match_lookahead(TokenKind.Number):
                newContent = PascalNumber(tokens.next_token().value)
            # string
            elif tokens.match_lookahead(TokenKind.String):
                newContent = PascalString(tokens.next_token().value)
            # Operator
            elif tokens.match_lookahead(TokenKind.Operator):                
                newContent = PascalOperator(tokens.match_token(TokenKind.Operator).value)
            # inner expression
            elif tokens.match_lookahead(TokenKind.Symbol, '('):
                tokens.next_token() # consume the delimiter
                newContent = PascalExpression(self._block, innerExpr = True)
                newContent.parse(tokens)
            # Boolean
            elif tokens.match_lookahead(TokenKind.Boolean):
                newContent = PascalBool(tokens.match_token(TokenKind.Boolean).value)
                
            else:
                raise_error(('Unknown expression token... %s' % str(tokens.next_token().value)), '', is_critical=False)

            self._contents.append(newContent)
Exemplo n.º 26
0
    def next_token(self):
        '''
        Find and return the next token     
        '''
        def num_match(cha, tmp):
            '''Checks for a number in format ##, ##.#. Returns False when at the 
            end of a number.'''
            if cha in '1234567890':
                return True
            elif cha == '.' and '.' not in tmp:
                return self._peek(1) in '1234567890'
            else:
                return False

        while (True):
            t = self._next_char()
            self._token_start = self._char_no

            kind = None
            value = ''

            # Ignore white space characters
            if t == ' ' or t == '\t':  #ignore white space
                pass
            # Move to next line (if at end of line)
            elif t == '\n':
                self._advance_line()
            # Numbers (int or float style format
            elif t in '1234567890' or (t in '-+' and self._peek(1)
                                       in '1234567890'):  #is digit or +/-
                kind = TokenKind.Number
                char_number = self._char_no
                value = self._read_matching(t, num_match)
                logger.debug('Tokeniser      : read %s (%s)', kind, value)
                return Token(kind, value, self._line_no + 1, char_number + 1)
            # Comment, single line // or meta comment line ///
            elif t == '/' and self._peek(1) == '/':  #start of comment
                if self._match_and_read('/'):
                    if self._match_and_read('/'):
                        self._meta_comment_start = self._char_no
                        self._meta_comment_line = self._line_no
                        kind = TokenKind.MetaComment
                        char_number = self._char_no
                        value = self.read_to_end_of_comment()
                    else:
                        kind = TokenKind.Comment
                        char_number = self._char_no
                        value = self.read_to_eol()
                        logger.debug('Tokeniser      : read %s (%s)', kind,
                                     value)
                    return Token(kind, value, self._line_no + 1,
                                 char_number + 1)
                else:
                    logger.error("Unexpected error: " + self.line_details)
            # Attribute identified by an @ symbol then a name
            elif t == '@':
                kind = TokenKind.Attribute
                char_number = self._char_no
                value = self._read_matching(
                    '', lambda cha, tmp: cha.isalnum() or cha == '_')
                logger.debug('Tokeniser      : read %s (%s)', kind, value)
                return Token(kind, value, self._line_no + 1, char_number + 1)
            # Identifier (id) of alphanumeric characters including
            elif t.isalpha():
                char_number = self._char_no
                value = self._read_matching(
                    t, lambda cha, tmp: cha.isalnum() or cha == '_')
                if value.lower() in ['true', 'false']:
                    kind = TokenKind.Boolean
                elif value.lower() in [
                        'or', 'and', 'not', 'xor', 'mod', 'div', 'in'
                ]:
                    kind = TokenKind.Operator
                else:
                    kind = TokenKind.Identifier
                logger.debug('Tokeniser      : read %s (%s)', kind, value)
                return Token(kind, value, self._line_no + 1, char_number + 1)
            #Bound Comment
            elif t == '{' or (t == '(' and self._peek(1) == '*'):
                if t == '(' and self._match_and_read('*'):
                    char_number = self._char_no
                    comment = self._read_until('',
                                               lambda temp: temp[-2:] == '*)')
                    kind = TokenKind.Comment
                    value = comment[:-2]
                    logger.debug('Tokeniser      : read %s (%s)', kind, value)
                    return Token(kind, value, self._line_no + 1,
                                 char_number + 1)
                elif t == '{':
                    comment = self._read_until('',
                                               lambda temp: temp[-1:] == '}')
                    kind = TokenKind.Comment
                    value = comment[:-1]
                    logger.debug('Tokeniser      : read %s (%s)', kind, value)
                    return Token(kind, value, self._line_no + 1,
                                 char_number + 1)
            # Operator
            elif (t == ':' and self._peek(1) == '=') or t in '=+-*/><':
                kind = TokenKind.Operator
                char_number = self._char_no
                if t == ':' and self._match_and_read('='):
                    value = ':='
                elif t in '+' and self._match_and_read('='):
                    value = t + '='
                elif t in '-' and self._match_and_read('='):
                    value = t + '='
                elif t in '/' and self._match_and_read('='):
                    value = t + '='
                elif t in '*' and self._match_and_read('='):
                    value = t + '='
                elif t == '*' and self._match_and_read('*'):
                    value = '**'
                elif t == '<' and self._match_and_read('>'):
                    value = '<>'
                elif t in '<>' and self._match_and_read('='):
                    value = t + '='
                else:
                    value = t
                logger.debug('Tokeniser      : read %s (%s)', kind, value)
                return Token(kind, value, self._line_no + 1, char_number + 1)
            # Symbol
            elif t in '(),:;[].^':
                kind = TokenKind.Symbol
                char_number = self._char_no
                value = t
                logger.debug('Tokeniser      : read %s (%s)', kind, value)
                return Token(kind, value, self._line_no + 1, char_number + 1)
            # Catch any single quotes inside a string value.
            elif t == "'":
                char_number = self._char_no
                string = self._read_until(
                    '', lambda temp:
                    (temp[-1:] == "'") and (not self._match_and_read("'")))
                kind = TokenKind.String
                value = string[:-1]
                logger.debug('Tokeniser      : read %s (%s)', kind, value)
                return Token(kind, value, self._line_no + 1, char_number + 1)
            # Hmm.. unknown token. What did we forget?
            else:
                logger.error("Unknown token type: " + t if t else 'NONE!')
Exemplo n.º 27
0
 def next_token(self):
     '''
     Find and return the next token     
     '''
     
     def num_match(cha, tmp):
         '''Checks for a number in format ##, ##.#. Returns False when at the 
         end of a number.'''
         if cha in '1234567890':
             return True
         elif cha == '.' and '.' not in tmp:
             return self._peek(1) in '1234567890'
         else:
             return False
     
     while (True):
         t = self._next_char();
         self._token_start = self._char_no
         
         kind = None
         value = ''
         
         
         # Ignore white space characters
         if t == ' ' or t == '\t': #ignore white space
             pass
         # Move to next line (if at end of line)
         elif t == '\n': 
             self._advance_line()
         # Numbers (int or float style format
         elif t in '1234567890' or (t in '-+' and self._peek(1) in '1234567890'): #is digit or +/-
            kind = TokenKind.Number
            char_number = self._char_no
            value = self._read_matching(t, num_match)
            logger.debug('Tokeniser      : read %s (%s)', kind, value)
            return Token(kind, value, self._line_no+1, char_number+1)
         # Comment, single line // or meta comment line ///
         elif t == '/' and self._peek(1) == '/': #start of comment
             if self._match_and_read('/'):
                 if self._match_and_read('/'):
                     self._meta_comment_start = self._char_no
                     self._meta_comment_line = self._line_no
                     kind = TokenKind.MetaComment
                     char_number = self._char_no
                     value = self.read_to_end_of_comment()
                 else:
                     kind = TokenKind.Comment
                     char_number = self._char_no
                     value = self.read_to_eol()
                     logger.debug('Tokeniser      : read %s (%s)', kind, value)
                 return Token(kind, value, self._line_no+1, char_number+1)
             else:
                 logger.error("Unexpected error: " + self.line_details)
         # Attribute identified by an @ symbol then a name
         elif t == '@':
             kind = TokenKind.Attribute
             char_number = self._char_no
             value = self._read_matching('', lambda cha, tmp: cha.isalnum() or cha == '_')
             logger.debug('Tokeniser      : read %s (%s)', kind, value)
             return Token(kind, value, self._line_no+1, char_number+1)
         # Identifier (id) of alphanumeric characters including 
         elif t.isalpha():
             char_number = self._char_no
             value = self._read_matching(t, lambda cha, tmp: cha.isalnum() or cha == '_')
             if value.lower() in ['true','false']:
                 kind = TokenKind.Boolean
             elif value.lower() in ['or', 'and', 'not', 'xor', 'mod', 'div', 'in']:
                 kind = TokenKind.Operator
             else:
                 kind = TokenKind.Identifier
             logger.debug('Tokeniser      : read %s (%s)', kind, value)
             return Token(kind, value, self._line_no+1, char_number+1)
         #Bound Comment
         elif t == '{' or (t == '(' and self._peek(1) == '*'):
             if t == '(' and self._match_and_read('*'):
                 char_number = self._char_no
                 comment = self._read_until('', lambda temp: temp[-2:] == '*)')
                 kind = TokenKind.Comment
                 value = comment[:-2]
                 logger.debug('Tokeniser      : read %s (%s)', kind, value)
                 return Token(kind, value, self._line_no+1, char_number+1)
             elif t == '{':
                 comment = self._read_until('', lambda temp: temp[-1:] == '}')
                 kind = TokenKind.Comment
                 value = comment[:-1]
                 logger.debug('Tokeniser      : read %s (%s)', kind, value)
                 return Token(kind, value, self._line_no+1, char_number+1)
         # Operator
         elif (t == ':' and self._peek(1) == '=') or t in '=+-*/><':
             kind = TokenKind.Operator
             char_number = self._char_no
             if t == ':' and self._match_and_read('='):
                 value = ':='
             elif t in '+' and self._match_and_read('='):
                 value = t + '='
             elif t in '-' and self._match_and_read('='):
                 value = t + '='
             elif t in '/' and self._match_and_read('='):
                 value = t + '='
             elif t in '*' and self._match_and_read('='):
                 value = t + '='
             elif t == '*' and self._match_and_read('*'):
                 value = '**'
             elif t == '<' and self._match_and_read('>'):
                 value = '<>'
             elif t in '<>' and self._match_and_read('='):
                 value = t + '='
             else:
                 value = t
             logger.debug('Tokeniser      : read %s (%s)', kind, value)
             return Token(kind, value, self._line_no+1, char_number+1)
         # Symbol
         elif t in '(),:;[].^':
             kind = TokenKind.Symbol
             char_number = self._char_no
             value = t
             logger.debug('Tokeniser      : read %s (%s)', kind, value)
             return Token(kind, value, self._line_no+1, char_number+1)
         # Catch any single quotes inside a string value.
         elif t == "'":
             char_number = self._char_no
             string = self._read_until('', lambda temp: (temp[-1:] == "'") and (not self._match_and_read("'")))
             kind = TokenKind.String
             value = string[:-1]
             logger.debug('Tokeniser      : read %s (%s)', kind, value)
             return Token(kind, value, self._line_no+1, char_number+1)
         # Hmm.. unknown token. What did we forget? 
         else:
             logger.error("Unknown token type: " + t if t else 'NONE!')
Exemplo n.º 28
0
    def parse(self, tokens):
        """
        this method parses the expression
        """
        from tokeniser.pas_token_kind import TokenKind
        logger.debug('Parsing %s expression' %
                     ('inner' if self._innerExpr else ''))
        newContent = None
        while (True):
            # expression over?
            if tokens.match_lookahead(
                    TokenKind.Symbol, ';') or tokens.match_lookahead(
                        TokenKind.Symbol, ',') or tokens.match_lookahead(
                            TokenKind.Identifier,
                            'then') or tokens.match_lookahead(
                                TokenKind.Identifier,
                                'do') or tokens.match_lookahead(
                                    TokenKind.Identifier,
                                    'of') or tokens.match_lookahead(
                                        TokenKind.Symbol, ']'):
                if self._innerExpr:
                    raise_error(('Inner expression terminator expected, %s: ',
                                 tokens.next_token()),
                                '',
                                is_critical=False)
                tokens.next_token()  # consume the delimiter
                logger.debug('Expression ended')
                break
            # Inner expression ended
            elif tokens.match_lookahead(TokenKind.Symbol, ')'):
                if self._innerExpr:
                    tokens.match_token(TokenKind.Symbol,
                                       ')')  # consume the delimiter
                    logger.debug('Inner expression ended')
                break
            # starts with an Identifier
            elif tokens.match_lookahead(TokenKind.Identifier):
                # Function Call
                if tokens.lookahead(2)[1].value is '(':
                    newContent = PascalFunctionCall(self._block, inExpr=True)
                    newContent.parse(tokens)
                # Variable
                else:
                    newContent = PascalVariableReference(self._block)
                    newContent.parse(tokens)
            # Number
            elif tokens.match_lookahead(TokenKind.Number):
                newContent = PascalNumber(tokens.next_token().value)
            # string
            elif tokens.match_lookahead(TokenKind.String):
                newContent = PascalString(tokens.next_token().value)
            # Operator
            elif tokens.match_lookahead(TokenKind.Operator):
                newContent = PascalOperator(
                    tokens.match_token(TokenKind.Operator).value)
            # inner expression
            elif tokens.match_lookahead(TokenKind.Symbol, '('):
                tokens.next_token()  # consume the delimiter
                newContent = PascalExpression(self._block, innerExpr=True)
                newContent.parse(tokens)
            # Boolean
            elif tokens.match_lookahead(TokenKind.Boolean):
                newContent = PascalBool(
                    tokens.match_token(TokenKind.Boolean).value)

            else:
                raise_error(('Unknown expression token... %s' %
                             str(tokens.next_token().value)),
                            '',
                            is_critical=False)

            self._contents.append(newContent)