def scan(self) -> Tuple[tokens.Token, ...]: self.tokens.append(tokens.StreamStartToken(self.line_number)) while True: self.scan_until_token() ch = self.peek() if ch == "\0": self.tokens.append(CH_MAPPING[ch](self.line_number)) break elif ch == ";": if self.peek_multiple(2) == ";;": self.advance(2) self.tokens.append(CH_MAPPING[ch](self.line_number)) elif ch == '"': self.advance() self.tokens.append(self.scan_quoted_literal()) elif ch in CH_MAPPING.keys(): self.advance() self.tokens.append(CH_MAPPING[ch](self.line_number)) elif self.check_for_expression_block(self.peek_multiple(25)): self.tokens.append(self.scan_literal()) self.scan_until_token() self.advance() self.tokens.append(tokens.ValueToken(self.line_number)) self.scan_until_token() self.tokens.append(self.scan_expression_block()) else: # TODO: This should actually check for valid literals first # and throw an error if it doesn't match self.tokens.append(self.scan_literal()) return tuple(self.tokens)
def test_scan_with_non_expression_block_starting_with_html(): text = "html_not_reserved_field: yes" output = lkml.Lexer(text).scan() assert output == ( tokens.StreamStartToken(1), tokens.LiteralToken("html_not_reserved_field", 1), tokens.ValueToken(1), tokens.LiteralToken("yes", 1), tokens.StreamEndToken(1), )
def test_scan_comment_with_surrounding_whitespace(): text = "\n# A comment\n " output = lkml.Lexer(text).scan() assert output == ( tokens.StreamStartToken(1), tokens.WhitespaceToken("\n", 1), tokens.CommentToken("# A comment", 2), tokens.WhitespaceToken("\n ", 2), tokens.StreamEndToken(3), )
def test_scan_with_complex_sql_block(): text = ("sql_distinct_key: concat(${orders.order_id}, '|', " "${orders__items.primary_key}) ;;") output = lkml.Lexer(text).scan() assert output == ( tokens.StreamStartToken(1), tokens.LiteralToken("sql_distinct_key", 1), tokens.ValueToken(1), tokens.ExpressionBlockToken( " concat(${orders.order_id}, '|', ${orders__items.primary_key}) ", 1), tokens.ExpressionBlockEndToken(1), tokens.StreamEndToken(1), )
def scan(self) -> Tuple[tokens.Token, ...]: self.tokens.append(tokens.StreamStartToken(self.line_number)) while True: self.scan_until_token() ch = self.peek() if ch == "\0": self.tokens.append(tokens.StreamEndToken(self.line_number)) break elif ch == "{": self.advance() self.tokens.append(tokens.BlockStartToken(self.line_number)) elif ch == "}": self.advance() self.tokens.append(tokens.BlockEndToken(self.line_number)) elif ch == "[": self.advance() self.tokens.append(tokens.ListStartToken(self.line_number)) elif ch == "]": self.advance() self.tokens.append(tokens.ListEndToken(self.line_number)) elif ch == ",": self.advance() self.tokens.append(tokens.CommaToken(self.line_number)) elif ch == ":": self.advance() self.tokens.append(tokens.ValueToken(self.line_number)) elif ch == ";": if self.peek_multiple(2) == ";;": self.advance(2) self.tokens.append(tokens.ExpressionBlockEndToken(self.line_number)) elif ch == '"': self.advance() self.tokens.append(self.scan_quoted_literal()) elif ( self.peek_multiple(3) == "sql" or self.peek_multiple(4) == "html" or self.peek_multiple(24) == "expression_custom_filter" ): self.tokens.append(self.scan_literal()) self.scan_until_token() self.advance() self.tokens.append(tokens.ValueToken(self.line_number)) self.scan_until_token() self.tokens.append(self.scan_expression_block()) else: # TODO: This should actually check for valid literals first # and throw an error if it doesn't match self.tokens.append(self.scan_literal()) return tuple(self.tokens)
def scan(self) -> Tuple[tokens.Token, ...]: """Tokenizes LookML into a sequence of tokens. This method skips through the text being lexed until it finds a character that indicates the start of a new token. It consumes the relevant characters and adds the tokens to a sequence until it reaches the end of the text. """ self.tokens.append(tokens.StreamStartToken(self.line_number)) while True: ch = self.peek() if ch == "\0": self.tokens.append(CHARACTER_TO_TOKEN[ch](self.line_number)) break elif ch in "\n\t ": self.tokens.append(self.scan_whitespace()) elif ch == "#": self.advance() self.tokens.append(self.scan_comment()) elif ch == ";": if self.peek_multiple(2) == ";;": self.advance(2) self.tokens.append(CHARACTER_TO_TOKEN[ch]( self.line_number)) elif ch == '"': self.advance() self.tokens.append(self.scan_quoted_literal()) elif ch in CHARACTER_TO_TOKEN.keys(): self.advance() self.tokens.append(CHARACTER_TO_TOKEN[ch](self.line_number)) elif self.check_for_expression_block(self.peek_multiple(25)): # TODO: Handle edges here with whitespace and comments self.tokens.append(self.scan_literal()) self.advance() self.tokens.append(tokens.ValueToken(self.line_number)) self.tokens.append(self.scan_expression_block()) else: # TODO: This should actually check for valid literals first # and throw an error if it doesn't match self.tokens.append(self.scan_literal()) return tuple(self.tokens)
def parser(): stream = ( tokens.StreamStartToken(1), tokens.LiteralToken("view", 1), tokens.ValueToken(1), tokens.LiteralToken("view_name", 1), tokens.BlockStartToken(1), tokens.LiteralToken("sql_table_name", 2), tokens.ValueToken(2), tokens.ExpressionBlockToken("schema.table_name", 2), tokens.ExpressionBlockEndToken(2), tokens.LiteralToken("drill_fields", 3), tokens.ValueToken(3), tokens.ListStartToken(3), tokens.LiteralToken("view_name.field_one", 3), tokens.CommaToken(3), tokens.LiteralToken("view_name.field_two", 3), tokens.ListEndToken(3), tokens.BlockEndToken(4), tokens.StreamEndToken(4), ) return lkml.parser.Parser(stream)