def INT_LITERAL(self, token: Token) -> Token: """Token rule to detect integer literals. An int literal may be represented as a number in decimal form appended with a 'K' or number in hexadecimal form. Example: 1024 1K # same as above -256 0x25 Lexer converts the detected string into a number. String literals appended with 'K' are multiplied by 1024. :param token: token matching integer literal pattern :return: Token representing integer literal """ number = token.value if number[0] == "'" and number[-1] == "'": # transform 'dude' into '0x64756465' number = "0x" + bytearray(number[1:-1], "utf-8").hex() number = int(number, 0) elif number[-1] == "K": number = int(number[:-1], 0) * 1024 else: number = int(number, 0) token.value = number return token
def IDENT(self, token: Token) -> Token: """Token rule to detect identifiers. A valid identifier can start either with underscore or a letter followed by any numbers of underscores, letters and numbers. If the name of an identifier is from the set of reserved keywords, the token type is replaced with the keyword name, otherwise the token is of type 'IDENT'. Values of type TRUE/YES, FALSE/NO are replaces by 1 or 0 respectively. :param token: token matching an identifier pattern :return: Token representing identifier """ # it may happen that we find an identifier, which is a keyword, in such # a case remap the type from IDENT to reserved word (i.e. keyword) token_type = self.reserved.get(token.value, "IDENT") if token_type in ["TRUE", "YES"]: token.type = "INT_LITERAL" token.value = 1 elif token_type in ["FALSE", "NO"]: token.type = "INT_LITERAL" token.value = 0 else: token.type = token_type # check, whether the identifier is under sources, in such case # change the type to SOURCE_NAME for source in self._sources: if source.name == token.value: token.type = "SOURCE_NAME" break return token
def error(self, token: Token) -> Token: """Token error handler. The lexing index is incremented so lexing can continue, however, an error token is returned. The token contains the whole text starting with the detected error. :param token: invalid token. :return: the invalid token. """ self.index += 1 token.value = token.value[0] return token
def BINARY_BLOB(self, token: Token) -> Token: """Token rule to detect binary blob. A binary blob is a sequence of hexadecimal bytes in double curly braces. Example: {{aa bb cc 1F 3C}} :param token: token matching binary blob pattern :return: Token representing binary blob """ # return just the content between braces value = token.value[2:-2] token.value = "".join(value.split()) return token
def post_lex(toks): """Tweak the token stream to simplify the grammar""" term = Token() term.value = ";" term.type = "TERM" try: t = next(toks) except StopIteration: return [] for next_tok in chain(toks, [term]): yield t term.lineno = t.lineno term.index = t.index # TERMs after blocks and after the last expression in a block are # optional. Fill them in here to make the grammar simpler. # # There are two places where '}' is used, and so there are two places # terminators must be consumed: block expressions and hashes. # # block: { a; b; c } -> { a; b; c; }; # # hashes: { a: b, c: d } -> { a: b, c: d; }; # Closing a block or hash if t.type == "}" and next_tok.type != ";": yield term # Last expression in a block or hash if next_tok.type == "}" and t.type != "TERM": yield term t = next_tok yield t
def __init__(self): Token.__init__(self) self.changes = []
def INTEGER(self, t: Token): t.value = int(t.value) return t
def LANGUAGE_CODE(self, t: Token): t.value = LanguageCode(t.value[3:]) return t