def IDENT(self, token: Token) -> Token: """Token rule to detect identifiers. A valid identifier can start either with underscore or a letter followed by any numbers of underscores, letters and numbers. If the name of an identifier is from the set of reserved keywords, the token type is replaced with the keyword name, otherwise the token is of type 'IDENT'. Values of type TRUE/YES, FALSE/NO are replaces by 1 or 0 respectively. :param token: token matching an identifier pattern :return: Token representing identifier """ # it may happen that we find an identifier, which is a keyword, in such # a case remap the type from IDENT to reserved word (i.e. keyword) token_type = self.reserved.get(token.value, "IDENT") if token_type in ["TRUE", "YES"]: token.type = "INT_LITERAL" token.value = 1 elif token_type in ["FALSE", "NO"]: token.type = "INT_LITERAL" token.value = 0 else: token.type = token_type # check, whether the identifier is under sources, in such case # change the type to SOURCE_NAME for source in self._sources: if source.name == token.value: token.type = "SOURCE_NAME" break return token
def post_lex(toks): """Tweak the token stream to simplify the grammar""" term = Token() term.value = ";" term.type = "TERM" try: t = next(toks) except StopIteration: return [] for next_tok in chain(toks, [term]): yield t term.lineno = t.lineno term.index = t.index # TERMs after blocks and after the last expression in a block are # optional. Fill them in here to make the grammar simpler. # # There are two places where '}' is used, and so there are two places # terminators must be consumed: block expressions and hashes. # # block: { a; b; c } -> { a; b; c; }; # # hashes: { a: b, c: d } -> { a: b, c: d; }; # Closing a block or hash if t.type == "}" and next_tok.type != ";": yield term # Last expression in a block or hash if next_tok.type == "}" and t.type != "TERM": yield term t = next_tok yield t