def _tokenize(self): rd = CodeReader(self.value) s = rd.consume_until(end='=') t = T_AssignOperator(s) self.tokens.append(t) s = rd.consume_all() t = T_Expression(s) self.tokens.append(t)
def __init__(self, value): super().__init__(value) rd = CodeReader(value) rd.consume_exact('#ifndef') rd.consume_inline_whitespace() self.name = rd.consume_identifier()
def __parse_body(self): """ parse macro content, store subtokens """ if self.args is None: self.tokens.append( DT_Code(self.body) ) else: rd = CodeReader(self.body) buff = '' while not rd.has_end(): if rd.has_identifier(): ident = rd.consume_identifier() # check if macro argument if ident in self.args: # append collected code fragment if len(buff) > 0: t = DT_Code(buff) buff = '' self.tokens.append(t) # append the var t = DT_Var(ident) self.tokens.append(t) else: buff += ident elif rd.has_string(): buff += rd.consume_string() elif rd.has_char(): buff += rd.consume_char() else: # just add the character to the currently built DT_Code buff += rd.consume() # add trailing code fragment if len(buff) > 0: t = DT_Code(buff) buff = '' self.tokens.append(t)
def __init__(self, value): super().__init__(value) rd = CodeReader(value) rd.consume_exact('#include') rd.consume_inline_whitespace() # get file (discard quotes) self.file = rd.consume_string()[1:-1]
def _tokenize(self): rd = CodeReader(self.value[1:-1].strip()) rd.sweep() s = rd.consume_code(end=',', eof=True, keep_end=False) t = T_Expression(s) self.tokens.append(t) self.index = t rd.sweep() if not rd.has_end(): raise Exception( 'Invalid array index (must be single expression).' )
def _tokenize(self): if self.ptype == ParenType.UNKNOWN: print('Paren has no type, cannot tokenize: ' + str(self)) return rd = CodeReader(self.value[1:-1].strip()) if self.ptype == ParenType.EXPR: # single expression self._collect_expr(rd) elif self.ptype == ParenType.ARGVALS: # comma-separated list of expressions, can be empty self._collect_argvals(rd) elif self.ptype == ParenType.ARGNAMES: # comma-separated list of argument names self._collect_argnames(rd) elif self.ptype == ParenType.FOR: # arguments for a FOR loop self._collect_for(rd)
def tokenize(self): """ Convert to tokens, get token list Returns: A list of obtained tokens, recursively tokenized. """ if self.tokens is not None: return self.tokens self.tokens = [] rd = CodeReader(self.source, self.filename) while not rd.has_end(): # discard garbage rd.sweep() # End of string. if rd.has_end(): break # <identifier> elif rd.has_identifier(): self._tokenize_identifier(rd) # {...stuff...} elif rd.has_code_block(): s = rd.consume_block() self._add( T_CodeBlock(s) ) # ; elif rd.starts(';'): self._collect_semicolon(rd) else: rd.error('Unexpected syntax here.') # tokenize all composite tokens for t in self.tokens: if t.is_composite(): t.tokenize() return self.tokens
def _tokenize(self): """ Parse expression sub-tokens """ rd = CodeReader(self.value) while not rd.has_end(): rd.sweep() if rd.has_identifier(): # an identifier # can be variable or a function call s = rd.consume_identifier() t = T_Name(s) self.tokens.append(t) rd.sweep() if rd.has_bracket(): # array index s = rd.consume_block() t = T_Bracket(s) self.tokens.append(t) elif rd.has_paren(): # paren with arguments for the function s = rd.consume_block() t = T_Paren(s) t.set_type(ParenType.ARGVALS) self.tokens.append(t) elif rd.has_paren(): # Parenthesised sub-expression s = rd.consume_block() t = T_Paren(s) t.set_type(ParenType.EXPR) self.tokens.append(t) elif rd.has_number(): # Number literal s = rd.consume_number() t = T_Number(s) self.tokens.append(t) elif (len(self.tokens) > 0 and type(self.tokens[-1:][0]) is T_Operator and rd.matches(r'[-+]\s*[0-9a-z_]+')): # Number literal sign = rd.consume() if sign == '+': sign = '' rd.sweep() if sign == '-': self.tokens.append(T_Number('-1')) self.tokens.append(T_Operator('*')) elif rd.has_operator(): # Operator s = rd.consume_operator() t = T_Operator(s) self.tokens.append(t) elif rd.has_char(): # Char literal s = rd.consume_char() t = T_Char(s) self.tokens.append(t) elif rd.has_string(): # String literal s = rd.consume_string() t = T_String(s) self.tokens.append(t) else: raise Exception('Unexpected expression token near' + rd.peek(10)) for t in self.tokens: if t.is_composite(): t.tokenize()
def __init__(self, value): super().__init__(value) rd = CodeReader(value) rd.consume_exact('#pragma') rd.consume_inline_whitespace() self.name = rd.consume_identifier() rd.consume_inline_whitespace() if rd.has_identifier(): self.value = rd.consume_identifier() # identifier without quotes elif rd.has_number(): n = rd.consume_number() try: self.value = int(n, 10) except ValueError: try: self.value = int(n, 16) except ValueError: try: self.value = int(n, 2) except ValueError: rd.error('Could not parse number: %s' % n) elif rd.has_string(): self.value = rd.consume_string()[1:-1] # crop quotes else: self.value = True # boolean directive (flag) v = self.value if type(v) is str: self.value = {'true': True, 'false': False}.get(v.lower(), v)
def __init__(self, value): super().__init__(value) rd = CodeReader(value) rd.consume_exact('#define') rd.consume_inline_whitespace() # get macro name self.name = rd.consume_identifier() # arraylike flag self.arraylike = False self.functionlike = False # macro arguments self.args = None # which argument is variadic self.vararg_pos = None #print(str(rd.has_bracket())) if rd.has_paren(): tmp = rd.consume_block()[1:-1] # inside the paren self.args = [] for a in tmp.split(','): a = a.strip() if len(a) > 0: if a[-3:] == '...': # a is a variadic argument if self.vararg_pos is not None: rd.error('Macro can have only one variadic argument!') self.vararg_pos = len(self.args) a = a[:-3].strip() self.args.append(a) self.functionlike = True elif rd.has_bracket(): tmp = rd.consume_block()[1:-1].strip() # inside the bracket if not re.match(r'\A[a-zA-Z_][a-zA-Z0-9_]*\Z', tmp): rd.error('Invalid argument format for macro "%s": %s' % (self.name, tmp)) self.args = [tmp] self.arraylike = True rd.consume_inline_whitespace() # macro body self.body = rd.consume_all() # macro body tokens self.tokens = [] self.__parse_body()
def apply_macros(self): """ Recursively apply macros to the output of `process()` To be called after `process()`. The `output` variable is overwritten by this. Returns: The final source code after applying all macro replacements. """ if len(self.output) == 0: print('There is no source code.') return rd = CodeReader(self.output) applied_count = 0 out = '' while not rd.has_end(): out += self._handle_whitespace(rd) if rd.has_end(): break if rd.has_identifier(): ident = rd.consume_identifier() ident_whitesp = rd.consume_inline_whitespace() if ident in self.defines: macros = self.defines[ident] replacement = None if rd.has_bracket(): # array macro bracket = rd.consume_block()[1:-1] for mm in macros: if mm.is_arraylike(): if mm.can_use_args([bracket]): replacement = mm.generate([bracket]) break if replacement is None: out += ident + ident_whitesp out += '[%s]' % bracket else: out += replacement applied_count += 1 elif rd.has_paren(): # func macro paren = rd.consume_block() t = T_Paren(paren) t.set_type(ParenType.ARGVALS) t.tokenize() args = [] for a in t.tokens: args.append(a.value) # print(args) for mm in macros: if mm.is_functionlike(): if mm.can_use_args(args): replacement = mm.generate(args) break if replacement is None: out += ident + ident_whitesp + paren print( '[W] Macro "%s" defined, but can\'t use arguments (%s)' % (ident, ', '.join(args) )) else: out += replacement applied_count += 1 else: # const macro for mm in macros: if mm.can_use_args(None): replacement = mm.generate(None) break if replacement is None: out += ident + ident_whitesp else: out += replacement + ident_whitesp applied_count += 1 else: out += ident + ident_whitesp # give it back # "...", and "sdgfsd""JOINED" "This too" elif rd.has_string(): # handle string concatenation s = '' while rd.has_string(): s += rd.consume_string()[1:-1] # drop quotes rd.sweep() out += '"%s"' % s # //... elif rd.has_inline_comment(): rd.consume_line() # /* ... */ elif rd.has_block_comment(): rd.consume_block_comment() # any char... else: out += rd.consume() self.output = out # take care of macros in macros if applied_count > 0: return self.apply_macros() else: return out
def _tokenize(self): """ Parse expression sub-tokens """ rd = CodeReader(self.value) while not rd.has_end(): rd.sweep() if rd.has_identifier(): # an identifier # can be variable or a function call s = rd.consume_identifier() t = T_Name(s) self.tokens.append(t) rd.sweep() if rd.has_bracket(): # array index s = rd.consume_block() t = T_Bracket(s) self.tokens.append(t) elif rd.has_paren(): # paren with arguments for the function s = rd.consume_block() t = T_Paren(s) t.set_type(ParenType.ARGVALS) self.tokens.append(t) elif rd.has_paren(): # Parenthesised sub-expression s = rd.consume_block() t = T_Paren(s) t.set_type(ParenType.EXPR) self.tokens.append(t) elif rd.has_number(): # Number literal s = rd.consume_number() t = T_Number(s) self.tokens.append(t) elif (((len(self.tokens) > 0 and type(self.tokens[-1:][0]) is T_Operator) or len(self.tokens) == 0) and rd.matches(r'[-+]\s*[0-9a-z_]+')): # Unary operator sign = rd.consume() if sign == '+': sign = '' rd.sweep() if sign == '-': self.tokens.append(T_Operator('@-')) elif rd.has_operator(): # Operator s = rd.consume_operator() t = T_Operator(s) self.tokens.append(t) elif rd.has_char(): # Char literal s = rd.consume_char() t = T_Char(s) self.tokens.append(t) elif rd.has_string(): # String literal s = rd.consume_string() t = T_String(s) self.tokens.append(t) else: raise Exception('Unexpected expression token near' + rd.peek(10)) for t in self.tokens: if t.is_composite(): t.tokenize()