Пример #1
0
	def _tokenize(self):
		rd = CodeReader(self.value)

		s = rd.consume_until(end='=')
		t = T_AssignOperator(s)
		self.tokens.append(t)

		s = rd.consume_all()
		t = T_Expression(s)
		self.tokens.append(t)
Пример #2
0
	def _tokenize(self):
		rd = CodeReader(self.value)

		s = rd.consume_until(end='=')
		t = T_AssignOperator(s)
		self.tokens.append(t)

		s = rd.consume_all()
		t = T_Expression(s)
		self.tokens.append(t)
Пример #3
0
	def __init__(self, value):
		super().__init__(value)

		rd = CodeReader(value)
		rd.consume_exact('#ifndef')
		rd.consume_inline_whitespace()

		self.name = rd.consume_identifier()
Пример #4
0
	def __parse_body(self):
		""" parse macro content, store subtokens """

		if self.args is None:
			self.tokens.append( DT_Code(self.body) )
		else:
			rd = CodeReader(self.body)
			buff = ''
			while not rd.has_end():
				if rd.has_identifier():
					ident = rd.consume_identifier()

					# check if macro argument
					if ident in self.args:

						# append collected code fragment
						if len(buff) > 0:
							t = DT_Code(buff)
							buff = ''
							self.tokens.append(t)

						# append the var
						t = DT_Var(ident)
						self.tokens.append(t)

					else:
						buff += ident

				elif rd.has_string():
					buff += rd.consume_string()

				elif rd.has_char():
					buff += rd.consume_char()

				else:
					# just add the character to the currently built DT_Code
					buff += rd.consume()

			# add trailing code fragment
			if len(buff) > 0:
				t = DT_Code(buff)
				buff = ''
				self.tokens.append(t)
Пример #5
0
	def __init__(self, value):
		super().__init__(value)

		rd = CodeReader(value)
		rd.consume_exact('#include')
		rd.consume_inline_whitespace()

		# get file (discard quotes)
		self.file = rd.consume_string()[1:-1]
Пример #6
0
	def _tokenize(self):

		rd = CodeReader(self.value[1:-1].strip())

		rd.sweep()

		s = rd.consume_code(end=',', eof=True, keep_end=False)
		t = T_Expression(s)
		self.tokens.append(t)
		self.index = t

		rd.sweep()

		if not rd.has_end():
			raise Exception(
				'Invalid array index (must be single expression).'
			)
Пример #7
0
	def _tokenize(self):

		rd = CodeReader(self.value[1:-1].strip())

		rd.sweep()

		s = rd.consume_code(end=',', eof=True, keep_end=False)
		t = T_Expression(s)
		self.tokens.append(t)
		self.index = t

		rd.sweep()

		if not rd.has_end():
			raise Exception(
				'Invalid array index (must be single expression).'
			)
Пример #8
0
	def _tokenize(self):

		if self.ptype == ParenType.UNKNOWN:
			print('Paren has no type, cannot tokenize: ' + str(self))
			return

		rd = CodeReader(self.value[1:-1].strip())

		if self.ptype == ParenType.EXPR:
			# single expression
			self._collect_expr(rd)

		elif self.ptype == ParenType.ARGVALS:
			# comma-separated list of expressions, can be empty
			self._collect_argvals(rd)

		elif self.ptype == ParenType.ARGNAMES:
			# comma-separated list of argument names
			self._collect_argnames(rd)

		elif self.ptype == ParenType.FOR:
			# arguments for a FOR loop
			self._collect_for(rd)
Пример #9
0
	def tokenize(self):
		""" Convert to tokens, get token list

		Returns:
			A list of obtained tokens, recursively tokenized.

		"""

		if self.tokens is not None:
			return self.tokens

		self.tokens = []

		rd = CodeReader(self.source, self.filename)

		while not rd.has_end():

			# discard garbage
			rd.sweep()

			# End of string.
			if rd.has_end():
				break

			# <identifier>
			elif rd.has_identifier():

				self._tokenize_identifier(rd)

			# {...stuff...}
			elif rd.has_code_block():

				s = rd.consume_block()
				self._add( T_CodeBlock(s) )

			# ;
			elif rd.starts(';'):

				self._collect_semicolon(rd)

			else:
				rd.error('Unexpected syntax here.')

		# tokenize all composite tokens
		for t in self.tokens:
			if t.is_composite():
				t.tokenize()

		return self.tokens
Пример #10
0
	def _tokenize(self):
		""" Parse expression sub-tokens """

		rd = CodeReader(self.value)

		while not rd.has_end():
			rd.sweep()

			if rd.has_identifier():
				# an identifier
				# can be variable or a function call

				s = rd.consume_identifier()
				t = T_Name(s)
				self.tokens.append(t)

				rd.sweep()

				if rd.has_bracket():
					# array index
					s = rd.consume_block()
					t = T_Bracket(s)
					self.tokens.append(t)

				elif rd.has_paren():
					# paren with arguments for the function
					s = rd.consume_block()
					t = T_Paren(s)

					t.set_type(ParenType.ARGVALS)

					self.tokens.append(t)

			elif rd.has_paren():
				# Parenthesised sub-expression
				s = rd.consume_block()
				t = T_Paren(s)
				t.set_type(ParenType.EXPR)
				self.tokens.append(t)

			elif rd.has_number():
				# Number literal
				s = rd.consume_number()
				t = T_Number(s)
				self.tokens.append(t)

			elif (len(self.tokens) > 0 and
				type(self.tokens[-1:][0]) is T_Operator
				and rd.matches(r'[-+]\s*[0-9a-z_]+')):

				# Number literal
				sign = rd.consume()
				if sign == '+':
					sign = ''

				rd.sweep()

				if sign == '-':
					self.tokens.append(T_Number('-1'))
					self.tokens.append(T_Operator('*'))

			elif rd.has_operator():
				# Operator
				s = rd.consume_operator()
				t = T_Operator(s)
				self.tokens.append(t)

			elif rd.has_char():
				# Char literal
				s = rd.consume_char()
				t = T_Char(s)
				self.tokens.append(t)

			elif rd.has_string():
				# String literal
				s = rd.consume_string()
				t = T_String(s)
				self.tokens.append(t)

			else:
				raise Exception('Unexpected expression token near' + rd.peek(10))

		for t in self.tokens:
			if t.is_composite():
				t.tokenize()
Пример #11
0
	def __init__(self, value):
		super().__init__(value)

		rd = CodeReader(value)
		rd.consume_exact('#pragma')
		rd.consume_inline_whitespace()
		self.name = rd.consume_identifier()
		rd.consume_inline_whitespace()

		if rd.has_identifier():
			self.value = rd.consume_identifier()  # identifier without quotes

		elif rd.has_number():

			n = rd.consume_number()

			try:
				self.value = int(n, 10)
			except ValueError:
				try:
					self.value = int(n, 16)
				except ValueError:
					try:
						self.value = int(n, 2)
					except ValueError:
						rd.error('Could not parse number: %s' % n)

		elif rd.has_string():
			self.value = rd.consume_string()[1:-1]  # crop quotes

		else:
			self.value = True  # boolean directive (flag)

		v = self.value
		if type(v) is str:
			self.value = {'true': True, 'false': False}.get(v.lower(), v)
Пример #12
0
	def __init__(self, value):
		super().__init__(value)

		rd = CodeReader(value)
		rd.consume_exact('#define')
		rd.consume_inline_whitespace()

		# get macro name
		self.name = rd.consume_identifier()

		# arraylike flag
		self.arraylike = False
		self.functionlike = False

		# macro arguments
		self.args = None

		# which argument is variadic
		self.vararg_pos = None

		#print(str(rd.has_bracket()))

		if rd.has_paren():
			tmp = rd.consume_block()[1:-1]  # inside the paren
			self.args = []
			for a in tmp.split(','):
				a = a.strip()
				if len(a) > 0:

					if a[-3:] == '...':
						# a is a variadic argument

						if self.vararg_pos is not None:
							rd.error('Macro can have only one variadic argument!')

						self.vararg_pos = len(self.args)
						a = a[:-3].strip()

					self.args.append(a)

			self.functionlike = True

		elif rd.has_bracket():
			tmp = rd.consume_block()[1:-1].strip()  # inside the bracket

			if not re.match(r'\A[a-zA-Z_][a-zA-Z0-9_]*\Z', tmp):
				rd.error('Invalid argument format for macro "%s": %s' % (self.name, tmp))

			self.args = [tmp]
			self.arraylike = True


		rd.consume_inline_whitespace()

		# macro body
		self.body = rd.consume_all()

		# macro body tokens
		self.tokens = []

		self.__parse_body()
Пример #13
0
	def apply_macros(self):
		""" Recursively apply macros to the output of `process()`

		To be called after `process()`.
		The `output` variable is overwritten by this.

		Returns:
			The final source code after applying all
			macro replacements.

		"""

		if len(self.output) == 0:
			print('There is no source code.')
			return

		rd = CodeReader(self.output)

		applied_count = 0
		out = ''
		while not rd.has_end():

			out += self._handle_whitespace(rd)
			if rd.has_end():
				break

			if rd.has_identifier():

				ident = rd.consume_identifier()
				ident_whitesp = rd.consume_inline_whitespace()

				if ident in self.defines:

					macros = self.defines[ident]

					replacement = None

					if rd.has_bracket():
						# array macro

						bracket = rd.consume_block()[1:-1]

						for mm in macros:
							if mm.is_arraylike():
								if mm.can_use_args([bracket]):
									replacement = mm.generate([bracket])
									break

						if replacement is None:
							out += ident + ident_whitesp
							out += '[%s]' % bracket
						else:
							out += replacement
							applied_count += 1

					elif rd.has_paren():
						# func macro

						paren = rd.consume_block()

						t = T_Paren(paren)
						t.set_type(ParenType.ARGVALS)
						t.tokenize()

						args = []
						for a in t.tokens:
							args.append(a.value)

						# print(args)

						for mm in macros:
							if mm.is_functionlike():
								if mm.can_use_args(args):
									replacement = mm.generate(args)
									break

						if replacement is None:
							out += ident + ident_whitesp + paren
							print(
								'[W] Macro "%s" defined, but can\'t use arguments (%s)'
								% (ident, ', '.join(args) ))
						else:
							out += replacement
							applied_count += 1

					else:
						# const macro

						for mm in macros:
							if mm.can_use_args(None):
								replacement = mm.generate(None)
								break

						if replacement is None:
							out += ident + ident_whitesp
						else:
							out += replacement + ident_whitesp
							applied_count += 1

				else:
					out += ident + ident_whitesp  # give it back

			# "...", and "sdgfsd""JOINED"  "This too"
			elif rd.has_string():
				# handle string concatenation
				s = ''
				while rd.has_string():
					s += rd.consume_string()[1:-1]  # drop quotes
					rd.sweep()

				out += '"%s"' % s

			# //...
			elif rd.has_inline_comment():
				rd.consume_line()

			# /* ... */
			elif rd.has_block_comment():
				rd.consume_block_comment()

			# any char...
			else:
				out += rd.consume()

		self.output = out

		# take care of macros in macros
		if applied_count > 0:
			return self.apply_macros()
		else:
			return out
Пример #14
0
	def tokenize(self):
		""" Convert to tokens, get token list

		Returns:
			A list of obtained tokens, recursively tokenized.

		"""

		if self.tokens is not None:
			return self.tokens

		self.tokens = []

		rd = CodeReader(self.source, self.filename)

		while not rd.has_end():

			# discard garbage
			rd.sweep()

			# End of string.
			if rd.has_end():
				break

			# <identifier>
			elif rd.has_identifier():

				self._tokenize_identifier(rd)

			# {...stuff...}
			elif rd.has_code_block():

				s = rd.consume_block()
				self._add( T_CodeBlock(s) )

			# ;
			elif rd.starts(';'):

				self._collect_semicolon(rd)

			else:
				rd.error('Unexpected syntax here.')

		# tokenize all composite tokens
		for t in self.tokens:
			if t.is_composite():
				t.tokenize()

		return self.tokens
Пример #15
0
	def _tokenize(self):
		""" Parse expression sub-tokens """

		rd = CodeReader(self.value)

		while not rd.has_end():
			rd.sweep()

			if rd.has_identifier():
				# an identifier
				# can be variable or a function call

				s = rd.consume_identifier()
				t = T_Name(s)
				self.tokens.append(t)

				rd.sweep()

				if rd.has_bracket():
					# array index
					s = rd.consume_block()
					t = T_Bracket(s)
					self.tokens.append(t)

				elif rd.has_paren():
					# paren with arguments for the function
					s = rd.consume_block()
					t = T_Paren(s)

					t.set_type(ParenType.ARGVALS)

					self.tokens.append(t)

			elif rd.has_paren():
				# Parenthesised sub-expression
				s = rd.consume_block()
				t = T_Paren(s)
				t.set_type(ParenType.EXPR)
				self.tokens.append(t)

			elif rd.has_number():
				# Number literal
				s = rd.consume_number()
				t = T_Number(s)
				self.tokens.append(t)

			elif (((len(self.tokens) > 0 and
				type(self.tokens[-1:][0]) is T_Operator) or len(self.tokens) == 0)
				and rd.matches(r'[-+]\s*[0-9a-z_]+')):

				# Unary operator
				sign = rd.consume()
				if sign == '+':
					sign = ''

				rd.sweep()

				if sign == '-':
					self.tokens.append(T_Operator('@-'))

			elif rd.has_operator():
				# Operator
				s = rd.consume_operator()
				t = T_Operator(s)
				self.tokens.append(t)

			elif rd.has_char():
				# Char literal
				s = rd.consume_char()
				t = T_Char(s)
				self.tokens.append(t)

			elif rd.has_string():
				# String literal
				s = rd.consume_string()
				t = T_String(s)
				self.tokens.append(t)

			else:
				raise Exception('Unexpected expression token near' + rd.peek(10))

		for t in self.tokens:
			if t.is_composite():
				t.tokenize()