def _parse(self, inp) -> Tuple[str, List[str]]: character_stream = Stream(inp) lexer = JavaTokenizer(character_stream) log_string = "" arguments = [] argument_mode = False first_token = True # We basically have two modes: Parse string concatenation and parse arguments # If we find a String.format we know what to look for. If we don't we assume that the first occurrence of ',' # is the delimiter between string concatenation and arguments for that string while not lexer.eof(): current_type, current_token = lexer.peek() if current_type == 'str': if first_token: argument_mode = True log_string += current_token lexer.next() elif current_type == 'op' and current_token == '+': lexer.next() current_type, current_token = lexer.peek() if current_type == 'str': log_string += current_token lexer.next() elif current_type == 'op' and not lexer.is_unary_ops( current_token): raise ValueError( f'Operator {current_token} may not follow a +') elif current_type == 'op': lexer.next() elif current_type == 'punc' and not current_token == '(': raise ValueError(f'"{current_token}" may not follow a +') elif current_type == 'punc' and current_token == '(': hints, _, string_only = self._read_expression(lexer) if string_only: pass if argument_mode: log_string += '{}' arguments.append(hints[0]) else: arguments.append(hints[0]) elif current_type == 'var': variable = self._read_var(lexer) if argument_mode: log_string += '{}' arguments.append(variable) else: arguments.append(variable) elif current_type == 'punc' and current_token == ',': argument_mode = False lexer.next() elif current_type == 'op' and lexer.is_unary_ops(current_token): lexer.next() elif current_type == 'var': _, expression, _ = self._read_expression(lexer) if 'String.format' in expression: expression = expression.replace("String.format(", '') expression = expression[:expression.rindex(')')] tmp = self._parse(expression) return tmp # handle this here: if argument_mode: log_string += '{}' else: arguments.append(expression) elif current_type == 'num': dtype, value = self._check_number(current_token) if argument_mode: log_string += '{}' arguments.append('{!Integer}' if dtype == 'int' else '{!Float}') else: arguments.append('{!Integer}' if dtype == 'int' else '{!Float}') lexer.next() elif current_type == 'punc' and current_token == '(': hints, output, string_only = self._read_expression(lexer) if string_only: stream = JavaTokenizer(Stream(output)) constructed_token = "" while not stream.eof(): if (token := stream.next())[0] == 'str': constructed_token += token[1] log_string += constructed_token elif argument_mode: log_string += '{}' else: arguments.append(hints[0]) else: print( f'Weird behavio for token {current_token}<{current_type}>') lexer.next()
def _parse_format(self, lexer: JavaTokenizer, params: List[str]): if not params: raise ValueError( "Trying to parse format without argument. Aborting...") param_offset = 0 param_type = params[param_offset] message = '' variables = [] statement_stack = [] while not lexer.eof(): token_type, token = lexer.peek() # Advance argument if token_type == 'punc' and token == ',' and param_type != '...': param_offset = self._increase_index(param_offset, len(params)) param_type = params[param_offset] # If the argument shall be skipped, e.g. when it's the log level argument or so on. elif param_type == 'skip': pass # New expression elif token_type == 'punc' and token == '(': statement_stack.append(token) # Closing expression elif token_type == 'punc' and token == ')': statement_stack.pop() # No expressions left if not statement_stack: break # String literal elif token_type == 'str' and param_type == '...': variables.append(token) elif token_type == 'str': message += self._parse_format_string(token) elif token_type == 'num' and param_type == 'str': message += str(token) elif token_type == 'num' and param_type == '...': variables.append(token) # Variable elif token_type == 'var' or (token_type == 'op' and lexer.is_unary_ops(token)): # If it is not, handle as normal variable var_type, tokens, arguments = self._read_variable(lexer) if var_type == 'simple': variables.append(''.join(tokens)) if param_type == 'str': message += '{}' if var_type == 'nested': message += tokens variables.append(arguments) # Operator '+' on string elif param_type == 'str' and token_type == 'op' and token == '+': lexer.next() token_type, token = lexer.peek() if token_type == 'str': message += token elif token_type == 'var': tmp_mode, tmp_message, tmp_variables = self._read_variable( lexer) if tmp_mode == 'simple' and tmp_message: message += '{}' variables += tmp_message elif tmp_mode == 'nested': pass lexer.next() self._parse_format_string(message) return message, variables