示例#1
0
    def _parse(self, inp) -> Tuple[str, List[str]]:
        character_stream = Stream(inp)
        lexer = JavaTokenizer(character_stream)

        log_string = ""
        arguments = []
        argument_mode = False
        first_token = True

        # We basically have two modes: Parse string concatenation and parse arguments
        # If we find a String.format we know what to look for. If we don't we assume that the first occurrence of ','
        # is the delimiter between string concatenation and arguments for that string

        while not lexer.eof():
            current_type, current_token = lexer.peek()
            if current_type == 'str':
                if first_token:
                    argument_mode = True
                log_string += current_token
                lexer.next()
            elif current_type == 'op' and current_token == '+':
                lexer.next()
                current_type, current_token = lexer.peek()
                if current_type == 'str':
                    log_string += current_token
                    lexer.next()
                elif current_type == 'op' and not lexer.is_unary_ops(
                        current_token):
                    raise ValueError(
                        f'Operator {current_token} may not follow a +')
                elif current_type == 'op':
                    lexer.next()
                elif current_type == 'punc' and not current_token == '(':
                    raise ValueError(f'"{current_token}" may not follow a +')
                elif current_type == 'punc' and current_token == '(':
                    hints, _, string_only = self._read_expression(lexer)
                    if string_only:
                        pass
                    if argument_mode:
                        log_string += '{}'
                        arguments.append(hints[0])
                    else:
                        arguments.append(hints[0])

                elif current_type == 'var':
                    variable = self._read_var(lexer)
                    if argument_mode:
                        log_string += '{}'
                        arguments.append(variable)
                    else:
                        arguments.append(variable)
            elif current_type == 'punc' and current_token == ',':
                argument_mode = False
                lexer.next()
            elif current_type == 'op' and lexer.is_unary_ops(current_token):
                lexer.next()
            elif current_type == 'var':
                _, expression, _ = self._read_expression(lexer)
                if 'String.format' in expression:
                    expression = expression.replace("String.format(", '')
                    expression = expression[:expression.rindex(')')]
                    tmp = self._parse(expression)
                    return tmp
                    # handle this here:
                if argument_mode:
                    log_string += '{}'
                else:
                    arguments.append(expression)
            elif current_type == 'num':
                dtype, value = self._check_number(current_token)
                if argument_mode:
                    log_string += '{}'
                    arguments.append('{!Integer}' if dtype ==
                                     'int' else '{!Float}')
                else:
                    arguments.append('{!Integer}' if dtype ==
                                     'int' else '{!Float}')
                lexer.next()
            elif current_type == 'punc' and current_token == '(':
                hints, output, string_only = self._read_expression(lexer)
                if string_only:
                    stream = JavaTokenizer(Stream(output))
                    constructed_token = ""
                    while not stream.eof():
                        if (token := stream.next())[0] == 'str':
                            constructed_token += token[1]
                    log_string += constructed_token
                elif argument_mode:
                    log_string += '{}'
                else:
                    arguments.append(hints[0])
            else:
                print(
                    f'Weird behavio for token {current_token}<{current_type}>')
                lexer.next()
示例#2
0
    def _parse_format(self, lexer: JavaTokenizer, params: List[str]):

        if not params:
            raise ValueError(
                "Trying to parse format without argument. Aborting...")

        param_offset = 0
        param_type = params[param_offset]
        message = ''
        variables = []
        statement_stack = []
        while not lexer.eof():
            token_type, token = lexer.peek()

            # Advance argument
            if token_type == 'punc' and token == ',' and param_type != '...':
                param_offset = self._increase_index(param_offset, len(params))
                param_type = params[param_offset]

            # If the argument shall be skipped, e.g. when it's the log level argument or so on.
            elif param_type == 'skip':
                pass

            # New expression
            elif token_type == 'punc' and token == '(':
                statement_stack.append(token)
            # Closing expression
            elif token_type == 'punc' and token == ')':
                statement_stack.pop()
                # No expressions left
                if not statement_stack:
                    break

            # String literal
            elif token_type == 'str' and param_type == '...':
                variables.append(token)
            elif token_type == 'str':
                message += self._parse_format_string(token)

            elif token_type == 'num' and param_type == 'str':
                message += str(token)
            elif token_type == 'num' and param_type == '...':
                variables.append(token)

            # Variable
            elif token_type == 'var' or (token_type == 'op'
                                         and lexer.is_unary_ops(token)):

                # If it is not, handle as normal variable
                var_type, tokens, arguments = self._read_variable(lexer)
                if var_type == 'simple':
                    variables.append(''.join(tokens))
                    if param_type == 'str':
                        message += '{}'
                if var_type == 'nested':
                    message += tokens
                    variables.append(arguments)

            # Operator '+' on string
            elif param_type == 'str' and token_type == 'op' and token == '+':
                lexer.next()
                token_type, token = lexer.peek()
                if token_type == 'str':
                    message += token
                elif token_type == 'var':
                    tmp_mode, tmp_message, tmp_variables = self._read_variable(
                        lexer)

                    if tmp_mode == 'simple' and tmp_message:
                        message += '{}'
                        variables += tmp_message
                    elif tmp_mode == 'nested':
                        pass

            lexer.next()
        self._parse_format_string(message)
        return message, variables