Exemplo n.º 1
0
def const_string_expr(expr):  # strings are embedded ...
    data = static_binaries(expr)
    _initial_data = peek(data)  # there should be at least one char, '\0'
    _push = push(Address(_initial_data, loc(expr)), loc(expr))
    return chain(
        relative_jump(Offset(peek(_push, loc(expr)), loc(expr)), loc(expr)),
        consume_all(data, _push))
Exemplo n.º 2
0
def composite_specifier(
        tokens,
        symbol_table,
        obj_type=StructType,
        member_parse_func=parse_struct_members,
        terminal=object()
):
    """
    : 'composite type' IDENTIFIER
    | 'composite type' IDENTIFIER  '{' members '}'
    | 'composite type' '{' members '}'
    """
    location = loc(consume(tokens))
    if peek_or_terminal(tokens) == TOKENS.LEFT_BRACE:  # anonymous composite ...
        return obj_type(None, member_parse_func(tokens, symbol_table), location)

    if isinstance(peek_or_terminal(tokens), IDENTIFIER):
        obj = symbol_table.get(obj_type.get_name(peek(tokens)), obj_type(consume(tokens), None, location))
        # some composites are bit tricky such as Struct/Union ...
        # since any of its members may contain itself as a reference, so we'll add the type to
        # the symbol table before adding the members ...
        # TODO: make types immutable, right now they are being shared.
        if symbol_table.get(obj.name, terminal) is terminal:
            symbol_table[name(obj)] = obj
        if peek_or_terminal(tokens) == TOKENS.LEFT_BRACE:
            obj.members = member_parse_func(tokens, symbol_table)

        return obj

    raise ValueError('{l} Expected IDENTIFIER or LEFT_BRACE got {got}'.format(
        l=loc(peek(tokens, EOFLocation)), got=peek(tokens, '')
    ))
Exemplo n.º 3
0
def composite_specifier(tokens,
                        symbol_table,
                        obj_type=StructType,
                        member_parse_func=parse_struct_members,
                        terminal=object()):
    """
    : 'composite type' IDENTIFIER
    | 'composite type' IDENTIFIER  '{' members '}'
    | 'composite type' '{' members '}'
    """
    location = loc(consume(tokens))
    if peek_or_terminal(
            tokens) == TOKENS.LEFT_BRACE:  # anonymous composite ...
        return obj_type(None, member_parse_func(tokens, symbol_table),
                        location)

    if isinstance(peek_or_terminal(tokens), IDENTIFIER):
        obj = symbol_table.get(obj_type.get_name(peek(tokens)),
                               obj_type(consume(tokens), None, location))
        # some composites are bit tricky such as Struct/Union ...
        # since any of its members may contain itself as a reference, so we'll add the type to
        # the symbol table before adding the members ...
        # TODO: make types immutable, right now they are being shared.
        if symbol_table.get(obj.name, terminal) is terminal:
            symbol_table[name(obj)] = obj
        if peek_or_terminal(tokens) == TOKENS.LEFT_BRACE:
            obj.members = member_parse_func(tokens, symbol_table)

        return obj

    raise ValueError('{l} Expected IDENTIFIER or LEFT_BRACE got {got}'.format(
        l=loc(peek(tokens, EOFLocation)), got=peek(tokens, '')))
Exemplo n.º 4
0
def additive_expression(tokens, symbol_table, rules=default_additive_expression_rules):
    # : multiplicative_expression ('+' multiplicative_expression | '-' multiplicative_expression)*
    exp = multiplicative_expression(tokens, symbol_table)
    while peek(tokens, '') in rules:
        exp = rules[peek(tokens, '')](
            get_binary_expression(tokens, symbol_table, exp, multiplicative_expression, NumericType)
        )
    return exp
Exemplo n.º 5
0
def type_qualifiers(tokens, _, defaults=None):  # : ('const' or volatile or *args)*
    values = set(takewhile(rules(type_qualifiers).__contains__, tokens))
    const, volatile = imap(values.__contains__, (TOKENS.CONST, TOKENS.VOLATILE))
    if not values and not defaults:
        raise ValueError('{l} Expected TOKENS.CONST or TOKEN.VOLATILE got {g}'.format(
            l=loc(peek(tokens, EOFLocation)), g=peek(tokens, '')
        ))
    return const or defaults[0], volatile or defaults[1]
Exemplo n.º 6
0
 def _values(char_stream):
     while peek(char_stream, TOKENS.DOUBLE_QUOTE) != TOKENS.DOUBLE_QUOTE:
         value = consume(char_stream)
         value = escape_characters.get(
             peek(char_stream),
             consume(char_stream)) if value == '\\' else value
         yield value
     _ = error_if_not_value(char_stream, TOKENS.DOUBLE_QUOTE)
Exemplo n.º 7
0
def get_repositioned_line(char_seq, location):  # get next line ...
    while not isinstance(peek(char_seq), NewLineStr):
        char = consume(char_seq)
        if char == '\\' and isinstance(peek(char_seq), NewLineStr):
            _ = exhaust(takewhile(lambda token: isinstance(token, NewLineStr), char_seq))
            for char in get_repositioned_line(char_seq, location):
                yield char
        else:
            yield Str(char, location)
Exemplo n.º 8
0
def multiplicative_expression(tokens, symbol_table):
    # : cast_expression ('*' cast_expression | '/' cast_expression | '%' cast_expression)*
    cast_expression = symbol_table['__ cast_expression __']
    exp = cast_expression(tokens, symbol_table)
    while peek(tokens, '') in rules(multiplicative_expression):
        exp = get_binary_expression(
            tokens, symbol_table, exp, cast_expression,
            rules(multiplicative_expression)[peek(tokens)])
    return exp
Exemplo n.º 9
0
def initializer_list(tokens, symbol_table):
    return () if peek(
        tokens, TOKENS.RIGHT_BRACE) == TOKENS.RIGHT_BRACE else chain(
            (designated_expression_or_expression(tokens, symbol_table), ),
            starmap(
                designated_expression_or_expression,
                takewhile(
                    lambda i: peek_or_terminal(i[0]) == TOKENS.COMMA and
                    consume(i[0]) and peek(tokens, TOKENS.RIGHT_BRACE) !=
                    TOKENS.RIGHT_BRACE, repeat((tokens, symbol_table)))))
Exemplo n.º 10
0
def additive_expression(tokens,
                        symbol_table,
                        rules=default_additive_expression_rules):
    # : multiplicative_expression ('+' multiplicative_expression | '-' multiplicative_expression)*
    exp = multiplicative_expression(tokens, symbol_table)
    while peek(tokens, '') in rules:
        exp = rules[peek(tokens,
                         '')](get_binary_expression(tokens, symbol_table, exp,
                                                    multiplicative_expression,
                                                    NumericType))
    return exp
Exemplo n.º 11
0
def type_qualifiers(tokens,
                    _,
                    defaults=None):  # : ('const' or volatile or *args)*
    values = set(takewhile(rules(type_qualifiers).__contains__, tokens))
    const, volatile = imap(values.__contains__,
                           (TOKENS.CONST, TOKENS.VOLATILE))
    if not values and not defaults:
        raise ValueError(
            '{l} Expected TOKENS.CONST or TOKEN.VOLATILE got {g}'.format(
                l=loc(peek(tokens, EOFLocation)), g=peek(tokens, '')))
    return const or defaults[0], volatile or defaults[1]
Exemplo n.º 12
0
def get_repositioned_line(char_seq, location):  # get next line ...
    while not isinstance(peek(char_seq), NewLineStr):
        char = consume(char_seq)
        if char == '\\' and isinstance(peek(char_seq), NewLineStr):
            _ = exhaust(
                takewhile(lambda token: isinstance(token, NewLineStr),
                          char_seq))
            for char in get_repositioned_line(char_seq, location):
                yield char
        else:
            yield Str(char, location)
Exemplo n.º 13
0
def initializer_list(tokens, symbol_table):
    return () if peek(tokens, TOKENS.RIGHT_BRACE) == TOKENS.RIGHT_BRACE else chain(
        (designated_expression_or_expression(tokens, symbol_table),),
        starmap(
            designated_expression_or_expression,
            takewhile(
                lambda i: peek_or_terminal(i[0]) == TOKENS.COMMA and consume(i[0])
                and peek(tokens, TOKENS.RIGHT_BRACE) != TOKENS.RIGHT_BRACE,
                repeat((tokens, symbol_table))
            )
        )
    )
Exemplo n.º 14
0
 def dimensions(tokens):
     while peek(tokens) == TOKENS.LEFT_BRACKET:
         location = loc(consume(tokens))
         if peek(tokens) == TOKENS.RIGHT_BRACKET:
             size = None
         else:
             const_exp = constant_expression(tokens, symbol_table)
             _ = error_if_not_type(c_type(const_exp), IntegralType)
             if exp(const_exp) < 0:
                 raise ValueError('{l} array size is negative'.format(l=loc(const_exp)))
             size = exp(const_exp)
         _ = error_if_not_value(tokens, TOKENS.RIGHT_BRACKET)
         yield size, location
Exemplo n.º 15
0
 def dimensions(tokens):
     while peek(tokens) == TOKENS.LEFT_BRACKET:
         location = loc(consume(tokens))
         if peek(tokens) == TOKENS.RIGHT_BRACKET:
             size = None
         else:
             const_exp = constant_expression(tokens, symbol_table)
             _ = error_if_not_type(c_type(const_exp), IntegralType)
             if exp(const_exp) < 0:
                 raise ValueError(
                     '{l} array size is negative'.format(l=loc(const_exp)))
             size = exp(const_exp)
         _ = error_if_not_value(tokens, TOKENS.RIGHT_BRACKET)
         yield size, location
Exemplo n.º 16
0
def unary_expression(tokens, symbol_table):
    """
        :   postfix_expression
            | '++' unary_expression
            | '--' unary_expression
            | unary_operator cast_expression
            | 'sizeof' (type_name | unary_expression)
    """
    error_if_empty(tokens)

    if peek_or_terminal(tokens) in rules(unary_expression) and not isinstance(peek(tokens), CONSTANT):
        return rules(unary_expression)[peek(tokens)](tokens, symbol_table)

    return symbol_table['__ postfix_expression __'](tokens, symbol_table)
Exemplo n.º 17
0
def declarations(tokens, symbol_table):
    # storage_class_specifier? type_name? init_declarator_list (';' or compound_statement) # declaration
    storage_class_specifier, specifier_qualifier_list, statement = imap(
        symbol_table.__getitem__,
        ('__ storage_class_specifier __', '__ specifier_qualifier_list __',
         '__ statement __'))
    storage_class = storage_class_specifier(tokens, symbol_table)
    base_type = specifier_qualifier_list(tokens, symbol_table)

    expecting_token = TOKENS.SEMICOLON
    if peek_or_terminal(tokens) == TOKENS.SEMICOLON:
        yield EmptyDeclaration(loc(consume(tokens)), storage_class)
    elif peek_or_terminal(tokens) is terminal:
        raise_error(
            '{l} Expected TOKENS.COMMA TOKENS.EQUAL TOKENS.SEMICOLON TOKENS.LEFT_BRACE got `{got}`'
            .format(l=loc(peek(tokens, EOFLocation)), got=peek(tokens, '')))
    else:
        for dec in init_declarator_list(tokens,
                                        symbol_table,
                                        base_type=base_type,
                                        storage_class=storage_class):
            dec.storage_class = storage_class
            if isinstance(
                    storage_class, TypeDef
            ):  # init_declarator_list adds the symbol as a decl to symbol_table
                symbol_table[name(dec)] = (symbol_table.pop(
                    name(dec)) or 1) and c_type(dec)  # replace dec by ctype
            elif peek_or_terminal(
                    tokens) == TOKENS.LEFT_BRACE and not error_if_not_type(
                        c_type(dec), FunctionType):
                symbol_table = push(symbol_table)
                symbol_table.update(
                    chain(
                        imap(
                            lambda a: (
                                name(a), a
                            ),  # add non variable list parameters to the symbol table ...
                            ifilterfalse(
                                lambda c: isinstance(c_type(c), VAListType),
                                c_type(dec))),
                        (('__ RETURN_TYPE __', c_type(c_type(dec))),
                         ('__ LABELS __', SymbolTable()))))
                yield FunctionDefinition(dec,
                                         next(statement(tokens, symbol_table)))
                expecting_token = (pop(symbol_table) or 1) and ''
            else:
                yield dec
                expecting_token = TOKENS.SEMICOLON
        _ = expecting_token and error_if_not_value(tokens, expecting_token)
Exemplo n.º 18
0
def unary_expression(tokens, symbol_table):
    """
        :   postfix_expression
            | '++' unary_expression
            | '--' unary_expression
            | unary_operator cast_expression
            | 'sizeof' (type_name | unary_expression)
    """
    error_if_empty(tokens)

    if peek_or_terminal(tokens) in rules(unary_expression) and not isinstance(
            peek(tokens), CONSTANT):
        return rules(unary_expression)[peek(tokens)](tokens, symbol_table)

    return symbol_table['__ postfix_expression __'](tokens, symbol_table)
Exemplo n.º 19
0
def postfix_expression(tokens, symbol_table):
    """
    : primary_expression
    (       '[' expression ']'
            |   '(' ')'
            |   '(' argument_expression_list ')'
            |   '.' IDENTIFIER
            |   '->' IDENTIFIER
            |   '++'
            |   '--'        )*
    """
    type_name, expression, initializer, primary_expression = imap(
        symbol_table.__getitem__,
        ('__ type_name __', '__ expression __', '__ initializer __', '__ primary_expression __')
    )
    # if primary_exp is None:
    #     if peek_or_terminal(tokens) == TOKENS.LEFT_PARENTHESIS and consume(tokens):
    #         # Again slight ambiguity since primary_expression may start with '(' expression ')'
    #         # can't call cast_expression since it will try to call postfix_expression.
    #         if is_type_name(peek_or_terminal(tokens), symbol_table):
    #             ctype, _ = type_name(tokens, symbol_table), error_if_not_value(tokens, TOKENS.RIGHT_PARENTHESIS)
    #             primary_exp = CompoundLiteral(initializer(tokens, symbol_table), ctype, loc(ctype))
    #         else:  # if we saw a parenthesis and it wasn't a type_name then it must be primary_expr `(` expression `)`
    #             primary_exp, _ = expression(tokens, symbol_table), error_if_not_value(tokens, TOKENS.RIGHT_PARENTHESIS)
    #     else:
    #         primary_exp = primary_expression(tokens, symbol_table)
    primary_exp = primary_expression(tokens, symbol_table)
    while peek_or_terminal(tokens) in rules(postfix_expression):
        primary_exp = rules(postfix_expression)[peek(tokens)](tokens, symbol_table, primary_exp)

    return primary_exp
Exemplo n.º 20
0
def remove_pass(instrs):
    """ replace 1 or more sequences of Pass by the next non-Pass instruction or Pass instruction  """
    pass_instrs = tuple(takewhile(lambda instr: isinstance(instr, Pass), instrs))
    if peek_or_terminal(instrs) is terminal:
        yield replace_instrs(pass_instrs[-1], pass_instrs[:-1])
    else:
        replace_instrs(peek(instrs), pass_instrs)
Exemplo n.º 21
0
def inclusive_or_expression(tokens, symbol_table):
    # : exclusive_or_expression ('|' exclusive_or_expression)*
    exp = exclusive_or_expression(tokens, symbol_table)
    while peek(tokens, '') == TOKENS.BAR:
        exp = get_binary_expression(tokens, symbol_table, exp,
                                    exclusive_or_expression, IntegralType)
    return exp
Exemplo n.º 22
0
def exclusive_or_expression(tokens, symbol_table):
    # : and_expression ('^' and_expression)*
    exp = and_expression(tokens, symbol_table)
    while peek(tokens, '') in rules(exclusive_or_expression):
        exp = get_binary_expression(tokens, symbol_table, exp, and_expression,
                                    IntegralType)
    return exp
Exemplo n.º 23
0
def compound_statement(tokens, symbol_table):  #: '{' statement*  '}'
    _, symbol_table = error_if_not_value(tokens,
                                         TOKENS.LEFT_BRACE), push(symbol_table)
    statement = symbol_table['__ statement __']
    while peek(tokens, TOKENS.RIGHT_BRACE) != TOKENS.RIGHT_BRACE:
        yield statement(tokens, symbol_table)
    _ = error_if_not_value(tokens, TOKENS.RIGHT_BRACE) and pop(symbol_table)
Exemplo n.º 24
0
def invalid_token(token_seq, _):
    raise ValueError(
        '{l} Expected either #elif, #else or #endif, got {g} for #if {at}'.
        format(
            l=loc(token_seq, EOFLocation),
            g=peek(token_seq, ''),
        ))
Exemplo n.º 25
0
def remove_allocation(instrs):
    """
        optimize 1 or more sequence of allocations ...
        take their sum and if zero replace with the next instruction in case this one is referenced.
        other wise do one allocation and remove rest
        replace allocate 1 with POP, which only requires a single address translation vs 2 (instr, oprn) for allocate.
    """
    alloc_instrs = tuple(
        takewhile(
            lambda i: isinstance(i, Allocate) and isinstance(
                opern(i), (int, long)), instrs))

    if not alloc_instrs:  # Operand must be non-primitive type (Address) ... must wait for its value.
        yield consume(instrs)
    else:
        total = sum(imap(opern, alloc_instrs))

        if total:  # non-zero allocates changes the state of the stack.
            if total in pop_instrs:
                new_instr = next(pop_instrs[total](loc(alloc_instrs[0])))
            elif len(alloc_instrs) != 1:
                new_instr = alloc_instrs[0]
            else:
                new_instr = Allocate(loc(alloc_instrs[-1]), total)
            yield replace_instrs(new_instr, alloc_instrs)
        else:  # stack remains unchanged, get next instruction for referencing, it one exists ...
            if peek_or_terminal(instrs) is terminal:
                yield replace_instr(Pass(loc(alloc_instrs[-1])), alloc_instrs)
            else:
                replace_instrs(peek(instrs), alloc_instrs)
Exemplo n.º 26
0
def logical_or_expression(tokens, symbol_table):
    # : logical_and_expression ('||' logical_and_expression)*
    exp = logical_and_expression(tokens, symbol_table)
    while peek(tokens, '') == TOKENS.LOGICAL_OR:
        exp = get_binary_expression(tokens, symbol_table, exp,
                                    logical_and_expression, NumericType)
    return exp
Exemplo n.º 27
0
def remove_allocation(instrs):
    """
        optimize 1 or more sequence of allocations ...
        take their sum and if zero replace with the next instruction in case this one is referenced.
        other wise do one allocation and remove rest
        replace allocate 1 with POP, which only requires a single address translation vs 2 (instr, oprn) for allocate.
    """
    alloc_instrs = tuple(takewhile(lambda i: isinstance(i, Allocate) and isinstance(opern(i), (int, long)), instrs))

    if not alloc_instrs:  # Operand must be non-primitive type (Address) ... must wait for its value.
        yield consume(instrs)
    else:
        total = sum(imap(opern, alloc_instrs))

        if total:  # non-zero allocates changes the state of the stack.
            if total in pop_instrs:
                new_instr = next(pop_instrs[total](loc(alloc_instrs[0])))
            elif len(alloc_instrs) != 1:
                new_instr = alloc_instrs[0]
            else:
                new_instr = Allocate(loc(alloc_instrs[-1]), total)
            yield replace_instrs(new_instr, alloc_instrs)
        else:  # stack remains unchanged, get next instruction for referencing, it one exists ...
            if peek_or_terminal(instrs) is terminal:
                yield replace_instr(Pass(loc(alloc_instrs[-1])), alloc_instrs)
            else:
                replace_instrs(peek(instrs), alloc_instrs)
Exemplo n.º 28
0
def equality_expression(tokens, symbol_table):
    # : relational_expression (('=='|'!=') relational_expression)*
    exp = relational_expression(tokens, symbol_table)
    while peek(tokens, '') in rules(equality_expression):
        exp = get_binary_expression(tokens, symbol_table, exp,
                                    relational_expression, NumericType)
    return exp
Exemplo n.º 29
0
def function_parameter_declarations(tokens, symbol_table):
    location = loc(error_if_not_value(tokens, TOKENS.LEFT_PARENTHESIS))
    parameter_types_decl = tuple(() if peek(tokens, TOKENS.RIGHT_PARENTHESIS) == TOKENS.RIGHT_PARENTHESIS else
                                 parameter_type_list(tokens, symbol_table))
    return error_if_not_value(tokens, TOKENS.RIGHT_PARENTHESIS) and FunctionType(
        CType(location), parameter_types_decl, location
    )
Exemplo n.º 30
0
def and_expression(tokens, symbol_table):
    # : equality_expression ('&' equality_expression)*
    exp = equality_expression(tokens, symbol_table)
    while peek(tokens, '') in rules(and_expression):
        exp = get_binary_expression(tokens, symbol_table, exp,
                                    equality_expression, IntegralType)
    return exp
Exemplo n.º 31
0
def relational_expression(tokens, symbol_table):
    # : shift_expression (('<'|'>'|'<='|'>=') shift_expression)*
    exp = shift_expression(tokens, symbol_table)
    while peek(tokens, '') in rules(relational_expression):
        exp = get_binary_expression(tokens, symbol_table, exp,
                                    shift_expression, NumericType)
    return exp
Exemplo n.º 32
0
def logical_and_expression(tokens, symbol_table):
    # : inclusive_or_expression ('&&' inclusive_or_expression)*
    exp = inclusive_or_expression(tokens, symbol_table)
    while peek(tokens, '') == TOKENS.LOGICAL_AND:
        exp = get_binary_expression(tokens, symbol_table, exp,
                                    inclusive_or_expression, NumericType)
    return exp
Exemplo n.º 33
0
def iteration_statement(tokens, symbol_table):
    """
        : 'while' '(' expression ')' statement
        | 'do' statement 'while' '(' expression ')' ';'
        | 'for' '(' expression?; expression?; expression? ')' statement
    """
    return rules(iteration_statement)[peek(tokens, '')](tokens, symbol_table)
Exemplo n.º 34
0
def char_literal(char_stream, location):
    char = consume(char_stream) and consume(char_stream)  # consume initial single quote, consume char
    if char == TOKENS.SINGLE_QUOTE:  # empty char ...
        return CHAR('', location)
    if char == '\\':  # if char is being escaped
        char = escape_characters.get(peek(char_stream), consume(char_stream))
    return error_if_not_value(char_stream, TOKENS.SINGLE_QUOTE) and CHAR(char, location)
Exemplo n.º 35
0
def iteration_statement(tokens, symbol_table):
    """
        : 'while' '(' expression ')' statement
        | 'do' statement 'while' '(' expression ')' ';'
        | 'for' '(' expression?; expression?; expression? ')' statement
    """
    return rules(iteration_statement)[peek(tokens, '')](tokens, symbol_table)
Exemplo n.º 36
0
def get_line(
    values
):  # get all the tokens on the current line, being that preprocessor works on a line-by-line basis
    return takewhile(
        lambda token, initial_line_number=line_number(peek(
            values)): initial_line_number == line_number(token),
        values) if peek_or_terminal(values) is not terminal else iter(())
Exemplo n.º 37
0
def specifier_qualifier_list(tokens, symbol_table):
    const, volatile = type_qualifiers(tokens, symbol_table, (False, False))
    base_type = type_specifier(tokens, symbol_table,
                               IntegerType(loc(peek(tokens, EOFLocation))))
    base_type.const, base_type.volatile = type_qualifiers(
        tokens, symbol_table, (const, volatile))
    return base_type
Exemplo n.º 38
0
def _func_macro_arguments(line):
    symbol_table = SymbolTable()
    while peek(line, TOKENS.RIGHT_PARENTHESIS) != TOKENS.RIGHT_PARENTHESIS:
        if peek(line) == TOKENS.ELLIPSIS:
            arg = FunctionMacroVariadicArgument(IDENTIFIER('__VA_ARGS__', loc(consume(line))))
        else:
            arg = FunctionMacroArgument(error_if_not_type(consume(line, EOFLocation), (IDENTIFIER, KEYWORD)))
            if peek_or_terminal(line) == TOKENS.ELLIPSIS:
                arg = FunctionMacroVariadicArgument(IDENTIFIER(arg, loc(consume(line))))
        symbol_table[arg] = arg     # check for duplicate argument name
        yield arg       # if ok add to the rest ...
        if isinstance(arg, FunctionMacroVariadicArgument):  # if variadic argument break ...
            break
        # consume expected comma if we don't see a right parenthesis ...
        _ = peek(line, TOKENS.RIGHT_PARENTHESIS) != TOKENS.RIGHT_PARENTHESIS \
            and error_if_not_value(line, TOKENS.COMMA, loc(arg))
Exemplo n.º 39
0
def postfix_expression(tokens, symbol_table):
    """
    : primary_expression
    (       '[' expression ']'
            |   '(' ')'
            |   '(' argument_expression_list ')'
            |   '.' IDENTIFIER
            |   '->' IDENTIFIER
            |   '++'
            |   '--'        )*
    """
    type_name, expression, initializer, primary_expression = imap(
        symbol_table.__getitem__,
        ('__ type_name __', '__ expression __', '__ initializer __',
         '__ primary_expression __'))
    # if primary_exp is None:
    #     if peek_or_terminal(tokens) == TOKENS.LEFT_PARENTHESIS and consume(tokens):
    #         # Again slight ambiguity since primary_expression may start with '(' expression ')'
    #         # can't call cast_expression since it will try to call postfix_expression.
    #         if is_type_name(peek_or_terminal(tokens), symbol_table):
    #             ctype, _ = type_name(tokens, symbol_table), error_if_not_value(tokens, TOKENS.RIGHT_PARENTHESIS)
    #             primary_exp = CompoundLiteral(initializer(tokens, symbol_table), ctype, loc(ctype))
    #         else:  # if we saw a parenthesis and it wasn't a type_name then it must be primary_expr `(` expression `)`
    #             primary_exp, _ = expression(tokens, symbol_table), error_if_not_value(tokens, TOKENS.RIGHT_PARENTHESIS)
    #     else:
    #         primary_exp = primary_expression(tokens, symbol_table)
    primary_exp = primary_expression(tokens, symbol_table)
    while peek_or_terminal(tokens) in rules(postfix_expression):
        primary_exp = rules(postfix_expression)[peek(tokens)](tokens,
                                                              symbol_table,
                                                              primary_exp)

    return primary_exp
Exemplo n.º 40
0
def string_literal(tokens):
    location = loc(peek(tokens))    # join adjacent strings into a single string ...
    token = ''.join(takewhile(lambda t: type(t) is STRING, tokens)) + '\0'
    return ConstantExpression(
        imap(char_literal, imap(iter, token)),
        StringType(len(token), location),
        location
    )
Exemplo n.º 41
0
def parse_struct_members(tokens, symbol_table):
    declarator = symbol_table['__ declarator __']
    location, members = loc(consume(tokens)), OrderedDict()
    while peek(tokens, TOKENS.RIGHT_BRACE) != TOKENS.RIGHT_BRACE:
        type_spec = specifier_qualifier_list(tokens, symbol_table)
        while peek(tokens, TOKENS.SEMICOLON) != TOKENS.SEMICOLON:
            decl = declarator(tokens, symbol_table)
            set_core_type(decl, type_spec)
            if name(decl) in members:
                raise ValueError('{l} Duplicate struct member {name} previous at {at}'.format(
                    l=loc(decl), name=name(decl), at=loc(members[name(decl)])
                ))
            members[name(decl)] = decl
            _ = peek_or_terminal(tokens) != TOKENS.SEMICOLON and error_if_not_value(tokens, TOKENS.COMMA)
        _ = error_if_not_value(tokens, TOKENS.SEMICOLON)
    _ = error_if_not_value(tokens, TOKENS.RIGHT_BRACE)
    return members
Exemplo n.º 42
0
def labeled_statement(tokens, symbol_table):
    """
        : IDENTIFIER ':' statement
        | 'case' constant_expression ':' statement
        | 'default' ':' statement
    """

    if isinstance(peek(tokens), IDENTIFIER):
        return label(tokens, symbol_table)

    try:
        _ = symbol_table['__ SWITCH STATEMENT __']
    except KeyError as _:
        raise ValueError('{l} {g} statement outside of switch'.format(
            l=loc(peek(tokens)), g=peek(tokens)))

    return rules(labeled_statement)[peek(tokens)](tokens, symbol_table)
Exemplo n.º 43
0
def remove_pass(instrs):
    """ replace 1 or more sequences of Pass by the next non-Pass instruction or Pass instruction  """
    pass_instrs = tuple(
        takewhile(lambda instr: isinstance(instr, Pass), instrs))
    if peek_or_terminal(instrs) is terminal:
        yield replace_instrs(pass_instrs[-1], pass_instrs[:-1])
    else:
        replace_instrs(peek(instrs), pass_instrs)
Exemplo n.º 44
0
def shift_expression(
        tokens, symbol_table
):  # : additive_expression (('<<'|'>>') additive_expression)*
    exp = additive_expression(tokens, symbol_table)
    while peek(tokens, '') in rules(shift_expression):
        exp = get_binary_expression(tokens, symbol_table, exp,
                                    additive_expression, IntegralType)
    return exp
Exemplo n.º 45
0
def argument(
    token_seq,  # a non empty argument terminates with either a comma or right parenthesis ...
    takewhile=lambda token_seq: peek(token_seq, TOKENS.COMMA) not in
    {TOKENS.COMMA, TOKENS.RIGHT_PARENTHESIS}):
    while takewhile(token_seq):
        if peek_or_terminal(
                token_seq) == TOKENS.LEFT_PARENTHESIS:  # nested parenthesis
            yield consume(token_seq)
            for token in argument(
                    token_seq,  # recursively call argument chaining all the nested parenthesis, until last right is hit
                    takewhile=lambda token_seq: peek(
                        token_seq, TOKENS.RIGHT_PARENTHESIS) != TOKENS.
                    RIGHT_PARENTHESIS):
                yield token
            yield error_if_not_value(token_seq, TOKENS.RIGHT_PARENTHESIS)
        else:
            yield consume(token_seq)
Exemplo n.º 46
0
def function_parameter_declarations(tokens, symbol_table):
    location = loc(error_if_not_value(tokens, TOKENS.LEFT_PARENTHESIS))
    parameter_types_decl = tuple(
        () if peek(tokens, TOKENS.RIGHT_PARENTHESIS) == TOKENS.
        RIGHT_PARENTHESIS else parameter_type_list(tokens, symbol_table))
    return error_if_not_value(
        tokens, TOKENS.RIGHT_PARENTHESIS) and FunctionType(
            CType(location), parameter_types_decl, location)
Exemplo n.º 47
0
def merge_lines(char_seq):
    while True:
        char = consume(char_seq)
        if char == '\\' and isinstance(peek(char_seq), NewLineStr):  # if current char is \ followed by end of line seq
            _ = exhaust(takewhile(lambda token: isinstance(token, NewLineStr), char_seq))
            for char in get_repositioned_line(char_seq, loc(char)):
                yield char
        else:
            yield char
Exemplo n.º 48
0
def parameter_type_list(tokens, symbol_table):  # : parameter_declaration (',' parameter_declaration)*
    return chain(
        (parameter_declaration(tokens, symbol_table),),
        imap(
            parameter_declaration,
            takewhile(lambda tokens: peek(tokens) == TOKENS.COMMA and consume(tokens), repeat(tokens)),
            repeat(symbol_table)
        )
    )
Exemplo n.º 49
0
def multiplicative_expression(tokens, symbol_table):
    # : cast_expression ('*' cast_expression | '/' cast_expression | '%' cast_expression)*
    cast_expression = symbol_table['__ cast_expression __']
    exp = cast_expression(tokens, symbol_table)
    while peek(tokens, '') in rules(multiplicative_expression):
        exp = get_binary_expression(
            tokens, symbol_table, exp, cast_expression, rules(multiplicative_expression)[peek(tokens)]
        )
    return exp
Exemplo n.º 50
0
def argument_expression_list(tokens, symbol_table):  # : assignment_expression (',' assignment_expression)*
    assignment_expression = symbol_table['__ assignment_expression __']
    return chain(
        (assignment_expression(tokens, symbol_table),),
        starmap(
            assignment_expression,
            takewhile(lambda i: peek(i[0]) == TOKENS.COMMA and consume(i[0]), repeat((tokens, symbol_table)))
        )
    )
Exemplo n.º 51
0
def __calc_if(expr, token_seq, macros):
    tokens = get_block(token_seq, terminating_with={TOKENS.PELIF, TOKENS.PELSE, TOKENS.PENDIF})  # get a single block
    if not expr:  # if expression is false we have to exhaust ... and search for a true elif expression, else or endif
        _ = exhaust(tokens)
        tokens = rules(__calc_if)[peek(token_seq)](token_seq, macros)

    for t in imap(consume, repeat(tokens)):  # emit tokens which will be pre-processed ...
        yield t

    exhaust_remaining_blocks(token_seq)
Exemplo n.º 52
0
def jump_statement(tokens, symbol_table):
    """
        : 'goto' IDENTIFIER ';'
        | 'continue' ';'
        | 'break' ';'
        | 'return' ';'
        | 'return' expression ';'
    """
    stmnt = rules(jump_statement)[peek(tokens)](tokens, symbol_table)
    _ = error_if_not_value(tokens, TOKENS.SEMICOLON)
    yield stmnt
Exemplo n.º 53
0
def init_declarator_list(tokens, symbol_table, base_type=CType(''), storage_class=None):
    return chain(   # init_declarator (',' init_declarator)*
        (init_declarator(tokens, symbol_table, base_type=base_type, storage_class=storage_class),),
        starmap(
            init_declarator,
            takewhile(
                lambda i: peek(i[0]) == TOKENS.COMMA and consume(i[0]),
                repeat((tokens, symbol_table, base_type, storage_class))
            )
        )
    )
Exemplo n.º 54
0
def type_name_or_postfix_expression(tokens, symbol_table):
    symbol_table = push(symbol_table)
    symbol_table['__ compound_literal __'] = type_name_or_compound_literal
    primary_exp = symbol_table['__ primary_expression __'](tokens, symbol_table)
    _ = pop(symbol_table)
    # pop 'type_name_or_compound_literal' and type_name_or_postfix_expression ...
    postfix_expression_rules = rules(symbol_table['__ postfix_expression __'])
    if not isinstance(primary_exp, CType):  # it must have being an expression ...
        while peek_or_terminal(tokens) in postfix_expression_rules:
            primary_exp = postfix_expression_rules[peek(tokens)](tokens, symbol_table, primary_exp)
    return primary_exp  # otherwise it must have being a type_name ...
Exemplo n.º 55
0
def type_specifier(tokens, symbol_table, default=no_default):
    """
        : 'void'
        | ['signed' | 'unsigned'] 'char' | ['signed' | 'unsigned'] 'short'
        | ['signed' | 'unsigned'] 'int' | ['signed' | 'unsigned'] 'long'
        | 'float' | 'double'
        | struct_specifier
        | union_specifier
        | enum_specifier
        | TYPE_NAME
    """
    token = peek_or_terminal(tokens)
    if token in rules(type_specifier):
        return rules(type_specifier)[token](tokens, symbol_table)
    elif isinstance(symbol_table.get(token, token), CType):
        return symbol_table[token](loc(consume(tokens)))
    elif default is not no_default:
        return default
    raise ValueError('{l} Expected type_specifier or TYPE_NAME got {got}'.format(
        l=loc(peek(tokens, EOFLocation)), got=peek(tokens, '')
    ))
Exemplo n.º 56
0
def static_definition(stmnt, symbol_table):
    def load_address(self, location):
        return push(Address(self._initial_data, location), location)

    data = static_def_binaries(stmnt, (Pass(loc(stmnt)),))
    stmnt._initial_data = peek(data)
    stmnt.end_of_data = Pass(loc(stmnt))
    stmnt.load_address = bind_load_address_func(load_address, stmnt)
    symbol_table[declarations.name(stmnt)] = stmnt
    return chain(  # jump over embedded data ...
        relative_jump(Offset(stmnt.end_of_data, loc(stmnt)), loc(stmnt)), consume_all(data), (stmnt.end_of_data,)
    )
Exemplo n.º 57
0
def statement(tokens, symbol_table):
    """
        : declaration
        | labeled_statement
        | compound_statement
        | selection_statement
        | iteration_statement
        | jump_statement
        | expression_statement
        | expression ';'
        | ;
    """
    if peek_or_terminal(tokens) in rules(statement):  # if current token has a rule use that one first
        return rules(statement)[peek(tokens)](tokens, symbol_table)

    if is_declaration(tokens, symbol_table):  # checking for type_name is a bit expensive ...
        return declaration(tokens, symbol_table)

    # both expressions and labels may start with an identifier
    if isinstance(peek_or_terminal(tokens), IDENTIFIER):
        label_name = consume(tokens)
        if peek_or_terminal(tokens) == TOKENS.COLON:
            return symbol_table['__ labeled_statement __'](chain((label_name,), consume_all(tokens)), symbol_table)
            # return label_stmnt(label_name, statement(tokens, symbol_table))
        # it must be an expression, TODO: figure out a way without using dangerous chain!
        # tokens = chain((label_name, consume(tokens)), tokens)
        tokens = chain((label_name,),  consume_all(tokens))
        expr, _ = symbol_table['__ expression __'](tokens, symbol_table), error_if_not_value(tokens, TOKENS.SEMICOLON)
        return repeat(expr, 1)

    if peek_or_terminal(tokens) is not terminal:
        expr, _ = symbol_table['__ expression __'](tokens, symbol_table), error_if_not_value(tokens, TOKENS.SEMICOLON)
        return repeat(expr, 1)

    raise ValueError('{l} No rule could be found to create statement, got {got}'.format(
        l=loc(peek(tokens, EOFLocation)), got=peek(tokens, '')
    ))
Exemplo n.º 58
0
def declarations(tokens, symbol_table):
    # storage_class_specifier? type_name? init_declarator_list (';' or compound_statement) # declaration
    storage_class_specifier, specifier_qualifier_list, statement = imap(
        symbol_table.__getitem__,
        ('__ storage_class_specifier __', '__ specifier_qualifier_list __', '__ statement __')
    )
    storage_class = storage_class_specifier(tokens, symbol_table)
    base_type = specifier_qualifier_list(tokens, symbol_table)

    expecting_token = TOKENS.SEMICOLON
    if peek_or_terminal(tokens) == TOKENS.SEMICOLON:
        yield EmptyDeclaration(loc(consume(tokens)), storage_class)
    elif peek_or_terminal(tokens) is terminal:
        raise_error('{l} Expected TOKENS.COMMA TOKENS.EQUAL TOKENS.SEMICOLON TOKENS.LEFT_BRACE got `{got}`'.format(
            l=loc(peek(tokens, EOFLocation)), got=peek(tokens, '')
        ))
    else:
        for dec in init_declarator_list(tokens, symbol_table, base_type=base_type, storage_class=storage_class):
            dec.storage_class = storage_class
            if isinstance(storage_class, TypeDef):  # init_declarator_list adds the symbol as a decl to symbol_table
                symbol_table[name(dec)] = (symbol_table.pop(name(dec)) or 1) and c_type(dec)  # replace dec by ctype
            elif peek_or_terminal(tokens) == TOKENS.LEFT_BRACE and not error_if_not_type(c_type(dec), FunctionType):
                symbol_table = push(symbol_table)
                symbol_table.update(chain(
                    imap(
                        lambda a: (name(a), a),  # add non variable list parameters to the symbol table ...
                        ifilterfalse(lambda c: isinstance(c_type(c), VAListType), c_type(dec))
                    ),
                    (('__ RETURN_TYPE __', c_type(c_type(dec))), ('__ LABELS __', SymbolTable()))
                ))
                yield FunctionDefinition(dec, next(statement(tokens, symbol_table)))
                expecting_token = (pop(symbol_table) or 1) and ''
            else:
                yield dec
                expecting_token = TOKENS.SEMICOLON
        _ = expecting_token and error_if_not_value(tokens, expecting_token)
Exemplo n.º 59
0
def parse_enum_members(tokens, symbol_table):
    constant_expression = symbol_table['__ constant_expression __']
    location, members, current_value = loc(consume(tokens)), OrderedDict(), 0

    while peek(tokens, TOKENS.RIGHT_BRACE) != TOKENS.RIGHT_BRACE:
        ident = error_if_not_type(consume(tokens, ''), IDENTIFIER)
        value = ConstantExpression(current_value, IntegerType(location), location)
        if peek_or_terminal(tokens) == TOKENS.EQUAL and consume(tokens):
            value = constant_expression(tokens, symbol_table)
            _ = error_if_not_type(c_type(value), IntegerType)
        current_value = error_if_not_type(exp(value), (int, long))

        symbol_table[ident] = value  # Add value to symbol_table
        members[ident] = Definition(ident, c_type(value), value, location)

        _ = peek_or_terminal(tokens) == TOKENS.COMMA and consume(tokens)
    _ = error_if_not_value(tokens, TOKENS.RIGHT_BRACE)

    return members
Exemplo n.º 60
0
def conditional_expression(tokens, symbol_table):
    # logical_or_expression ('?' expression ':' conditional_expression)?
    exp = logical_or_expression(tokens, symbol_table)
    if peek(tokens, '') in rules(conditional_expression):
        location = loc(error_if_not_value(tokens, TOKENS.QUESTION))
        _ = error_if_not_type(c_type(exp), NumericType)
        if_exp_is_true = assignment_expression(tokens, symbol_table)
        _ = error_if_not_value(tokens, TOKENS.COLON)
        if_exp_is_false = conditional_expression(tokens, symbol_table)

        ctype_1, ctype_2 = imap(c_type, (if_exp_is_true, if_exp_is_false))
        if safe_type_coercion(ctype_1, ctype_2):
            ctype = ctype_1(location)
        elif safe_type_coercion(ctype_2, ctype_1):
            ctype = ctype_2(location)
        else:
            raise ValueError('{l} Could not determine type for ternary-expr, giving the types {t1} and {t2}'.format(
                t1=ctype_1, t2=ctype_2
            ))
        return TernaryExpression(exp, if_exp_is_true, if_exp_is_false, ctype, location)
    return exp