Пример #1
0
def test_rule_yaml_descriptions():
    rule = textwrap.dedent("""
        rule:
            meta:
                name: test rule
            features:
                - and:
                    - number: 1 = This is the number 1
                    - string: This program cannot be run in DOS mode.
                      description: MS-DOS stub message
                    - string: '/SELECT.*FROM.*WHERE/i'
                      description: SQL WHERE Clause
                    - count(number(2 = AF_INET/SOCK_DGRAM)): 2
                    - or:
                        - and:
                            - offset: 0x50 = IMAGE_NT_HEADERS.OptionalHeader.SizeOfImage
                            - offset: 0x34 = IMAGE_NT_HEADERS.OptionalHeader.ImageBase
                          description: 32-bits
                        - and:
                            - offset: 0x50 = IMAGE_NT_HEADERS64.OptionalHeader.SizeOfImage
                            - offset: 0x30 = IMAGE_NT_HEADERS64.OptionalHeader.ImageBase
                          description: 64-bits
                      description: PE headers offsets
        """)
    r = capa.rules.Rule.from_yaml(rule)
    assert (r.evaluate({
        Number(1): {1},
        Number(2): {2, 3},
        String("This program cannot be run in DOS mode."): {4},
        String("SELECT password FROM hidden_table WHERE user == admin"): {5},
        Offset(0x50): {6},
        Offset(0x30): {7},
    }) == True)
Пример #2
0
def test_rule_yaml():
    rule = textwrap.dedent("""
        rule:
            meta:
                name: test rule
                author: [email protected]
                scope: function
                examples:
                    - foo1234
                    - bar5678
            features:
                - and:
                    - number: 1
                    - number: 2
        """)
    r = capa.rules.Rule.from_yaml(rule)
    assert r.evaluate({Number(0): {1}}) == False
    assert r.evaluate({Number(0): {1}, Number(1): {1}}) == False
    assert r.evaluate({Number(0): {1}, Number(1): {1}, Number(2): {1}}) == True
    assert r.evaluate({
        Number(0): {1},
        Number(1): {1},
        Number(2): {1},
        Number(3): {1}
    }) == True
Пример #3
0
def extract_insn_number_features(f, bb, insn):
    """ parse instruction number features

        args:
            f (IDA func_t)
            bb (IDA BasicBlock)
            insn (IDA insn_t)

        example:
            push    3136B0h         ; dwControlCode
    """
    if idaapi.is_ret_insn(insn):
        # skip things like:
        #   .text:0042250E retn 8
        return

    if capa.features.extractors.ida.helpers.is_sp_modified(insn):
        # skip things like:
        #   .text:00401145 add esp, 0Ch
        return

    for op in capa.features.extractors.ida.helpers.get_insn_ops(
            insn, target_ops=(idaapi.o_imm, )):
        const = capa.features.extractors.ida.helpers.mask_op_val(op)
        if not idaapi.is_mapped(const):
            yield Number(const), insn.ea
            yield Number(const, arch=get_arch(f.ctx)), insn.ea
Пример #4
0
def extract_insn_number_features(f, bb, insn):
    """parse number features from the given instruction."""
    # example:
    #
    #     push    3136B0h         ; dwControlCode
    for oper in insn.opers:
        # this is for both x32 and x64
        if not isinstance(oper, (envi.archs.i386.disasm.i386ImmOper,
                                 envi.archs.i386.disasm.i386ImmMemOper)):
            continue

        if isinstance(oper, envi.archs.i386.disasm.i386ImmOper):
            v = oper.getOperValue(oper)
        else:
            v = oper.getOperAddr(oper)

        if f.vw.probeMemory(v, 1, envi.memory.MM_READ):
            # this is a valid address
            # assume its not also a constant.
            continue

        if insn.mnem == "add" and insn.opers[0].isReg(
        ) and insn.opers[0].reg == envi.archs.i386.disasm.REG_ESP:
            # skip things like:
            #
            #    .text:00401140                 call    sub_407E2B
            #    .text:00401145                 add     esp, 0Ch
            return

        yield Number(v), insn.va
        yield Number(v, arch=get_arch(f.vw)), insn.va
Пример #5
0
def extract_insn_number_features(f, bb, insn):
    """parse instruction number features

    args:
        f (IDA func_t)
        bb (IDA BasicBlock)
        insn (IDA insn_t)

    example:
        push    3136B0h         ; dwControlCode
    """
    if idaapi.is_ret_insn(insn):
        # skip things like:
        #   .text:0042250E retn 8
        return

    if capa.features.extractors.ida.helpers.is_sp_modified(insn):
        # skip things like:
        #   .text:00401145 add esp, 0Ch
        return

    for op in capa.features.extractors.ida.helpers.get_insn_ops(
            insn, target_ops=(idaapi.o_imm, idaapi.o_mem)):
        # skip things like:
        #   .text:00401100 shr eax, offset loc_C
        if capa.features.extractors.ida.helpers.is_op_offset(insn, op):
            continue

        if op.type == idaapi.o_imm:
            const = capa.features.extractors.ida.helpers.mask_op_val(op)
        else:
            const = op.addr

        yield Number(const), insn.ea
        yield Number(const, arch=get_arch(f.ctx)), insn.ea
Пример #6
0
def extract_insn_number_features(f, bb, insn):
    """parse instruction number features

    args:
        f (IDA func_t)
        bb (IDA BasicBlock)
        insn (IDA insn_t)

    example:
        push    3136B0h         ; dwControlCode
    """
    unit: DataUnit
    unit = f.unit

    # get from cache (AttrDict will not add new attribute to json)
    syntax: Assembly
    syntax = f.unit.syntax
    if len(insn.oprs) < 1:
        return

    stk = insn.oprs[0].lower()
    if 'ADD' in insn.mne and any(
            reg in stk for reg in syntax.registers_cat['ptr'].keys()):
        return

    if insn.mne in unit.syntax.operations:
        if unit.syntax.operations[insn.mne].jmp:
            return

    for const in get_opr_constant(insn.oprs, insn.oprs_tp, True):
        yield Number(const), insn.ea
        yield Number(const, arch=get_arch(f)), insn.ea
Пример #7
0
def test_rule_yaml_count():
    rule = textwrap.dedent("""
        rule:
            meta:
                name: test rule
            features:
                - count(number(100)): 1
        """)
    r = capa.rules.Rule.from_yaml(rule)
    assert r.evaluate({Number(100): {}}) == False
    assert r.evaluate({Number(100): {1}}) == True
    assert r.evaluate({Number(100): {1, 2}}) == False
Пример #8
0
def test_number_arch():
    r = capa.rules.Rule.from_yaml(
        textwrap.dedent("""
            rule:
                meta:
                    name: test rule
                features:
                    - number/x32: 2
            """))
    assert r.evaluate({Number(2, arch=ARCH_X32): {1}}) == True

    assert r.evaluate({Number(2): {1}}) == False
    assert r.evaluate({Number(2, arch=ARCH_X64): {1}}) == False
Пример #9
0
def test_rule_yaml_not():
    rule = textwrap.dedent("""
        rule:
            meta:
                name: test rule
            features:
                - and:
                    - number: 1
                    - not:
                        - number: 2
        """)
    r = capa.rules.Rule.from_yaml(rule)
    assert r.evaluate({Number(1): {1}}) == True
    assert r.evaluate({Number(1): {1}, Number(2): {1}}) == False
Пример #10
0
def extract_insn_number_features(f, bb, insn):
    """parse number features from the given instruction."""
    # example:
    #
    #     push    3136B0h         ; dwControlCode
    operands = [o.strip() for o in insn.operands.split(",")]
    if insn.mnemonic == "add" and operands[0] in ["esp", "rsp"]:
        # skip things like:
        #
        #    .text:00401140                 call    sub_407E2B
        #    .text:00401145                 add     esp, 0Ch
        return
    for i, operand in enumerate(operands):
        try:
            # The result of bitwise operations is calculated as though carried out
            # in two’s complement with an infinite number of sign bits
            value = int(operand, 16) & ((1 << f.smda_report.bitness) - 1)
        except ValueError:
            continue
        else:
            yield Number(value), insn.offset
            yield OperandNumber(i, value), insn.offset

            if insn.mnemonic == "add" and 0 < value < MAX_STRUCTURE_SIZE:
                # for pattern like:
                #
                #     add eax, 0x10
                #
                # assume 0x10 is also an offset (imagine eax is a pointer).
                yield Offset(value), insn.offset
                yield OperandOffset(i, value), insn.offset
Пример #11
0
def test_count_number_symbol():
    rule = textwrap.dedent("""
        rule:
            meta:
                name: test rule
            features:
                - or:
                    - count(number(2 = symbol name)): 1
                    - count(number(0x100 = symbol name)): 2 or more
                    - count(number(0x11 = (FLAG_A | FLAG_B))): 2 or more
        """)
    r = capa.rules.Rule.from_yaml(rule)
    assert r.evaluate({Number(2): {}}) == False
    assert r.evaluate({Number(2): {1}}) == True
    assert r.evaluate({Number(2): {1, 2}}) == False
    assert r.evaluate({Number(0x100, "symbol name"): {1}}) == False
    assert r.evaluate({Number(0x100, "symbol name"): {1, 2, 3}}) == True
Пример #12
0
def test_rule_yaml_complex():
    rule = textwrap.dedent("""
        rule:
            meta:
                name: test rule
            features:
                - or:
                    - and:
                        - number: 1
                        - number: 2
                    - or:
                        - number: 3
                        - 2 or more:
                            - number: 4
                            - number: 5
                            - number: 6
        """)
    r = capa.rules.Rule.from_yaml(rule)
    assert r.evaluate({
        Number(5): {1},
        Number(6): {1},
        Number(7): {1},
        Number(8): {1}
    }) == True
    assert r.evaluate({
        Number(6): {1},
        Number(7): {1},
        Number(8): {1}
    }) == False
Пример #13
0
def extract_insn_number_features(f, bb, insn):
    """parse number features from the given instruction."""
    # example:
    #
    #     push    3136B0h         ; dwControlCode
    operands = [o.strip() for o in insn.operands.split(",")]
    if insn.mnemonic == "add" and operands[0] in ["esp", "rsp"]:
        # skip things like:
        #
        #    .text:00401140                 call    sub_407E2B
        #    .text:00401145                 add     esp, 0Ch
        return
    for operand in operands:
        try:
            yield Number(int(operand, 16)), insn.offset
            yield Number(int(operand, 16), arch=get_arch(f.smda_report)), insn.offset
        except:
            continue
Пример #14
0
def test_number_arch_symbol():
    r = capa.rules.Rule.from_yaml(
        textwrap.dedent("""
            rule:
                meta:
                    name: test rule
                features:
                    - number/x32: 2 = some constant
            """))
    assert r.evaluate(
        {Number(2, arch=ARCH_X32, description="some constant"): {1}}) == True
Пример #15
0
def test_rule_yaml_descriptions():
    rule = textwrap.dedent("""
        rule:
            meta:
                name: test rule
            features:
                - and:
                    - number: 1 = This is the number 1
                    - string: This program cannot be run in DOS mode.
                      description: MS-DOS stub message
                    - string: '/SELECT.*FROM.*WHERE/i'
                      description: SQL WHERE Clause
                    - count(number(2 = AF_INET/SOCK_DGRAM)): 2
        """)
    r = capa.rules.Rule.from_yaml(rule)
    assert (r.evaluate({
        Number(1): {1},
        Number(2): {2, 3},
        String("This program cannot be run in DOS mode."): {4},
        String("SELECT password FROM hidden_table WHERE user == admin"): {5},
    }) == True)
Пример #16
0
def test_number_symbol():
    rule = textwrap.dedent(
        """
        rule:
            meta:
                name: test rule
            features:
                - and:
                    - number: 1
                    - number: 0xFFFFFFFF
                    - number: 2 = symbol name
                    - number: 3  =  symbol name
                    - number: 4  =  symbol name = another name
                    - number: 0x100 = symbol name
                    - number: 0x11 = (FLAG_A | FLAG_B)
        """
    )
    r = capa.rules.Rule.from_yaml(rule)
    children = list(r.statement.get_children())
    assert (Number(1) in children) == True
    assert (Number(0xFFFFFFFF) in children) == True
    assert (Number(2, description="symbol name") in children) == True
    assert (Number(3, description="symbol name") in children) == True
    assert (Number(4, description="symbol name = another name") in children) == True
    assert (Number(0x100, description="symbol name") in children) == True
Пример #17
0
Файл: insn.py Проект: H1d3r/capa
def extract_insn_number_features(f, bb, insn):
    """parse number features from the given instruction."""
    # example:
    #
    #     push    3136B0h         ; dwControlCode
    operands = [o.strip() for o in insn.operands.split(",")]
    if insn.mnemonic == "add" and operands[0] in ["esp", "rsp"]:
        # skip things like:
        #
        #    .text:00401140                 call    sub_407E2B
        #    .text:00401145                 add     esp, 0Ch
        return
    for operand in operands:
        try:
            # The result of bitwise operations is calculated as though carried out
            # in two’s complement with an infinite number of sign bits
            value = int(operand, 16) & ((1 << f.smda_report.bitness) - 1)

            yield Number(value), insn.offset
            yield Number(value,
                         bitness=get_bitness(f.smda_report)), insn.offset
        except:
            continue
Пример #18
0
def extract_insn_offset_features(f, bb, insn):
    """parse instruction structure offset features

    args:
        f (IDA func_t)
        bb (IDA BasicBlock)
        insn (IDA insn_t)

    example:
        .text:0040112F cmp [esi+4], ebx
    """
    for i, op in enumerate(insn.ops):
        if op.type == idaapi.o_void:
            break
        if op.type not in (idaapi.o_phrase, idaapi.o_displ):
            continue
        if capa.features.extractors.ida.helpers.is_op_stack_var(insn.ea, op.n):
            continue

        p_info = capa.features.extractors.ida.helpers.get_op_phrase_info(op)
        op_off = p_info.get("offset", 0)
        if idaapi.is_mapped(op_off):
            # Ignore:
            #   mov esi, dword_1005B148[esi]
            continue

        # I believe that IDA encodes all offsets as two's complement in a u32.
        # a 64-bit displacement isn't a thing, see:
        # https://stackoverflow.com/questions/31853189/x86-64-assembly-why-displacement-not-64-bits
        op_off = capa.features.extractors.helpers.twos_complement(op_off, 32)

        yield Offset(op_off), insn.ea
        yield OperandOffset(i, op_off), insn.ea

        if (insn.itype == idaapi.NN_lea and i == 1
                # o_displ is used for both:
                #   [eax+1]
                #   [eax+ebx+2]
                and op.type == idaapi.o_displ
                # but the SIB is only present for [eax+ebx+2]
                # which we don't want
                and not capa.features.extractors.ida.helpers.has_sib(op)):
            # for pattern like:
            #
            #     lea eax, [ebx + 1]
            #
            # assume 1 is also an offset (imagine ebx is a zero register).
            yield Number(op_off), insn.ea
            yield OperandNumber(i, op_off), insn.ea
Пример #19
0
def extract_op_offset_features(f, bb, insn, i, oper):
    """parse structure offset features from the given operand."""
    # example:
    #
    #     .text:0040112F    cmp     [esi+4], ebx

    # this is for both x32 and x64
    # like [esi + 4]
    #       reg   ^
    #             disp
    if isinstance(oper, envi.archs.i386.disasm.i386RegMemOper):
        if oper.reg == envi.archs.i386.regs.REG_ESP:
            return

        if oper.reg == envi.archs.i386.regs.REG_EBP:
            return

        # TODO: do x64 support for real.
        if oper.reg == envi.archs.amd64.regs.REG_RBP:
            return

        # viv already decodes offsets as signed
        v = oper.disp

        yield Offset(v), insn.va
        yield OperandOffset(i, v), insn.va

        if insn.mnem == "lea" and i == 1 and not f.vw.probeMemory(
                v, 1, envi.memory.MM_READ):
            # for pattern like:
            #
            #     lea eax, [ebx + 1]
            #
            # assume 1 is also an offset (imagine ebx is a zero register).
            yield Number(v), insn.va
            yield OperandNumber(i, v), insn.va

    # like: [esi + ecx + 16384]
    #        reg   ^     ^
    #              index ^
    #                    disp
    elif isinstance(oper, envi.archs.i386.disasm.i386SibOper):
        # viv already decodes offsets as signed
        v = oper.disp

        yield Offset(v), insn.va
        yield OperandOffset(i, v), insn.va
Пример #20
0
def extract_insn_number_features(f, bb, insn):
    """parse instruction number features

    args:
        f (IDA func_t)
        bb (IDA BasicBlock)
        insn (IDA insn_t)

    example:
        push    3136B0h         ; dwControlCode
    """
    if idaapi.is_ret_insn(insn):
        # skip things like:
        #   .text:0042250E retn 8
        return

    if capa.features.extractors.ida.helpers.is_sp_modified(insn):
        # skip things like:
        #   .text:00401145 add esp, 0Ch
        return

    for i, op in enumerate(insn.ops):
        if op.type == idaapi.o_void:
            break
        if op.type not in (idaapi.o_imm, idaapi.o_mem):
            continue
        # skip things like:
        #   .text:00401100 shr eax, offset loc_C
        if capa.features.extractors.ida.helpers.is_op_offset(insn, op):
            continue

        if op.type == idaapi.o_imm:
            const = capa.features.extractors.ida.helpers.mask_op_val(op)
        else:
            const = op.addr

        yield Number(const), insn.ea
        yield OperandNumber(i, const), insn.ea

        if insn.itype == idaapi.NN_add and 0 < const < MAX_STRUCTURE_SIZE and op.type == idaapi.o_imm:
            # for pattern like:
            #
            #     add eax, 0x10
            #
            # assume 0x10 is also an offset (imagine eax is a pointer).
            yield Offset(const), insn.ea
            yield OperandOffset(i, const), insn.ea
Пример #21
0
def extract_op_number_features(f, bb, insn, i, oper):
    """parse number features from the given operand."""
    # example:
    #
    #     push    3136B0h         ; dwControlCode

    # this is for both x32 and x64
    if not isinstance(oper, (envi.archs.i386.disasm.i386ImmOper,
                             envi.archs.i386.disasm.i386ImmMemOper)):
        return

    if isinstance(oper, envi.archs.i386.disasm.i386ImmOper):
        v = oper.getOperValue(oper)
    else:
        v = oper.getOperAddr(oper)

    if f.vw.probeMemory(v, 1, envi.memory.MM_READ):
        # this is a valid address
        # assume its not also a constant.
        return

    if insn.mnem == "add" and insn.opers[0].isReg(
    ) and insn.opers[0].reg == envi.archs.i386.regs.REG_ESP:
        # skip things like:
        #
        #    .text:00401140                 call    sub_407E2B
        #    .text:00401145                 add     esp, 0Ch
        return

    yield Number(v), insn.va
    yield OperandNumber(i, v), insn.va

    if insn.mnem == "add" and 0 < v < MAX_STRUCTURE_SIZE and isinstance(
            oper, envi.archs.i386.disasm.i386ImmOper):
        # for pattern like:
        #
        #     add eax, 0x10
        #
        # assume 0x10 is also an offset (imagine eax is a pointer).
        yield Offset(v), insn.va
        yield OperandOffset(i, v), insn.va
Пример #22
0
def extract_insn_offset_features(f, bb, insn):
    """parse structure offset features from the given instruction."""
    # examples:
    #
    #     mov eax, [esi + 4]
    #     mov eax, [esi + ecx + 16384]
    operands = [o.strip() for o in insn.operands.split(",")]
    for i, operand in enumerate(operands):
        if "esp" in operand or "ebp" in operand or "rbp" in operand:
            continue

        number = 0
        number_hex = re.search(PATTERN_HEXNUM, operand)
        number_int = re.search(PATTERN_SINGLENUM, operand)
        if number_hex:
            number = int(number_hex.group("num"), 16)
            number = -1 * number if number_hex.group().startswith(
                "-") else number
        elif number_int:
            number = int(number_int.group("num"))
            number = -1 * number if number_int.group().startswith(
                "-") else number

        if "ptr" not in operand:
            if (insn.mnemonic == "lea" and i == 1
                    and (operand.count("+") + operand.count("-")) == 1
                    and operand.count("*") == 0):
                # for pattern like:
                #
                #     lea eax, [ebx + 1]
                #
                # assume 1 is also an offset (imagine ebx is a zero register).
                yield Number(number), insn.offset
                yield OperandNumber(i, number), insn.offset

            continue

        yield Offset(number), insn.offset
        yield OperandOffset(i, number), insn.offset
Пример #23
0
def test_rule_ctor():
    r = capa.rules.Rule("test rule", capa.rules.FUNCTION_SCOPE, Number(1), {})
    assert r.evaluate({Number(0): {1}}) == False
    assert r.evaluate({Number(1): {1}}) == True
Пример #24
0
def extract_insn_number_features(
        f: CilMethodBody, bb: CilMethodBody,
        insn: Instruction) -> Iterator[Tuple[Number, int]]:
    """parse instruction number features"""
    if insn.is_ldc():
        yield Number(insn.get_ldc()), insn.offset