Exemplo n.º 1
0
    def dbg_trace(self, tid, ea):
        """
        处理trace事件回调
        :param tid:
        :param ea:
        :return:
        """
        vmr = get_vmr()
        try:
            if vmr.extract_param and GetDisasm(ea).__contains__('call'):
                run_var = 0
                key = GetDisasm(ea).split('call')[1].strip()
                while True:
                    # traverse trace backwards and get sequential push and mov params
                    line = self.trace[-(run_var + 1)]
                    if line.is_push and line.disasm_len == 2:
                        try:
                            self.func_args[key].add(line.ctx[get_reg(
                                line.disasm[1], self.arch)])
                        except:
                            self.func_args[key].add(line.disasm[1])
                    elif line.is_mov:
                        try:
                            self.func_args[key].add(line.ctx[get_reg(
                                line.disasm[2], self.arch)])
                        except:
                            self.func_args[key].add(line.disasm[2])
                    else:
                        break
                    run_var += 1
            # TODO mmx xmmx ymmx
            # compute next ctx
            if self.arch == 32:
                self.ctx = defaultdict(
                    lambda: '0', {
                        'eax': self.convert(cpu.eax),
                        'ebx': self.convert(cpu.ebx),
                        'edx': self.convert(cpu.edx),
                        'ecx': self.convert(cpu.ecx),
                        'ebp': self.convert(cpu.ebp),
                        'esp': self.convert(cpu.esp),
                        'eip': self.convert(cpu.eip),
                        'edi': self.convert(cpu.edi),
                        'esi': self.convert(cpu.esi),
                        'cf': self.convert(cpu.cf),
                        'zf': self.convert(cpu.zf),
                        'sf': self.convert(cpu.sf),
                        'of': self.convert(cpu.of),
                        'pf': self.convert(cpu.pf),
                        'af': self.convert(cpu.af),
                        'tf': self.convert(cpu.tf),
                        'df': self.convert(cpu.df)
                    })
            elif self.arch == 64:
                self.ctx = defaultdict(
                    lambda: '0', {
                        'rax': self.convert(cpu.eax),
                        'rbx': self.convert(cpu.ebx),
                        'rdx': self.convert(cpu.edx),
                        'rcx': self.convert(cpu.ecx),
                        'rbp': self.convert(cpu.ebp),
                        'rsp': self.convert(cpu.esp),
                        'rip': self.convert(cpu.eip),
                        'edi': self.convert(cpu.edi),
                        'rsi': self.convert(cpu.rsi),
                        'r8': self.convert(cpu.r8),
                        'r9': self.convert(cpu.r9),
                        'r10': self.convert(cpu.r10),
                        'r11': self.convert(cpu.r11),
                        'r12': self.convert(cpu.r12),
                        'r13': self.convert(cpu.r13),
                        'r14': self.convert(cpu.r14),
                        'r15': self.convert(cpu.r15),
                        'cf': self.convert(cpu.cf),
                        'zf': self.convert(cpu.zf),
                        'sf': self.convert(cpu.sf),
                        'of': self.convert(cpu.of),
                        'pf': self.convert(cpu.pf),
                        'af': self.convert(cpu.af),
                        'tf': self.convert(cpu.tf),
                        'df': self.convert(cpu.df)
                    })

            self.trace.append(
                Traceline(thread_id=tid,
                          addr=ea,
                          disasm=self.disconv(GetDisasm(ea)),
                          ctx=deepcopy(self.ctx)))
        except Exception, e:
            print e.message
Exemplo n.º 2
0
def load():
    """
    Load a trace from file. Supported are IDAs txt trace files and VMAttacks json files. Further OllyDBG and ImmunityDBG traces are supported but have slightly limited analysis capabilities.
    :param path: system path to trace file
    :return: trace object
    """
    path = ''
    try:
        fd = QtGui.QFileDialog()
        fd.setFileMode(QtGui.QFileDialog.AnyFile)
        fd.setFilters(["Text files (*.txt)", "JSON files (*.json)"])
        fd.setWindowTitle('Load Trace ...')
        if fd.exec_():
            path = fd.selectedFiles()[0]
        else:
            path = None
    except:
        msg('A Problem occured with the file selector dialog, first *.txt file in the current working directory was choosen!')
        for f in os.listdir(os.getcwd()):
            if f.endswith('txt'):
                path = f
        if path == '':
            path = asktext(40, '', 'Please provide the full path to the trace file: ')

    if path is not None:
        get_log().log('[TRC] Loaded the trace at %s\n' % path)
        if path.endswith('.txt'):
            with open(path, 'r') as f:
                lines = f.readlines()
        elif path.endswith('.json'):
            with open(path) as f:
                lines = json.load(f)
        else:
            return None
        trace = Trace()

        functions = {SegName(addr): {GetFunctionName(ea): ea for ea in Functions(SegStart(addr), SegEnd(addr))} for addr in Segments()}

        try:
            context = defaultdict(lambda: False)

            # framework json trace
            if isinstance(lines, dict) or path.endswith('.json'):
                get_log().log('[TRC] The trace seems to be a VMAttack trace\n')
                for index in range(len(lines.keys())):
                    line = lines[str(index)]
                    t = Traceline(thread_id=line[0], addr=line[1], disasm=line[2], ctx=line[3], comment=line[4])
                    t.grade = line[5]
                    trace.append(t)

            # ida trace via Win32Dbg
            elif lines[0].startswith('Thread '):
                for i in lines[3:]:
                    if i.startswith('Thread'):
                        break
                    values = i.split('\t')
                    # thread id
                    thread_id = int(values[0], 16)

                    # addr
                    addr = BADADDR
                    func_name = values[1].strip(' ').split(':')
                    if len(func_name) == 2:
                        try:  # .segment:addr
                            addr = int(func_name[1], 16)
                        except:
                            try:  # .segment:func_name+offset
                                offset = int(func_name[1].split('+')[1], 16)
                                name = func_name[1].split('+')[0]
                                addr = functions[func_name[0]][name] + offset
                            except:
                                try:  # .segment:func_name-offset
                                    offset = int(i.split('-')[1].split(' ')[0], 16)
                                    name = func_name[1].split('-')[0]
                                    addr = functions[func_name[0]][name] - offset
                                except:
                                    if not func_name[1].startswith('loc_'):  # .segment:func_name
                                        addr = functions[func_name[0]][func_name[1]]
                                    else:  # .segment:jmp_location
                                        addr = int(func_name[1][4:], 16)
                    elif len(func_name) == 3:
                        addr = int(func_name[2][4:], 16)

                    # disasm
                    disasm = values[2].strip(' ').lower()
                    disasm = disasm.split('  ')
                    disasm = [x.lstrip() for x in disasm]
                    disasm = filter(None, disasm)
                    if len(disasm) > 1 and disasm[1].__contains__(', '):
                        temp = disasm.pop(1)
                        for elem in temp.split(', '):
                            disasm.append(elem.lstrip().lstrip('0').rstrip('h'))

                    # remove [ebp+0]
                    for dis in disasm:
                        if dis.__contains__('[ebp+0]'):
                            dis.replace('[ebp+0]', '[ebp]')

                    # context
                    ida_ctx = values[3].strip(' ').split(' ')
                    for value in ida_ctx:
                        try:
                            a, b = value.split('=')
                            if len(b) > 1:
                                b = ''.join(c.rstrip('\r\n') for c in b.lstrip('0'))
                            if b == '':
                                b = '0'
                            context[a.lower()] = b
                        except:
                            pass

                    trace.append(Traceline(thread_id=thread_id, addr=addr, disasm=disasm, ctx=deepcopy(context)))
            # immunity trace
            elif lines[0].startswith('Address	'):
                for i in lines[1:]:
                    if i.__contains__('Run trace closed') or i.__contains__('Process terminated'):
                        break
                    values = i.split('\t')
                    try:
                        # thread_id
                        thread_id = sum(ord(c) for c in values[1]) # immunity uses names, e.g. main
                        # addr
                        try:
                            addr = int(values[0], 16)
                        except:
                            addr = BADADDR
                        # disasm
                        disasm = values[2].lower().rstrip('\r\n')
                        disasm = disasm.split(' ', 1)
                        if len(disasm) > 1 and disasm[1].__contains__(','):
                            temp = disasm.pop(1)
                            for elem in temp.split(','):
                                disasm.append(elem.lstrip('0'))
                        disasm = [x.split('dword ptr ')[1] if x.__contains__('dword ptr ') else x for x in disasm]
                        if len(disasm) == 2 and len(re.findall(r'.*\[.*[\+\-\*].*[\+\-\*].*\].*', disasm[1])) > 0:
                            disasm[1] = ida_offset(disasm[1])
                        # context
                        if len(values) > 3:
                            olly_ctx = values[3].lstrip(' ').rstrip('\r\n').split(',')
                            for value in olly_ctx:
                                try:
                                    a, b = value.split('=')
                                    if len(b) > 1:
                                        b = ''.join(c for c in b.lstrip('0') if c not in '\n\r\t')
                                    if b == '':
                                        b = '0'
                                    context[a.lower()] = b
                                except:
                                    pass
                        trace.append(Traceline(thread_id=thread_id, addr=addr, disasm=disasm, ctx=deepcopy(context)))
                    except:
                        if i.__contains__('terminated') or i.__contains__('entry point'):
                            pass

            # olly trace
            elif lines[1].startswith('main	'):
                for i in lines[1:]:
                    if i.__contains__('Logging stopped'):
                        break
                    values = i.split('\t')
                    # thread_id
                    thread_id = sum(ord(c) for c in values[0])  # olly uses names, e.g. main
                    # addr
                    try:
                        addr = int(values[1], 16)
                    except:
                        addr = BADADDR
                    # disasm
                    disasm = values[2].lower().rstrip('\r\n')
                    disasm = disasm.split(' ', 1)
                    if len(disasm) > 1 and disasm[1].__contains__(','):
                        temp = disasm.pop(1)
                        for elem in temp.split(','):
                            disasm.append(elem.lstrip('0'))

                    disasm = [x.split('dword ptr ')[1] if x.__contains__('dword ptr ') else x for x in disasm]
                    if len(disasm) == 2 and len(re.findall(r'.*\[.*[\+\-\*].*[\+\-\*].*\].*', disasm[1])) > 0:
                        disasm[1] = ida_offset(disasm[1])
                    # context
                    if len(values) > 3:
                        olly_ctx = values[3].lstrip(' ').rstrip('\r\n').split(',')
                        for value in olly_ctx:
                            try:
                                a, b = value.split('=')
                                if len(b) > 1:
                                    b = ''.join(c for c in b.lstrip('0') if c not in '\n\r\t')
                                if b == '':
                                    b = '0'
                                context[a.lower()] = b
                            except:
                                pass
                    trace.append(Traceline(thread_id=thread_id, addr=addr, disasm=disasm, ctx=deepcopy(context)))


            if 'rax' in trace[-1].ctx.keys():
                trace.ctx_reg_size = 64
            elif 'eax' in trace[-1].ctx.keys() and 'rax' not in trace[-1].ctx.keys():
                trace.ctx_reg_size = 32
            msg("[*] Trace Loaded!\n")
            return trace
        except Exception, e:
            raise Exception('[*] Exception occured: \n%s\n' % (e.message))
Exemplo n.º 3
0
def load():
    """
    从文件加载trace
    Load a trace from file. Supported are IDAs txt trace files and VMAttacks json files. Further OllyDBG and ImmunityDBG traces are supported but have slightly limited analysis capabilities.
    :param path: system path to trace file
    :return: trace object
    """
    path = ''
    try:
        fd = QtGui.QFileDialog()
        fd.setFileMode(QtGui.QFileDialog.AnyFile)
        fd.setFilters(["Text files (*.txt)", "JSON files (*.json)"])
        fd.setWindowTitle('Load Trace ...')
        if fd.exec_():
            path = fd.selectedFiles()[0]
        else:
            path = None
    except:
        msg('A Problem occured with the file selector dialog, first *.txt file in the current working directory was choosen!'
            )
        for f in os.listdir(os.getcwd()):
            if f.endswith('txt'):
                path = f
        if path == '':
            path = asktext(40, '',
                           'Please provide the full path to the trace file: ')

    if path is not None:
        get_log().log('[TRC] Loaded the trace at %s\n' % path)
        if path.endswith('.txt'):
            with open(path, 'r') as f:
                lines = f.readlines()
        elif path.endswith('.json'):
            with open(path) as f:
                lines = json.load(f)
        else:
            return None
        trace = Trace()

        functions = {
            SegName(addr): {
                GetFunctionName(ea): ea
                for ea in Functions(SegStart(addr), SegEnd(addr))
            }
            for addr in Segments()
        }

        try:
            context = defaultdict(lambda: False)

            # framework json trace
            if isinstance(lines, dict) or path.endswith('.json'):
                get_log().log('[TRC] The trace seems to be a VMAttack trace\n')
                for index in range(len(lines.keys())):
                    line = lines[str(index)]
                    t = Traceline(thread_id=line[0],
                                  addr=line[1],
                                  disasm=line[2],
                                  ctx=line[3],
                                  comment=line[4])
                    t.grade = line[5]
                    trace.append(t)

            # ida trace via Win32Dbg
            elif lines[0].startswith('Thread '):
                for i in lines[3:]:
                    if i.startswith('Thread'):
                        break
                    values = i.split('\t')
                    # thread id
                    thread_id = int(values[0], 16)

                    # addr
                    addr = BADADDR
                    func_name = values[1].strip(' ').split(':')
                    if len(func_name) == 2:
                        try:  # .segment:addr
                            addr = int(func_name[1], 16)
                        except:
                            try:  # .segment:func_name+offset
                                offset = int(func_name[1].split('+')[1], 16)
                                name = func_name[1].split('+')[0]
                                addr = functions[func_name[0]][name] + offset
                            except:
                                try:  # .segment:func_name-offset
                                    offset = int(
                                        i.split('-')[1].split(' ')[0], 16)
                                    name = func_name[1].split('-')[0]
                                    addr = functions[
                                        func_name[0]][name] - offset
                                except:
                                    if not func_name[1].startswith(
                                            'loc_'):  # .segment:func_name
                                        addr = functions[func_name[0]][
                                            func_name[1]]
                                    else:  # .segment:jmp_location
                                        addr = int(func_name[1][4:], 16)
                    elif len(func_name) == 3:
                        addr = int(func_name[2][4:], 16)

                    # disasm
                    disasm = values[2].strip(' ').lower()
                    disasm = disasm.split('  ')
                    disasm = [x.lstrip() for x in disasm]
                    disasm = filter(None, disasm)
                    if len(disasm) > 1 and disasm[1].__contains__(', '):
                        temp = disasm.pop(1)
                        for elem in temp.split(', '):
                            disasm.append(
                                elem.lstrip().lstrip('0').rstrip('h'))

                    # remove [ebp+0]
                    for dis in disasm:
                        if dis.__contains__('[ebp+0]'):
                            dis.replace('[ebp+0]', '[ebp]')

                    # context
                    ida_ctx = values[3].strip(' ').split(' ')
                    for value in ida_ctx:
                        try:
                            a, b = value.split('=')
                            if len(b) > 1:
                                b = ''.join(
                                    c.rstrip('\r\n') for c in b.lstrip('0'))
                            if b == '':
                                b = '0'
                            context[a.lower()] = b
                        except:
                            pass

                    trace.append(
                        Traceline(thread_id=thread_id,
                                  addr=addr,
                                  disasm=disasm,
                                  ctx=deepcopy(context)))
            # immunity trace
            elif lines[0].startswith('Address	'):
                for i in lines[1:]:
                    if i.__contains__('Run trace closed') or i.__contains__(
                            'Process terminated'):
                        break
                    values = i.split('\t')
                    try:
                        # thread_id
                        thread_id = sum(
                            ord(c) for c in
                            values[1])  # immunity uses names, e.g. main
                        # addr
                        try:
                            addr = int(values[0], 16)
                        except:
                            addr = BADADDR
                        # disasm
                        disasm = values[2].lower().rstrip('\r\n')
                        disasm = disasm.split(' ', 1)
                        if len(disasm) > 1 and disasm[1].__contains__(','):
                            temp = disasm.pop(1)
                            for elem in temp.split(','):
                                disasm.append(elem.lstrip('0'))
                        disasm = [
                            x.split('dword ptr ')[1]
                            if x.__contains__('dword ptr ') else x
                            for x in disasm
                        ]
                        if len(disasm) == 2 and len(
                                re.findall(r'.*\[.*[\+\-\*].*[\+\-\*].*\].*',
                                           disasm[1])) > 0:
                            disasm[1] = ida_offset(disasm[1])
                        # context
                        if len(values) > 3:
                            olly_ctx = values[3].lstrip(' ').rstrip(
                                '\r\n').split(',')
                            for value in olly_ctx:
                                try:
                                    a, b = value.split('=')
                                    if len(b) > 1:
                                        b = ''.join(c for c in b.lstrip('0')
                                                    if c not in '\n\r\t')
                                    if b == '':
                                        b = '0'
                                    context[a.lower()] = b
                                except:
                                    pass
                        trace.append(
                            Traceline(thread_id=thread_id,
                                      addr=addr,
                                      disasm=disasm,
                                      ctx=deepcopy(context)))
                    except:
                        if i.__contains__('terminated') or i.__contains__(
                                'entry point'):
                            pass

            # olly trace
            elif lines[1].startswith('main	'):
                for i in lines[1:]:
                    if i.__contains__('Logging stopped'):
                        break
                    values = i.split('\t')
                    # thread_id
                    thread_id = sum(
                        ord(c)
                        for c in values[0])  # olly uses names, e.g. main
                    # addr
                    try:
                        addr = int(values[1], 16)
                    except:
                        addr = BADADDR
                    # disasm
                    disasm = values[2].lower().rstrip('\r\n')
                    disasm = disasm.split(' ', 1)
                    if len(disasm) > 1 and disasm[1].__contains__(','):
                        temp = disasm.pop(1)
                        for elem in temp.split(','):
                            disasm.append(elem.lstrip('0'))

                    disasm = [
                        x.split('dword ptr ')[1]
                        if x.__contains__('dword ptr ') else x for x in disasm
                    ]
                    if len(disasm) == 2 and len(
                            re.findall(r'.*\[.*[\+\-\*].*[\+\-\*].*\].*',
                                       disasm[1])) > 0:
                        disasm[1] = ida_offset(disasm[1])
                    # context
                    if len(values) > 3:
                        olly_ctx = values[3].lstrip(' ').rstrip('\r\n').split(
                            ',')
                        for value in olly_ctx:
                            try:
                                a, b = value.split('=')
                                if len(b) > 1:
                                    b = ''.join(c for c in b.lstrip('0')
                                                if c not in '\n\r\t')
                                if b == '':
                                    b = '0'
                                context[a.lower()] = b
                            except:
                                pass
                    trace.append(
                        Traceline(thread_id=thread_id,
                                  addr=addr,
                                  disasm=disasm,
                                  ctx=deepcopy(context)))

            if 'rax' in trace[-1].ctx.keys():
                trace.ctx_reg_size = 64
            elif 'eax' in trace[-1].ctx.keys(
            ) and 'rax' not in trace[-1].ctx.keys():
                trace.ctx_reg_size = 32
            msg("[*] Trace Loaded!\n")
            return trace
        except Exception, e:
            raise Exception('[*] Exception occured: \n%s\n' % (e.message))
Exemplo n.º 4
0
def create_bb_diff(bb, ctx_reg_size, prev_line_ctx):
    """
    Addr and thread id irrelevant; ctx shown as: before -> after; disasm (and comment) is chosen by heuristic.
    :param ctx_reg_size:
    :param prev_line_ctx:
    :param bb:
    return
    """
    first = bb[0]
    last = bb[-1]
    keys_f = prev_line_ctx.keys()
    keys_l = last.ctx.keys()
    context = {}
    disasm = []
    comment = []
    if keys_f == keys_l:
        for key in keys_f:
            if first.ctx[key] != last.ctx[key]:
                context[key] = first.ctx[key] + ' -> ' + last.ctx[key]
            else:
                context[key] = last.ctx[key]
    elif len(keys_l) > len(keys_f):
        for key in keys_f:
            if first.ctx[key] != last.ctx[key]:
                context[key] = first.ctx[key] + ' -> ' + last.ctx[key]
            else:
                context[key] = last.ctx[key]
        for key in list(set(keys_l) - set(keys_f)):
            context[key] = last.ctx[key]
    else:  # means keys_l < keys_f and if that happens sth went wrong. Should not be possible by normal execution.
        raise Exception(
            '[*] Keys at the end of basic block %s-%s were LESS than at the beginning!'
            % (first.addr, last.addr))
    last_ctx = prev_line_ctx
    for line in bb:
        if line.comment is not None:
            comment.append(line.comment)
        if line.disasm[0].startswith('mov'):
            try:
                if bb[bb.index(line) +
                      1].disasm[0].startswith('mov') and get_reg_class(
                          bb[bb.index(line) + 1].disasm[1]) == get_reg_class(
                              line.disasm[1]):
                    continue
            except:
                pass
            if line.disasm[1].startswith('[') and line.disasm[1].endswith(']'):
                comment.append(line.disasm[1] + '=' + line.disasm[2])
            elif get_reg_class(line.disasm[1]) is not None:
                continue
        elif line.disasm[0].startswith('j'):
            continue
        elif line.comment is not None and len(
                line.disasm) == 3 and line.disasm[1].startswith('['):
            if get_reg_class(line.disasm[2]) is not None:
                comment[-1] = comment[-1] + ' ' + line.disasm[
                    0] + ' ' + last_ctx[get_reg(line.disasm[2], ctx_reg_size)]
            else:
                comment[-1] = comment[-1] + ' ' + line.disasm[
                    0] + ' ' + line.disasm[2]
        elif line.comment is not None and len(
                line.disasm) == 3 and line.disasm[2].startswith('['):
            if get_reg_class(line.disasm[1]) is not None:
                comment[-1] = comment[-1] + ' ' + line.disasm[
                    0] + ' ' + last_ctx[get_reg(line.disasm[1], ctx_reg_size)]
            else:
                comment[-1] = comment[-1] + ' ' + line.disasm[
                    0] + ' ' + line.disasm[1]
        disasm.append(line.disasm)
        last_ctx = line.ctx

    result = Traceline(addr=last.addr,
                       thread_id=last.thread_id,
                       ctx=context,
                       disasm=disasm,
                       comment=comment)
    return result