def x86_imul(ctx, i): if len(i.operands) == 1: # single operand form ctx.emit( unkn_()) elif len(i.operands) == 2: # double operand form a = operand.get(ctx, i, 0) b = operand.get(ctx, i, 1) result = ctx.tmp(a.size * 2) ctx.emit( mul_ (a, b, result)) operand.set(ctx, i, 0, result) _imul_set_flags(ctx, result) else: # triple operand form a = operand.get(ctx, i, 1) b = operand.get(ctx, i, 2) if b.size < a.size: prev_b = b b = ctx.tmp(a.size) ctx.emit( sex_ (prev_b, b)) result = ctx.tmp(a.size * 2) ctx.emit( mul_ (a, b, result)) operand.set(ctx, i, 0, result) _imul_set_flags(ctx, result)
def x86_movq(ctx, i): value = None dst = operand.get(ctx, i, 0) value = operand.get(ctx, i, 1) operand.set(ctx, i, 0, value, clear=True, sign_extend=False)
def x86_shrd(ctx, i): a = operand.get(ctx, i, 0) b = operand.get(ctx, i, 1) if len(i.operands) == 2: c = ctx.counter else: c = operand.get(ctx, i, 2) size = a.size max_shift = size - 1 tmp0 = ctx.tmp(size) tmp1 = ctx.tmp(size * 2) result = ctx.tmp(size) # the shift amount is truncated at word_size - 1 ctx.emit( and_ (c, imm(max_shift, size), tmp0)) # make a register double the size of the operands containing b a ctx.emit( str_ (b, tmp1)) ctx.emit( lshl_ (tmp1, imm(size // 8, 8), tmp1)) ctx.emit( or_ (tmp1, a, tmp1)) # now shift right by the desired amount ctx.emit( lshr_ (tmp1, tmp0, tmp1)) # and truncate into result ctx.emit( str_ (tmp1, result)) # TODO: flags properly _shift_set_flags(ctx, result) operand.set(ctx, i, 0, result)
def x86_arpl(ctx, i): dest_seg = operand.get(ctx, i, 0) src_seg = operand.get(ctx, i, 1) dest_rpl = ctx.tmp(16) src_rpl = ctx.tmp(16) tmp0 = ctx.tmp(32) tmp1 = ctx.tmp(8) result_seg = ctx.tmp(16) tmp2 = ctx.tmp(16) ctx.emit(lshr_(dest_seg, imm(14, 8), dest_rpl)) ctx.emit(lshr_(src_seg, imm(14, 8), src_rpl)) ctx.emit(sub_(dest_seg, src_seg, tmp0)) ctx.emit(and_(tmp0, imm(sign_bit(32), 32), tmp0)) ctx.emit(bisz_(tmp0, tmp1)) ctx.emit(jcc_('check_passed')) ctx.emit(str_(imm(1, 8), r('zf', 8))) ctx.emit(and_(dest_seg, imm(0b0011111111111111, 16), result_seg)) ctx.emit(and_(src_seg, imm(0b1100000000000000, 16), tmp2)) ctx.emit(or_(dest_seg, tmp2, dest_seg)) operand.set(ctx, i, 0, result_seg) ctx.emit(jcc_(imm(1, 8), 'done')) ctx.emit('check_passed') ctx.emit(str_(imm(0, 8), r('zf', 8))) ctx.emit('done') ctx.emit(nop_())
def x86_pminu(ctx, i, size): a_id, b_id, dst_id = vex_opnds(i) a = operand.get(ctx, i, a_id) b = operand.get(ctx, i, b_id) a_parts = unpack(ctx, a, size) b_parts = unpack(ctx, b, size) tmp0 = ctx.tmp(size * 2) tmp1 = ctx.tmp(size * 2) dst_parts = [] for a_part, b_part in zip(a_parts, b_parts): dst_part = ctx.tmp(size) ctx.emit(sub_(a_part, b_part, tmp0)) ctx.emit(and_(tmp0, imm(sign_bit(size * 2), size * 2), tmp0)) ctx.emit(bisz_(tmp0, tmp1)) ctx.emit(mul_(b_part, tmp1, tmp0)) ctx.emit(xor_(tmp1, imm(1, size * 2), tmp1)) ctx.emit(mul_(a_part, tmp1, tmp1)) ctx.emit(add_(tmp0, tmp1, tmp0)) ctx.emit(str_(tmp0, dst_part)) dst_parts.append(dst_part) value = pack(ctx, dst_parts) operand.set(ctx, i, dst_id, value)
def x86_pshufd(ctx, i): src = operand.get(ctx, i, 1) order = operand.get(ctx, i, 2) value = imm(0, 128) for j in range(0, 4): prev_order = order order = ctx.tmp(8) prev_value = value value = ctx.tmp(128) tmp0 = ctx.tmp(128) tmp1 = ctx.tmp(8) tmp2 = ctx.tmp(32) tmp3 = ctx.tmp(128) tmp4 = ctx.tmp(32) tmp5 = ctx.tmp(128) tmp6 = ctx.tmp(128) ctx.emit(lshr_(prev_order, imm(2, 8), order)) ctx.emit(and_(prev_order, imm(0b00000011, 8), tmp1)) ctx.emit(mul_(tmp1, imm(32, 32), tmp2)) ctx.emit(lshr_(src, tmp2, tmp3)) ctx.emit(str_(tmp3, tmp4)) ctx.emit(str_(tmp4, tmp5)) ctx.emit(lshl_(tmp5, imm(j * 32, 8), tmp6)) ctx.emit(add_(tmp6, prev_value, value)) operand.set(ctx, i, 0, value)
def x86_shrd(ctx, i): a = operand.get(ctx, i, 0) b = operand.get(ctx, i, 1) if len(i.operands) == 2: c = ctx.counter else: c = operand.get(ctx, i, 2) size = a.size max_shift = size - 1 tmp0 = ctx.tmp(size) tmp1 = ctx.tmp(size * 2) result = ctx.tmp(size) # the shift amount is truncated at word_size - 1 ctx.emit(and_(c, imm(max_shift, size), tmp0)) # make a register double the size of the operands containing b a ctx.emit(str_(b, tmp1)) ctx.emit(lshl_(tmp1, imm(size // 8, 8), tmp1)) ctx.emit(or_(tmp1, a, tmp1)) # now shift right by the desired amount ctx.emit(lshr_(tmp1, tmp0, tmp1)) # and truncate into result ctx.emit(str_(tmp1, result)) # TODO: flags properly _shift_set_flags(ctx, result) operand.set(ctx, i, 0, result)
def x86_pminu(ctx, i, size): a_id, b_id, dst_id = vex_opnds(i) a = operand.get(ctx, i, a_id) b = operand.get(ctx, i, b_id) a_parts = unpack(ctx, a, size) b_parts = unpack(ctx, b, size) tmp0 = ctx.tmp(size * 2) tmp1 = ctx.tmp(size * 2) dst_parts = [] for a_part, b_part in zip(a_parts, b_parts): dst_part = ctx.tmp(size) ctx.emit( sub_ (a_part, b_part, tmp0)) ctx.emit( and_ (tmp0, imm(sign_bit(size * 2), size * 2), tmp0)) ctx.emit( bisz_ (tmp0, tmp1)) ctx.emit( mul_ (b_part, tmp1, tmp0)) ctx.emit( xor_ (tmp1, imm(1, size * 2), tmp1)) ctx.emit( mul_ (a_part, tmp1, tmp1)) ctx.emit( add_ (tmp0, tmp1, tmp0)) ctx.emit( str_ (tmp0, dst_part)) dst_parts.append(dst_part) value = pack(ctx, dst_parts) operand.set(ctx, i, dst_id, value)
def x86_bextr(ctx, i): a = operand.get(ctx, i, 1) b = operand.get(ctx, i, 2) start = ctx.tmp(8) length = ctx.tmp(8) mask = ctx.tmp(a.size) tmp0 = ctx.tmp(8) result = ctx.tmp(a.size) ctx.emit( str_ (b, start)) ctx.emit( lshr_ (b, imm(8, 8), length)) # we are masking off [11111[start + length , start]111111] ctx.emit( sub_ (imm(a.size, a.size), length, tmp0)) ctx.emit( lshr_ (imm(mask(a.size), a.size), tmp0, mask)) # [[start + length, start]111111] ctx.emit( add_ (tmp0, start, tmp0)) ctx.emit( lshl_ (mask, tmp0, mask)) # [000000000000[start + length, start]] ctx.emit( lshr_ (mask, start, mask)) # we have our mask [00000[start + length , start]000000] ctx.emit( and_ (a, mask, result)) ctx.emit( lshr_ (result, start, result)) set_zf(ctx, result) ctx.emit( str_ (imm(0, 8), r('cf', 8))) ctx.emit( undef_(r('af', 8))) ctx.emit( undef_(r('sf', 8))) ctx.emit( undef_(r('pf', 8))) operand.set(ctx, i, 0, result)
def x86_cmpxchg(ctx, i): a = ctx.accumulator b = operand.get(ctx, i, 0) c = operand.get(ctx, i, 1) if b.size != a.size: prev_a = a a = ctx.tmp(b.size) ctx.emit( str_ (prev_a, a)) tmp0 = ctx.tmp(8) ctx.emit( equ_ (a, b, tmp0)) ctx.emit( jcc_ (tmp0, 'equal')) ctx.emit('not-equal') ctx.emit( str_ (c, ctx.accumulator)) ctx.emit( str_ (imm(0, 8), r('zf', 8))) ctx.emit( jcc_ (imm(1, 8), 'done')) ctx.emit('equal') operand.set(ctx, i, 0, c) ctx.emit( str_ (imm(1, 8), r('zf', 8))) ctx.emit('done') ctx.emit( nop_())
def x86_pshufd(ctx, i): src = operand.get(ctx, i, 1) order = operand.get(ctx, i, 2) value = imm(0, 128) for j in range(0, 4): prev_order = order order = ctx.tmp(8) prev_value = value value = ctx.tmp(128) tmp0 = ctx.tmp(128) tmp1 = ctx.tmp(8) tmp2 = ctx.tmp(32) tmp3 = ctx.tmp(128) tmp4 = ctx.tmp(32) tmp5 = ctx.tmp(128) tmp6 = ctx.tmp(128) ctx.emit( lshr_ (prev_order, imm(2, 8), order)) ctx.emit( and_ (prev_order, imm(0b00000011, 8), tmp1)) ctx.emit( mul_ (tmp1, imm(32, 32), tmp2)) ctx.emit( lshr_ (src, tmp2, tmp3)) ctx.emit( str_ (tmp3, tmp4)) ctx.emit( str_ (tmp4, tmp5)) ctx.emit( lshl_ (tmp5, imm(j * 32, 8), tmp6)) ctx.emit( add_ (tmp6, prev_value, value)) operand.set(ctx, i, 0, value)
def x86_arpl(ctx, i): dest_seg = operand.get(ctx, i, 0) src_seg = operand.get(ctx, i, 1) dest_rpl = ctx.tmp(16) src_rpl = ctx.tmp(16) tmp0 = ctx.tmp(32) tmp1 = ctx.tmp(8) result_seg = ctx.tmp(16) tmp2 = ctx.tmp(16) ctx.emit( lshr_ (dest_seg, imm(14, 8), dest_rpl)) ctx.emit( lshr_ (src_seg, imm(14, 8), src_rpl)) ctx.emit( sub_ (dest_seg, src_seg, tmp0)) ctx.emit( and_ (tmp0, imm(sign_bit(32), 32), tmp0)) ctx.emit( bisz_ (tmp0, tmp1)) ctx.emit( jcc_ ('check_passed')) ctx.emit( str_ (imm(1, 8), r('zf', 8))) ctx.emit( and_ (dest_seg, imm(0b0011111111111111, 16), result_seg)) ctx.emit( and_ (src_seg, imm(0b1100000000000000, 16), tmp2)) ctx.emit( or_ (dest_seg, tmp2, dest_seg)) operand.set(ctx, i, 0, result_seg) ctx.emit( jcc_ (imm(1, 8), 'done')) ctx.emit('check_passed') ctx.emit( str_ (imm(0, 8), r('zf', 8))) ctx.emit('done') ctx.emit( nop_())
def x86_bextr(ctx, i): a = operand.get(ctx, i, 1) b = operand.get(ctx, i, 2) start = ctx.tmp(8) length = ctx.tmp(8) mask = ctx.tmp(a.size) tmp0 = ctx.tmp(8) result = ctx.tmp(a.size) ctx.emit(str_(b, start)) ctx.emit(lshr_(b, imm(8, 8), length)) # we are masking off [11111[start + length , start]111111] ctx.emit(sub_(imm(a.size, a.size), length, tmp0)) ctx.emit(lshr_(imm(mask(a.size), a.size), tmp0, mask)) # [[start + length, start]111111] ctx.emit(add_(tmp0, start, tmp0)) ctx.emit(lshl_(mask, tmp0, mask)) # [000000000000[start + length, start]] ctx.emit(lshr_(mask, start, mask)) # we have our mask [00000[start + length , start]000000] ctx.emit(and_(a, mask, result)) ctx.emit(lshr_(result, start, result)) set_zf(ctx, result) ctx.emit(str_(imm(0, 8), r('cf', 8))) ctx.emit(undef_(r('af', 8))) ctx.emit(undef_(r('sf', 8))) ctx.emit(undef_(r('pf', 8))) operand.set(ctx, i, 0, result)
def x86_rol(ctx, i): a = operand.get(ctx, i, 0) b = operand.get(ctx, i, 1) max_shift = a.size - 1 size = a.size tmp0 = ctx.tmp(size) tmp1 = ctx.tmp(8) tmp2 = ctx.tmp(size * 2) tmp3 = ctx.tmp(size * 2) tmp4 = ctx.tmp(size) tmp5 = ctx.tmp(size * 2) tmp6 = ctx.tmp(size * 2) tmp7 = ctx.tmp(size) tmp8 = ctx.tmp(size) result = ctx.tmp(size) # the rotate amount is truncated at word_size - 1 ctx.emit(and_(b, imm(max_shift, size), tmp0)) # zero rotate doesn't affect flags ctx.emit(bisz_(tmp0, tmp1)) ctx.emit(jcc_(tmp1, 'zero_rotate')) # zero extend ctx.emit(str_(a, tmp2)) # left shift by the correct amount ctx.emit(lshl_(tmp2, tmp0, tmp3)) # truncate to get first half of result ctx.emit(str_(tmp3, tmp4)) # shift out then truncate to get second half of result ctx.emit(lshr_(tmp3, imm(max_shift + 1, size * 2), tmp5)) ctx.emit(str_(tmp5, tmp6)) # or both halves of the result ctx.emit(or_(tmp4, tmp6, result)) # compute carry flag (last bit that was shifted across) ctx.emit(and_(result, imm(1, size), tmp7)) ctx.emit(bisnz_(tmp7, r('cf', 8))) if isinstance(b, reil.ImmediateOperand) and b.value == 1: # overflow flag is msb of input ^ msb output tmp9 = ctx.tmp(size) ctx.emit(and_(a, imm(sign_bit(size), size), tmp8)) ctx.emit(xor_(tmp8, tmp7, tmp8)) ctx.emit(bisnz_(tmp8, r('of', 8))) else: ctx.emit(undef_(r('of', 8))) operand.set(ctx, i, 0, result) ctx.emit('zero_rotate') ctx.emit(nop_())
def x86_pcmpgt(ctx, i, size): a_id, b_id, dst_id = vex_opnds(i) a = operand.get(ctx, i, a_id) b = operand.get(ctx, i, b_id) a_parts = unpack(ctx, a, size) b_parts = unpack(ctx, b, size) a_sign = ctx.tmp(size) a_abs = ctx.tmp(size) b_sign = ctx.tmp(size) b_abs = ctx.tmp(size) tmp0 = ctx.tmp(size * 2) a_abs_lt_b_abs = ctx.tmp(8) tmp1 = ctx.tmp(size) a_b_same_sign = ctx.tmp(8) a_neg = ctx.tmp(8) b_nonneg = ctx.tmp(8) a_neg_and_b_nonneg = ctx.tmp(8) cond = ctx.tmp(8) dst_parts = [] for a_part, b_part in zip(a_parts, b_parts): dst_part = ctx.tmp(size) ctx.emit( and_ (a_part, imm(sign_bit(size), size), a_sign)) ctx.emit( and_ (a_part, imm(~sign_bit(size), size), a_abs)) ctx.emit( and_ (b_part, imm(sign_bit(size), size), b_sign)) ctx.emit( and_ (b_part, imm(~sign_bit(size), size), b_abs)) # a < b <==> (|a| < |b| and sign(a) == sign(b)) or (a < 0 and b >= 0) # |a| < |b| ctx.emit( sub_ (a_abs, b_abs, tmp0)) ctx.emit( and_ (tmp0, imm(sign_bit(size * 2), size * 2), tmp0)) ctx.emit( bisz_ (tmp0, a_abs_lt_b_abs)) # sign(a) == sign(b) ctx.emit( xor_ (a_sign, b_sign, tmp1)) ctx.emit( bisz_ (tmp1, a_b_same_sign)) # a < 0 and b >= 0 ctx.emit( bisnz_(a_sign, a_neg)) ctx.emit( bisz_ (b_sign, b_nonneg)) ctx.emit( and_ (a_neg, b_nonneg, a_neg_and_b_nonneg)) ctx.emit( and_ (a_abs_lt_b_abs, a_b_same_sign, cond)) ctx.emit( or_ (cond, a_neg_and_b_nonneg, cond)) ctx.emit( mul_ (cond, imm(mask(size), size), dst_part)) dst_parts.append(dst_part) value = pack(ctx, dst_parts) operand.set(ctx, i, dst_id, value)
def x86_rol(ctx, i): a = operand.get(ctx, i, 0) b = operand.get(ctx, i, 1) max_shift = a.size-1 size = a.size tmp0 = ctx.tmp(size) tmp1 = ctx.tmp(8) tmp2 = ctx.tmp(size * 2) tmp3 = ctx.tmp(size * 2) tmp4 = ctx.tmp(size) tmp5 = ctx.tmp(size * 2) tmp6 = ctx.tmp(size * 2) tmp7 = ctx.tmp(size) tmp8 = ctx.tmp(size) result = ctx.tmp(size) # the rotate amount is truncated at word_size - 1 ctx.emit( and_ (b, imm(max_shift, size), tmp0)) # zero rotate doesn't affect flags ctx.emit( bisz_ (tmp0, tmp1)) ctx.emit( jcc_ (tmp1, 'zero_rotate')) # zero extend ctx.emit( str_ (a, tmp2)) # left shift by the correct amount ctx.emit( lshl_ (tmp2, tmp0, tmp3)) # truncate to get first half of result ctx.emit( str_ (tmp3, tmp4)) # shift out then truncate to get second half of result ctx.emit( lshr_ (tmp3, imm(max_shift+1, size * 2), tmp5)) ctx.emit( str_ (tmp5, tmp6)) # or both halves of the result ctx.emit( or_ (tmp4, tmp6, result)) # compute carry flag (last bit that was shifted across) ctx.emit( and_ (result, imm(1, size), tmp7)) ctx.emit( bisnz_(tmp7, r('cf', 8))) if isinstance(b, reil.ImmediateOperand) and b.value == 1: # overflow flag is msb of input ^ msb output tmp9 = ctx.tmp(size) ctx.emit( and_ (a, imm(sign_bit(size), size), tmp8)) ctx.emit( xor_ (tmp8, tmp7, tmp8)) ctx.emit( bisnz_(tmp8, r('of', 8))) else: ctx.emit( undef_(r('of', 8))) operand.set(ctx, i, 0, result) ctx.emit( 'zero_rotate') ctx.emit( nop_())
def x86_movlpd(ctx, i): a = operand.get(ctx, i, 0) value = operand.get(ctx, i, 1) tmp0 = ctx.tmp(a.size) tmp1 = ctx.tmp(a.size) ctx.emit(and_(a, imm(0xffffffffffffffff0000000000000000, 128), tmp0)) ctx.emit(or_(tmp0, value, tmp0)) operand.set(ctx, i, 0, tmp0)
def x86_pcmpgt(ctx, i, size): a_id, b_id, dst_id = vex_opnds(i) a = operand.get(ctx, i, a_id) b = operand.get(ctx, i, b_id) a_parts = unpack(ctx, a, size) b_parts = unpack(ctx, b, size) a_sign = ctx.tmp(size) a_abs = ctx.tmp(size) b_sign = ctx.tmp(size) b_abs = ctx.tmp(size) tmp0 = ctx.tmp(size * 2) a_abs_lt_b_abs = ctx.tmp(8) tmp1 = ctx.tmp(size) a_b_same_sign = ctx.tmp(8) a_neg = ctx.tmp(8) b_nonneg = ctx.tmp(8) a_neg_and_b_nonneg = ctx.tmp(8) cond = ctx.tmp(8) dst_parts = [] for a_part, b_part in zip(a_parts, b_parts): dst_part = ctx.tmp(size) ctx.emit(and_(a_part, imm(sign_bit(size), size), a_sign)) ctx.emit(and_(a_part, imm(~sign_bit(size), size), a_abs)) ctx.emit(and_(b_part, imm(sign_bit(size), size), b_sign)) ctx.emit(and_(b_part, imm(~sign_bit(size), size), b_abs)) # a < b <==> (|a| < |b| and sign(a) == sign(b)) or (a < 0 and b >= 0) # |a| < |b| ctx.emit(sub_(a_abs, b_abs, tmp0)) ctx.emit(and_(tmp0, imm(sign_bit(size * 2), size * 2), tmp0)) ctx.emit(bisz_(tmp0, a_abs_lt_b_abs)) # sign(a) == sign(b) ctx.emit(xor_(a_sign, b_sign, tmp1)) ctx.emit(bisz_(tmp1, a_b_same_sign)) # a < 0 and b >= 0 ctx.emit(bisnz_(a_sign, a_neg)) ctx.emit(bisz_(b_sign, b_nonneg)) ctx.emit(and_(a_neg, b_nonneg, a_neg_and_b_nonneg)) ctx.emit(and_(a_abs_lt_b_abs, a_b_same_sign, cond)) ctx.emit(or_(cond, a_neg_and_b_nonneg, cond)) ctx.emit(mul_(cond, imm(mask(size), size), dst_part)) dst_parts.append(dst_part) value = pack(ctx, dst_parts) operand.set(ctx, i, dst_id, value)
def x86_bt(ctx, i): a = operand.get(ctx, i, 0) b = operand.get(ctx, i, 1) bitmask = ctx.tmp(a.size) bit = ctx.tmp(a.size) ctx.emit( lshl_ (imm(1, a.size), b, bitmask)) ctx.emit( and_ (a, bitmask, bit)) ctx.emit( bisnz_(bit, r('cf', 8)))
def x86_bt(ctx, i): a = operand.get(ctx, i, 0) b = operand.get(ctx, i, 1) bitmask = ctx.tmp(a.size) bit = ctx.tmp(a.size) ctx.emit(lshl_(imm(1, a.size), b, bitmask)) ctx.emit(and_(a, bitmask, bit)) ctx.emit(bisnz_(bit, r('cf', 8)))
def x86_movlpd(ctx, i): a = operand.get(ctx, i, 0) value = operand.get(ctx, i, 1) tmp0 = ctx.tmp(a.size) tmp1 = ctx.tmp(a.size) ctx.emit( and_ (a, imm(0xffffffffffffffff0000000000000000, 128), tmp0)) ctx.emit( or_ (tmp0, value, tmp0)) operand.set(ctx, i, 0, tmp0)
def x86_shl(ctx, i): a = operand.get(ctx, i, 0) if len(i.operands) == 1: if i.mnemonic.endswith('1'): b = imm(1, a.size) else: b = ctx.counter else: b = operand.get(ctx, i, 1) max_shift = a.size - 1 size = a.size tmp0 = ctx.tmp(size) tmp1 = ctx.tmp(size * 2) tmp2 = ctx.tmp(size * 2) tmp3 = ctx.tmp(size * 2) tmp4 = ctx.tmp(8) tmp5 = ctx.tmp(size) tmp6 = ctx.tmp(8) result = ctx.tmp(size) ctx.emit(and_(b, imm(max_shift, size), tmp0)) # zero extend ctx.emit(str_(a, tmp1)) # left shift by the correct amount ctx.emit(lshl_(tmp1, tmp0, tmp2)) # truncate to get result ctx.emit(str_(tmp2, result)) # compute carry flag ctx.emit(and_(tmp2, imm(carry_bit(size), size * 2), tmp3)) ctx.emit(bisnz_(tmp3, r('cf', 8))) ctx.emit(equ_(tmp0, imm(1, size), tmp4)) ctx.emit(bisz_(tmp4, tmp4)) ctx.emit(jcc_(tmp4, 'no_overflow_flag')) # compute overflow flag ctx.emit(and_(result, imm(sign_bit(size), size), tmp5)) ctx.emit(bisz_(tmp5, tmp6)) ctx.emit(equ_(r('cf', 8), tmp6, r('of', 8))) ctx.emit(jcc_(imm(1, 8), 'overflow_flag_done')) ctx.emit('no_overflow_flag') ctx.emit(str_(imm(0, 8), r('of', 8))) ctx.emit('overflow_flag_done') _shift_set_flags(ctx, result) operand.set(ctx, i, 0, result)
def x86_shl(ctx, i): a = operand.get(ctx, i, 0) if len(i.operands) == 1: if i.mnemonic.endswith('1'): b = imm(1, a.size) else: b = ctx.counter else: b = operand.get(ctx, i, 1) max_shift = a.size-1 size = a.size tmp0 = ctx.tmp(size) tmp1 = ctx.tmp(size * 2) tmp2 = ctx.tmp(size * 2) tmp3 = ctx.tmp(size * 2) tmp4 = ctx.tmp(8) tmp5 = ctx.tmp(size) tmp6 = ctx.tmp(8) result = ctx.tmp(size) ctx.emit( and_ (b, imm(max_shift, size), tmp0)) # zero extend ctx.emit( str_ (a, tmp1)) # left shift by the correct amount ctx.emit( lshl_ (tmp1, tmp0, tmp2)) # truncate to get result ctx.emit( str_ (tmp2, result)) # compute carry flag ctx.emit( and_ (tmp2, imm(carry_bit(size), size * 2), tmp3)) ctx.emit( bisnz_(tmp3, r('cf', 8))) ctx.emit( equ_ (tmp0, imm(1, size), tmp4)) ctx.emit( bisz_ (tmp4, tmp4)) ctx.emit( jcc_ (tmp4, 'no_overflow_flag')) # compute overflow flag ctx.emit( and_ (result, imm(sign_bit(size), size), tmp5)) ctx.emit( bisz_ (tmp5, tmp6)) ctx.emit( equ_ (r('cf', 8), tmp6, r('of', 8))) ctx.emit( jcc_ (imm(1, 8), 'overflow_flag_done')) ctx.emit('no_overflow_flag') ctx.emit( str_ (imm(0, 8), r('of', 8))) ctx.emit('overflow_flag_done') _shift_set_flags(ctx, result) operand.set(ctx, i, 0, result)
def x86_test(ctx, i): a = operand.get(ctx, i, 0) b = operand.get(ctx, i, 1, a.size) size = min(a.size, b.size) result = ctx.tmp(size) ctx.emit( and_ (a, b, result)) _logic_set_flags(ctx, result)
def x86_pxor(ctx, i): a = operand.get(ctx, i, 0) b = operand.get(ctx, i, 1) size = min(a.size, b.size) value = ctx.tmp(size) ctx.emit(xor_(a, b, value)) operand.set(ctx, i, 0, value)
def x86_test(ctx, i): a = operand.get(ctx, i, 0) b = operand.get(ctx, i, 1, a.size) size = min(a.size, b.size) result = ctx.tmp(size) ctx.emit(and_(a, b, result)) _logic_set_flags(ctx, result)
def x86_xchg(ctx, i): a = operand.get(ctx, i, 0) b = operand.get(ctx, i, 1) tmp0 = ctx.tmp(a.size) ctx.emit( str_ (a, tmp0)) operand.set(ctx, i, 0, b) operand.set(ctx, i, 1, tmp0)
def x86_pxor(ctx, i): a = operand.get(ctx, i, 0) b = operand.get(ctx, i, 1) size = min(a.size, b.size) value = ctx.tmp(size) ctx.emit( xor_ (a, b, value)) operand.set(ctx, i, 0, value)
def x86_pslldq(ctx, i): a = operand.get(ctx, i, 0) b = operand.get(ctx, i, 1) result = ctx.tmp(a.size) shift = min(b.value, 16) # left shift by the correct amount ctx.emit( lshl_ (a, imm(shift * 8, 8), result)) operand.set(ctx, i, 0, result)
def x86_cmp(ctx, i): a = operand.get(ctx, i, 0) b = operand.get(ctx, i, 1) b = _sign_extend(ctx, a, b) result = ctx.tmp(a.size * 2) ctx.emit( sub_ (a, b, result)) _sub_set_flags(ctx, a, b, result)
def x86_psrldq(ctx, i): a = operand.get(ctx, i, 0) b = operand.get(ctx, i, 1) result = ctx.tmp(a.size) shift = min(b.value, 16) # right shift by the correct amount ctx.emit(lshr_(a, imm(shift * 8, 8), result)) operand.set(ctx, i, 0, result)
def x86_xor(ctx, i): a = operand.get(ctx, i, 0) b = operand.get(ctx, i, 1, a.size) size = min(a.size, b.size) result = ctx.tmp(size) ctx.emit( xor_ (a, b, result)) _logic_set_flags(ctx, result) operand.set(ctx, i, 0, result, clear=True)
def x86_movhpd(ctx, i): a = operand.get(ctx, i, 0) value = operand.get(ctx, i, 1) tmp0 = ctx.tmp(a.size) tmp1 = ctx.tmp(a.size) ctx.emit( and_ (a, imm(0x0000000000000000ffffffffffffffff, 128), tmp0)) ctx.emit( str_ (value, tmp1)) ctx.emit( lshl_ (tmp1, imm(64, 8), tmp1)) ctx.emit( or_ (tmp0, tmp1, tmp0)) operand.set(ctx, i, 0, tmp0)
def x86_movzx(ctx, i): value = None dst = operand.get(ctx, i, 0) if len(i.operands) == 1: # source is the accumulator value = ctx.accumulator else: value = operand.get(ctx, i, 1) operand.set(ctx, i, 0, value, clear=True, sign_extend=False)
def x86_movhpd(ctx, i): a = operand.get(ctx, i, 0) value = operand.get(ctx, i, 1) tmp0 = ctx.tmp(a.size) tmp1 = ctx.tmp(a.size) ctx.emit(and_(a, imm(0x0000000000000000ffffffffffffffff, 128), tmp0)) ctx.emit(str_(value, tmp1)) ctx.emit(lshl_(tmp1, imm(64, 8), tmp1)) ctx.emit(or_(tmp0, tmp1, tmp0)) operand.set(ctx, i, 0, tmp0)
def x86_xor(ctx, i): a = operand.get(ctx, i, 0) b = operand.get(ctx, i, 1, a.size) size = min(a.size, b.size) result = ctx.tmp(size) ctx.emit(xor_(a, b, result)) _logic_set_flags(ctx, result) operand.set(ctx, i, 0, result, clear=True)
def _write_bit(ctx, i, base_index, offset_index, bit): if operand.is_memory(ctx, i, base_index): # nasty case, indexing into in-memory bitstring; offset can be # > word_size base = operand.get_address(ctx, i, base_index) offset = operand.get(ctx, i, offset_index) offset_sign = ctx.tmp(8) byte_offset = ctx.tmp(base.size) tmp0 = ctx.tmp(offset.size) byte = ctx.tmp(8) bitmask = ctx.tmp(8) ctx.emit(and_(offset, imm(sign_bit(offset.size), offset.size), tmp0)) ctx.emit(bisnz_(tmp0, offset_sign)) ctx.emit(and_(offset, imm(~sign_bit(offset.size), offset.size), offset)) ctx.emit(div_(offset, imm(8, offset.size), byte_offset)) ctx.emit(mod_(offset, imm(8, offset.size), offset)) ctx.emit(jcc_(offset_sign, 'negative_offset')) ctx.emit(add_(base, byte_offset, base)) ctx.emit(jcc_(imm(1, 8), 'base_calculated')) ctx.emit('negative_offset') ctx.emit(sub_(base, byte_offset, base)) ctx.emit('base_calculated') ctx.emit(ldm_(base, byte)) ctx.emit(lshl_(imm(1, 8), offset, bitmask)) ctx.emit(xor_(bitmask, imm(mask(8), 8), bitmask)) ctx.emit(and_(byte, bitmask, byte)) ctx.emit(lshl_(bit, offset, bitmask)) ctx.emit(or_(byte, bit, byte)) ctx.emit(stm_(byte, base)) else: # simple case, it's a register a = operand.get(ctx, i, base_index) offset = operand.get(ctx, i, offset_index) bitmask = ctx.tmp(a.size) tmp0 = ctx.tmp(a.size) tmp1 = ctx.tmp(a.size) ctx.emit(lshl_(imm(1, a.size), offset, bitmask)) ctx.emit(xor_(bitmask, imm(mask(a.size), a.size), bitmask)) ctx.emit(and_(a, bitmask, tmp0)) ctx.emit(str_(bit, tmp1)) ctx.emit(lshl_(tmp1, offset, tmp1)) ctx.emit(or_(tmp0, tmp1, tmp1)) operand.set(ctx, i, base_index, tmp1)
def x86_sub(ctx, i): a = operand.get(ctx, i, 0) b = operand.get(ctx, i, 1) b = _sign_extend(ctx, a, b) result = ctx.tmp(a.size * 2) ctx.emit( sub_ (a, b, result)) _sub_set_flags(ctx, a, b, result) operand.set(ctx, i, 0, result)
def x86_add(ctx, i): a = operand.get(ctx, i, 0) b = operand.get(ctx, i, 1) b = _sign_extend(ctx, a, b) result = ctx.tmp(a.size * 2) ctx.emit( add_ (a, b, result)) _add_set_flags(ctx, a, b, result) operand.set(ctx, i, 0, result)
def x86_andn(ctx, i): a = operand.get(ctx, i, 0) b = operand.get(ctx, i, 1, a.size) size = min(a.size, b.size) result = ctx.tmp(size) ctx.emit( xor_ (a, imm(mask(size), size), result)) ctx.emit( and_ (result, b, result)) _logic_set_flags(ctx, result) operand.set(ctx, i, 0, result)
def x86_andn(ctx, i): a = operand.get(ctx, i, 0) b = operand.get(ctx, i, 1, a.size) size = min(a.size, b.size) result = ctx.tmp(size) ctx.emit(xor_(a, imm(mask(size), size), result)) ctx.emit(and_(result, b, result)) _logic_set_flags(ctx, result) operand.set(ctx, i, 0, result)
def _write_bit(ctx, i, base_index, offset_index, bit): if operand.is_memory(ctx, i, base_index): # nasty case, indexing into in-memory bitstring; offset can be # > word_size base = operand.get_address(ctx, i, base_index) offset = operand.get(ctx, i, offset_index) offset_sign = ctx.tmp(8) byte_offset = ctx.tmp(base.size) tmp0 = ctx.tmp(offset.size) byte = ctx.tmp(8) bitmask = ctx.tmp(8) ctx.emit( and_ (offset, imm(sign_bit(offset.size), offset.size), tmp0)) ctx.emit( bisnz_(tmp0, offset_sign)) ctx.emit( and_ (offset, imm(~sign_bit(offset.size), offset.size), offset)) ctx.emit( div_ (offset, imm(8, offset.size), byte_offset)) ctx.emit( mod_ (offset, imm(8, offset.size), offset)) ctx.emit( jcc_ (offset_sign, 'negative_offset')) ctx.emit( add_ (base, byte_offset, base)) ctx.emit( jcc_ (imm(1, 8), 'base_calculated')) ctx.emit('negative_offset') ctx.emit( sub_ (base, byte_offset, base)) ctx.emit('base_calculated') ctx.emit( ldm_ (base, byte)) ctx.emit( lshl_ (imm(1, 8), offset, bitmask)) ctx.emit( xor_ (bitmask, imm(mask(8), 8), bitmask)) ctx.emit( and_ (byte, bitmask, byte)) ctx.emit( lshl_ (bit, offset, bitmask)) ctx.emit( or_ (byte, bit, byte)) ctx.emit( stm_ (byte, base)) else: # simple case, it's a register a = operand.get(ctx, i, base_index) offset = operand.get(ctx, i, offset_index) bitmask = ctx.tmp(a.size) tmp0 = ctx.tmp(a.size) tmp1 = ctx.tmp(a.size) ctx.emit( lshl_ (imm(1, a.size), offset, bitmask)) ctx.emit( xor_ (bitmask, imm(mask(a.size), a.size), bitmask)) ctx.emit( and_ (a, bitmask, tmp0)) ctx.emit( str_ (bit, tmp1)) ctx.emit( lshl_ (tmp1, offset, tmp1)) ctx.emit( or_ (tmp0, tmp1, tmp1)) operand.set(ctx, i, base_index, tmp1)
def x86_pxor(ctx, i): a_id, b_id, dst_id = vex_opnds(i) a = operand.get(ctx, i, a_id) b = operand.get(ctx, i, b_id) value = ctx.tmp(a.size) ctx.emit( xor_ (a, b, value)) # TODO: this will clear all the remaining bits of the destination register, # which is incorrect for the legacy sse version. When ymmX register support # is added, this will be broken. operand.set(ctx, i, dst_id, value)
def x86_sar(ctx, i): a = operand.get(ctx, i, 0) if len(i.operands) == 1: if i.mnemonic.endswith('1'): b = imm(1, a.size) else: b = ctx.counter else: b = operand.get(ctx, i, 1) max_shift = a.size-1 size = a.size tmp0 = ctx.tmp(size) tmp1 = ctx.tmp(size * 2) tmp2 = ctx.tmp(size * 2) tmp3 = ctx.tmp(size * 2) tmp4 = ctx.tmp(size) tmp5 = ctx.tmp(size * 2) result = ctx.tmp(a.size) # the shift amount is truncated at word_size - 1 ctx.emit( and_ (b, imm(max_shift, size), tmp0)) # zero extend ctx.emit( str_ (a, tmp1)) # left shift all the way ctx.emit( lshl_ (tmp1, imm(max_shift+1, size * 2), tmp2)) # right shift by the correct amount ctx.emit( ashr_ (tmp2, tmp0, tmp3)) # save off the first bit that is going to be lost ctx.emit( and_ (tmp3, imm(sign_bit(size), size * 2), tmp4)) # shift out then truncate to get second half of result ctx.emit( ashr_ (tmp3, imm(max_shift+1, size * 2), tmp5)) ctx.emit( str_ (tmp5, result)) # set sign flag ctx.emit( bisnz_(tmp4, r('cf', 8))) # overflow flag is always 0 ctx.emit( str_ (imm(0, 8), r('of', 8))) _shift_set_flags(ctx, result) operand.set(ctx, i, 0, result)
def x86_sbb(ctx, i): a = operand.get(ctx, i, 0) b = operand.get(ctx, i, 1) b = _sign_extend(ctx, a, b) result = ctx.tmp(a.size * 2) ctx.emit( sub_ (a, b, result)) ctx.emit( sub_ (result, r('cf', 8), result)) _sub_set_flags(ctx, a, b, result) operand.set(ctx, i, 0, result, clear=True)
def x86_adc(ctx, i): a = operand.get(ctx, i, 0) b = operand.get(ctx, i, 1) b = _sign_extend(ctx, a, b) result = ctx.tmp(a.size * 2) ctx.emit( add_ (a, b, result)) ctx.emit( add_ (result, r('cf', 8), result)) _add_set_flags(ctx, a, b, result) operand.set(ctx, i, 0, result)
def x86_pxor(ctx, i): a_id, b_id, dst_id = vex_opnds(i) a = operand.get(ctx, i, a_id) b = operand.get(ctx, i, b_id) value = ctx.tmp(a.size) ctx.emit(xor_(a, b, value)) # TODO: this will clear all the remaining bits of the destination register, # which is incorrect for the legacy sse version. When ymmX register support # is added, this will be broken. operand.set(ctx, i, dst_id, value)
def x86_sar(ctx, i): a = operand.get(ctx, i, 0) if len(i.operands) == 1: if i.mnemonic.endswith('1'): b = imm(1, a.size) else: b = ctx.counter else: b = operand.get(ctx, i, 1) max_shift = a.size - 1 size = a.size tmp0 = ctx.tmp(size) tmp1 = ctx.tmp(size * 2) tmp2 = ctx.tmp(size * 2) tmp3 = ctx.tmp(size * 2) tmp4 = ctx.tmp(size) tmp5 = ctx.tmp(size * 2) result = ctx.tmp(a.size) # the shift amount is truncated at word_size - 1 ctx.emit(and_(b, imm(max_shift, size), tmp0)) # zero extend ctx.emit(str_(a, tmp1)) # left shift all the way ctx.emit(lshl_(tmp1, imm(max_shift + 1, size * 2), tmp2)) # right shift by the correct amount ctx.emit(ashr_(tmp2, tmp0, tmp3)) # save off the first bit that is going to be lost ctx.emit(and_(tmp3, imm(sign_bit(size), size * 2), tmp4)) # shift out then truncate to get second half of result ctx.emit(ashr_(tmp3, imm(max_shift + 1, size * 2), tmp5)) ctx.emit(str_(tmp5, result)) # set sign flag ctx.emit(bisnz_(tmp4, r('cf', 8))) # overflow flag is always 0 ctx.emit(str_(imm(0, 8), r('of', 8))) _shift_set_flags(ctx, result) operand.set(ctx, i, 0, result)
def x86_call(ctx, i): """call procedure""" dst = operand.get(ctx, i, 0) _push(ctx, i, imm(i.address + i.size, ctx.word_size)) ctx.emit(jcc_(imm(1, 8), dst))
def x86_idiv(ctx, i): divisor = operand.get(ctx, i, 0) dividend = ctx.tmp(divisor.size * 2) if divisor.size == 8: # dividend is ax ctx.emit(str_(ctx.accumulator, dividend)) else: # dividend is dx:ax, edx:eax, rdx:rax dividend_lo = ctx.tmp(divisor.size) dividend_hi = ctx.tmp(divisor.size) ctx.emit(str_(ctx.accumulator, dividend_lo)) ctx.emit(str_(ctx.data, dividend_hi)) ctx.emit(lshl_(dividend_hi, imm(divisor.size, 8), dividend)) ctx.emit(or_(dividend, dividend_lo, dividend)) quotient = ctx.tmp(divisor.size) remainder = ctx.tmp(divisor.size) # TODO: implement checking for overflow # TODO: also is a signed divide/modulus different to unsigned, or is it # just a question of the error cases being different? consider... testcases # so far suggest that it is the same, but that is just from program traces # not exhaustive proof. ctx.emit(sdiv_(dividend, divisor, quotient)) ctx.emit(mod_(dividend, divisor, remainder)) # compute sign of remainder tmp = ctx.tmp(dividend.size) ctx.emit(and_(dividend, imm(sign_bit(dividend.size), dividend.size), tmp)) ctx.emit(bisz_(tmp, tmp)) ctx.emit(jcc_(tmp, 'positive')) # remainder is negative # remainder is positive, nothing to do ctx.emit('positive') if divisor.size == 8: # result goes in ax result = r(ctx.accumulator.name, 16) ctx.emit(str_(remainder, result)) ctx.emit(lshl_(result, imm(divisor.size, 8), result)) ctx.emit(or_(quotient, result, result)) else: # quotient goes in *ax, remainder goes in *dx ctx.emit(str_(quotient, ctx.accumulator)) ctx.emit(str_(remainder, ctx.data)) ctx.emit(undef_(r('cf', 8))) ctx.emit(undef_(r('of', 8))) ctx.emit(undef_(r('sf', 8))) ctx.emit(undef_(r('zf', 8))) ctx.emit(undef_(r('af', 8))) ctx.emit(undef_(r('pf', 8)))
def x86_loop(ctx, i): c = ctx.tmp(8) dst = operand.get(ctx, i, 0) ctx.emit( sub_ (ctx.counter, imm(1, ctx.counter.size), ctx.counter)) ctx.emit( equ_ (ctx.counter, imm(0, ctx.counter.size), c)) ctx.emit( jcc_ (c, dst))
def x86_call(ctx, i): """call procedure""" dst = operand.get(ctx, i, 0) _push(ctx, i, imm(i.address + i.size, ctx.word_size)) ctx.emit( jcc_ (imm(1, 8), dst))
def x86_loop(ctx, i): c = ctx.tmp(8) dst = operand.get(ctx, i, 0) ctx.emit(sub_(ctx.counter, imm(1, ctx.counter.size), ctx.counter)) ctx.emit(equ_(ctx.counter, imm(0, ctx.counter.size), c)) ctx.emit(jcc_(c, dst))
def x86_ror(ctx, i): a = operand.get(ctx, i, 0) b = operand.get(ctx, i, 1) max_shift = ctx.word_size-1 size = a.size tmp0 = ctx.tmp(size) tmp1 = ctx.tmp(size * 2) tmp2 = ctx.tmp(size * 2) tmp3 = ctx.tmp(size * 2) tmp4 = ctx.tmp(size) tmp5 = ctx.tmp(size * 2) tmp6 = ctx.tmp(size) result = ctx.tmp(size) # the rotate amount is truncated at word_size - 1 ctx.emit( and_ (b, imm(max_shift, size), tmp0)) # zero extend ctx.emit( str_ (a, tmp1)) # left shift all the way ctx.emit( lshl_ (tmp1, imm(max_shift+1, size * 2), tmp2)) # right shift by the correct amount ctx.emit( lshr_ (tmp2, tmp0, tmp3)) # truncate to get first half of result ctx.emit( str_ (tmp3, tmp4)) # shift out then truncate to get second half of result ctx.emit( lshr_ (tmp3, imm(max_shift+1, size * 2), tmp5)) ctx.emit( str_ (tmp5, tmp6)) # or both halves of the result ctx.emit( or_ (tmp4, tmp6, result)) # TODO: compute carry flag if isinstance(b, pyreil.ImmediateOperand) and b.value == 1: # TODO: compute overflow flag pass else: ctx.emit( undef_(r('of', 8))) operand.set(ctx, i, 0, result)
def x86_ror(ctx, i): a = operand.get(ctx, i, 0) b = operand.get(ctx, i, 1) max_shift = ctx.word_size - 1 size = a.size tmp0 = ctx.tmp(size) tmp1 = ctx.tmp(size * 2) tmp2 = ctx.tmp(size * 2) tmp3 = ctx.tmp(size * 2) tmp4 = ctx.tmp(size) tmp5 = ctx.tmp(size * 2) tmp6 = ctx.tmp(size) result = ctx.tmp(size) # the rotate amount is truncated at word_size - 1 ctx.emit(and_(b, imm(max_shift, size), tmp0)) # zero extend ctx.emit(str_(a, tmp1)) # left shift all the way ctx.emit(lshl_(tmp1, imm(max_shift + 1, size * 2), tmp2)) # right shift by the correct amount ctx.emit(lshr_(tmp2, tmp0, tmp3)) # truncate to get first half of result ctx.emit(str_(tmp3, tmp4)) # shift out then truncate to get second half of result ctx.emit(lshr_(tmp3, imm(max_shift + 1, size * 2), tmp5)) ctx.emit(str_(tmp5, tmp6)) # or both halves of the result ctx.emit(or_(tmp4, tmp6, result)) # TODO: compute carry flag if isinstance(b, pyreil.ImmediateOperand) and b.value == 1: # TODO: compute overflow flag pass else: ctx.emit(undef_(r('of', 8))) operand.set(ctx, i, 0, result)