def test_vselect_icmpimm(self): # type: () -> None x = Var('x') y = Var('y') z = Var('z') w = Var('w') v = Var('v') zeroes = Var('zeroes') imm0 = Var("imm0") r = Rtl( zeroes << iconst(imm0), y << icmp(intcc.eq, x, zeroes), v << vselect(y, z, w), ) r1 = r.copy({}) s = r.substitution(r1, {}) s[zeroes].set_typevar(TypeVar.singleton(i32.by(4))) s[z].set_typevar(TypeVar.singleton(f32.by(4))) r1.cleanup_concrete_rtl() assert s is not None assert s[zeroes].get_typevar().singleton_type() == i32.by(4) assert s[x].get_typevar().singleton_type() == i32.by(4) assert s[y].get_typevar().singleton_type() == b32.by(4) assert s[z].get_typevar().singleton_type() == f32.by(4) assert s[w].get_typevar().singleton_type() == f32.by(4) assert s[v].get_typevar().singleton_type() == f32.by(4)
def test_enumeration_with_constraints(self): # type: () -> None xform = XForm( Rtl( self.v0 << iconst(self.imm0), self.v1 << icmp(intcc.eq, self.v2, self.v0), self.v5 << vselect(self.v1, self.v3, self.v4) ), Rtl( self.v0 << iconst(self.imm0), self.v1 << icmp(intcc.eq, self.v2, self.v0), self.v5 << vselect(self.v1, self.v3, self.v4) )) # Check all var assigns are correct assert len(xform.ti.constraints) > 0 concrete_var_assigns = list(xform.ti.concrete_typings()) v0 = xform.symtab[str(self.v0)] v1 = xform.symtab[str(self.v1)] v2 = xform.symtab[str(self.v2)] v3 = xform.symtab[str(self.v3)] v4 = xform.symtab[str(self.v4)] v5 = xform.symtab[str(self.v5)] for var_m in concrete_var_assigns: assert var_m[v0] == var_m[v2] and \ var_m[v3] == var_m[v4] and\ var_m[v5] == var_m[v3] and\ var_m[v1] == var_m[v2].as_bool() and\ var_m[v1].get_typeset() == var_m[v3].as_bool().get_typeset() check_concrete_typing_xform(var_m, xform) # The number of possible typings here is: # 8 cases for v0 = i8xN times 2 options for v3 - i8, b8 = 16 # 8 cases for v0 = i16xN times 2 options for v3 - i16, b16 = 16 # 8 cases for v0 = i32xN times 3 options for v3 - i32, b32, f32 = 24 # 8 cases for v0 = i64xN times 3 options for v3 - i64, b64, f64 = 24 # # (Note we have 8 cases for lanes since vselect prevents scalars) # Total: 2*16 + 2*24 = 80 assert len(concrete_var_assigns) == 80
def test_vselect_imm(self): # type: () -> None ts = TypeSet(lanes=(2, 256), ints=True, floats=True, bools=(8, 64)) r = Rtl( self.v0 << iconst(self.imm0), self.v1 << icmp(intcc.eq, self.v2, self.v0), self.v5 << vselect(self.v1, self.v3, self.v4), ) x = XForm(r, r) tv2_exp = 'Some({}).map(|t: Type| -> t.as_bool())'\ .format(self.v2.get_typevar().name) tv3_exp = 'Some({}).map(|t: Type| -> t.as_bool())'\ .format(self.v3.get_typevar().name) self.check_yo_check( x, sequence(typeset_check(self.v3, ts), equiv_check(tv2_exp, tv3_exp)))
def test_vselect_imm(self): # type: () -> None ts = TypeSet(lanes=(2, 256), ints=True, floats=True, bools=(8, 64)) r = Rtl( self.v0 << iconst(self.imm0), self.v1 << icmp(intcc.eq, self.v2, self.v0), self.v5 << vselect(self.v1, self.v3, self.v4), ) x = XForm(r, r) tv2_exp = 'Some({}).map(|t: crate::ir::Type| t.as_bool())'\ .format(self.v2.get_typevar().name) tv3_exp = 'Some({}).map(|t: crate::ir::Type| t.as_bool())'\ .format(self.v3.get_typevar().name) self.check_yo_check( x, sequence(typeset_check(self.v3, ts), equiv_check(tv2_exp, tv3_exp)))
def test_vselect_icmpimm(self): # type: () -> None r = Rtl( self.v0 << iconst(self.imm0), self.v1 << icmp(intcc.eq, self.v2, self.v0), self.v5 << vselect(self.v1, self.v3, self.v4), ) ti = TypeEnv() typing = ti_rtl(r, ti) ixn = self.IxN_nonscalar.get_fresh_copy("IxN1") txn = self.TxN.get_fresh_copy("TxN1") check_typing(typing, ({ self.v0: ixn, self.v1: ixn.as_bool(), self.v2: ixn, self.v3: txn, self.v4: txn, self.v5: txn, }, [TypesEqual(ixn.as_bool(), txn.as_bool())]))
chain=shared.expand) a = Var('a') dead = Var('dead') x = Var('x') xhi = Var('xhi') y = Var('y') a1 = Var('a1') a2 = Var('a2') # # Division and remainder. # intel_expand.legalize( a << insts.udiv(x, y), Rtl(xhi << insts.iconst(imm64(0)), (a, dead) << x86.udivmodx(x, xhi, y))) intel_expand.legalize( a << insts.urem(x, y), Rtl(xhi << insts.iconst(imm64(0)), (dead, a) << x86.udivmodx(x, xhi, y))) for ty in [i32, i64]: intel_expand.legalize( a << insts.sdiv.bind(ty)(x, y), Rtl(xhi << insts.sshr_imm(x, imm64(ty.lane_bits() - 1)), (a, dead) << x86.sdivmodx(x, xhi, y))) # The srem expansion requires custom code because srem INT_MIN, -1 is not # allowed to trap. intel_expand.custom_legalize(insts.srem, 'expand_srem')
a = Var('a') dead = Var('dead') x = Var('x') xhi = Var('xhi') y = Var('y') a1 = Var('a1') a2 = Var('a2') # # Division and remainder. # intel_expand.legalize( a << insts.udiv(x, y), Rtl( xhi << insts.iconst(imm64(0)), (a, dead) << x86.udivmodx(x, xhi, y) )) intel_expand.legalize( a << insts.urem(x, y), Rtl( xhi << insts.iconst(imm64(0)), (dead, a) << x86.udivmodx(x, xhi, y) )) for ty in [i32, i64]: intel_expand.legalize( a << insts.sdiv.bind(ty)(x, y), Rtl( xhi << insts.sshr_imm(x, imm64(ty.lane_bits() - 1)),
def test_subst_imm(self): src = Rtl(a << iconst(x)) dst = Rtl(c << iconst(y)) assert src.substitution(dst, {}) == {a: c, x: y}
def test_macro_pattern(self): src = Rtl(a << iadd_imm(x, y)) dst = Rtl( c << iconst(y), a << iadd(x, c)) XForm(src, dst)
x86_expand.custom_legalize(insts.fcvt_to_uint, 'expand_fcvt_to_uint') # Count leading and trailing zeroes, for baseline x86_64 c_minus_one = Var('c_minus_one') c_thirty_one = Var('c_thirty_one') c_thirty_two = Var('c_thirty_two') c_sixty_three = Var('c_sixty_three') c_sixty_four = Var('c_sixty_four') index1 = Var('index1') r2flags = Var('r2flags') index2 = Var('index2') x86_expand.legalize( a << insts.clz.i64(x), Rtl( c_minus_one << insts.iconst(imm64(-1)), c_sixty_three << insts.iconst(imm64(63)), (index1, r2flags) << x86.bsr(x), index2 << insts.selectif(intcc.eq, r2flags, c_minus_one, index1), a << insts.isub(c_sixty_three, index2), )) x86_expand.legalize( a << insts.clz.i32(x), Rtl( c_minus_one << insts.iconst(imm64(-1)), c_thirty_one << insts.iconst(imm64(31)), (index1, r2flags) << x86.bsr(x), index2 << insts.selectif(intcc.eq, r2flags, c_minus_one, index1), a << insts.isub(c_thirty_one, index2), ))