def numeric_emulate(self, vy, vx): if vx > 0: return sollya.atan(vy / vx) elif vy < 0: # vy / vx > 0 return -sollya.pi + sollya.atan(vy / vx) else: # vy > 0, vy / vx < 0 return sollya.pi + sollya.atan(vy / vx)
def atan2_emulate(vy, vx): if vx > 0: return sollya.atan(vy / vx) elif vy < 0: # vy / vx > 0 return -sollya.pi + sollya.atan(vy / vx) else: # vy > 0, vy / vx < 0 return sollya.pi + sollya.atan(vy / vx)
S2 = sollya.SollyaObject(2) # dict of (str) -> tuple(ctor, dict(ML_Format -> str)) # the first level key is the function name # the first value of value tuple is the meta-function constructor # the second value of the value tuple is a dict which associates to a ML_Format # the corresponding libm function FUNCTION_MAP = { "exp": (metalibm_functions.ml_exp.ML_Exponential, {}, sollya.exp), "tanh": (metalibm_functions.ml_tanh.ML_HyperbolicTangent, {}, sollya.tanh), "sqrt": (metalibm_functions.ml_sqrt.MetalibmSqrt, {}, sollya.sqrt), "log": (metalibm_functions.generic_log.ML_GenericLog, {"basis": sollya.exp(1)}, sollya.log), "log2": (metalibm_functions.generic_log.ML_GenericLog, {"basis": 2}, sollya.log2), "log10": (metalibm_functions.generic_log.ML_GenericLog, {"basis": 10}, sollya.log10), "exp2": (metalibm_functions.ml_exp2.ML_Exp2, {}, (lambda x: S2**x)), "div": (metalibm_functions.ml_div.ML_Division, {}, (lambda x,y: x / y)), "cbrt": (metalibm_functions.ml_cbrt.ML_Cbrt, {}, cbrt), "cosh": (metalibm_functions.ml_cosh.ML_HyperbolicCosine, {}, sollya.cosh), "sinh": (metalibm_functions.ml_sinh.ML_HyperbolicSine, {}, sollya.sinh), "cos": (metalibm_functions.ml_sincos.ML_SinCos, {"sin_output": False}, sollya.cos), "sin": (metalibm_functions.ml_sincos.ML_SinCos, {"sin_output": True}, sollya.sin), "atan": (metalibm_functions.ml_atan.MetaAtan, {}, sollya.atan), "atan2": (metalibm_functions.ml_atan.MetaAtan2, {}, (lambda y, x: sollya.atan(y / x))), "erf": (metalibm_functions.erf.ML_Erf, {}, sollya.erf), "fmod": (metalibm_functions.fmod.MetaFMOD, {}, bigfloat.fmod), }
def numeric_emulate(self, input_value): return sollya.atan(input_value)
def generic_atan2_generate(self, _vx, vy=None): """ if vy is None, compute atan(_vx), else compute atan2(vy / vx) """ if vy is None: # approximation # if abs_vx <= 1.0 then atan(abx_vx) is directly approximated # if abs_vx > 1.0 then atan(abs_vx) = pi/2 - atan(1 / abs_vx) # # for vx >= 0, atan(vx) = atan(abs_vx) # # for vx < 0, atan(vx) = -atan(abs_vx) for vx < 0 # = -pi/2 + atan(1 / abs_vx) vx = _vx sign_cond = vx < 0 abs_vx = Select(vx < 0, -vx, vx, tag="abs_vx", debug=debug_multi) bound_cond = abs_vx > 1 inv_abs_vx = 1 / abs_vx # condition to select subtraction cond = LogicalOr(LogicalAnd(vx < 0, LogicalNot(bound_cond)), vx > 1, tag="cond", debug=debug_multi) # reduced argument red_vx = Select(bound_cond, inv_abs_vx, abs_vx, tag="red_vx", debug=debug_multi) offset = None else: # bound_cond is True iff Abs(vy / _vx) > 1.0 bound_cond = Abs(vy) > Abs(_vx) bound_cond.set_attributes(tag="bound_cond", debug=debug_multi) # vx and vy are of opposite signs #sign_cond = (_vx * vy) < 0 # using cast to int(signed) and bitwise xor # to determine if _vx and vy are of opposite sign rapidly fast_sign_cond = BitLogicXor( TypeCast(_vx, precision=self.precision.get_integer_format()), TypeCast(vy, precision=self.precision.get_integer_format()), precision=self.precision.get_integer_format()) < 0 # sign_cond = (_vx * vy) < 0 sign_cond = fast_sign_cond sign_cond.set_attributes(tag="sign_cond", debug=debug_multi) # condition to select subtraction # TODO: could be accelerated if LogicalXor existed slow_cond = LogicalOr( LogicalAnd(sign_cond, LogicalNot(bound_cond)), # 1 < (vy / _vx) < 0 LogicalAnd(bound_cond, LogicalNot(sign_cond)), # (vy / _vx) > 1 tag="cond", debug=debug_multi) cond = slow_cond numerator = Select(bound_cond, _vx, vy, tag="numerator", debug=debug_multi) denominator = Select(bound_cond, vy, _vx, tag="denominator", debug=debug_multi) # reduced argument red_vx = Abs(numerator) / Abs(denominator) red_vx.set_attributes(tag="red_vx", debug=debug_multi) offset = Select( _vx > 0, Constant(0, precision=self.precision), # vx < 0 Select( sign_cond, # vy > 0 Constant(sollya.pi, precision=self.precision), Constant(-sollya.pi, precision=self.precision), precision=self.precision), precision=self.precision, tag="offset") approx_fct = sollya.atan(sollya.x) if self.method == "piecewise": sign_vx = Select(cond, -1, 1, precision=self.precision, tag="sign_vx", debug=debug_multi) cst_sign = Select(sign_cond, -1, 1, precision=self.precision, tag="cst_sign", debug=debug_multi) cst = cst_sign * Select( bound_cond, sollya.pi / 2, 0, precision=self.precision) cst.set_attributes(tag="cst", debug=debug_multi) bound_low = 0.0 bound_high = 1.0 num_intervals = self.num_sub_intervals error_threshold = S2**-(self.precision.get_mantissa_size() + 8) approx, eval_error = piecewise_approximation( approx_fct, red_vx, self.precision, bound_low=bound_low, bound_high=bound_high, max_degree=None, num_intervals=num_intervals, error_threshold=error_threshold, odd=True) result = cst + sign_vx * approx result.set_attributes(tag="result", precision=self.precision, debug=debug_multi) elif self.method == "single": approx_interval = Interval(0, 1.0) # determining the degree of the polynomial approximation poly_degree_range = sollya.guessdegree( approx_fct / sollya.x, approx_interval, S2**-(self.precision.get_field_size() + 2)) poly_degree = int(sollya.sup(poly_degree_range)) + 4 Log.report(Log.Info, "poly_degree={}".format(poly_degree)) # arctan is an odd function, so only odd coefficient must be non-zero poly_degree_list = list(range(1, poly_degree + 1, 2)) poly_object, poly_error = Polynomial.build_from_approximation_with_error( approx_fct, poly_degree_list, [1] + [self.precision.get_sollya_object()] * (len(poly_degree_list) - 1), approx_interval) odd_predicate = lambda index, _: ((index - 1) % 4 != 0) even_predicate = lambda index, _: (index != 1 and (index - 1) % 4 == 0) poly_odd_object = poly_object.sub_poly_cond(odd_predicate, offset=1) poly_even_object = poly_object.sub_poly_cond(even_predicate, offset=1) sollya.settings.display = sollya.hexadecimal Log.report(Log.Info, "poly_error: {}".format(poly_error)) Log.report(Log.Info, "poly_odd: {}".format(poly_odd_object)) Log.report(Log.Info, "poly_even: {}".format(poly_even_object)) poly_odd = PolynomialSchemeEvaluator.generate_horner_scheme( poly_odd_object, abs_vx) poly_odd.set_attributes(tag="poly_odd", debug=debug_multi) poly_even = PolynomialSchemeEvaluator.generate_horner_scheme( poly_even_object, abs_vx) poly_even.set_attributes(tag="poly_even", debug=debug_multi) exact_sum = poly_odd + poly_even exact_sum.set_attributes(tag="exact_sum", debug=debug_multi) # poly_even should be (1 + poly_even) result = vx + vx * exact_sum result.set_attributes(tag="result", precision=self.precision, debug=debug_multi) else: raise NotImplementedError if not offset is None: result = result + offset std_scheme = Statement(Return(result)) scheme = std_scheme return scheme
def generate_scheme(self): def compute_reciprocal(vx): inv_seed = ReciprocalSeed(vx, precision = self.precision, tag = "inv_seed", debug = debug_multi) nr_1 = 2*inv_seed - vx*inv_seed*inv_seed nr_2 = 2*nr_1 - vx*nr_1*nr_1 nr_3 =2*nr_2 - vx*nr_2*nr_2 inv_vx = 2*nr_3 - vx*nr_3*nr_3 return inv_vx vx = self.implementation.add_input_variable("x", self.get_input_precision()) sollya_precision = self.precision.get_sollya_object() int_precision = { ML_Binary32 : ML_Int32, ML_Binary64 : ML_Int64 }[self.precision] hi_precision = self.precision.get_field_size() - 12 half_pi = round(pi/2, sollya_precision, sollya.RN) half_pi_cst = Constant(half_pi, precision = self.precision) test_sign = Comparison(vx, 0, specifier = Comparison.Less, precision = ML_Bool, debug = debug_multi, tag = "Is_Negative") neg_vx = -vx sign = Variable("sign", precision = self.precision, var_type = Variable.Local) abs_vx_std = Variable("abs_vx", precision = self.precision, var_type = Variable.Local) red_vx_std = Variable("red_vx", precision = self.precision, var_type = Variable.Local) const_index_std = Variable("const_index", precision = int_precision, var_type = Variable.Local) set_sign = Statement( ConditionBlock(test_sign, Statement(ReferenceAssign(abs_vx_std, neg_vx), ReferenceAssign(sign, -1)), Statement(ReferenceAssign(abs_vx_std, vx), ReferenceAssign(sign, 1)) )) if self.precision is ML_Binary32: bound = 24 else: bound = 53 test_bound = Comparison(abs_vx_std, S2**bound, specifier = Comparison.GreaterOrEqual, precision = ML_Bool)#, debug = debug_multi, tag ="bound") test_bound1 = Comparison(abs_vx_std, 39.0/16.0, specifier = Comparison.GreaterOrEqual, precision = ML_Bool)#, debug = debug_multi, tag ="bound") test_bound2 = Comparison(abs_vx_std, 19.0/16.0, specifier = Comparison.GreaterOrEqual, precision = ML_Bool)#, debug = debug_multi, tag ="bound") test_bound3 = Comparison(abs_vx_std, 11.0/16.0, specifier = Comparison.GreaterOrEqual, precision = ML_Bool)#, debug = debug_multi, tag ="bound") test_bound4 = Comparison(abs_vx_std, 7.0/16.0, specifier = Comparison.GreaterOrEqual, precision = ML_Bool)#, debug = debug_multi, tag ="bound") set_bound = Return(sign*half_pi_cst) set_bound1 = Statement( ReferenceAssign(red_vx_std, -compute_reciprocal(abs_vx_std)), ReferenceAssign(const_index_std, 3) ) set_bound2 = Statement( ReferenceAssign(red_vx_std, (abs_vx_std - 1.5)*compute_reciprocal(1 + 1.5*abs_vx_std)), ReferenceAssign(const_index_std, 2) ) set_bound3 = Statement( ReferenceAssign(red_vx_std, (abs_vx_std - 1.0)*compute_reciprocal(abs_vx_std + 1.0)), ReferenceAssign(const_index_std, 1) ) set_bound4 = Statement( ReferenceAssign(red_vx_std, (abs_vx_std - 0.5)*compute_reciprocal(1 + abs_vx_std*0.5)), ReferenceAssign(const_index_std, 0) ) set_bound5 = Statement( ReferenceAssign(red_vx_std, abs_vx_std), ReferenceAssign(const_index_std, 4) ) cons_table = ML_NewTable(dimensions = [5, 2], storage_precision = self.precision, tag = self.uniquify_name("cons_table")) coeff_table = ML_NewTable(dimensions = [11], storage_precision = self.precision, tag = self.uniquify_name("coeff_table")) cons_hi = round(atan(0.5), hi_precision, sollya.RN) cons_table[0][0] = cons_hi cons_table[0][1] = round(atan(0.5) - cons_hi, sollya_precision, sollya.RN) cons_hi = round(atan(1.0), hi_precision, sollya.RN) cons_table[1][0] = cons_hi cons_table[1][1] = round(atan(1.0) - cons_hi, sollya_precision, sollya.RN) cons_hi = round(atan(1.5), hi_precision, sollya.RN) cons_table[2][0] = cons_hi cons_table[2][1] = round(atan(1.5) - cons_hi, sollya_precision, sollya.RN) cons_hi = round(pi/2, hi_precision, sollya.RN) cons_table[3][0] = cons_hi cons_table[3][1] = round(pi/2 - cons_hi, sollya_precision, sollya.RN) cons_table[4][0] = 0.0 cons_table[4][1] = 0.0 coeff_table[0] = round(3.33333333333329318027e-01, sollya_precision, sollya.RN) coeff_table[1] = round(-1.99999999998764832476e-01, sollya_precision, sollya.RN) coeff_table[2] = round(1.42857142725034663711e-01, sollya_precision, sollya.RN) coeff_table[3] = round(-1.11111104054623557880e-01, sollya_precision, sollya.RN) coeff_table[4] = round(9.09088713343650656196e-02, sollya_precision, sollya.RN) coeff_table[5] = round(-7.69187620504482999495e-02, sollya_precision, sollya.RN) coeff_table[6] = round(6.66107313738753120669e-02, sollya_precision, sollya.RN) coeff_table[7] = round(-5.83357013379057348645e-02, sollya_precision, sollya.RN) coeff_table[8] = round(4.97687799461593236017e-02, sollya_precision, sollya.RN) coeff_table[9] = round(-3.65315727442169155270e-02, sollya_precision, sollya.RN) coeff_table[10] = round(1.62858201153657823623e-02, sollya_precision, sollya.RN) red_vx2 = red_vx_std*red_vx_std red_vx4 = red_vx2*red_vx2 a0 = TableLoad(coeff_table, 0, precision = self.precision) a1 = TableLoad(coeff_table, 1, precision = self.precision) a2 = TableLoad(coeff_table, 2, precision = self.precision) a3 = TableLoad(coeff_table, 3, precision = self.precision) a4 = TableLoad(coeff_table, 4, precision = self.precision) a5 = TableLoad(coeff_table, 5, precision = self.precision) a6 = TableLoad(coeff_table, 6, precision = self.precision) a7 = TableLoad(coeff_table, 7, precision = self.precision) a8 = TableLoad(coeff_table, 8, precision = self.precision) a9 = TableLoad(coeff_table, 9, precision = self.precision) a10 = TableLoad(coeff_table, 10, precision = self.precision) poly_even = red_vx2*(a0 + red_vx4*(a2 + red_vx4*(a4 + red_vx4*(a6 + red_vx4*(a8 + red_vx4*a10))))) poly_odd = red_vx4*(a1 + red_vx4*(a3 + red_vx4*(a5 + red_vx4*(a7 + red_vx4*a9)))) poly_even.set_attributes(tag = "poly_even", debug = debug_multi) poly_odd.set_attributes(tag = "poly_odd", debug = debug_multi) const_load_hi = TableLoad(cons_table, const_index_std, 0, tag = "const_load_hi", debug = debug_multi) const_load_lo = TableLoad(cons_table, const_index_std, 1, tag = "const_load_lo", debug = debug_multi) test_NaN_or_inf = Test(vx, specifier = Test.IsInfOrNaN, tag = "nan_or_inf", likely = False) test_nan = Test(vx, specifier = Test.IsNaN, debug = debug_multi, tag = "is_nan_test", likely = False) test_positive = Comparison(vx, 0, specifier = Comparison.GreaterOrEqual, debug = debug_multi, tag = "inf_sign", likely = False) result = const_load_hi - ((red_vx_std*(poly_even + poly_odd) - const_load_lo) - red_vx_std) result.set_attributes(tag = "result", debug = debug_multi) std_scheme = Statement( sign, abs_vx_std, red_vx_std, const_index_std, set_sign, ConditionBlock( test_bound, set_bound, ConditionBlock( test_bound1, set_bound1, ConditionBlock( test_bound2, set_bound2, ConditionBlock( test_bound3, set_bound3, ConditionBlock( test_bound4, set_bound4, set_bound5 ) ) ) ) ), Return(sign*result) ) infty_return = ConditionBlock(test_positive, Return(half_pi_cst), Return(-half_pi_cst)) non_std_return = ConditionBlock(test_nan, Return(FP_QNaN(self.precision)), infty_return) scheme = ConditionBlock(test_NaN_or_inf, Statement(ClearException(), non_std_return), std_scheme) return scheme