def generate_scheme(self): # declaring target and instantiating optimization engine vx = self.implementation.add_input_variable("x", self.precision) Log.set_dump_stdout(True) Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m") if self.debug_flag: Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m") # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name if self.libm_compliant: return RaiseReturn(*args, precision=self.precision, **kwords) else: return Return(kwords["return_value"], precision=self.precision) test_nan_or_inf = Test(vx, specifier=Test.IsInfOrNaN, likely=False, debug=debug_multi, tag="nan_or_inf") test_nan = Test(vx, specifier=Test.IsNaN, debug=debug_multi, tag="is_nan_test") test_positive = Comparison(vx, 0, specifier=Comparison.GreaterOrEqual, debug=debug_multi, tag="inf_sign") test_signaling_nan = Test(vx, specifier=Test.IsSignalingNaN, debug=debug_multi, tag="is_signaling_nan") return_snan = Statement( ExpRaiseReturn(ML_FPE_Invalid, return_value=FP_QNaN(self.precision))) # return in case of infinity input infty_return = Statement( ConditionBlock( test_positive, Return(FP_PlusInfty(self.precision), precision=self.precision), Return(FP_PlusZero(self.precision), precision=self.precision))) # return in case of specific value input (NaN or inf) specific_return = ConditionBlock( test_nan, ConditionBlock( test_signaling_nan, return_snan, Return(FP_QNaN(self.precision), precision=self.precision)), infty_return) # return in case of standard (non-special) input # exclusion of early overflow and underflow cases precision_emax = self.precision.get_emax() precision_max_value = S2 * S2**precision_emax exp_overflow_bound = sollya.ceil(log(precision_max_value)) early_overflow_test = Comparison(vx, exp_overflow_bound, likely=False, specifier=Comparison.Greater) early_overflow_return = Statement( ClearException() if self.libm_compliant else Statement(), ExpRaiseReturn(ML_FPE_Inexact, ML_FPE_Overflow, return_value=FP_PlusInfty(self.precision))) precision_emin = self.precision.get_emin_subnormal() precision_min_value = S2**precision_emin exp_underflow_bound = floor(log(precision_min_value)) early_underflow_test = Comparison(vx, exp_underflow_bound, likely=False, specifier=Comparison.Less) early_underflow_return = Statement( ClearException() if self.libm_compliant else Statement(), ExpRaiseReturn(ML_FPE_Inexact, ML_FPE_Underflow, return_value=FP_PlusZero(self.precision))) # constant computation invlog2 = self.precision.round_sollya_object(1 / log(2), sollya.RN) interval_vx = Interval(exp_underflow_bound, exp_overflow_bound) interval_fk = interval_vx * invlog2 interval_k = Interval(floor(inf(interval_fk)), sollya.ceil(sup(interval_fk))) log2_hi_precision = self.precision.get_field_size() - ( sollya.ceil(log2(sup(abs(interval_k)))) + 2) Log.report(Log.Info, "log2_hi_precision: %d" % log2_hi_precision) invlog2_cst = Constant(invlog2, precision=self.precision) log2_hi = round(log(2), log2_hi_precision, sollya.RN) log2_lo = self.precision.round_sollya_object( log(2) - log2_hi, sollya.RN) # argument reduction unround_k = vx * invlog2 unround_k.set_attributes(tag="unround_k", debug=debug_multi) k = NearestInteger(unround_k, precision=self.precision, debug=debug_multi) ik = NearestInteger(unround_k, precision=self.precision.get_integer_format(), debug=debug_multi, tag="ik") ik.set_tag("ik") k.set_tag("k") exact_pre_mul = (k * log2_hi) exact_pre_mul.set_attributes(exact=True) exact_hi_part = vx - exact_pre_mul exact_hi_part.set_attributes(exact=True, tag="exact_hi", debug=debug_multi, prevent_optimization=True) exact_lo_part = -k * log2_lo exact_lo_part.set_attributes(tag="exact_lo", debug=debug_multi, prevent_optimization=True) r = exact_hi_part + exact_lo_part r.set_tag("r") r.set_attributes(debug=debug_multi) approx_interval = Interval(-log(2) / 2, log(2) / 2) approx_interval_half = approx_interval / 2 approx_interval_split = [ Interval(-log(2) / 2, inf(approx_interval_half)), approx_interval_half, Interval(sup(approx_interval_half), log(2) / 2) ] # TODO: should be computed automatically exact_hi_interval = approx_interval exact_lo_interval = -interval_k * log2_lo opt_r = self.optimise_scheme(r, copy={}) tag_map = {} self.opt_engine.register_nodes_by_tag(opt_r, tag_map) cg_eval_error_copy_map = { vx: Variable("x", precision=self.precision, interval=interval_vx), tag_map["k"]: Variable("k", interval=interval_k, precision=self.precision) } #try: if is_gappa_installed(): eval_error = self.gappa_engine.get_eval_error_v2( self.opt_engine, opt_r, cg_eval_error_copy_map, gappa_filename="red_arg.g") else: eval_error = 0.0 Log.report(Log.Warning, "gappa is not installed in this environnement") Log.report(Log.Info, "eval error: %s" % eval_error) local_ulp = sup(ulp(sollya.exp(approx_interval), self.precision)) # FIXME refactor error_goal from accuracy Log.report(Log.Info, "accuracy: %s" % self.accuracy) if isinstance(self.accuracy, ML_Faithful): error_goal = local_ulp elif isinstance(self.accuracy, ML_CorrectlyRounded): error_goal = S2**-1 * local_ulp elif isinstance(self.accuracy, ML_DegradedAccuracyAbsolute): error_goal = self.accuracy.goal elif isinstance(self.accuracy, ML_DegradedAccuracyRelative): error_goal = self.accuracy.goal else: Log.report(Log.Error, "unknown accuracy: %s" % self.accuracy) # error_goal = local_ulp #S2**-(self.precision.get_field_size()+1) error_goal_approx = S2**-1 * error_goal Log.report(Log.Info, "\033[33;1m building mathematical polynomial \033[0m\n") poly_degree = max( sup( guessdegree( expm1(sollya.x) / sollya.x, approx_interval, error_goal_approx)) - 1, 2) init_poly_degree = poly_degree error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai) polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_estrin_scheme #polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme while 1: Log.report(Log.Info, "attempting poly degree: %d" % poly_degree) precision_list = [1] + [self.precision] * (poly_degree) poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error( expm1(sollya.x), poly_degree, precision_list, approx_interval, sollya.absolute, error_function=error_function) Log.report(Log.Info, "polynomial: %s " % poly_object) sub_poly = poly_object.sub_poly(start_index=2) Log.report(Log.Info, "polynomial: %s " % sub_poly) Log.report(Log.Info, "poly approx error: %s" % poly_approx_error) Log.report( Log.Info, "\033[33;1m generating polynomial evaluation scheme \033[0m") pre_poly = polynomial_scheme_builder( poly_object, r, unified_precision=self.precision) pre_poly.set_attributes(tag="pre_poly", debug=debug_multi) pre_sub_poly = polynomial_scheme_builder( sub_poly, r, unified_precision=self.precision) pre_sub_poly.set_attributes(tag="pre_sub_poly", debug=debug_multi) poly = 1 + (exact_hi_part + (exact_lo_part + pre_sub_poly)) poly.set_tag("poly") # optimizing poly before evaluation error computation #opt_poly = self.opt_engine.optimization_process(poly, self.precision, fuse_fma = fuse_fma) #opt_sub_poly = self.opt_engine.optimization_process(pre_sub_poly, self.precision, fuse_fma = fuse_fma) opt_poly = self.optimise_scheme(poly) opt_sub_poly = self.optimise_scheme(pre_sub_poly) # evaluating error of the polynomial approximation r_gappa_var = Variable("r", precision=self.precision, interval=approx_interval) exact_hi_gappa_var = Variable("exact_hi", precision=self.precision, interval=exact_hi_interval) exact_lo_gappa_var = Variable("exact_lo", precision=self.precision, interval=exact_lo_interval) vx_gappa_var = Variable("x", precision=self.precision, interval=interval_vx) k_gappa_var = Variable("k", interval=interval_k, precision=self.precision) #print "exact_hi interval: ", exact_hi_interval sub_poly_error_copy_map = { #r.get_handle().get_node(): r_gappa_var, #vx.get_handle().get_node(): vx_gappa_var, exact_hi_part.get_handle().get_node(): exact_hi_gappa_var, exact_lo_part.get_handle().get_node(): exact_lo_gappa_var, #k.get_handle().get_node(): k_gappa_var, } poly_error_copy_map = { exact_hi_part.get_handle().get_node(): exact_hi_gappa_var, exact_lo_part.get_handle().get_node(): exact_lo_gappa_var, } if is_gappa_installed(): sub_poly_eval_error = -1.0 sub_poly_eval_error = self.gappa_engine.get_eval_error_v2( self.opt_engine, opt_sub_poly, sub_poly_error_copy_map, gappa_filename="%s_gappa_sub_poly.g" % self.function_name) dichotomy_map = [ { exact_hi_part.get_handle().get_node(): approx_interval_split[0], }, { exact_hi_part.get_handle().get_node(): approx_interval_split[1], }, { exact_hi_part.get_handle().get_node(): approx_interval_split[2], }, ] poly_eval_error_dico = self.gappa_engine.get_eval_error_v3( self.opt_engine, opt_poly, poly_error_copy_map, gappa_filename="gappa_poly.g", dichotomy=dichotomy_map) poly_eval_error = max( [sup(abs(err)) for err in poly_eval_error_dico]) else: poly_eval_error = 0.0 sub_poly_eval_error = 0.0 Log.report(Log.Warning, "gappa is not installed in this environnement") Log.report(Log.Info, "stopping autonomous degree research") # incrementing polynomial degree to counteract initial decrementation effect poly_degree += 1 break Log.report(Log.Info, "poly evaluation error: %s" % poly_eval_error) Log.report(Log.Info, "sub poly evaluation error: %s" % sub_poly_eval_error) global_poly_error = None global_rel_poly_error = None for case_index in range(3): poly_error = poly_approx_error + poly_eval_error_dico[ case_index] rel_poly_error = sup( abs(poly_error / sollya.exp(approx_interval_split[case_index]))) if global_rel_poly_error == None or rel_poly_error > global_rel_poly_error: global_rel_poly_error = rel_poly_error global_poly_error = poly_error flag = error_goal > global_rel_poly_error if flag: break else: poly_degree += 1 late_overflow_test = Comparison(ik, self.precision.get_emax(), specifier=Comparison.Greater, likely=False, debug=debug_multi, tag="late_overflow_test") overflow_exp_offset = (self.precision.get_emax() - self.precision.get_field_size() / 2) diff_k = Subtraction( ik, Constant(overflow_exp_offset, precision=self.precision.get_integer_format()), precision=self.precision.get_integer_format(), debug=debug_multi, tag="diff_k", ) late_overflow_result = (ExponentInsertion( diff_k, precision=self.precision) * poly) * ExponentInsertion( overflow_exp_offset, precision=self.precision) late_overflow_result.set_attributes(silent=False, tag="late_overflow_result", debug=debug_multi, precision=self.precision) late_overflow_return = ConditionBlock( Test(late_overflow_result, specifier=Test.IsInfty, likely=False), ExpRaiseReturn(ML_FPE_Overflow, return_value=FP_PlusInfty(self.precision)), Return(late_overflow_result, precision=self.precision)) late_underflow_test = Comparison(k, self.precision.get_emin_normal(), specifier=Comparison.LessOrEqual, likely=False) underflow_exp_offset = 2 * self.precision.get_field_size() corrected_exp = Addition( ik, Constant(underflow_exp_offset, precision=self.precision.get_integer_format()), precision=self.precision.get_integer_format(), tag="corrected_exp") late_underflow_result = ( ExponentInsertion(corrected_exp, precision=self.precision) * poly) * ExponentInsertion(-underflow_exp_offset, precision=self.precision) late_underflow_result.set_attributes(debug=debug_multi, tag="late_underflow_result", silent=False) test_subnormal = Test(late_underflow_result, specifier=Test.IsSubnormal) late_underflow_return = Statement( ConditionBlock( test_subnormal, ExpRaiseReturn(ML_FPE_Underflow, return_value=late_underflow_result)), Return(late_underflow_result, precision=self.precision)) twok = ExponentInsertion(ik, tag="exp_ik", debug=debug_multi, precision=self.precision) #std_result = twok * ((1 + exact_hi_part * pre_poly) + exact_lo_part * pre_poly) std_result = twok * poly std_result.set_attributes(tag="std_result", debug=debug_multi) result_scheme = ConditionBlock( late_overflow_test, late_overflow_return, ConditionBlock(late_underflow_test, late_underflow_return, Return(std_result, precision=self.precision))) std_return = ConditionBlock( early_overflow_test, early_overflow_return, ConditionBlock(early_underflow_test, early_underflow_return, result_scheme)) # main scheme Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m") scheme = ConditionBlock( test_nan_or_inf, Statement(ClearException() if self.libm_compliant else Statement(), specific_return), std_return) return scheme
def generate_scalar_scheme(self, vx): Log.set_dump_stdout(True) Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m") if self.debug_flag: Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m") index_size = 5 comp_lo = (vx < 0) comp_lo.set_attributes(tag = "comp_lo", precision = ML_Bool) sign = Select(comp_lo, -1, 1, precision = self.precision) # as sinh is an odd function, we can simplify the input to its absolute # value once the sign has been extracted vx = Abs(vx) int_precision = self.precision.get_integer_format() # argument reduction arg_reg_value = log(2)/2**index_size inv_log2_value = round(1/arg_reg_value, self.precision.get_sollya_object(), sollya.RN) inv_log2_cst = Constant(inv_log2_value, precision = self.precision, tag = "inv_log2") # for r_hi to be accurate we ensure k * log2_hi_value_cst is exact # by limiting the number of non-zero bits in log2_hi_value_cst # cosh(x) ~ exp(abs(x))/2 for a big enough x # cosh(x) > 2^1023 <=> exp(x) > 2^1024 <=> x > log(2^1024) # k = inv_log2_value * x # -1 for guard max_k_approx = inv_log2_value * log(sollya.SollyaObject(2)**1024) max_k_bitsize = int(ceil(log2(max_k_approx))) Log.report(Log.Info, "max_k_bitsize: %d" % max_k_bitsize) log2_hi_value_precision = self.precision.get_precision() - max_k_bitsize - 1 log2_hi_value = round(arg_reg_value, log2_hi_value_precision, sollya.RN) log2_lo_value = round(arg_reg_value - log2_hi_value, self.precision.get_sollya_object(), sollya.RN) log2_hi_value_cst = Constant(log2_hi_value, tag = "log2_hi_value", precision = self.precision) log2_lo_value_cst = Constant(log2_lo_value, tag = "log2_lo_value", precision = self.precision) k = Trunc(Multiplication(inv_log2_cst, vx), precision = self.precision) k_log2 = Multiplication(k, log2_hi_value_cst, precision = self.precision, exact = True, tag = "k_log2", unbreakable = True) r_hi = vx - k_log2 r_hi.set_attributes(tag = "r_hi", debug = debug_multi, unbreakable = True) r_lo = -k * log2_lo_value_cst # reduced argument r = r_hi + r_lo r.set_attributes(tag = "r", debug = debug_multi) if is_gappa_installed(): r_eval_error = self.get_eval_error(r_hi, variable_copy_map = { vx: Variable("vx", interval = Interval(0, 715), precision = self.precision), k: Variable("k", interval = Interval(0, 1024), precision = self.precision) }) Log.report(Log.Verbose, "r_eval_error: ", r_eval_error) approx_interval = Interval(-arg_reg_value, arg_reg_value) error_goal_approx = 2**-(self.precision.get_precision()) poly_degree = sup(guessdegree(exp(sollya.x), approx_interval, error_goal_approx)) + 3 precision_list = [1] + [self.precision] * (poly_degree) k_integer = Conversion(k, precision = int_precision, tag = "k_integer", debug = debug_multi) k_hi = BitLogicRightShift(k_integer, Constant(index_size, precision=int_precision), tag = "k_int_hi", precision = int_precision, debug = debug_multi) k_lo = Modulo(k_integer, 2**index_size, tag = "k_int_lo", precision = int_precision, debug = debug_multi) pow_exp = ExponentInsertion(Conversion(k_hi, precision = int_precision), precision = self.precision, tag = "pow_exp", debug = debug_multi) exp_table = ML_NewTable(dimensions = [2 * 2**index_size, 4], storage_precision = self.precision, tag = self.uniquify_name("exp2_table")) for i in range(2 * 2**index_size): input_value = i - 2**index_size if i >= 2**index_size else i reduced_hi_prec = int(self.precision.get_mantissa_size() - 8) # using SollyaObject wrapper to force evaluation by sollya # with higher precision exp_value = sollya.SollyaObject(2)**((input_value)* 2**-index_size) mexp_value = sollya.SollyaObject(2)**((-input_value)* 2**-index_size) pos_value_hi = round(exp_value, reduced_hi_prec, sollya.RN) pos_value_lo = round(exp_value - pos_value_hi, self.precision.get_sollya_object(), sollya.RN) neg_value_hi = round(mexp_value, reduced_hi_prec, sollya.RN) neg_value_lo = round(mexp_value - neg_value_hi, self.precision.get_sollya_object(), sollya.RN) exp_table[i][0] = neg_value_hi exp_table[i][1] = neg_value_lo exp_table[i][2] = pos_value_hi exp_table[i][3] = pos_value_lo # log2_value = log(2) / 2^index_size # sinh(x) = 1/2 * (exp(x) - exp(-x)) # exp(x) = exp(x - k * log2_value + k * log2_value) # # r = x - k * log2_value # exp(x) = exp(r) * 2 ^ (k / 2^index_size) # # k / 2^index_size = h + l * 2^-index_size, with k, h, l integers # exp(x) = exp(r) * 2^h * 2^(l *2^-index_size) # # sinh(x) = exp(r) * 2^(h-1) * 2^(l *2^-index_size) - exp(-r) * 2^(-h-1) * 2^(-l *2^-index_size) # S=2^(h-1), T = 2^(-h-1) # exp(r) = 1 + poly_pos(r) # exp(-r) = 1 + poly_neg(r) # 2^(l / 2^index_size) = pos_value_hi + pos_value_lo # 2^(-l / 2^index_size) = neg_value_hi + neg_value_lo # error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai) poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(exp(sollya.x), poly_degree, precision_list, approx_interval, sollya.absolute, error_function = error_function) Log.report(Log.Verbose, "poly_approx_error: {}, {}".format(poly_approx_error, float(log2(poly_approx_error)))) polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme poly_pos = polynomial_scheme_builder(poly_object.sub_poly(start_index = 1), r, unified_precision = self.precision) poly_pos.set_attributes(tag = "poly_pos", debug = debug_multi) poly_neg = polynomial_scheme_builder(poly_object.sub_poly(start_index = 1), -r, unified_precision = self.precision) poly_neg.set_attributes(tag = "poly_neg", debug = debug_multi) table_index = Addition(k_lo, Constant(2**index_size, precision = int_precision), precision = int_precision, tag = "table_index", debug = debug_multi) neg_value_load_hi = TableLoad(exp_table, table_index, 0, tag = "neg_value_load_hi", debug = debug_multi) neg_value_load_lo = TableLoad(exp_table, table_index, 1, tag = "neg_value_load_lo", debug = debug_multi) pos_value_load_hi = TableLoad(exp_table, table_index, 2, tag = "pos_value_load_hi", debug = debug_multi) pos_value_load_lo = TableLoad(exp_table, table_index, 3, tag = "pos_value_load_lo", debug = debug_multi) k_plus = Max( Subtraction(k_hi, Constant(1, precision = int_precision), precision=int_precision, tag="k_plus", debug=debug_multi), Constant(self.precision.get_emin_normal(), precision = int_precision)) k_neg = Max( Subtraction(-k_hi, Constant(1, precision=int_precision), precision=int_precision, tag="k_neg", debug=debug_multi), Constant(self.precision.get_emin_normal(), precision = int_precision)) # 2^(h-1) pow_exp_pos = ExponentInsertion(k_plus, precision = self.precision, tag="pow_exp_pos", debug=debug_multi) # 2^(-h-1) pow_exp_neg = ExponentInsertion(k_neg, precision = self.precision, tag="pow_exp_neg", debug=debug_multi) hi_terms = (pos_value_load_hi * pow_exp_pos - neg_value_load_hi * pow_exp_neg) hi_terms.set_attributes(tag = "hi_terms", debug=debug_multi) pos_exp = (pos_value_load_hi * poly_pos + (pos_value_load_lo + pos_value_load_lo * poly_pos)) * pow_exp_pos pos_exp.set_attributes(tag = "pos_exp", debug = debug_multi) neg_exp = (neg_value_load_hi * poly_neg + (neg_value_load_lo + neg_value_load_lo * poly_neg)) * pow_exp_neg neg_exp.set_attributes(tag = "neg_exp", debug = debug_multi) result = Addition( Subtraction( pos_exp, neg_exp, precision=self.precision, ), hi_terms, precision=self.precision, tag="result", debug=debug_multi ) # ov_value ov_value = round(asinh(self.precision.get_max_value()), self.precision.get_sollya_object(), sollya.RD) ov_flag = Comparison(Abs(vx), Constant(ov_value, precision = self.precision), specifier = Comparison.Greater) # main scheme scheme = Statement( Return( Select( ov_flag, sign*FP_PlusInfty(self.precision), sign*result ))) return scheme
def generate_scheme(self): # declaring target and instantiating optimization engine vx = self.implementation.add_input_variable("x", self.precision) Log.set_dump_stdout(True) Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m") if self.debug_flag: Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m") # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) C_m1 = Constant(-1, precision = self.precision) test_NaN_or_inf = Test(vx, specifier = Test.IsInfOrNaN, likely = False, debug = debug_multi, tag = "NaN_or_inf", precision = ML_Bool) test_NaN = Test(vx, specifier = Test.IsNaN, likely = False, debug = debug_multi, tag = "is_NaN", precision = ML_Bool) test_inf = Comparison(vx, 0, specifier = Comparison.Greater, debug = debug_multi, tag = "sign", precision = ML_Bool, likely = False); # Infnty input infty_return = Statement(ConditionBlock(test_inf, Return(FP_PlusInfty(self.precision)), Return(C_m1))) # non-std input (inf/nan) specific_return = ConditionBlock(test_NaN, Return(FP_QNaN(self.precision)), infty_return) # Over/Underflow Tests precision_emax = self.precision.get_emax() precision_max_value = S2**(precision_emax + 1) expm1_overflow_bound = ceil(log(precision_max_value + 1)) overflow_test = Comparison(vx, expm1_overflow_bound, likely = False, specifier = Comparison.Greater, precision = ML_Bool) overflow_return = Statement(Return(FP_PlusInfty(self.precision))) precision_emin = self.precision.get_emin_subnormal() precision_min_value = S2** precision_emin expm1_underflow_bound = floor(log(precision_min_value) + 1) underflow_test = Comparison(vx, expm1_underflow_bound, likely = False, specifier = Comparison.Less, precision = ML_Bool) underflow_return = Statement(Return(C_m1)) sollya_precision = {ML_Binary32: sollya.binary32, ML_Binary64: sollya.binary64}[self.precision] int_precision = {ML_Binary32: ML_Int32, ML_Binary64: ML_Int64}[self.precision] # Constants log_2 = round(log(2), sollya_precision, sollya.RN) invlog2 = round(1/log(2), sollya_precision, sollya.RN) log_2_cst = Constant(log_2, precision = self.precision) interval_vx = Interval(expm1_underflow_bound, expm1_overflow_bound) interval_fk = interval_vx * invlog2 interval_k = Interval(floor(inf(interval_fk)), ceil(sup(interval_fk))) log2_hi_precision = self.precision.get_field_size() - 6 log2_hi = round(log(2), log2_hi_precision, sollya.RN) log2_lo = round(log(2) - log2_hi, sollya_precision, sollya.RN) # Reduction unround_k = vx * invlog2 ik = NearestInteger(unround_k, precision = int_precision, debug = debug_multi, tag = "ik") k = Conversion(ik, precision = self.precision, tag = "k") red_coeff1 = Multiplication(k, log2_hi, precision = self.precision) red_coeff2 = Multiplication(Negation(k, precision = self.precision), log2_lo, precision = self.precision) pre_sub_mul = Subtraction(vx, red_coeff1, precision = self.precision) s = Addition(pre_sub_mul, red_coeff2, precision = self.precision) z = Subtraction(s, pre_sub_mul, precision = self.precision) t = Subtraction(red_coeff2, z, precision = self.precision) r = Addition(s, t, precision = self.precision) r.set_attributes(tag = "r", debug = debug_multi) r_interval = Interval(-log_2/S2, log_2/S2) local_ulp = sup(ulp(exp(r_interval), self.precision)) print("ulp: ", local_ulp) error_goal = S2**-1*local_ulp print("error goal: ", error_goal) # Polynomial Approx error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai) Log.report(Log.Info, "\033[33;1m Building polynomial \033[0m\n") poly_degree = sup(guessdegree(expm1(sollya.x), r_interval, error_goal) + 1) polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme poly_degree_list = range(0, poly_degree) precision_list = [self.precision] *(len(poly_degree_list) + 1) poly_object, poly_error = Polynomial.build_from_approximation_with_error(expm1(sollya.x), poly_degree, precision_list, r_interval, sollya.absolute, error_function = error_function) sub_poly = poly_object.sub_poly(start_index = 2) Log.report(Log.Info, "Poly : %s" % sub_poly) Log.report(Log.Info, "poly error : {} / {:d}".format(poly_error, int(sollya.log2(poly_error)))) pre_sub_poly = polynomial_scheme_builder(sub_poly, r, unified_precision = self.precision) poly = r + pre_sub_poly poly.set_attributes(tag = "poly", debug = debug_multi) exp_k = ExponentInsertion(ik, tag = "exp_k", debug = debug_multi, precision = self.precision) exp_mk = ExponentInsertion(-ik, tag = "exp_mk", debug = debug_multi, precision = self.precision) diff = 1 - exp_mk diff.set_attributes(tag = "diff", debug = debug_multi) # Late Tests late_overflow_test = Comparison(ik, self.precision.get_emax(), specifier = Comparison.Greater, likely = False, debug = debug_multi, tag = "late_overflow_test") overflow_exp_offset = (self.precision.get_emax() - self.precision.get_field_size() / 2) diff_k = ik - overflow_exp_offset exp_diff_k = ExponentInsertion(diff_k, precision = self.precision, tag = "exp_diff_k", debug = debug_multi) exp_oflow_offset = ExponentInsertion(overflow_exp_offset, precision = self.precision, tag = "exp_offset", debug = debug_multi) late_overflow_result = (exp_diff_k * (1 + poly)) * exp_oflow_offset - 1.0 late_overflow_return = ConditionBlock( Test(late_overflow_result, specifier = Test.IsInfty, likely = False), ExpRaiseReturn(ML_FPE_Overflow, return_value = FP_PlusInfty(self.precision)), Return(late_overflow_result) ) late_underflow_test = Comparison(k, self.precision.get_emin_normal(), specifier = Comparison.LessOrEqual, likely = False) underflow_exp_offset = 2 * self.precision.get_field_size() corrected_coeff = ik + underflow_exp_offset exp_corrected = ExponentInsertion(corrected_coeff, precision = self.precision) exp_uflow_offset = ExponentInsertion(-underflow_exp_offset, precision = self.precision) late_underflow_result = ( exp_corrected * (1 + poly)) * exp_uflow_offset - 1.0 test_subnormal = Test(late_underflow_result, specifier = Test.IsSubnormal, likely = False) late_underflow_return = Statement( ConditionBlock( test_subnormal, ExpRaiseReturn(ML_FPE_Underflow, return_value = late_underflow_result)), Return(late_underflow_result) ) # Reconstruction std_result = exp_k * ( poly + diff ) std_result.set_attributes(tag = "result", debug = debug_multi) result_scheme = ConditionBlock( late_overflow_test, late_overflow_return, ConditionBlock( late_underflow_test, late_underflow_return, Return(std_result) ) ) std_return = ConditionBlock( overflow_test, overflow_return, ConditionBlock( underflow_test, underflow_return, result_scheme) ) scheme = ConditionBlock( test_NaN_or_inf, Statement(specific_return), std_return ) return scheme
def generate_scheme(self): # declaring target and instantiating optimization engine vx = self.implementation.add_input_variable("x", self.precision) Log.set_dump_stdout(True) Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m") if self.debug_flag: Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m") # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) index_size = 3 approx_interval = Interval(0.0, 2**-index_size) error_goal_approx = 2**-(self.precision.get_precision()) int_precision = { ML_Binary32: ML_Int32, ML_Binary64: ML_Int64 }[self.precision] vx_int = Floor(vx * 2**index_size, precision=self.precision, tag="vx_int", debug=debug_multi) vx_frac = vx - (vx_int * 2**-index_size) vx_frac.set_attributes(tag="vx_frac", debug=debug_multi, unbreakable=True) poly_degree = sup( guessdegree(2**(sollya.x), approx_interval, error_goal_approx)) + 1 precision_list = [1] + [self.precision] * (poly_degree) vx_integer = Conversion(vx_int, precision=int_precision, tag="vx_integer", debug=debug_multi) vx_int_hi = BitLogicRightShift(vx_integer, Constant(index_size), tag="vx_int_hi", debug=debug_multi) vx_int_lo = Modulo(vx_integer, 2**index_size, tag="vx_int_lo", debug=debug_multi) pow_exp = ExponentInsertion(Conversion(vx_int_hi, precision=int_precision), precision=self.precision, tag="pow_exp", debug=debug_multi) exp2_table = ML_Table(dimensions=[2 * 2**index_size, 2], storage_precision=self.precision, tag=self.uniquify_name("exp2_table")) for i in range(2 * 2**index_size): input_value = i - 2**index_size if i >= 2**index_size else i exp2_value = SollyaObject(2)**((input_value) * 2**-index_size) hi_value = round(exp2_value, self.precision.get_sollya_object(), RN) lo_value = round(exp2_value - hi_value, self.precision.get_sollya_object(), RN) exp2_table[i][0] = lo_value exp2_table[i][1] = hi_value error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai) poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error( 2**(sollya.x), poly_degree, precision_list, approx_interval, sollya.absolute, error_function=error_function) print "poly_approx_error: ", poly_approx_error, float( log2(poly_approx_error)) polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme poly = polynomial_scheme_builder(poly_object.sub_poly(start_index=1), vx_frac, unified_precision=self.precision) poly.set_attributes(tag="poly", debug=debug_multi) table_index = Addition(vx_int_lo, Constant(2**index_size, precision=int_precision), precision=int_precision, tag="table_index", debug=debug_multi) lo_value_load = TableLoad(exp2_table, table_index, 0, tag="lo_value_load", debug=debug_multi) hi_value_load = TableLoad(exp2_table, table_index, 1, tag="hi_value_load", debug=debug_multi) result = (hi_value_load + (hi_value_load * poly + (lo_value_load + lo_value_load * poly))) * pow_exp ov_flag = Comparison(vx_int_hi, Constant(self.precision.get_emax(), precision=self.precision), specifier=Comparison.Greater) # main scheme Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m") scheme = Statement( Return(Select(ov_flag, FP_PlusInfty(self.precision), result))) return scheme
def generate_scalar_scheme(self, vx, inline_select=False): Log.set_dump_stdout(True) Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m") if self.debug_flag: Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m") # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) # r_interval = Interval(0, 1.0) index_size = 3 r_interval = Interval(-2**(-index_size), 2**-index_size) local_ulp = sup(ulp(2**r_interval, self.precision)) Log.report(Log.Info, "ulp: ", local_ulp) error_goal = S2**-1 * local_ulp Log.report(Log.Info, "error goal: ", error_goal) sollya_precision = { ML_Binary32: sollya.binary32, ML_Binary64: sollya.binary64 }[self.precision] int_precision = { ML_Binary32: ML_Int32, ML_Binary64: ML_Int64 }[self.precision] # Argument Reduction # r = x - floor(x), r >= 0 vx_floor = Floor(vx, precision=self.precision, tag='vx_floor', debug=debug_multi) vx_int = Conversion(vx_floor, precision=int_precision, tag="vx_int", debug=debug_multi) vx_intf = vx_floor # Conversion(vx_int, precision = self.precision) vx_r = vx - vx_intf r_hi = NearestInteger(vx_r * 2**index_size, precision=self.precision, tag="r_hi", debug=debug_multi) # clamping r_hi_int within table-size to make sure # it does not exceeds hi_part_table when used to index it r_hi_int = Max( Min( Conversion(r_hi, precision=int_precision, tag="r_hi_int", debug=debug_multi), 2**index_size + 1), 0) r_lo = vx_r - r_hi * 2**-index_size r_lo.set_attributes(tag="r_lo", debug=debug_multi) vx_r.set_attributes(tag="vx_r", debug=debug_multi) degree = sup(guessdegree(2**(sollya.x), r_interval, error_goal)) + 2 precision_list = [1] + [self.precision] * degree exp_X = ExponentInsertion(vx_int, tag="exp_X", debug=debug_multi, precision=self.precision) #Polynomial Approx polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme poly_object, poly_error = Polynomial.build_from_approximation_with_error( 2**(sollya.x) - 1, degree, precision_list, r_interval, sollya.absolute) Log.report(Log.Info, "Poly : %s" % poly_object) Log.report(Log.Info, "poly_error : ", poly_error) poly = polynomial_scheme_builder(poly_object.sub_poly(start_index=1), r_lo, unified_precision=self.precision) poly.set_attributes(tag="poly", debug=debug_multi) hi_part_table = ML_NewTable(dimensions=[2**index_size + 1], storage_precision=self.precision, tag=self.uniquify_name("exp2_table"), const=True) for i in range(2**index_size + 1): input_value = i * 2**-index_size tab_value = self.precision.round_sollya_object( sollya.SollyaObject(2)**(input_value)) hi_part_table[i] = tab_value hi_part_value = TableLoad(hi_part_table, r_hi_int, precision=self.precision, tag="hi_part_value", debug=debug_multi) #Handling special cases oflow_bound = Constant(self.precision.get_emax() + 1, precision=self.precision) subnormal_bound = self.precision.get_emin_subnormal() uflow_bound = self.precision.get_emin_normal() Log.report(Log.Info, "oflow : ", oflow_bound) #print "uflow : ", uflow_bound #print "sub : ", subnormal_bound test_overflow = Comparison(vx, oflow_bound, specifier=Comparison.GreaterOrEqual) test_overflow.set_attributes(tag="oflow_test", debug=debug_multi, likely=False, precision=ML_Bool) test_underflow = Comparison(vx, uflow_bound, specifier=Comparison.Less) test_underflow.set_attributes(tag="uflow_test", debug=debug_multi, likely=False, precision=ML_Bool) test_subnormal = Comparison(vx, subnormal_bound, specifier=Comparison.Greater) test_subnormal.set_attributes(tag="sub_test", debug=debug_multi, likely=False, precision=ML_Bool) subnormal_offset = -(uflow_bound - vx_int) subnormal_offset.set_attributes(tag="offset", debug=debug_multi) exp_offset = ExponentInsertion(subnormal_offset, precision=self.precision, debug=debug_multi, tag="exp_offset") exp_min = ExponentInsertion(uflow_bound, precision=self.precision, debug=debug_multi, tag="exp_min") subnormal_result = hi_part_value * exp_offset * exp_min * poly + hi_part_value * exp_offset * exp_min test_std = LogicalOr(test_overflow, test_underflow, precision=ML_Bool, tag="std_test", likely=False, debug=debug_multi) #Reconstruction result = hi_part_value * exp_X * poly + hi_part_value * exp_X result.set_attributes(tag="result", debug=debug_multi) C0 = Constant(0, precision=self.precision) if inline_select: scheme = Select( test_std, Select(test_overflow, FP_PlusInfty(self.precision), Select( test_subnormal, subnormal_result, C0, )), result, ) return scheme else: return_inf = Return(FP_PlusInfty(self.precision)) return_C0 = Return(C0) return_sub = Return(subnormal_result) return_std = Return(result) non_std_statement = Statement( ConditionBlock( test_overflow, return_inf, ConditionBlock(test_subnormal, return_sub, return_C0))) scheme = Statement( ConditionBlock(test_std, non_std_statement, return_std)) return scheme
def generate_scheme(self): # declaring target and instantiating optimization engine vx = self.implementation.add_input_variable("x", self.precision) Log.set_dump_stdout(True) Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m") if self.debug_flag: Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m") # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) r_interval = Interval(-0.5, 0.5) local_ulp = sup(ulp(2**r_interval, self.precision)) Log.report(Log.Info, "ulp: ", local_ulp) error_goal = S2**-1 * local_ulp Log.report(Log.Info, "error goal: ", error_goal) sollya_precision = { ML_Binary32: sollya.binary32, ML_Binary64: sollya.binary64 }[self.precision] int_precision = { ML_Binary32: ML_Int32, ML_Binary64: ML_Int64 }[self.precision] #Argument Reduction vx_int = NearestInteger(vx, precision=int_precision, tag='vx_int', debug=debug_multi) vx_intf = Conversion(vx_int, precision=self.precision) vx_r = vx - vx_intf vx_r.set_attributes(tag="vx_r", debug=debug_multi) degree = sup(guessdegree(2**(sollya.x), r_interval, error_goal)) + 2 precision_list = [1] + [self.precision] * degree exp_X = ExponentInsertion(vx_int, tag="exp_X", debug=debug_multi, precision=self.precision) #Polynomial Approx polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme poly_object, poly_error = Polynomial.build_from_approximation_with_error( 2**(sollya.x) - 1, degree, precision_list, r_interval, sollya.absolute) Log.report(Log.Info, "Poly : %s" % poly_object) Log.report(Log.Info, "poly_error : ", poly_error) poly = polynomial_scheme_builder(poly_object.sub_poly(start_index=1), vx_r, unified_precision=self.precision) poly.set_attributes(tag="poly", debug=debug_multi) #Handling special cases oflow_bound = Constant(self.precision.get_emax() + 1, precision=self.precision) subnormal_bound = self.precision.get_emin_subnormal() uflow_bound = self.precision.get_emin_normal() Log.report(Log.Info, "oflow : ", oflow_bound) #print "uflow : ", uflow_bound #print "sub : ", subnormal_bound test_overflow = Comparison(vx, oflow_bound, specifier=Comparison.GreaterOrEqual) test_overflow.set_attributes(tag="oflow_test", debug=debug_multi, likely=False, precision=ML_Bool) test_underflow = Comparison(vx, uflow_bound, specifier=Comparison.Less) test_underflow.set_attributes(tag="uflow_test", debug=debug_multi, likely=False, precision=ML_Bool) test_subnormal = Comparison(vx, subnormal_bound, specifier=Comparison.Greater) test_subnormal.set_attributes(tag="sub_test", debug=debug_multi, likely=False, precision=ML_Bool) subnormal_offset = -(uflow_bound - vx_int) subnormal_offset.set_attributes(tag="offset", debug=debug_multi) exp_offset = ExponentInsertion(subnormal_offset, precision=self.precision, debug=debug_multi, tag="exp_offset") exp_min = ExponentInsertion(uflow_bound, precision=self.precision, debug=debug_multi, tag="exp_min") subnormal_result = exp_offset * exp_min * poly + exp_offset * exp_min test_std = LogicalOr(test_overflow, test_underflow, precision=ML_Bool, tag="std_test", likely=False) #Reconstruction result = exp_X * poly + exp_X result.set_attributes(tag="result", debug=debug_multi) C0 = Constant(0, precision=self.precision) return_inf = Return(FP_PlusInfty(self.precision)) return_C0 = Return(C0) return_sub = Return(subnormal_result) return_std = Return(result) non_std_statement = Statement( ConditionBlock( test_overflow, return_inf, ConditionBlock(test_subnormal, return_sub, return_C0))) scheme = Statement( ConditionBlock(test_std, non_std_statement, return_std)) return scheme
def generate_scheme(self): # declaring target and instantiating optimization engine vx = self.implementation.add_input_variable("x", self.precision) vx.set_attributes(precision=self.precision, tag="vx", debug=debug_multi) Log.set_dump_stdout(True) Log.report(Log.Info, "\033[33;1m Generating implementation scheme \033[0m") if self.debug_flag: Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m") # local overloading of RaiseReturn operation def SqrtRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) C0 = Constant(0, precision=self.precision) C0_plus = Constant(FP_PlusZero(self.precision)) C0_minus = Constant(FP_MinusZero(self.precision)) test_NaN = Test(vx, specifier=Test.IsNaN, likely=False, debug=debug_multi, tag="is_NaN", precision=ML_Bool) test_inf = Test(vx, specifier=Test.IsInfty, likely=False, debug=debug_multi, tag="is_Inf", precision=ML_Bool) test_negative = Comparison(vx, C0, specifier=Comparison.Less, debug=debug_multi, tag="is_Negative", precision=ML_Bool, likely=False) test_NaN_or_Inf = Test(vx, specifier=Test.IsInfOrNaN, likely=False, debug=debug_multi, tag="is_Inf_Or_Nan", precision=ML_Bool) test_NaN_or_Neg = LogicalOr(test_NaN, test_negative, precision=ML_Bool) test_std = LogicalNot(LogicalOr(test_NaN_or_Inf, test_negative, precision=ML_Bool, likely=False), precision=ML_Bool, likely=True) test_zero = Comparison(vx, C0, specifier=Comparison.Equal, likely=False, debug=debug_multi, tag="Is_Zero", precision=ML_Bool) return_NaN_or_neg = Statement(Return(FP_QNaN(self.precision))) return_inf = Statement(Return(FP_PlusInfty(self.precision))) return_PosZero = Return(C0_plus) return_NegZero = Return(C0_minus) NR_init = InverseSquareRootSeed(vx, precision=self.precision, tag="sqrt_seed", debug=debug_multi) result = compute_sqrt(vx, NR_init, int(self.num_iter), precision=self.precision) return_non_std = ConditionBlock( test_NaN_or_Neg, return_NaN_or_neg, ConditionBlock( test_inf, return_inf, ConditionBlock(test_zero, return_PosZero, return_NegZero))) return_std = Return(result) scheme = ConditionBlock(test_std, return_std, return_non_std) return scheme
def generate_scheme(self): # declaring target and instantiating optimization engine vx = self.implementation.add_input_variable("x", self.precision) vx.set_attributes(precision=self.precision, tag="vx", debug=debug_multi) Log.set_dump_stdout(True) Log.report(Log.Info, "\033[33;1m Generating implementation scheme \033[0m") if self.debug_flag: Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m") C0 = Constant(0, precision=self.precision) C0_plus = Constant(FP_PlusZero(self.precision)) C0_minus = Constant(FP_MinusZero(self.precision)) def local_test(specifier, tag): """ Local wrapper to generate Test operations """ return Test(vx, specifier=specifier, likely=False, debug=debug_multi, tag="is_%s" % tag, precision=ML_Bool) test_NaN = local_test(Test.IsNaN, "is_NaN") test_inf = local_test(Test.IsInfty, "is_Inf") test_NaN_or_Inf = local_test(Test.IsInfOrNaN, "is_Inf_Or_Nan") test_negative = Comparison(vx, C0, specifier=Comparison.Less, debug=debug_multi, tag="is_Negative", precision=ML_Bool, likely=False) test_NaN_or_Neg = LogicalOr(test_NaN, test_negative, precision=ML_Bool) test_std = LogicalNot(LogicalOr(test_NaN_or_Inf, test_negative, precision=ML_Bool, likely=False), precision=ML_Bool, likely=True) test_zero = Comparison(vx, C0, specifier=Comparison.Equal, likely=False, debug=debug_multi, tag="Is_Zero", precision=ML_Bool) return_NaN_or_neg = Statement(Return(FP_QNaN(self.precision))) return_inf = Statement(Return(FP_PlusInfty(self.precision))) return_PosZero = Return(C0_plus) return_NegZero = Return(C0_minus) NR_init = ReciprocalSquareRootSeed(vx, precision=self.precision, tag="sqrt_seed", debug=debug_multi) result = compute_sqrt(vx, NR_init, int(self.num_iter), precision=self.precision) return_non_std = ConditionBlock( test_NaN_or_Neg, return_NaN_or_neg, ConditionBlock( test_inf, return_inf, ConditionBlock(test_zero, return_PosZero, return_NegZero))) return_std = Return(result) scheme = ConditionBlock(test_std, return_std, return_non_std) return scheme
def generate_scheme(self): # declaring main input variable vx = self.implementation.add_input_variable("x", self.precision) # declaring approximation parameters index_size = 6 num_iteration = 8 Log.set_dump_stdout(True) Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m") if self.debug_flag: Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m") # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) def cbrt_newton_iteration(current_approx, input_value, input_inverse): # Cubic root of A is approximated by a Newton-Raphson iteration # on f(x) = 1 - A / x^3 # x_n+1 = 4/3 * x_n - x_n^4 / (3 * A) # x_n+1 = 1/3 * (x_n * (1 - x_n^3/A) + x_n) approx_triple = Multiplication( current_approx, Multiplication(current_approx, current_approx)) diff = FMSN(approx_triple, input_inverse, Constant(1, precision=self.precision)) injection = FMA( Multiplication( current_approx, Constant(1 / 3.0, precision=self.precision), ), diff, current_approx) new_approx = injection return new_approx reduced_vx = MantissaExtraction(vx, precision=self.precision) int_precision = self.precision.get_integer_format() cbrt_approx_table = ML_NewTable( dimensions=[2**index_size, 1], storage_precision=self.precision, tag=self.uniquify_name("cbrt_approx_table")) for i in range(2**index_size): input_value = 1 + i / SollyaObject(2**index_size) cbrt_approx = cbrt(input_value) cbrt_approx_table[i][0] = round(cbrt_approx, self.precision.get_sollya_object(), RN) # Modulo operations will returns a reduced exponent within [-3, 2] # so we approximate cbrt on this interval (with index offset by -3) cbrt_mod_table = ML_NewTable(dimensions=[6, 1], storage_precision=self.precision, tag=self.uniquify_name("cbrt_mod_table")) for i in range(6): input_value = SollyaObject(2)**(i - 3) cbrt_mod_table[i][0] = round(cbrt(input_value), self.precision.get_sollya_object(), RN) vx_int = TypeCast(reduced_vx, precision=int_precision) mask = BitLogicRightShift(vx_int, self.precision.get_precision() - index_size, precision=int_precision) mask = BitLogicAnd(mask, Constant(2**index_size - 1, precision=int_precision), precision=int_precision, tag="table_index", debug=debug_multi) table_index = mask int_precision = self.precision.get_integer_format() exp_vx = ExponentExtraction(vx, precision=int_precision, tag="exp_vx") exp_vx_third = Division(exp_vx, Constant(3, precision=int_precision), precision=int_precision, tag="exp_vx_third") exp_vx_mod = Modulo(exp_vx, Constant(3, precision=int_precision), precision=int_precision, tag="exp_vx_mod", debug=debug_multi) # offset on modulo to make sure table index is positive exp_vx_mod = exp_vx_mod + 3 cbrt_mod = TableLoad(cbrt_mod_table, exp_vx_mod, Constant(0), tag="cbrt_mod") init_approx = Multiplication( Multiplication( # approx cbrt(mantissa) TableLoad(cbrt_approx_table, table_index, Constant(0, precision=ML_Int32), tag="seed", debug=debug_multi), # approx cbrt(2^(e%3)) cbrt_mod, tag="init_mult", debug=debug_multi, precision=self.precision), # 2^(e/3) ExponentInsertion(exp_vx_third, precision=self.precision, tag="exp_vx_third", debug=debug_multi), tag="init_approx", debug=debug_multi, precision=self.precision) inverse_red_vx = Division(Constant(1, precision=self.precision), reduced_vx) inverse_vx = Division(Constant(1, precision=self.precision), vx) current_approx = init_approx for i in range(num_iteration): #current_approx = cbrt_newton_iteration(current_approx, reduced_vx, inverse_red_vx) current_approx = cbrt_newton_iteration(current_approx, vx, inverse_vx) current_approx.set_attributes(tag="approx_%d" % i, debug=debug_multi) result = current_approx result.set_attributes(tag="result", debug=debug_multi) # last iteration ext_precision = ML_DoubleDouble xn_2 = Multiplication(current_approx, current_approx, precision=ext_precision) xn_3 = Multiplication(current_approx, xn_2, precision=ext_precision) FourThird = Constant(4 / SollyaObject(3), precision=ext_precision) # main scheme Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m") scheme = Statement(Return(result)) return scheme
def generate_scheme(self): # declaring target and instantiating optimization engine vx = self.implementation.add_input_variable("x", self.precision) Log.set_dump_stdout(True) Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m") if self.debug_flag: Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m") # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) index_size = 3 vx = Abs(vx) int_precision = { ML_Binary32: ML_Int32, ML_Binary64: ML_Int64 }[self.precision] # argument reduction arg_reg_value = log(2) / 2**index_size inv_log2_value = round(1 / arg_reg_value, self.precision.get_sollya_object(), RN) inv_log2_cst = Constant(inv_log2_value, precision=self.precision, tag="inv_log2") # for r_hi to be accurate we ensure k * log2_hi_value_cst is exact # by limiting the number of non-zero bits in log2_hi_value_cst # cosh(x) ~ exp(abs(x))/2 for a big enough x # cosh(x) > 2^1023 <=> exp(x) > 2^1024 <=> x > log(2^21024) # k = inv_log2_value * x # -1 for guard max_k_approx = inv_log2_value * log(sollya.SollyaObject(2)**1024) max_k_bitsize = int(ceil(log2(max_k_approx))) Log.report(Log.Info, "max_k_bitsize: %d" % max_k_bitsize) log2_hi_value_precision = self.precision.get_precision( ) - max_k_bitsize - 1 log2_hi_value = round(arg_reg_value, log2_hi_value_precision, RN) log2_lo_value = round(arg_reg_value - log2_hi_value, self.precision.get_sollya_object(), RN) log2_hi_value_cst = Constant(log2_hi_value, tag="log2_hi_value", precision=self.precision) log2_lo_value_cst = Constant(log2_lo_value, tag="log2_lo_value", precision=self.precision) k = Trunc(Multiplication(inv_log2_cst, vx), precision=self.precision) k_log2 = Multiplication(k, log2_hi_value_cst, precision=self.precision, exact=True, tag="k_log2", unbreakable=True) r_hi = vx - k_log2 r_hi.set_attributes(tag="r_hi", debug=debug_multi, unbreakable=True) r_lo = -k * log2_lo_value_cst # reduced argument r = r_hi + r_lo r.set_attributes(tag="r", debug=debug_multi) r_eval_error = self.get_eval_error( r_hi, variable_copy_map={ vx: Variable("vx", interval=Interval(0, 715), precision=self.precision), k: Variable("k", interval=Interval(0, 1024), precision=int_precision) }) print "r_eval_error: ", r_eval_error approx_interval = Interval(-arg_reg_value, arg_reg_value) error_goal_approx = 2**-(self.precision.get_precision()) poly_degree = sup( guessdegree(exp(sollya.x), approx_interval, error_goal_approx)) precision_list = [1] + [self.precision] * (poly_degree) k_integer = Conversion(k, precision=int_precision, tag="k_integer", debug=debug_multi) k_hi = BitLogicRightShift(k_integer, Constant(index_size), tag="k_int_hi", precision=int_precision, debug=debug_multi) k_lo = Modulo(k_integer, 2**index_size, tag="k_int_lo", precision=int_precision, debug=debug_multi) pow_exp = ExponentInsertion(Conversion(k_hi, precision=int_precision), precision=self.precision, tag="pow_exp", debug=debug_multi) exp_table = ML_Table(dimensions=[2 * 2**index_size, 4], storage_precision=self.precision, tag=self.uniquify_name("exp2_table")) for i in range(2 * 2**index_size): input_value = i - 2**index_size if i >= 2**index_size else i # using SollyaObject wrapper to force evaluation by sollya # with higher precision exp_value = sollya.SollyaObject(2)**((input_value) * 2**-index_size) mexp_value = sollya.SollyaObject(2)**((-input_value) * 2**-index_size) pos_value_hi = round(exp_value, self.precision.get_sollya_object(), RN) pos_value_lo = round(exp_value - pos_value_hi, self.precision.get_sollya_object(), RN) neg_value_hi = round(mexp_value, self.precision.get_sollya_object(), RN) neg_value_lo = round(mexp_value - neg_value_hi, self.precision.get_sollya_object(), RN) exp_table[i][0] = neg_value_hi exp_table[i][1] = neg_value_lo exp_table[i][2] = pos_value_hi exp_table[i][3] = pos_value_lo # log2_value = log(2) / 2^index_size # cosh(x) = 1/2 * (exp(x) + exp(-x)) # exp(x) = exp(x - k * log2_value + k * log2_value # # r = x - k * log2_value # exp(x) = exp(r) * 2 ^ (k / 2^index_size) # # k / 2^index_size = h + l * 2^-index_size # exp(x) = exp(r) * 2^h * 2^(l *2^-index_size) # # cosh(x) = exp(r) * 2^(h-1) 2^(l *2^-index_size) + exp(-r) * 2^(-h-1) * 2^(-l *2^-index_size) # error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai) poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error( exp(sollya.x), poly_degree, precision_list, approx_interval, sollya.absolute, error_function=error_function) print "poly_approx_error: ", poly_approx_error, float( log2(poly_approx_error)) polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme poly_pos = polynomial_scheme_builder( poly_object.sub_poly(start_index=1), r, unified_precision=self.precision) poly_pos.set_attributes(tag="poly_pos", debug=debug_multi) poly_neg = polynomial_scheme_builder( poly_object.sub_poly(start_index=1), -r, unified_precision=self.precision) poly_neg.set_attributes(tag="poly_neg", debug=debug_multi) table_index = Addition(k_lo, Constant(2**index_size, precision=int_precision), precision=int_precision, tag="table_index", debug=debug_multi) neg_value_load_hi = TableLoad(exp_table, table_index, 0, tag="neg_value_load_hi", debug=debug_multi) neg_value_load_lo = TableLoad(exp_table, table_index, 1, tag="neg_value_load_lo", debug=debug_multi) pos_value_load_hi = TableLoad(exp_table, table_index, 2, tag="pos_value_load_hi", debug=debug_multi) pos_value_load_lo = TableLoad(exp_table, table_index, 3, tag="pos_value_load_lo", debug=debug_multi) k_plus = Max( Subtraction(k_hi, Constant(1, precision=int_precision), precision=int_precision, tag="k_plus", debug=debug_multi), Constant(self.precision.get_emin_normal(), precision=int_precision)) k_neg = Max( Subtraction(-k_hi, Constant(1, precision=int_precision), precision=int_precision, tag="k_neg", debug=debug_multi), Constant(self.precision.get_emin_normal(), precision=int_precision)) pow_exp_pos = ExponentInsertion(k_plus, precision=self.precision) pow_exp_neg = ExponentInsertion(k_neg, precision=self.precision) pos_exp = ( pos_value_load_hi + (pos_value_load_hi * poly_pos + (pos_value_load_lo + pos_value_load_lo * poly_pos))) * pow_exp_pos pos_exp.set_attributes(tag="pos_exp", debug=debug_multi) neg_exp = ( neg_value_load_hi + (neg_value_load_hi * poly_neg + (neg_value_load_lo + neg_value_load_lo * poly_neg))) * pow_exp_neg neg_exp.set_attributes(tag="neg_exp", debug=debug_multi) result = Addition(pos_exp, neg_exp, precision=self.precision, tag="result", debug=debug_multi) # ov_value ov_value = round(acosh(self.precision.get_max_value()), self.precision.get_sollya_object(), RD) ov_flag = Comparison(Abs(vx), Constant(ov_value, precision=self.precision), specifier=Comparison.Greater) # main scheme Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m") scheme = Statement( Return(Select(ov_flag, FP_PlusInfty(self.precision), result))) return scheme