示例#1
0
        def generate_fptaylor(x):
            x_low = sollya.inf(x)
            x_high = sollya.sup(x)
            query = "\n".join([
                "Variables", "  real z in [{},{}];".format(x_low, x_high),
                "Definitions", "  retval rnd64= {};".format(poly_expr),
                "Expressions", "  retval;"
            ])

            rnd_rel_err = None
            rnd_abs_err = None
            try:
                res = fptaylor.Result(query, {
                    **config, "--rel-error": "true",
                    "--abs-error": "true"
                })
                rnd_rel_err = float(
                    res.result["relative_errors"]["final_total"]["value"])
                rnd_abs_err = float(
                    res.result["absolute_errors"]["final_total"]["value"])
            except AssertionError:
                pass
            except KeyError:
                try:
                    rnd_abs_err = float(
                        res.result["absolute_errors"]["final_total"]["value"])
                except KeyError:
                    pass

            if rnd_abs_err is None:
                try:
                    res = fptaylor.Result(query, {
                        **config, "--rel-error": "false",
                        "--abs-error": "true"
                    })
                    rnd_abs_err = float(
                        res.result["absolute_errors"]["final_total"]["value"])
                except AssertionError:
                    pass

            err_int = sollya.supnorm(self.poly_object.get_sollya_object(),
                                     sollya.cos(sollya.x), x, sollya.relative,
                                     2**-100)
            algo_rel_err = sollya.sup(err_int)

            err_int = sollya.supnorm(self.poly_object.get_sollya_object(),
                                     sollya.cos(sollya.x), x, sollya.absolute,
                                     2**-100)
            algo_abs_err = sollya.sup(err_int)

            if rnd_rel_err is None or str(algo_rel_err) == "error":
                rel_err = float("inf")
            else:
                rel_err = rnd_rel_err + algo_rel_err

            abs_err = rnd_abs_err + algo_abs_err

            return rel_err, abs_err
示例#2
0
 def numeric_emulate(self, input_value):
     if self.sin_output:
         return sin(input_value)
     else:
         return cos(input_value)
示例#3
0
    def generate_scheme(self):
        # declaring CodeFunction and retrieving input variable
        vx = self.implementation.add_input_variable("x", self.precision)

        Log.report(Log.Info, "generating implementation scheme")
        if self.debug_flag:
            Log.report(Log.Info, "debug has been enabled")

        # local overloading of RaiseReturn operation
        def SincosRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        sollya_precision = self.precision.get_sollya_object()
        hi_precision = self.precision.get_field_size() - 8
        cw_hi_precision = self.precision.get_field_size() - 4

        ext_precision = {
            ML_Binary32: ML_Binary64,
            ML_Binary64: ML_Binary64
        }[self.precision]

        int_precision = {
            ML_Binary32: ML_Int32,
            ML_Binary64: ML_Int64
        }[self.precision]

        if self.precision is ML_Binary32:
            ph_bound = S2**10
        else:
            ph_bound = S2**33

        test_ph_bound = Comparison(vx,
                                   ph_bound,
                                   specifier=Comparison.GreaterOrEqual,
                                   precision=ML_Bool,
                                   likely=False)

        # argument reduction
        # m
        frac_pi_index = {ML_Binary32: 10, ML_Binary64: 14}[self.precision]

        C0 = Constant(0, precision=int_precision)
        C1 = Constant(1, precision=int_precision)
        C_offset = Constant(3 * S2**(frac_pi_index - 1),
                            precision=int_precision)

        # 2^m / pi
        frac_pi = round(S2**frac_pi_index / pi, cw_hi_precision, sollya.RN)
        frac_pi_lo = round(S2**frac_pi_index / pi - frac_pi, sollya_precision,
                           sollya.RN)
        # pi / 2^m, high part
        inv_frac_pi = round(pi / S2**frac_pi_index, cw_hi_precision, sollya.RN)
        # pi / 2^m, low part
        inv_frac_pi_lo = round(pi / S2**frac_pi_index - inv_frac_pi,
                               sollya_precision, sollya.RN)

        # computing k
        vx.set_attributes(tag="vx", debug=debug_multi)

        vx_pi = Addition(Multiplication(vx,
                                        Constant(frac_pi,
                                                 precision=self.precision),
                                        precision=self.precision),
                         Multiplication(vx,
                                        Constant(frac_pi_lo,
                                                 precision=self.precision),
                                        precision=self.precision),
                         precision=self.precision,
                         tag="vx_pi",
                         debug=debug_multi)

        k = NearestInteger(vx_pi,
                           precision=int_precision,
                           tag="k",
                           debug=debug_multi)
        # k in floating-point precision
        fk = Conversion(k,
                        precision=self.precision,
                        tag="fk",
                        debug=debug_multi)

        inv_frac_pi_cst = Constant(inv_frac_pi,
                                   tag="inv_frac_pi",
                                   precision=self.precision,
                                   debug=debug_multi)
        inv_frac_pi_lo_cst = Constant(inv_frac_pi_lo,
                                      tag="inv_frac_pi_lo",
                                      precision=self.precision,
                                      debug=debug_multi)

        # Cody-Waite reduction
        red_coeff1 = Multiplication(fk,
                                    inv_frac_pi_cst,
                                    precision=self.precision,
                                    exact=True)
        red_coeff2 = Multiplication(Negation(fk, precision=self.precision),
                                    inv_frac_pi_lo_cst,
                                    precision=self.precision,
                                    exact=True)

        # Should be exact / Sterbenz' Lemma
        pre_sub_mul = Subtraction(vx,
                                  red_coeff1,
                                  precision=self.precision,
                                  exact=True)

        # Fast2Sum
        s = Addition(pre_sub_mul,
                     red_coeff2,
                     precision=self.precision,
                     unbreakable=True,
                     tag="s",
                     debug=debug_multi)
        z = Subtraction(s,
                        pre_sub_mul,
                        precision=self.precision,
                        unbreakable=True,
                        tag="z",
                        debug=debug_multi)
        t = Subtraction(red_coeff2,
                        z,
                        precision=self.precision,
                        unbreakable=True,
                        tag="t",
                        debug=debug_multi)

        red_vx_std = Addition(s, t, precision=self.precision)
        red_vx_std.set_attributes(tag="red_vx_std", debug=debug_multi)

        # To compute sine we offset x by 3pi/2
        # which means add 3  * S2^(frac_pi_index-1) to k
        if self.sin_output:
            Log.report(Log.Info, "Computing Sin")
            offset_k = Addition(k,
                                C_offset,
                                precision=int_precision,
                                tag="offset_k")
        else:
            Log.report(Log.Info, "Computing Cos")
            offset_k = k

        modk = Variable("modk",
                        precision=int_precision,
                        var_type=Variable.Local)
        red_vx = Variable("red_vx",
                          precision=self.precision,
                          var_type=Variable.Local)

        # Faster modulo using bitwise logic
        modk_std = BitLogicAnd(offset_k,
                               2**(frac_pi_index + 1) - 1,
                               precision=int_precision,
                               tag="modk",
                               debug=debug_multi)

        approx_interval = Interval(-pi / (S2**(frac_pi_index + 1)),
                                   pi / S2**(frac_pi_index + 1))

        red_vx.set_interval(approx_interval)

        Log.report(Log.Info, "approx interval: %s\n" % approx_interval)

        Log.report(Log.Info,
                   "building tabulated approximation for sin and cos")

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        # polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_estrin_scheme
        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme

        table_index_size = frac_pi_index + 1
        cos_table = ML_NewTable(dimensions=[2**table_index_size, 1],
                                storage_precision=self.precision,
                                tag=self.uniquify_name("cos_table"))

        for i in range(2**(frac_pi_index + 1)):
            local_x = i * pi / S2**frac_pi_index
            cos_local = round(cos(local_x), self.precision.get_sollya_object(),
                              sollya.RN)
            cos_table[i][0] = cos_local

        sin_index = Modulo(modk + 2**(frac_pi_index - 1),
                           2**(frac_pi_index + 1),
                           precision=int_precision,
                           tag="sin_index")  #, debug = debug_multi)
        tabulated_cos = TableLoad(cos_table,
                                  modk,
                                  C0,
                                  precision=self.precision,
                                  tag="tab_cos",
                                  debug=debug_multi)
        tabulated_sin = -TableLoad(cos_table,
                                   sin_index,
                                   C0,
                                   precision=self.precision,
                                   tag="tab_sin",
                                   debug=debug_multi)

        poly_degree_cos = sup(
            guessdegree(cos(sollya.x), approx_interval, S2**
                        -self.precision.get_precision()) + 2)
        poly_degree_sin = sup(
            guessdegree(
                sin(sollya.x) / sollya.x, approx_interval, S2**
                -self.precision.get_precision()) + 2)

        poly_degree_cos_list = range(0, int(poly_degree_cos) + 3)
        poly_degree_sin_list = range(0, int(poly_degree_sin) + 3)

        # cosine polynomial: limiting first and second coefficient precision to 1-bit
        poly_cos_prec_list = [self.precision] * len(poly_degree_cos_list)
        # sine polynomial: limiting first coefficient precision to 1-bit
        poly_sin_prec_list = [self.precision] * len(poly_degree_sin_list)

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)
        Log.report(Log.Info,
                   "building mathematical polynomials for sin and cos")
        # Polynomial approximations
        Log.report(Log.Info, "cos")
        poly_object_cos, poly_error_cos = Polynomial.build_from_approximation_with_error(
            cos(sollya.x),
            poly_degree_cos_list,
            poly_cos_prec_list,
            approx_interval,
            sollya.absolute,
            error_function=error_function)
        Log.report(Log.Info, "sin")
        poly_object_sin, poly_error_sin = Polynomial.build_from_approximation_with_error(
            sin(sollya.x),
            poly_degree_sin_list,
            poly_sin_prec_list,
            approx_interval,
            sollya.absolute,
            error_function=error_function)

        Log.report(
            Log.Info, "poly error cos: {} / {:d}".format(
                poly_error_cos, int(sollya.log2(poly_error_cos))))
        Log.report(
            Log.Info, "poly error sin: {0} / {1:d}".format(
                poly_error_sin, int(sollya.log2(poly_error_sin))))
        Log.report(Log.Info, "poly cos : %s" % poly_object_cos)
        Log.report(Log.Info, "poly sin : %s" % poly_object_sin)

        # Polynomial evaluation scheme
        poly_cos = polynomial_scheme_builder(
            poly_object_cos.sub_poly(start_index=1),
            red_vx,
            unified_precision=self.precision)
        poly_sin = polynomial_scheme_builder(
            poly_object_sin.sub_poly(start_index=2),
            red_vx,
            unified_precision=self.precision)
        poly_cos.set_attributes(tag="poly_cos", debug=debug_multi)
        poly_sin.set_attributes(tag="poly_sin",
                                debug=debug_multi,
                                unbreakable=True)

        # TwoProductFMA
        mul_cos_x = tabulated_cos * poly_cos
        mul_cos_y = FusedMultiplyAdd(tabulated_cos,
                                     poly_cos,
                                     -mul_cos_x,
                                     precision=self.precision)

        mul_sin_x = tabulated_sin * poly_sin
        mul_sin_y = FusedMultiplyAdd(tabulated_sin,
                                     poly_sin,
                                     -mul_sin_x,
                                     precision=self.precision)

        mul_coeff_sin_hi = tabulated_sin * red_vx
        mul_coeff_sin_lo = FusedMultiplyAdd(tabulated_sin, red_vx,
                                            -mul_coeff_sin_hi)

        mul_cos = Addition(mul_cos_x,
                           mul_cos_y,
                           precision=self.precision,
                           tag="mul_cos")  #, debug = debug_multi)
        mul_sin = Negation(Addition(mul_sin_x,
                                    mul_sin_y,
                                    precision=self.precision),
                           precision=self.precision,
                           tag="mul_sin")  #, debug = debug_multi)
        mul_coeff_sin = Negation(Addition(mul_coeff_sin_hi,
                                          mul_coeff_sin_lo,
                                          precision=self.precision),
                                 precision=self.precision,
                                 tag="mul_coeff_sin")  #, debug = debug_multi)

        mul_cos_x.set_attributes(
            tag="mul_cos_x", precision=self.precision)  #, debug = debug_multi)
        mul_cos_y.set_attributes(
            tag="mul_cos_y", precision=self.precision)  #, debug = debug_multi)
        mul_sin_x.set_attributes(
            tag="mul_sin_x", precision=self.precision)  #, debug = debug_multi)
        mul_sin_y.set_attributes(
            tag="mul_sin_y", precision=self.precision)  #, debug = debug_multi)

        cos_eval_d_1 = (((mul_cos + mul_sin) + mul_coeff_sin) + tabulated_cos)

        cos_eval_d_1.set_attributes(tag="cos_eval_d_1",
                                    precision=self.precision,
                                    debug=debug_multi)

        result_1 = Statement(Return(cos_eval_d_1))

        #######################################################################
        #                    LARGE ARGUMENT MANAGEMENT                        #
        #                 (lar: Large Argument Reduction)                     #
        #######################################################################
        # payne and hanek argument reduction for large arguments
        ph_k = frac_pi_index
        ph_frac_pi = round(S2**ph_k / pi, 1500, sollya.RN)
        ph_inv_frac_pi = pi / S2**ph_k

        ph_statement, ph_acc, ph_acc_int = generate_payne_hanek(vx,
                                                                ph_frac_pi,
                                                                self.precision,
                                                                n=100,
                                                                k=ph_k)

        # assigning Large Argument Reduction reduced variable
        lar_vx = Variable("lar_vx",
                          precision=self.precision,
                          var_type=Variable.Local)

        lar_red_vx = Addition(Multiplication(lar_vx,
                                             inv_frac_pi,
                                             precision=self.precision),
                              Multiplication(lar_vx,
                                             inv_frac_pi_lo,
                                             precision=self.precision),
                              precision=self.precision,
                              tag="lar_red_vx",
                              debug=debug_multi)

        C32 = Constant(2**(ph_k + 1), precision=int_precision, tag="C32")
        ph_acc_int_red = Select(ph_acc_int < C0,
                                C32 + ph_acc_int,
                                ph_acc_int,
                                precision=int_precision,
                                tag="ph_acc_int_red")
        if self.sin_output:
            lar_offset_k = Addition(ph_acc_int_red,
                                    C_offset,
                                    precision=int_precision,
                                    tag="lar_offset_k")
        else:
            lar_offset_k = ph_acc_int_red

        ph_acc_int_red.set_attributes(tag="ph_acc_int_red", debug=debug_multi)
        lar_modk = BitLogicAnd(lar_offset_k,
                               2**(frac_pi_index + 1) - 1,
                               precision=int_precision,
                               tag="lar_modk",
                               debug=debug_multi)

        lar_statement = Statement(ph_statement,
                                  ReferenceAssign(lar_vx,
                                                  ph_acc,
                                                  debug=debug_multi),
                                  ReferenceAssign(red_vx,
                                                  lar_red_vx,
                                                  debug=debug_multi),
                                  ReferenceAssign(modk, lar_modk),
                                  prevent_optimization=True)

        test_NaN_or_Inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               tag="NaN_or_Inf",
                               debug=debug_multi)
        return_NaN_or_Inf = Statement(Return(FP_QNaN(self.precision)))

        scheme = ConditionBlock(
            test_NaN_or_Inf, Statement(ClearException(), return_NaN_or_Inf),
            Statement(
                modk, red_vx,
                ConditionBlock(
                    test_ph_bound, lar_statement,
                    Statement(
                        ReferenceAssign(modk, modk_std),
                        ReferenceAssign(red_vx, red_vx_std),
                    )), result_1))

        return scheme
示例#4
0
    def generate_scheme(self):
        # declaring CodeFunction and retrieving input variable
        vx = self.implementation.add_input_variable("x", self.precision)

        Log.report(Log.Info, "target: %s " % self.processor.target_name)

        # display parameter information
        Log.report(Log.Info, "accuracy      : %s " % self.accuracy)
        Log.report(Log.Info, "input interval: %s " % self.input_interval)

        accuracy_goal = self.accuracy.get_goal()
        Log.report(Log.Info, "accuracy_goal=%f" % accuracy_goal)

        table_size_log = self.table_size_log
        integer_size = 31
        integer_precision = ML_Int32

        max_bound = sup(abs(self.input_interval))
        max_bound_log = int(ceil(log2(max_bound)))
        Log.report(Log.Info, "max_bound_log=%s " % max_bound_log)
        scaling_power = integer_size - max_bound_log
        Log.report(Log.Info, "scaling power: %s " % scaling_power)

        storage_precision = ML_Custom_FixedPoint_Format(1, 30, signed=True)

        Log.report(Log.Info, "tabulating cosine and sine")
        # cosine and sine fused table
        fused_table = ML_NewTable(
            dimensions=[2**table_size_log, 2],
            storage_precision=storage_precision,
            tag="fast_lib_shared_table")  # self.uniquify_name("cossin_table"))
        # filling table
        for i in range(2**table_size_log):
            local_x = i / S2**table_size_log * S2**max_bound_log

            cos_local = cos(
                local_x
            )  # nearestint(cos(local_x) * S2**storage_precision.get_frac_size())

            sin_local = sin(
                local_x
            )  # nearestint(sin(local_x) * S2**storage_precision.get_frac_size())

            fused_table[i][0] = cos_local
            fused_table[i][1] = sin_local

        # argument reduction evaluation scheme
        # scaling_factor = Constant(S2**scaling_power, precision = self.precision)

        red_vx_precision = ML_Custom_FixedPoint_Format(31 - scaling_power,
                                                       scaling_power,
                                                       signed=True)
        Log.report(
            Log.Verbose, "red_vx_precision.get_c_bit_size()=%d" %
            red_vx_precision.get_c_bit_size())
        # red_vx = NearestInteger(vx * scaling_factor, precision = integer_precision)
        red_vx = Conversion(vx,
                            precision=red_vx_precision,
                            tag="red_vx",
                            debug=debug_fixed32)

        computation_precision = red_vx_precision  # self.precision
        output_precision = self.io_precisions[0]
        Log.report(Log.Info,
                   "computation_precision is %s" % computation_precision)
        Log.report(Log.Info, "storage_precision     is %s" % storage_precision)
        Log.report(Log.Info, "output_precision      is %s" % output_precision)

        hi_mask_value = 2**32 - 2**(32 - table_size_log - 1)
        hi_mask = Constant(hi_mask_value, precision=ML_Int32)
        Log.report(Log.Info, "hi_mask=0x%x" % hi_mask_value)

        red_vx_hi_int = BitLogicAnd(TypeCast(red_vx, precision=ML_Int32),
                                    hi_mask,
                                    precision=ML_Int32,
                                    tag="red_vx_hi_int",
                                    debug=debugd)
        red_vx_hi = TypeCast(red_vx_hi_int,
                             precision=red_vx_precision,
                             tag="red_vx_hi",
                             debug=debug_fixed32)
        red_vx_lo = red_vx - red_vx_hi
        red_vx_lo.set_attributes(precision=red_vx_precision,
                                 tag="red_vx_lo",
                                 debug=debug_fixed32)
        table_index = BitLogicRightShift(TypeCast(red_vx, precision=ML_Int32),
                                         scaling_power -
                                         (table_size_log - max_bound_log),
                                         precision=ML_Int32,
                                         tag="table_index",
                                         debug=debugd)

        tabulated_cos = TableLoad(fused_table,
                                  table_index,
                                  0,
                                  tag="tab_cos",
                                  precision=storage_precision,
                                  debug=debug_fixed32)
        tabulated_sin = TableLoad(fused_table,
                                  table_index,
                                  1,
                                  tag="tab_sin",
                                  precision=storage_precision,
                                  debug=debug_fixed32)

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        Log.report(Log.Info, "building polynomial approximation for cosine")
        # cosine polynomial approximation
        poly_interval = Interval(0, S2**(max_bound_log - table_size_log))
        Log.report(Log.Info, "poly_interval=%s " % poly_interval)
        cos_poly_degree = 2  # int(sup(guessdegree(cos(x), poly_interval, accuracy_goal)))

        Log.report(Log.Verbose, "cosine polynomial approximation")
        cos_poly_object, cos_approx_error = Polynomial.build_from_approximation_with_error(
            cos(x), [0, 2], [0] + [computation_precision.get_bit_size()],
            poly_interval,
            sollya.absolute,
            error_function=error_function)
        #cos_eval_scheme = PolynomialSchemeEvaluator.generate_horner_scheme(cos_poly_object, red_vx_lo, unified_precision = computation_precision)
        Log.report(Log.Info, "cos_approx_error=%e" % cos_approx_error)
        cos_coeff_list = cos_poly_object.get_ordered_coeff_list()
        coeff_C0 = cos_coeff_list[0][1]
        coeff_C2 = Constant(cos_coeff_list[1][1],
                            precision=ML_Custom_FixedPoint_Format(-1,
                                                                  32,
                                                                  signed=True))

        Log.report(Log.Info, "building polynomial approximation for sine")

        # sine polynomial approximation
        sin_poly_degree = 2  # int(sup(guessdegree(sin(x)/x, poly_interval, accuracy_goal)))
        Log.report(Log.Info, "sine poly degree: %e" % sin_poly_degree)
        Log.report(Log.Verbose, "sine polynomial approximation")
        sin_poly_object, sin_approx_error = Polynomial.build_from_approximation_with_error(
            sin(sollya.x) / sollya.x, [0, 2], [0] +
            [computation_precision.get_bit_size()] * (sin_poly_degree + 1),
            poly_interval,
            sollya.absolute,
            error_function=error_function)
        sin_coeff_list = sin_poly_object.get_ordered_coeff_list()
        coeff_S0 = sin_coeff_list[0][1]
        coeff_S2 = Constant(sin_coeff_list[1][1],
                            precision=ML_Custom_FixedPoint_Format(-1,
                                                                  32,
                                                                  signed=True))

        # scheme selection between sine and cosine
        if self.cos_output:
            scheme = self.generate_cos_scheme(computation_precision,
                                              tabulated_cos, tabulated_sin,
                                              coeff_S2, coeff_C2, red_vx_lo)
        else:
            scheme = self.generate_sin_scheme(computation_precision,
                                              tabulated_cos, tabulated_sin,
                                              coeff_S2, coeff_C2, red_vx_lo)

        result = Conversion(scheme, precision=self.io_precisions[0])

        Log.report(
            Log.Verbose, "result operation tree :\n %s " % result.get_str(
                display_precision=True, depth=None, memoization_map={}))
        scheme = Statement(Return(result))

        return scheme
示例#5
0
  def generate_scheme(self): 
    # declaring CodeFunction and retrieving input variable
    vx = Abs(self.implementation.add_input_variable("x", self.precision), tag = "vx") 


    Log.report(Log.Info, "generating implementation scheme")
    if self.debug_flag: 
        Log.report(Log.Info, "debug has been enabled")

    # local overloading of RaiseReturn operation
    def ExpRaiseReturn(*args, **kwords):
        kwords["arg_value"] = vx
        kwords["function_name"] = self.function_name
        return RaiseReturn(*args, **kwords)

    debug_precision = {ML_Binary32: debug_ftox, ML_Binary64: debug_lftolx}[self.precision]


    test_nan_or_inf = Test(vx, specifier = Test.IsInfOrNaN, likely = False, debug = True, tag = "nan_or_inf")
    test_nan        = Test(vx, specifier = Test.IsNaN, debug = True, tag = "is_nan_test")
    test_positive   = Comparison(vx, 0, specifier = Comparison.GreaterOrEqual, debug = True, tag = "inf_sign")

    test_signaling_nan = Test(vx, specifier = Test.IsSignalingNaN, debug = True, tag = "is_signaling_nan")
    return_snan        = Statement(ExpRaiseReturn(ML_FPE_Invalid, return_value = FP_QNaN(self.precision)))

    # return in case of infinity input
    infty_return = Statement(ConditionBlock(test_positive, Return(FP_PlusInfty(self.precision)), Return(FP_PlusZero(self.precision))))
    # return in case of specific value input (NaN or inf)
    specific_return = ConditionBlock(test_nan, ConditionBlock(test_signaling_nan, return_snan, Return(FP_QNaN(self.precision))), infty_return)
    # return in case of standard (non-special) input

    sollya_precision = self.precision.get_sollya_object()
    hi_precision = self.precision.get_field_size() - 3


    

    # argument reduction
    frac_pi_index = 3
    frac_pi     = round(S2**frac_pi_index / pi, sollya_precision, sollya.RN)
    inv_frac_pi = round(pi / S2**frac_pi_index, hi_precision, sollya.RN)
    inv_frac_pi_lo = round(pi / S2**frac_pi_index - inv_frac_pi, sollya_precision, sollya.RN)
    # computing k = E(x * frac_pi)
    vx_pi = Multiplication(vx, frac_pi, precision = self.precision)
    k = NearestInteger(vx_pi, precision = ML_Int32, tag = "k", debug = True)
    fk = Conversion(k, precision = self.precision, tag = "fk")

    inv_frac_pi_cst    = Constant(inv_frac_pi, tag = "inv_frac_pi", precision = self.precision)
    inv_frac_pi_lo_cst = Constant(inv_frac_pi_lo, tag = "inv_frac_pi_lo", precision = self.precision)

    red_vx_hi = (vx - inv_frac_pi_cst * fk)
    red_vx_hi.set_attributes(tag = "red_vx_hi", debug = debug_precision, precision = self.precision)
    red_vx_lo_sub = inv_frac_pi_lo_cst * fk
    red_vx_lo_sub.set_attributes(tag = "red_vx_lo_sub", debug = debug_precision, unbreakable = True, precision = self.precision)
    vx_d = Conversion(vx, precision = ML_Binary64, tag = "vx_d")
    pre_red_vx = red_vx_hi - inv_frac_pi_lo_cst * fk
    pre_red_vx_d_hi = (vx_d - inv_frac_pi_cst * fk)
    pre_red_vx_d_hi.set_attributes(tag = "pre_red_vx_d_hi", precision = ML_Binary64, debug = debug_lftolx)
    pre_red_vx_d = pre_red_vx_d_hi - inv_frac_pi_lo_cst * fk
    pre_red_vx_d.set_attributes(tag = "pre_red_vx_d", debug = debug_lftolx, precision = ML_Binary64)


    modk = Modulo(k, 2**(frac_pi_index+1), precision = ML_Int32, tag = "switch_value", debug = True)

    sel_c = Equal(BitLogicAnd(modk, 2**(frac_pi_index-1)), 2**(frac_pi_index-1))
    red_vx = Select(sel_c, -pre_red_vx, pre_red_vx)
    red_vx.set_attributes(tag = "red_vx", debug = debug_precision, precision = self.precision)

    red_vx_d = Select(sel_c, -pre_red_vx_d, pre_red_vx_d)
    red_vx_d.set_attributes(tag = "red_vx_d", debug = debug_lftolx, precision = ML_Binary64)

    approx_interval = Interval(-pi/(S2**(frac_pi_index+1)), pi / S2**(frac_pi_index+1))

    Log.report(Log.Info, "approx interval: %s\n" % approx_interval)

    error_goal_approx = S2**-self.precision.get_precision()


    Log.report(Log.Info, "building mathematical polynomial")
    poly_degree_vector = [None] * 2**(frac_pi_index+1)



    error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

    #polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_estrin_scheme
    polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme

    index_relative = []

    poly_object_vector = [None] * 2**(frac_pi_index+1)
    for i in range(2**(frac_pi_index+1)):
      sub_func = cos(sollya.x+i*pi/S2**frac_pi_index)
      degree = int(sup(guessdegree(sub_func, approx_interval, error_goal_approx))) + 1

      degree_list = range(degree+1)
      a_interval = approx_interval
      if i == 0:
        # ad-hoc, TODO: to be cleaned
        degree = 6
        degree_list = range(0, degree+1, 2)
      elif i % 2**(frac_pi_index) == 2**(frac_pi_index-1):
        # for pi/2 and 3pi/2, an approx to  sin=cos(pi/2+x) 
        # must be generated
        degree_list = range(1, degree+1, 2)

      if i == 3 or i == 5 or i == 7 or i == 9: 
        precision_list =  [sollya.binary64] + [sollya.binary32] *(degree)
      else:
        precision_list = [sollya.binary32] * (degree+1)

      poly_degree_vector[i] = degree 

      constraint = sollya.absolute
      delta = (2**(frac_pi_index - 3))
      centered_i = (i % 2**(frac_pi_index)) - 2**(frac_pi_index-1)
      if centered_i < delta and centered_i > -delta and centered_i != 0:
        constraint = sollya.relative
        index_relative.append(i)
      Log.report(Log.Info, "generating approximation for %d/%d" % (i, 2**(frac_pi_index+1)))
      poly_object_vector[i], _ = Polynomial.build_from_approximation_with_error(sub_func, degree_list, precision_list, a_interval, constraint, error_function = error_function) 


    # unified power map for red_sx^n
    upm = {}
    rel_error_list = []

    poly_scheme_vector = [None] * (2**(frac_pi_index+1))

    for i in range(2**(frac_pi_index+1)):
      poly_object = poly_object_vector[i]
      poly_precision = self.precision
      if i == 3 or i == 5 or i == 7 or i == 9: 
          poly_precision = ML_Binary64
          c0 = Constant(coeff(poly_object.get_sollya_object(), 0), precision = ML_Binary64)
          c1 = Constant(coeff(poly_object.get_sollya_object(), 1), precision = self.precision)
          poly_hi = (c0 + c1 * red_vx)
          poly_hi.set_precision(ML_Binary64)
          red_vx_d_2 = red_vx_d * red_vx_d
          poly_scheme = poly_hi + red_vx_d_2 * polynomial_scheme_builder(poly_object.sub_poly(start_index = 2, offset = 2), red_vx, unified_precision = self.precision, power_map_ = upm)
          poly_scheme.set_attributes(unbreakable = True)
      elif i == 4:
          c1 = Constant(coeff(poly_object.get_sollya_object(), 1), precision = ML_Binary64)
          poly_scheme = c1 * red_vx_d + polynomial_scheme_builder(poly_object.sub_poly(start_index = 2), red_vx, unified_precision = self.precision, power_map_ = upm)
          poly_scheme.set_precision(ML_Binary64)
      else:
          poly_scheme = polynomial_scheme_builder(poly_object, red_vx, unified_precision = poly_precision, power_map_ = upm)
      #if i == 3:
      #  c0 = Constant(coeff(poly_object.get_sollya_object(), 0), precision = self.precision)
      #  c1 = Constant(coeff(poly_object.get_sollya_object(), 1), precision = self.precision)
      #  poly_scheme = (c0 + c1 * red_vx) + polynomial_scheme_builder(poly_object.sub_poly(start_index = 2), red_vx, unified_precision = self.precision, power_map_ = upm)

      poly_scheme.set_attributes(tag = "poly_cos%dpi%d" % (i, 2**(frac_pi_index)), debug = debug_precision)
      poly_scheme_vector[i] = poly_scheme



      #try:
      if is_gappa_installed() and i == 3:
          opt_scheme = self.opt_engine.optimization_process(poly_scheme, self.precision, copy = True, fuse_fma = self.fuse_fma)

          tag_map = {}
          self.opt_engine.register_nodes_by_tag(opt_scheme, tag_map)

          gappa_vx = Variable("red_vx", precision = self.precision, interval = approx_interval)

          cg_eval_error_copy_map = {
              tag_map["red_vx"]:    gappa_vx, 
              tag_map["red_vx_d"]:  gappa_vx,
          }
示例#6
0
          tag_map = {}
          self.opt_engine.register_nodes_by_tag(opt_scheme, tag_map)

          gappa_vx = Variable("red_vx", precision = self.precision, interval = approx_interval)

          cg_eval_error_copy_map = {
              tag_map["red_vx"]:    gappa_vx, 
              tag_map["red_vx_d"]:  gappa_vx,
          }

          print "opt_scheme"
          print opt_scheme.get_str(depth = None, display_precision = True, memoization_map = {})

          eval_error = self.gappa_engine.get_eval_error_v2(self.opt_engine, opt_scheme, cg_eval_error_copy_map, gappa_filename = "red_arg_%d.g" % i)
          poly_range = cos(approx_interval+i*pi/S2**frac_pi_index)
          rel_error_list.append(eval_error / poly_range)


    #for rel_error in rel_error_list:
    #  print sup(abs(rel_error))

    #return 

    # case 17
    #poly17 = poly_object_vector[17]
    #c0 = Constant(coeff(poly17.get_sollya_object(), 0), precision = self.precision)
    #c1 = Constant(coeff(poly17.get_sollya_object(), 1), precision = self.precision)
    #poly_scheme_vector[17] = FusedMultiplyAdd(c1, red_vx, c0, specifier = FusedMultiplyAdd.Standard) + polynomial_scheme_builder(poly17.sub_poly(start_index = 2), red_vx, unified_precision = self.precision, power_map_ = upm)

    half = 2**frac_pi_index
示例#7
0
    def generate_scheme(self):
        # declaring CodeFunction and retrieving input variable
        vx = Abs(self.implementation.add_input_variable("x", self.precision),
                 tag="vx")

        Log.report(Log.Info, "generating implementation scheme")
        if self.debug_flag:
            Log.report(Log.Info, "debug has been enabled")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        debug_precision = {
            ML_Binary32: debug_ftox,
            ML_Binary64: debug_lftolx
        }[self.precision]

        test_nan_or_inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               debug=True,
                               tag="nan_or_inf")
        test_nan = Test(vx,
                        specifier=Test.IsNaN,
                        debug=True,
                        tag="is_nan_test")
        test_positive = Comparison(vx,
                                   0,
                                   specifier=Comparison.GreaterOrEqual,
                                   debug=True,
                                   tag="inf_sign")

        test_signaling_nan = Test(vx,
                                  specifier=Test.IsSignalingNaN,
                                  debug=True,
                                  tag="is_signaling_nan")
        return_snan = Statement(
            ExpRaiseReturn(ML_FPE_Invalid,
                           return_value=FP_QNaN(self.precision)))

        # return in case of infinity input
        infty_return = Statement(
            ConditionBlock(test_positive, Return(FP_PlusInfty(self.precision)),
                           Return(FP_PlusZero(self.precision))))
        # return in case of specific value input (NaN or inf)
        specific_return = ConditionBlock(
            test_nan,
            ConditionBlock(test_signaling_nan, return_snan,
                           Return(FP_QNaN(self.precision))), infty_return)
        # return in case of standard (non-special) input

        sollya_precision = self.precision.get_sollya_object()
        hi_precision = self.precision.get_field_size() - 3

        # argument reduction
        frac_pi_index = 3
        frac_pi = round(S2**frac_pi_index / pi, sollya_precision, sollya.RN)
        inv_frac_pi = round(pi / S2**frac_pi_index, hi_precision, sollya.RN)
        inv_frac_pi_lo = round(pi / S2**frac_pi_index - inv_frac_pi,
                               sollya_precision, sollya.RN)
        # computing k = E(x * frac_pi)
        vx_pi = Multiplication(vx, frac_pi, precision=self.precision)
        k = NearestInteger(vx_pi, precision=ML_Int32, tag="k", debug=True)
        fk = Conversion(k, precision=self.precision, tag="fk")

        inv_frac_pi_cst = Constant(inv_frac_pi,
                                   tag="inv_frac_pi",
                                   precision=self.precision)
        inv_frac_pi_lo_cst = Constant(inv_frac_pi_lo,
                                      tag="inv_frac_pi_lo",
                                      precision=self.precision)

        red_vx_hi = (vx - inv_frac_pi_cst * fk)
        red_vx_hi.set_attributes(tag="red_vx_hi",
                                 debug=debug_precision,
                                 precision=self.precision)
        red_vx_lo_sub = inv_frac_pi_lo_cst * fk
        red_vx_lo_sub.set_attributes(tag="red_vx_lo_sub",
                                     debug=debug_precision,
                                     unbreakable=True,
                                     precision=self.precision)
        vx_d = Conversion(vx, precision=ML_Binary64, tag="vx_d")
        pre_red_vx = red_vx_hi - inv_frac_pi_lo_cst * fk
        pre_red_vx_d_hi = (vx_d - inv_frac_pi_cst * fk)
        pre_red_vx_d_hi.set_attributes(tag="pre_red_vx_d_hi",
                                       precision=ML_Binary64,
                                       debug=debug_lftolx)
        pre_red_vx_d = pre_red_vx_d_hi - inv_frac_pi_lo_cst * fk
        pre_red_vx_d.set_attributes(tag="pre_red_vx_d",
                                    debug=debug_lftolx,
                                    precision=ML_Binary64)

        modk = Modulo(k,
                      2**(frac_pi_index + 1),
                      precision=ML_Int32,
                      tag="switch_value",
                      debug=True)

        sel_c = Equal(BitLogicAnd(modk, 2**(frac_pi_index - 1)),
                      2**(frac_pi_index - 1))
        red_vx = Select(sel_c, -pre_red_vx, pre_red_vx)
        red_vx.set_attributes(tag="red_vx",
                              debug=debug_precision,
                              precision=self.precision)

        red_vx_d = Select(sel_c, -pre_red_vx_d, pre_red_vx_d)
        red_vx_d.set_attributes(tag="red_vx_d",
                                debug=debug_lftolx,
                                precision=ML_Binary64)

        approx_interval = Interval(-pi / (S2**(frac_pi_index + 1)),
                                   pi / S2**(frac_pi_index + 1))

        Log.report(Log.Info, "approx interval: %s\n" % approx_interval)

        error_goal_approx = S2**-self.precision.get_precision()

        Log.report(Log.Info, "building mathematical polynomial")
        poly_degree_vector = [None] * 2**(frac_pi_index + 1)

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        #polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_estrin_scheme
        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme

        index_relative = []

        poly_object_vector = [None] * 2**(frac_pi_index + 1)
        for i in range(2**(frac_pi_index + 1)):
            sub_func = cos(sollya.x + i * pi / S2**frac_pi_index)
            degree = int(
                sup(guessdegree(sub_func, approx_interval,
                                error_goal_approx))) + 1

            degree_list = range(degree + 1)
            a_interval = approx_interval
            if i == 0:
                # ad-hoc, TODO: to be cleaned
                degree = 6
                degree_list = range(0, degree + 1, 2)
            elif i % 2**(frac_pi_index) == 2**(frac_pi_index - 1):
                # for pi/2 and 3pi/2, an approx to  sin=cos(pi/2+x)
                # must be generated
                degree_list = range(1, degree + 1, 2)

            if i == 3 or i == 5 or i == 7 or i == 9:
                precision_list = [sollya.binary64
                                  ] + [sollya.binary32] * (degree)
            else:
                precision_list = [sollya.binary32] * (degree + 1)

            poly_degree_vector[i] = degree

            constraint = sollya.absolute
            delta = (2**(frac_pi_index - 3))
            centered_i = (i % 2**(frac_pi_index)) - 2**(frac_pi_index - 1)
            if centered_i < delta and centered_i > -delta and centered_i != 0:
                constraint = sollya.relative
                index_relative.append(i)
            Log.report(
                Log.Info, "generating approximation for %d/%d" %
                (i, 2**(frac_pi_index + 1)))
            poly_object_vector[
                i], _ = Polynomial.build_from_approximation_with_error(
                    sub_func,
                    degree_list,
                    precision_list,
                    a_interval,
                    constraint,
                    error_function=error_function)

        # unified power map for red_sx^n
        upm = {}
        rel_error_list = []

        poly_scheme_vector = [None] * (2**(frac_pi_index + 1))

        for i in range(2**(frac_pi_index + 1)):
            poly_object = poly_object_vector[i]
            poly_precision = self.precision
            if i == 3 or i == 5 or i == 7 or i == 9:
                poly_precision = ML_Binary64
                c0 = Constant(coeff(poly_object.get_sollya_object(), 0),
                              precision=ML_Binary64)
                c1 = Constant(coeff(poly_object.get_sollya_object(), 1),
                              precision=self.precision)
                poly_hi = (c0 + c1 * red_vx)
                poly_hi.set_precision(ML_Binary64)
                red_vx_d_2 = red_vx_d * red_vx_d
                poly_scheme = poly_hi + red_vx_d_2 * polynomial_scheme_builder(
                    poly_object.sub_poly(start_index=2, offset=2),
                    red_vx,
                    unified_precision=self.precision,
                    power_map_=upm)
                poly_scheme.set_attributes(unbreakable=True)
            elif i == 4:
                c1 = Constant(coeff(poly_object.get_sollya_object(), 1),
                              precision=ML_Binary64)
                poly_scheme = c1 * red_vx_d + polynomial_scheme_builder(
                    poly_object.sub_poly(start_index=2),
                    red_vx,
                    unified_precision=self.precision,
                    power_map_=upm)
                poly_scheme.set_precision(ML_Binary64)
            else:
                poly_scheme = polynomial_scheme_builder(
                    poly_object,
                    red_vx,
                    unified_precision=poly_precision,
                    power_map_=upm)
            #if i == 3:
            #  c0 = Constant(coeff(poly_object.get_sollya_object(), 0), precision = self.precision)
            #  c1 = Constant(coeff(poly_object.get_sollya_object(), 1), precision = self.precision)
            #  poly_scheme = (c0 + c1 * red_vx) + polynomial_scheme_builder(poly_object.sub_poly(start_index = 2), red_vx, unified_precision = self.precision, power_map_ = upm)

            poly_scheme.set_attributes(tag="poly_cos%dpi%d" %
                                       (i, 2**(frac_pi_index)),
                                       debug=debug_precision)
            poly_scheme_vector[i] = poly_scheme

            #try:
            if is_gappa_installed() and i == 3:
                opt_scheme = self.opt_engine.optimization_process(
                    poly_scheme,
                    self.precision,
                    copy=True,
                    fuse_fma=self.fuse_fma)

                tag_map = {}
                self.opt_engine.register_nodes_by_tag(opt_scheme, tag_map)

                gappa_vx = Variable("red_vx",
                                    precision=self.precision,
                                    interval=approx_interval)

                cg_eval_error_copy_map = {
                    tag_map["red_vx"]: gappa_vx,
                    tag_map["red_vx_d"]: gappa_vx,
                }

                print "opt_scheme"
                print opt_scheme.get_str(depth=None,
                                         display_precision=True,
                                         memoization_map={})

                eval_error = self.gappa_engine.get_eval_error_v2(
                    self.opt_engine,
                    opt_scheme,
                    cg_eval_error_copy_map,
                    gappa_filename="red_arg_%d.g" % i)
                poly_range = cos(approx_interval + i * pi / S2**frac_pi_index)
                rel_error_list.append(eval_error / poly_range)

        #for rel_error in rel_error_list:
        #  print sup(abs(rel_error))

        #return

        # case 17
        #poly17 = poly_object_vector[17]
        #c0 = Constant(coeff(poly17.get_sollya_object(), 0), precision = self.precision)
        #c1 = Constant(coeff(poly17.get_sollya_object(), 1), precision = self.precision)
        #poly_scheme_vector[17] = FusedMultiplyAdd(c1, red_vx, c0, specifier = FusedMultiplyAdd.Standard) + polynomial_scheme_builder(poly17.sub_poly(start_index = 2), red_vx, unified_precision = self.precision, power_map_ = upm)

        half = 2**frac_pi_index
        sub_half = 2**(frac_pi_index - 1)

        # determine if the reduced input is within the second and third quarter (not first nor fourth)
        # to negate the cosine output
        factor_cond = BitLogicAnd(BitLogicXor(
            BitLogicRightShift(modk, frac_pi_index),
            BitLogicRightShift(modk, frac_pi_index - 1)),
                                  1,
                                  tag="factor_cond",
                                  debug=True)

        CM1 = Constant(-1, precision=self.precision)
        C1 = Constant(1, precision=self.precision)
        factor = Select(factor_cond,
                        CM1,
                        C1,
                        tag="factor",
                        debug=debug_precision)
        factor2 = Select(Equal(modk, Constant(sub_half)),
                         CM1,
                         C1,
                         tag="factor2",
                         debug=debug_precision)

        switch_map = {}
        if 0:
            for i in range(2**(frac_pi_index + 1)):
                switch_map[i] = Return(poly_scheme_vector[i])
        else:
            for i in range(2**(frac_pi_index - 1)):
                switch_case = (i, half - i)
                #switch_map[i]      = Return(poly_scheme_vector[i])
                #switch_map[half-i] = Return(-poly_scheme_vector[i])
                if i != 0:
                    switch_case = switch_case + (half + i, 2 * half - i)
                    #switch_map[half+i] = Return(-poly_scheme_vector[i])
                    #switch_map[2*half-i] = Return(poly_scheme_vector[i])
                if poly_scheme_vector[i].get_precision() != self.precision:
                    poly_result = Conversion(poly_scheme_vector[i],
                                             precision=self.precision)
                else:
                    poly_result = poly_scheme_vector[i]
                switch_map[switch_case] = Return(factor * poly_result)
            #switch_map[sub_half] = Return(-poly_scheme_vector[sub_half])
            #switch_map[half + sub_half] = Return(poly_scheme_vector[sub_half])
            switch_map[(sub_half, half + sub_half)] = Return(
                factor2 * poly_scheme_vector[sub_half])

        result = SwitchBlock(modk, switch_map)

        #######################################################################
        #                    LARGE ARGUMENT MANAGEMENT                        #
        #                 (lar: Large Argument Reduction)                     #
        #######################################################################

        # payne and hanek argument reduction for large arguments
        #red_func_name = "payne_hanek_cosfp32" # "payne_hanek_fp32_asm"
        red_func_name = "payne_hanek_fp32_asm"
        payne_hanek_func_op = FunctionOperator(
            red_func_name,
            arg_map={0: FO_Arg(0)},
            require_header=["support_lib/ml_red_arg.h"])
        payne_hanek_func = FunctionObject(red_func_name, [ML_Binary32],
                                          ML_Binary64, payne_hanek_func_op)
        payne_hanek_func_op.declare_prototype = payne_hanek_func
        #large_arg_red = FunctionCall(payne_hanek_func, vx)
        large_arg_red = payne_hanek_func(vx)
        red_bound = S2**20

        cond = Abs(vx) >= red_bound
        cond.set_attributes(tag="cond", likely=False)

        lar_neark = NearestInteger(large_arg_red, precision=ML_Int64)
        lar_modk = Modulo(lar_neark,
                          Constant(16, precision=ML_Int64),
                          tag="lar_modk",
                          debug=True)
        # Modulo is supposed to be already performed (by payne_hanek_cosfp32)
        #lar_modk = NearestInteger(large_arg_red, precision = ML_Int64)
        pre_lar_red_vx = large_arg_red - Conversion(lar_neark,
                                                    precision=ML_Binary64)
        pre_lar_red_vx.set_attributes(precision=ML_Binary64,
                                      debug=debug_lftolx,
                                      tag="pre_lar_red_vx")
        lar_red_vx = Conversion(pre_lar_red_vx,
                                precision=self.precision,
                                debug=debug_precision,
                                tag="lar_red_vx")
        lar_red_vx_lo = Conversion(
            pre_lar_red_vx - Conversion(lar_red_vx, precision=ML_Binary64),
            precision=self.precision)
        lar_red_vx_lo.set_attributes(tag="lar_red_vx_lo",
                                     precision=self.precision)

        lar_k = 3
        # large arg reduction Universal Power Map
        lar_upm = {}
        lar_switch_map = {}
        approx_interval = Interval(-0.5, 0.5)
        for i in range(2**(lar_k + 1)):
            frac_pi = pi / S2**lar_k
            func = cos(frac_pi * i + frac_pi * sollya.x)

            degree = 6
            error_mode = sollya.absolute
            if i % 2**(lar_k) == 2**(lar_k - 1):
                # close to sin(x) cases
                func = -sin(frac_pi * x) if i == 2**(lar_k -
                                                     1) else sin(frac_pi * x)
                degree_list = range(0, degree + 1, 2)
                precision_list = [sollya.binary32] * len(degree_list)
                poly_object, _ = Polynomial.build_from_approximation_with_error(
                    func / x, degree_list, precision_list, approx_interval,
                    error_mode)
                poly_object = poly_object.sub_poly(offset=-1)
            else:
                degree_list = range(degree + 1)
                precision_list = [sollya.binary32] * len(degree_list)
                poly_object, _ = Polynomial.build_from_approximation_with_error(
                    func, degree_list, precision_list, approx_interval,
                    error_mode)

            if i == 3 or i == 5 or i == 7 or i == 9 or i == 11 or i == 13:
                poly_precision = ML_Binary64
                c0 = Constant(coeff(poly_object.get_sollya_object(), 0),
                              precision=ML_Binary64)
                c1 = Constant(coeff(poly_object.get_sollya_object(), 1),
                              precision=self.precision)
                poly_hi = (c0 + c1 * lar_red_vx)
                poly_hi.set_precision(ML_Binary64)
                pre_poly_scheme = poly_hi + polynomial_scheme_builder(
                    poly_object.sub_poly(start_index=2),
                    lar_red_vx,
                    unified_precision=self.precision,
                    power_map_=lar_upm)
                pre_poly_scheme.set_attributes(precision=ML_Binary64)
                poly_scheme = Conversion(pre_poly_scheme,
                                         precision=self.precision)
            elif i == 4 or i == 12:
                c1 = Constant(coeff(poly_object.get_sollya_object(), 1),
                              precision=self.precision)
                c3 = Constant(coeff(poly_object.get_sollya_object(), 3),
                              precision=self.precision)
                c5 = Constant(coeff(poly_object.get_sollya_object(), 5),
                              precision=self.precision)
                poly_hi = polynomial_scheme_builder(
                    poly_object.sub_poly(start_index=3),
                    lar_red_vx,
                    unified_precision=self.precision,
                    power_map_=lar_upm)
                poly_hi.set_attributes(tag="poly_lar_%d_hi" % i,
                                       precision=ML_Binary64)
                poly_scheme = Conversion(FusedMultiplyAdd(
                    c1, lar_red_vx, poly_hi, precision=ML_Binary64) +
                                         c1 * lar_red_vx_lo,
                                         precision=self.precision)
            else:
                poly_scheme = polynomial_scheme_builder(
                    poly_object,
                    lar_red_vx,
                    unified_precision=self.precision,
                    power_map_=lar_upm)
            # poly_scheme = polynomial_scheme_builder(poly_object, lar_red_vx, unified_precision = self.precision, power_map_ = lar_upm)
            poly_scheme.set_attributes(tag="lar_poly_%d" % i,
                                       debug=debug_precision)
            lar_switch_map[(i, )] = Return(poly_scheme)

        lar_result = SwitchBlock(lar_modk, lar_switch_map)

        # main scheme
        #Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m")
        # scheme = Statement(ConditionBlock(cond, lar_result, result))

        Log.report(Log.Info, "Construction of the initial MDL scheme")
        scheme = Statement(pre_red_vx_d, red_vx_lo_sub,
                           ConditionBlock(cond, lar_result, result))

        return scheme
示例#8
0
    def generate_scheme(self):
        x = self.implementation.add_input_variable("x", self.precision)

        n_hpi = self.precision.round_sollya_object(sollya.pi / 2, sollya.RN)

        if not self.skip_reduction:
            abs_x = Abs(x, tag="abs_x")

            n_invhpi = self.precision.round_sollya_object(
                2 / sollya.pi, sollya.RN)
            invhpi = Constant(n_invhpi, tag="invhpi")

            unround_k = Multiplication(abs_x, invhpi, tag="unround_k")

            k = Floor(unround_k, precision=self.precision, tag="k")

            hpi = Constant(n_hpi, tag="hpi")

            whole = Multiplication(k, hpi, tag="whole")

            r = Subtraction(abs_x, whole, tag="r")

            ik = Conversion(k,
                            precision=ML_Binary32.get_integer_format(),
                            tag="ik")

            part = Modulo(ik,
                          4,
                          precision=ML_Binary32.get_integer_format(),
                          tag="part")

            pre_part = Modulo(part,
                              2,
                              precision=ML_Binary32.get_integer_format(),
                              tag="pre_part")

            flip = Subtraction(hpi, r, tag="flip")

            do_flip = Equal(pre_part, 0, tag="do_flip")

            z = Select(do_flip, r, flip)

        else:
            z = x

        approx_interval = sollya.Interval(-2**-10, n_hpi + 2**-10)
        approx_func = sollya.cos(sollya.x)
        builder = Polynomial.build_from_approximation
        sollya.settings.prec = 2**10
        poly_object = builder(approx_func, range(0, self.poly_degree + 1, 2),
                              [self.precision] * (self.poly_degree + 1),
                              approx_interval, sollya.relative)
        self.poly_object = poly_object
        schemer = PolynomialSchemeEvaluator.generate_horner_scheme
        poly = schemer(poly_object, z)
        self.poly = poly

        if not self.skip_reduction:
            post_bool = LogicalOr(Equal(part, 1, tag="part_eq_1"),
                                  Equal(part, 2, tag="part_eq_2"))

            flipped_poly = Select(post_bool, -poly, poly)

            retval = flipped_poly

        else:
            retval = poly

        scheme = Return(retval, precision=self.precision)

        return scheme
示例#9
0
        def generate_reduction_fptaylor(x):
            # get sign and abs_x, must be the same at endpoints
            if sollya.sup(x) <= 0:
                abs_x_expr = "-x"
                abs_x = -x
            elif sollya.inf(x) >= 0:
                abs_x_expr = "x"
                abs_x = x
            else:
                assert False, "Interval must not straddle 0"

            # get k, must be the same at endpoints
            unround_k = abs_x * n_invhpi
            k_low = sollya.floor(sollya.inf(unround_k))
            k_high = sollya.floor(sollya.sup(unround_k))
            if k_low != k_high:
                assert False, "Interval must not straddle multples of pi/2"
            k = int(k_low)
            part = k % 4
            pre_part = part % 2

            r_expr = "abs_x - whole"
            r = abs_x - k * n_hpi

            if pre_part == 0:
                z_expr = "r"
                z = r
            else:
                z_expr = "{} - r".format(n_hpi)
                z = n_hpi - r

            if part >= 1:
                flipped_poly_expr = "-poly"
            else:
                flipped_poly_expr = "poly"

            x_low = sollya.inf(x)
            x_high = sollya.sup(x)
            query = "\n".join([
                "Variables", "  real x in [{},{}];".format(x_low, x_high),
                "Definitions", "  abs_x rnd64= {};".format(abs_x_expr),
                "  whole rnd64= {} * {};".format(k, n_hpi),
                "  r rnd64= abs_x - whole;", "  z rnd64= {};".format(z_expr),
                "  poly rnd64= {};".format(poly_expr),
                "  flipped_poly rnd64= {};".format(flipped_poly_expr),
                "  retval rnd64= flipped_poly;", "Expressions", "  retval;"
            ])

            rnd_rel_err = None
            rnd_abs_err = None
            try:
                res = fptaylor.Result(query, {
                    **config, "--rel-error": "true",
                    "--abs-error": "true"
                })
                rnd_rel_err = float(
                    res.result["relative_errors"]["final_total"]["value"])
                rnd_abs_err = float(
                    res.result["absolute_errors"]["final_total"]["value"])
            except AssertionError:
                pass
            except KeyError:
                try:
                    rnd_abs_err = float(
                        res.result["absolute_errors"]["final_total"]["value"])
                except KeyError:
                    pass

            if rnd_abs_err is None:
                try:
                    res = fptaylor.Result(query, {
                        **config, "--rel-error": "false",
                        "--abs-error": "true"
                    })
                    rnd_abs_err = float(
                        res.result["absolute_errors"]["final_total"]["value"])
                except AssertionError:
                    pass

            err_int = sollya.supnorm(self.poly_object.get_sollya_object(),
                                     sollya.cos(sollya.x), z, sollya.relative,
                                     2**-100)
            algo_rel_err = sollya.sup(err_int)

            err_int = sollya.supnorm(self.poly_object.get_sollya_object(),
                                     sollya.cos(sollya.x), z, sollya.absolute,
                                     2**-100)
            algo_abs_err = sollya.sup(err_int)

            if rnd_rel_err is None or str(algo_rel_err) == "error":
                rel_err = float("inf")
            else:
                rel_err = rnd_rel_err + algo_rel_err

            abs_err = rnd_abs_err + algo_abs_err

            return rel_err, abs_err
示例#10
0
import sollya
import random
import itertools

period = 2 + 10.0 * random.random()
print("period={}".format(float(2 * sollya.pi(100) / period)))

f = lambda x: sollya.cos(period * x)


def diff(func, x0, u=0.00001):
    return (func(x0 + u) - func(x0)) / u


def search_equality(func, x0, xstart, epsilon=0.001, step=0.0001):
    f_x0 = func(x0)
    h = func(xstart) - f_x0
    while abs(h) > epsilon:
        dxs = diff(func, xstart)
        if dxs > 0 and h > 0:
            xstart -= step
        elif dxs > 0 and h < 0:
            xstart += step
        elif dxs < 0 and h > 0:
            xstart += step
        else:
            xstart -= step
        h = func(xstart) - f_x0
    return x0, xstart

示例#11
0
    def generate_scheme(self):
        x = self.implementation.add_input_variable("x", self.precision)

        n_pi = self.precision.round_sollya_object(sollya.pi, sollya.RN)

        if not self.skip_reduction:
            abs_x = Abs(x, tag="abs_x")

            n_invpi = self.precision.round_sollya_object(
                1 / sollya.pi, sollya.RN)
            invpi = Constant(n_invpi, tag="invpi")

            unround_k = Multiplication(abs_x, invpi, tag="unround_k")

            k = Floor(unround_k, precision=self.precision, tag="k")

            pi = Constant(n_pi, tag="pi")

            whole = Multiplication(k, pi, tag="whole")

            r = Subtraction(abs_x, whole, tag="r")

            ik = Conversion(k,
                            precision=ML_Binary32.get_integer_format(),
                            tag="ik")

            part = Modulo(ik,
                          2,
                          precision=ML_Binary32.get_integer_format(),
                          tag="part")

            z = r

        else:
            z = x

        approx_interval = sollya.Interval(-2**-7, n_pi + 2**-7)
        approx_func = sollya.cos(sollya.x)
        builder = Polynomial.build_from_approximation
        for p in range(10, 20):
            try:
                sollya.settings.prec = 2**p
                poly_object = builder(approx_func,
                                      range(0, self.poly_degree + 1,
                                            2), [self.precision] *
                                      (self.poly_degree + 1), approx_interval,
                                      sollya.relative)
            except SollyaError:
                continue

            if str(poly_object.get_sollya_object()) == "0":
                continue

            break

        self.poly_object = poly_object
        schemer = PolynomialSchemeEvaluator.generate_horner_scheme
        poly = schemer(poly_object, z)
        print("LSKDFKLJK", poly_object.get_sollya_object())
        self.poly = poly

        if not self.skip_reduction:
            post_bool = Equal(part, 1, tag="part_eq_1")

            flipped_poly = Select(post_bool, -poly, poly)

            retval = flipped_poly

        else:
            retval = poly

        scheme = Return(retval, precision=self.precision)

        return scheme