Пример #1
0
    def generate_scheme(self):
        # declaring target and instantiating optimization engine
        vx = self.implementation.add_input_variable("x", self.precision)

        Log.set_dump_stdout(True)

        Log.report(Log.Info,
                   "\033[33;1m generating implementation scheme \033[0m")
        if self.debug_flag:
            Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            if self.libm_compliant:
                return RaiseReturn(*args, precision=self.precision, **kwords)
            else:
                return Return(kwords["return_value"], precision=self.precision)

        test_nan_or_inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               debug=debug_multi,
                               tag="nan_or_inf")
        test_nan = Test(vx,
                        specifier=Test.IsNaN,
                        debug=debug_multi,
                        tag="is_nan_test")
        test_positive = Comparison(vx,
                                   0,
                                   specifier=Comparison.GreaterOrEqual,
                                   debug=debug_multi,
                                   tag="inf_sign")

        test_signaling_nan = Test(vx,
                                  specifier=Test.IsSignalingNaN,
                                  debug=debug_multi,
                                  tag="is_signaling_nan")
        return_snan = Statement(
            ExpRaiseReturn(ML_FPE_Invalid,
                           return_value=FP_QNaN(self.precision)))

        # return in case of infinity input
        infty_return = Statement(
            ConditionBlock(
                test_positive,
                Return(FP_PlusInfty(self.precision), precision=self.precision),
                Return(FP_PlusZero(self.precision), precision=self.precision)))
        # return in case of specific value input (NaN or inf)
        specific_return = ConditionBlock(
            test_nan,
            ConditionBlock(
                test_signaling_nan, return_snan,
                Return(FP_QNaN(self.precision), precision=self.precision)),
            infty_return)
        # return in case of standard (non-special) input

        # exclusion of early overflow and underflow cases
        precision_emax = self.precision.get_emax()
        precision_max_value = S2 * S2**precision_emax
        exp_overflow_bound = sollya.ceil(log(precision_max_value))
        early_overflow_test = Comparison(vx,
                                         exp_overflow_bound,
                                         likely=False,
                                         specifier=Comparison.Greater)
        early_overflow_return = Statement(
            ClearException() if self.libm_compliant else Statement(),
            ExpRaiseReturn(ML_FPE_Inexact,
                           ML_FPE_Overflow,
                           return_value=FP_PlusInfty(self.precision)))

        precision_emin = self.precision.get_emin_subnormal()
        precision_min_value = S2**precision_emin
        exp_underflow_bound = floor(log(precision_min_value))

        early_underflow_test = Comparison(vx,
                                          exp_underflow_bound,
                                          likely=False,
                                          specifier=Comparison.Less)
        early_underflow_return = Statement(
            ClearException() if self.libm_compliant else Statement(),
            ExpRaiseReturn(ML_FPE_Inexact,
                           ML_FPE_Underflow,
                           return_value=FP_PlusZero(self.precision)))

        # constant computation
        invlog2 = self.precision.round_sollya_object(1 / log(2), sollya.RN)

        interval_vx = Interval(exp_underflow_bound, exp_overflow_bound)
        interval_fk = interval_vx * invlog2
        interval_k = Interval(floor(inf(interval_fk)),
                              sollya.ceil(sup(interval_fk)))

        log2_hi_precision = self.precision.get_field_size() - (
            sollya.ceil(log2(sup(abs(interval_k)))) + 2)
        Log.report(Log.Info, "log2_hi_precision: %d" % log2_hi_precision)
        invlog2_cst = Constant(invlog2, precision=self.precision)
        log2_hi = round(log(2), log2_hi_precision, sollya.RN)
        log2_lo = self.precision.round_sollya_object(
            log(2) - log2_hi, sollya.RN)

        # argument reduction
        unround_k = vx * invlog2
        unround_k.set_attributes(tag="unround_k", debug=debug_multi)
        k = NearestInteger(unround_k,
                           precision=self.precision,
                           debug=debug_multi)
        ik = NearestInteger(unround_k,
                            precision=self.precision.get_integer_format(),
                            debug=debug_multi,
                            tag="ik")
        ik.set_tag("ik")
        k.set_tag("k")
        exact_pre_mul = (k * log2_hi)
        exact_pre_mul.set_attributes(exact=True)
        exact_hi_part = vx - exact_pre_mul
        exact_hi_part.set_attributes(exact=True,
                                     tag="exact_hi",
                                     debug=debug_multi,
                                     prevent_optimization=True)
        exact_lo_part = -k * log2_lo
        exact_lo_part.set_attributes(tag="exact_lo",
                                     debug=debug_multi,
                                     prevent_optimization=True)
        r = exact_hi_part + exact_lo_part
        r.set_tag("r")
        r.set_attributes(debug=debug_multi)

        approx_interval = Interval(-log(2) / 2, log(2) / 2)

        approx_interval_half = approx_interval / 2
        approx_interval_split = [
            Interval(-log(2) / 2, inf(approx_interval_half)),
            approx_interval_half,
            Interval(sup(approx_interval_half),
                     log(2) / 2)
        ]

        # TODO: should be computed automatically
        exact_hi_interval = approx_interval
        exact_lo_interval = -interval_k * log2_lo

        opt_r = self.optimise_scheme(r, copy={})

        tag_map = {}
        self.opt_engine.register_nodes_by_tag(opt_r, tag_map)

        cg_eval_error_copy_map = {
            vx:
            Variable("x", precision=self.precision, interval=interval_vx),
            tag_map["k"]:
            Variable("k", interval=interval_k, precision=self.precision)
        }

        #try:
        if is_gappa_installed():
            eval_error = self.gappa_engine.get_eval_error_v2(
                self.opt_engine,
                opt_r,
                cg_eval_error_copy_map,
                gappa_filename="red_arg.g")
        else:
            eval_error = 0.0
            Log.report(Log.Warning,
                       "gappa is not installed in this environnement")
        Log.report(Log.Info, "eval error: %s" % eval_error)

        local_ulp = sup(ulp(sollya.exp(approx_interval), self.precision))
        # FIXME refactor error_goal from accuracy
        Log.report(Log.Info, "accuracy: %s" % self.accuracy)
        if isinstance(self.accuracy, ML_Faithful):
            error_goal = local_ulp
        elif isinstance(self.accuracy, ML_CorrectlyRounded):
            error_goal = S2**-1 * local_ulp
        elif isinstance(self.accuracy, ML_DegradedAccuracyAbsolute):
            error_goal = self.accuracy.goal
        elif isinstance(self.accuracy, ML_DegradedAccuracyRelative):
            error_goal = self.accuracy.goal
        else:
            Log.report(Log.Error, "unknown accuracy: %s" % self.accuracy)

        # error_goal = local_ulp #S2**-(self.precision.get_field_size()+1)
        error_goal_approx = S2**-1 * error_goal

        Log.report(Log.Info,
                   "\033[33;1m building mathematical polynomial \033[0m\n")
        poly_degree = max(
            sup(
                guessdegree(
                    expm1(sollya.x) / sollya.x, approx_interval,
                    error_goal_approx)) - 1, 2)
        init_poly_degree = poly_degree

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_estrin_scheme
        #polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme

        while 1:
            Log.report(Log.Info, "attempting poly degree: %d" % poly_degree)
            precision_list = [1] + [self.precision] * (poly_degree)
            poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(
                expm1(sollya.x),
                poly_degree,
                precision_list,
                approx_interval,
                sollya.absolute,
                error_function=error_function)
            Log.report(Log.Info, "polynomial: %s " % poly_object)
            sub_poly = poly_object.sub_poly(start_index=2)
            Log.report(Log.Info, "polynomial: %s " % sub_poly)

            Log.report(Log.Info, "poly approx error: %s" % poly_approx_error)

            Log.report(
                Log.Info,
                "\033[33;1m generating polynomial evaluation scheme \033[0m")
            pre_poly = polynomial_scheme_builder(
                poly_object, r, unified_precision=self.precision)
            pre_poly.set_attributes(tag="pre_poly", debug=debug_multi)

            pre_sub_poly = polynomial_scheme_builder(
                sub_poly, r, unified_precision=self.precision)
            pre_sub_poly.set_attributes(tag="pre_sub_poly", debug=debug_multi)

            poly = 1 + (exact_hi_part + (exact_lo_part + pre_sub_poly))
            poly.set_tag("poly")

            # optimizing poly before evaluation error computation
            #opt_poly = self.opt_engine.optimization_process(poly, self.precision, fuse_fma = fuse_fma)
            #opt_sub_poly = self.opt_engine.optimization_process(pre_sub_poly, self.precision, fuse_fma = fuse_fma)
            opt_poly = self.optimise_scheme(poly)
            opt_sub_poly = self.optimise_scheme(pre_sub_poly)

            # evaluating error of the polynomial approximation
            r_gappa_var = Variable("r",
                                   precision=self.precision,
                                   interval=approx_interval)
            exact_hi_gappa_var = Variable("exact_hi",
                                          precision=self.precision,
                                          interval=exact_hi_interval)
            exact_lo_gappa_var = Variable("exact_lo",
                                          precision=self.precision,
                                          interval=exact_lo_interval)
            vx_gappa_var = Variable("x",
                                    precision=self.precision,
                                    interval=interval_vx)
            k_gappa_var = Variable("k",
                                   interval=interval_k,
                                   precision=self.precision)

            #print "exact_hi interval: ", exact_hi_interval

            sub_poly_error_copy_map = {
                #r.get_handle().get_node(): r_gappa_var,
                #vx.get_handle().get_node():  vx_gappa_var,
                exact_hi_part.get_handle().get_node():
                exact_hi_gappa_var,
                exact_lo_part.get_handle().get_node():
                exact_lo_gappa_var,
                #k.get_handle().get_node(): k_gappa_var,
            }

            poly_error_copy_map = {
                exact_hi_part.get_handle().get_node(): exact_hi_gappa_var,
                exact_lo_part.get_handle().get_node(): exact_lo_gappa_var,
            }

            if is_gappa_installed():
                sub_poly_eval_error = -1.0
                sub_poly_eval_error = self.gappa_engine.get_eval_error_v2(
                    self.opt_engine,
                    opt_sub_poly,
                    sub_poly_error_copy_map,
                    gappa_filename="%s_gappa_sub_poly.g" % self.function_name)

                dichotomy_map = [
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[0],
                    },
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[1],
                    },
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[2],
                    },
                ]
                poly_eval_error_dico = self.gappa_engine.get_eval_error_v3(
                    self.opt_engine,
                    opt_poly,
                    poly_error_copy_map,
                    gappa_filename="gappa_poly.g",
                    dichotomy=dichotomy_map)

                poly_eval_error = max(
                    [sup(abs(err)) for err in poly_eval_error_dico])
            else:
                poly_eval_error = 0.0
                sub_poly_eval_error = 0.0
                Log.report(Log.Warning,
                           "gappa is not installed in this environnement")
                Log.report(Log.Info, "stopping autonomous degree research")
                # incrementing polynomial degree to counteract initial decrementation effect
                poly_degree += 1
                break
            Log.report(Log.Info, "poly evaluation error: %s" % poly_eval_error)
            Log.report(Log.Info,
                       "sub poly evaluation error: %s" % sub_poly_eval_error)

            global_poly_error = None
            global_rel_poly_error = None

            for case_index in range(3):
                poly_error = poly_approx_error + poly_eval_error_dico[
                    case_index]
                rel_poly_error = sup(
                    abs(poly_error /
                        sollya.exp(approx_interval_split[case_index])))
                if global_rel_poly_error == None or rel_poly_error > global_rel_poly_error:
                    global_rel_poly_error = rel_poly_error
                    global_poly_error = poly_error
            flag = error_goal > global_rel_poly_error

            if flag:
                break
            else:
                poly_degree += 1

        late_overflow_test = Comparison(ik,
                                        self.precision.get_emax(),
                                        specifier=Comparison.Greater,
                                        likely=False,
                                        debug=debug_multi,
                                        tag="late_overflow_test")
        overflow_exp_offset = (self.precision.get_emax() -
                               self.precision.get_field_size() / 2)
        diff_k = Subtraction(
            ik,
            Constant(overflow_exp_offset,
                     precision=self.precision.get_integer_format()),
            precision=self.precision.get_integer_format(),
            debug=debug_multi,
            tag="diff_k",
        )
        late_overflow_result = (ExponentInsertion(
            diff_k, precision=self.precision) * poly) * ExponentInsertion(
                overflow_exp_offset, precision=self.precision)
        late_overflow_result.set_attributes(silent=False,
                                            tag="late_overflow_result",
                                            debug=debug_multi,
                                            precision=self.precision)
        late_overflow_return = ConditionBlock(
            Test(late_overflow_result, specifier=Test.IsInfty, likely=False),
            ExpRaiseReturn(ML_FPE_Overflow,
                           return_value=FP_PlusInfty(self.precision)),
            Return(late_overflow_result, precision=self.precision))

        late_underflow_test = Comparison(k,
                                         self.precision.get_emin_normal(),
                                         specifier=Comparison.LessOrEqual,
                                         likely=False)
        underflow_exp_offset = 2 * self.precision.get_field_size()
        corrected_exp = Addition(
            ik,
            Constant(underflow_exp_offset,
                     precision=self.precision.get_integer_format()),
            precision=self.precision.get_integer_format(),
            tag="corrected_exp")
        late_underflow_result = (
            ExponentInsertion(corrected_exp, precision=self.precision) *
            poly) * ExponentInsertion(-underflow_exp_offset,
                                      precision=self.precision)
        late_underflow_result.set_attributes(debug=debug_multi,
                                             tag="late_underflow_result",
                                             silent=False)
        test_subnormal = Test(late_underflow_result,
                              specifier=Test.IsSubnormal)
        late_underflow_return = Statement(
            ConditionBlock(
                test_subnormal,
                ExpRaiseReturn(ML_FPE_Underflow,
                               return_value=late_underflow_result)),
            Return(late_underflow_result, precision=self.precision))

        twok = ExponentInsertion(ik,
                                 tag="exp_ik",
                                 debug=debug_multi,
                                 precision=self.precision)
        #std_result = twok * ((1 + exact_hi_part * pre_poly) + exact_lo_part * pre_poly)
        std_result = twok * poly
        std_result.set_attributes(tag="std_result", debug=debug_multi)
        result_scheme = ConditionBlock(
            late_overflow_test, late_overflow_return,
            ConditionBlock(late_underflow_test, late_underflow_return,
                           Return(std_result, precision=self.precision)))
        std_return = ConditionBlock(
            early_overflow_test, early_overflow_return,
            ConditionBlock(early_underflow_test, early_underflow_return,
                           result_scheme))

        # main scheme
        Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m")
        scheme = ConditionBlock(
            test_nan_or_inf,
            Statement(ClearException() if self.libm_compliant else Statement(),
                      specific_return), std_return)

        return scheme
Пример #2
0
    def generate_scalar_scheme(self, vx):
        Log.set_dump_stdout(True)

        Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m")
        if self.debug_flag:
                Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")

        index_size = 5

        comp_lo = (vx < 0)
        comp_lo.set_attributes(tag = "comp_lo", precision = ML_Bool)
        sign = Select(comp_lo, -1, 1, precision = self.precision)

        # as sinh is an odd function, we can simplify the input to its absolute
        # value once the sign has been extracted
        vx = Abs(vx)
        int_precision = self.precision.get_integer_format()

        # argument reduction
        arg_reg_value = log(2)/2**index_size
        inv_log2_value = round(1/arg_reg_value, self.precision.get_sollya_object(), sollya.RN)
        inv_log2_cst = Constant(inv_log2_value, precision = self.precision, tag = "inv_log2")

        # for r_hi to be accurate we ensure k * log2_hi_value_cst is exact
        # by limiting the number of non-zero bits in log2_hi_value_cst
        # cosh(x) ~ exp(abs(x))/2    for a big enough x
        # cosh(x) > 2^1023 <=> exp(x) > 2^1024 <=> x > log(2^1024)
        # k = inv_log2_value * x
        # -1 for guard
        max_k_approx    = inv_log2_value * log(sollya.SollyaObject(2)**1024)
        max_k_bitsize = int(ceil(log2(max_k_approx)))
        Log.report(Log.Info, "max_k_bitsize: %d" % max_k_bitsize)
        log2_hi_value_precision = self.precision.get_precision() - max_k_bitsize - 1

        log2_hi_value = round(arg_reg_value, log2_hi_value_precision, sollya.RN)
        log2_lo_value = round(arg_reg_value - log2_hi_value, self.precision.get_sollya_object(), sollya.RN)
        log2_hi_value_cst = Constant(log2_hi_value, tag = "log2_hi_value", precision = self.precision)
        log2_lo_value_cst = Constant(log2_lo_value, tag = "log2_lo_value", precision = self.precision)

        k = Trunc(Multiplication(inv_log2_cst, vx), precision = self.precision)
        k_log2 = Multiplication(k, log2_hi_value_cst, precision = self.precision, exact = True, tag = "k_log2", unbreakable = True)
        r_hi = vx - k_log2
        r_hi.set_attributes(tag = "r_hi", debug = debug_multi, unbreakable = True)
        r_lo = -k * log2_lo_value_cst
        # reduced argument
        r = r_hi + r_lo
        r.set_attributes(tag = "r", debug = debug_multi)

        if is_gappa_installed():
                r_eval_error = self.get_eval_error(r_hi, variable_copy_map =
                    {
                        vx: Variable("vx", interval = Interval(0, 715), precision = self.precision),
                        k: Variable("k", interval = Interval(0, 1024), precision = self.precision)
                    })
                Log.report(Log.Verbose, "r_eval_error: ", r_eval_error)

        approx_interval = Interval(-arg_reg_value, arg_reg_value)
        error_goal_approx = 2**-(self.precision.get_precision())

        poly_degree = sup(guessdegree(exp(sollya.x), approx_interval, error_goal_approx)) + 3
        precision_list = [1] + [self.precision] * (poly_degree)

        k_integer = Conversion(k, precision = int_precision, tag = "k_integer", debug = debug_multi)
        k_hi = BitLogicRightShift(k_integer, Constant(index_size, precision=int_precision), tag = "k_int_hi", precision = int_precision, debug = debug_multi)
        k_lo = Modulo(k_integer, 2**index_size, tag = "k_int_lo", precision = int_precision, debug = debug_multi)
        pow_exp = ExponentInsertion(Conversion(k_hi, precision = int_precision), precision = self.precision, tag = "pow_exp", debug = debug_multi)

        exp_table = ML_NewTable(dimensions = [2 * 2**index_size, 4], storage_precision = self.precision, tag = self.uniquify_name("exp2_table"))
        for i in range(2 * 2**index_size):
            input_value = i - 2**index_size if i >= 2**index_size else i

            reduced_hi_prec = int(self.precision.get_mantissa_size() - 8)
            # using SollyaObject wrapper to force evaluation by sollya
            # with higher precision
            exp_value    = sollya.SollyaObject(2)**((input_value)* 2**-index_size)
            mexp_value = sollya.SollyaObject(2)**((-input_value)* 2**-index_size)
            pos_value_hi = round(exp_value, reduced_hi_prec, sollya.RN)
            pos_value_lo = round(exp_value - pos_value_hi, self.precision.get_sollya_object(), sollya.RN)
            neg_value_hi = round(mexp_value, reduced_hi_prec, sollya.RN)
            neg_value_lo = round(mexp_value - neg_value_hi, self.precision.get_sollya_object(), sollya.RN)
            exp_table[i][0] = neg_value_hi
            exp_table[i][1] = neg_value_lo
            exp_table[i][2] = pos_value_hi
            exp_table[i][3] = pos_value_lo

        # log2_value = log(2) / 2^index_size
        # sinh(x) = 1/2 * (exp(x) - exp(-x))
        # exp(x) = exp(x - k * log2_value + k * log2_value)
        #
        # r = x - k * log2_value
        # exp(x) = exp(r) * 2 ^ (k / 2^index_size)
        #
        # k / 2^index_size = h + l * 2^-index_size, with k, h, l integers
        # exp(x) = exp(r) * 2^h * 2^(l *2^-index_size)
        #
        # sinh(x) = exp(r) * 2^(h-1) * 2^(l *2^-index_size) - exp(-r) * 2^(-h-1) * 2^(-l *2^-index_size)
        # S=2^(h-1), T = 2^(-h-1)
        # exp(r)    = 1 + poly_pos(r)
        # exp(-r) = 1 + poly_neg(r)
        # 2^(l / 2^index_size)    = pos_value_hi + pos_value_lo
        # 2^(-l / 2^index_size) = neg_value_hi + neg_value_lo
        #

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(exp(sollya.x), poly_degree, precision_list, approx_interval, sollya.absolute, error_function = error_function)

        Log.report(Log.Verbose, "poly_approx_error: {}, {}".format(poly_approx_error, float(log2(poly_approx_error))))

        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme
        poly_pos = polynomial_scheme_builder(poly_object.sub_poly(start_index = 1), r, unified_precision = self.precision)
        poly_pos.set_attributes(tag = "poly_pos", debug = debug_multi)

        poly_neg = polynomial_scheme_builder(poly_object.sub_poly(start_index = 1), -r, unified_precision = self.precision)
        poly_neg.set_attributes(tag = "poly_neg", debug = debug_multi)

        table_index = Addition(k_lo, Constant(2**index_size, precision = int_precision), precision = int_precision, tag = "table_index", debug = debug_multi)

        neg_value_load_hi = TableLoad(exp_table, table_index, 0, tag = "neg_value_load_hi", debug = debug_multi)
        neg_value_load_lo = TableLoad(exp_table, table_index, 1, tag = "neg_value_load_lo", debug = debug_multi)
        pos_value_load_hi = TableLoad(exp_table, table_index, 2, tag = "pos_value_load_hi", debug = debug_multi)
        pos_value_load_lo = TableLoad(exp_table, table_index, 3, tag = "pos_value_load_lo", debug = debug_multi)

        k_plus = Max(
            Subtraction(k_hi, Constant(1, precision = int_precision), precision=int_precision, tag="k_plus", debug=debug_multi),
            Constant(self.precision.get_emin_normal(), precision = int_precision))
        k_neg = Max(
            Subtraction(-k_hi, Constant(1, precision=int_precision), precision=int_precision, tag="k_neg", debug=debug_multi),
            Constant(self.precision.get_emin_normal(), precision = int_precision))

        # 2^(h-1)
        pow_exp_pos = ExponentInsertion(k_plus, precision = self.precision, tag="pow_exp_pos", debug=debug_multi)
        # 2^(-h-1)
        pow_exp_neg = ExponentInsertion(k_neg, precision = self.precision, tag="pow_exp_neg", debug=debug_multi)

        hi_terms = (pos_value_load_hi * pow_exp_pos - neg_value_load_hi * pow_exp_neg)
        hi_terms.set_attributes(tag = "hi_terms", debug=debug_multi)


        pos_exp = (pos_value_load_hi * poly_pos + (pos_value_load_lo + pos_value_load_lo * poly_pos)) * pow_exp_pos
        pos_exp.set_attributes(tag = "pos_exp", debug = debug_multi)

        neg_exp = (neg_value_load_hi * poly_neg + (neg_value_load_lo + neg_value_load_lo * poly_neg)) * pow_exp_neg
        neg_exp.set_attributes(tag = "neg_exp", debug = debug_multi)

        result = Addition(
            Subtraction(
                pos_exp,
                neg_exp,
                precision=self.precision,
            ),
            hi_terms,
            precision=self.precision,
            tag="result",
            debug=debug_multi
        )

        # ov_value
        ov_value = round(asinh(self.precision.get_max_value()), self.precision.get_sollya_object(), sollya.RD)
        ov_flag = Comparison(Abs(vx), Constant(ov_value, precision = self.precision), specifier = Comparison.Greater)

        # main scheme
        scheme = Statement(
            Return(
                Select(
                    ov_flag,
                    sign*FP_PlusInfty(self.precision),
                    sign*result
                )))

        return scheme
Пример #3
0
  def generate_scheme(self):
    # declaring target and instantiating optimization engine

    vx = self.implementation.add_input_variable("x", self.precision)
    
    Log.set_dump_stdout(True)
    
    Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m")
    if self.debug_flag: 
        Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")
    
    # local overloading of RaiseReturn operation
    def ExpRaiseReturn(*args, **kwords):
        kwords["arg_value"] = vx
        kwords["function_name"] = self.function_name
        return RaiseReturn(*args, **kwords)
    
    C_m1 = Constant(-1, precision = self.precision)
    
    test_NaN_or_inf = Test(vx, specifier = Test.IsInfOrNaN, likely = False, debug = debug_multi, tag = "NaN_or_inf", precision = ML_Bool)
    test_NaN = Test(vx, specifier = Test.IsNaN, likely = False, debug = debug_multi, tag = "is_NaN", precision = ML_Bool)
    test_inf = Comparison(vx, 0, specifier = Comparison.Greater, debug = debug_multi, tag = "sign", precision = ML_Bool, likely = False);
    
    #  Infnty input
    infty_return = Statement(ConditionBlock(test_inf, Return(FP_PlusInfty(self.precision)), Return(C_m1)))
    #  non-std input (inf/nan)
    specific_return = ConditionBlock(test_NaN, Return(FP_QNaN(self.precision)), infty_return)
    
    # Over/Underflow Tests
    
    precision_emax = self.precision.get_emax()
    precision_max_value = S2**(precision_emax + 1)
    expm1_overflow_bound = ceil(log(precision_max_value + 1))
    overflow_test = Comparison(vx, expm1_overflow_bound, likely = False, specifier = Comparison.Greater, precision = ML_Bool)
    overflow_return = Statement(Return(FP_PlusInfty(self.precision)))
    
    precision_emin = self.precision.get_emin_subnormal()
    precision_min_value = S2** precision_emin
    expm1_underflow_bound = floor(log(precision_min_value) + 1)
    underflow_test = Comparison(vx, expm1_underflow_bound, likely = False, specifier = Comparison.Less, precision = ML_Bool)
    underflow_return = Statement(Return(C_m1))
    
    sollya_precision = {ML_Binary32: sollya.binary32, ML_Binary64: sollya.binary64}[self.precision]
    int_precision = {ML_Binary32: ML_Int32, ML_Binary64: ML_Int64}[self.precision]
    
    # Constants
    
    log_2 = round(log(2), sollya_precision, sollya.RN)
    invlog2 = round(1/log(2), sollya_precision, sollya.RN)
    log_2_cst = Constant(log_2, precision = self.precision)
    
    interval_vx = Interval(expm1_underflow_bound, expm1_overflow_bound)
    interval_fk = interval_vx * invlog2
    interval_k = Interval(floor(inf(interval_fk)), ceil(sup(interval_fk)))
    
    log2_hi_precision = self.precision.get_field_size() - 6
    log2_hi = round(log(2), log2_hi_precision, sollya.RN)
    log2_lo = round(log(2) - log2_hi, sollya_precision, sollya.RN)


    # Reduction
    unround_k = vx * invlog2
    ik = NearestInteger(unround_k, precision = int_precision, debug = debug_multi, tag = "ik")
    k = Conversion(ik, precision = self.precision, tag = "k")
    
    red_coeff1 = Multiplication(k, log2_hi, precision = self.precision)
    red_coeff2 = Multiplication(Negation(k, precision = self.precision), log2_lo, precision = self.precision)
    
    pre_sub_mul = Subtraction(vx, red_coeff1, precision  = self.precision)
    
    s = Addition(pre_sub_mul, red_coeff2, precision = self.precision)
    z = Subtraction(s, pre_sub_mul, precision = self.precision)
    t = Subtraction(red_coeff2, z, precision = self.precision)
    
    r = Addition(s, t, precision = self.precision)
    
    r.set_attributes(tag = "r", debug = debug_multi)
    
    r_interval = Interval(-log_2/S2, log_2/S2)
    
    local_ulp = sup(ulp(exp(r_interval), self.precision))
    
    print("ulp: ", local_ulp)
    error_goal = S2**-1*local_ulp
    print("error goal: ", error_goal)
    
    
    # Polynomial Approx
    error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)
    Log.report(Log.Info, "\033[33;1m Building polynomial \033[0m\n")
    
    poly_degree = sup(guessdegree(expm1(sollya.x), r_interval, error_goal) + 1)
    
    polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme
    poly_degree_list = range(0, poly_degree)
    
    precision_list = [self.precision] *(len(poly_degree_list) + 1)
    poly_object, poly_error = Polynomial.build_from_approximation_with_error(expm1(sollya.x), poly_degree, precision_list, r_interval, sollya.absolute, error_function = error_function)
    sub_poly = poly_object.sub_poly(start_index = 2)
    Log.report(Log.Info, "Poly : %s" % sub_poly)
    Log.report(Log.Info, "poly error : {} / {:d}".format(poly_error, int(sollya.log2(poly_error))))
    pre_sub_poly = polynomial_scheme_builder(sub_poly, r, unified_precision = self.precision)
    poly = r + pre_sub_poly
    poly.set_attributes(tag = "poly", debug = debug_multi)
    
    exp_k = ExponentInsertion(ik, tag = "exp_k", debug = debug_multi, precision = self.precision)
    exp_mk = ExponentInsertion(-ik, tag = "exp_mk", debug = debug_multi, precision = self.precision)
    
    diff = 1 - exp_mk
    diff.set_attributes(tag = "diff", debug = debug_multi) 
    
    # Late Tests
    late_overflow_test = Comparison(ik, self.precision.get_emax(), specifier = Comparison.Greater, likely = False, debug = debug_multi, tag = "late_overflow_test")
    
    overflow_exp_offset = (self.precision.get_emax() - self.precision.get_field_size() / 2)
    diff_k = ik - overflow_exp_offset 
    
    exp_diff_k = ExponentInsertion(diff_k, precision = self.precision, tag = "exp_diff_k", debug = debug_multi)
    exp_oflow_offset = ExponentInsertion(overflow_exp_offset, precision = self.precision, tag = "exp_offset", debug = debug_multi)
    
    late_overflow_result = (exp_diff_k * (1 + poly)) * exp_oflow_offset - 1.0
    
    late_overflow_return = ConditionBlock(
        Test(late_overflow_result, specifier = Test.IsInfty, likely = False), 
        ExpRaiseReturn(ML_FPE_Overflow, return_value = FP_PlusInfty(self.precision)), 
        Return(late_overflow_result)
        )


    late_underflow_test = Comparison(k, self.precision.get_emin_normal(), specifier = Comparison.LessOrEqual, likely = False)
    
    underflow_exp_offset = 2 * self.precision.get_field_size()
    corrected_coeff = ik + underflow_exp_offset
    
    exp_corrected = ExponentInsertion(corrected_coeff, precision = self.precision)
    exp_uflow_offset = ExponentInsertion(-underflow_exp_offset, precision = self.precision)
    
    late_underflow_result = ( exp_corrected * (1 + poly)) * exp_uflow_offset - 1.0
    
    test_subnormal = Test(late_underflow_result, specifier = Test.IsSubnormal, likely = False)
    
    late_underflow_return = Statement(
        ConditionBlock(
            test_subnormal, 
            ExpRaiseReturn(ML_FPE_Underflow, return_value = late_underflow_result)), 
            Return(late_underflow_result)
            )
    
    # Reconstruction
    
    std_result = exp_k * ( poly + diff )
    std_result.set_attributes(tag = "result", debug = debug_multi)
    
    result_scheme = ConditionBlock(
        late_overflow_test, 
        late_overflow_return, 
        ConditionBlock(
            late_underflow_test, 
            late_underflow_return, 
            Return(std_result)
            )
        )
        
    std_return = ConditionBlock(
        overflow_test, 
        overflow_return, 
        ConditionBlock(
            underflow_test, 
            underflow_return, 
            result_scheme)
        )
        
    scheme = ConditionBlock(
        test_NaN_or_inf, 
        Statement(specific_return), 
        std_return
        )

    return scheme
Пример #4
0
    def generate_scheme(self):
        # declaring target and instantiating optimization engine

        vx = self.implementation.add_input_variable("x", self.precision)

        Log.set_dump_stdout(True)

        Log.report(Log.Info,
                   "\033[33;1m generating implementation scheme \033[0m")
        if self.debug_flag:
            Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        index_size = 3

        approx_interval = Interval(0.0, 2**-index_size)
        error_goal_approx = 2**-(self.precision.get_precision())
        int_precision = {
            ML_Binary32: ML_Int32,
            ML_Binary64: ML_Int64
        }[self.precision]

        vx_int = Floor(vx * 2**index_size,
                       precision=self.precision,
                       tag="vx_int",
                       debug=debug_multi)
        vx_frac = vx - (vx_int * 2**-index_size)
        vx_frac.set_attributes(tag="vx_frac",
                               debug=debug_multi,
                               unbreakable=True)
        poly_degree = sup(
            guessdegree(2**(sollya.x), approx_interval, error_goal_approx)) + 1
        precision_list = [1] + [self.precision] * (poly_degree)

        vx_integer = Conversion(vx_int,
                                precision=int_precision,
                                tag="vx_integer",
                                debug=debug_multi)
        vx_int_hi = BitLogicRightShift(vx_integer,
                                       Constant(index_size),
                                       tag="vx_int_hi",
                                       debug=debug_multi)
        vx_int_lo = Modulo(vx_integer,
                           2**index_size,
                           tag="vx_int_lo",
                           debug=debug_multi)
        pow_exp = ExponentInsertion(Conversion(vx_int_hi,
                                               precision=int_precision),
                                    precision=self.precision,
                                    tag="pow_exp",
                                    debug=debug_multi)

        exp2_table = ML_Table(dimensions=[2 * 2**index_size, 2],
                              storage_precision=self.precision,
                              tag=self.uniquify_name("exp2_table"))
        for i in range(2 * 2**index_size):
            input_value = i - 2**index_size if i >= 2**index_size else i
            exp2_value = SollyaObject(2)**((input_value) * 2**-index_size)
            hi_value = round(exp2_value, self.precision.get_sollya_object(),
                             RN)
            lo_value = round(exp2_value - hi_value,
                             self.precision.get_sollya_object(), RN)
            exp2_table[i][0] = lo_value
            exp2_table[i][1] = hi_value

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(
            2**(sollya.x),
            poly_degree,
            precision_list,
            approx_interval,
            sollya.absolute,
            error_function=error_function)

        print "poly_approx_error: ", poly_approx_error, float(
            log2(poly_approx_error))

        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme
        poly = polynomial_scheme_builder(poly_object.sub_poly(start_index=1),
                                         vx_frac,
                                         unified_precision=self.precision)
        poly.set_attributes(tag="poly", debug=debug_multi)

        table_index = Addition(vx_int_lo,
                               Constant(2**index_size,
                                        precision=int_precision),
                               precision=int_precision,
                               tag="table_index",
                               debug=debug_multi)

        lo_value_load = TableLoad(exp2_table,
                                  table_index,
                                  0,
                                  tag="lo_value_load",
                                  debug=debug_multi)
        hi_value_load = TableLoad(exp2_table,
                                  table_index,
                                  1,
                                  tag="hi_value_load",
                                  debug=debug_multi)

        result = (hi_value_load +
                  (hi_value_load * poly +
                   (lo_value_load + lo_value_load * poly))) * pow_exp
        ov_flag = Comparison(vx_int_hi,
                             Constant(self.precision.get_emax(),
                                      precision=self.precision),
                             specifier=Comparison.Greater)

        # main scheme
        Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m")
        scheme = Statement(
            Return(Select(ov_flag, FP_PlusInfty(self.precision), result)))

        return scheme
Пример #5
0
    def generate_scalar_scheme(self, vx, inline_select=False):
        Log.set_dump_stdout(True)

        Log.report(Log.Info,
                   "\033[33;1m generating implementation scheme \033[0m")
        if self.debug_flag:
            Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        # r_interval = Interval(0, 1.0)
        index_size = 3
        r_interval = Interval(-2**(-index_size), 2**-index_size)

        local_ulp = sup(ulp(2**r_interval, self.precision))
        Log.report(Log.Info, "ulp: ", local_ulp)
        error_goal = S2**-1 * local_ulp
        Log.report(Log.Info, "error goal: ", error_goal)

        sollya_precision = {
            ML_Binary32: sollya.binary32,
            ML_Binary64: sollya.binary64
        }[self.precision]
        int_precision = {
            ML_Binary32: ML_Int32,
            ML_Binary64: ML_Int64
        }[self.precision]

        # Argument Reduction
        # r = x - floor(x), r >= 0
        vx_floor = Floor(vx,
                         precision=self.precision,
                         tag='vx_floor',
                         debug=debug_multi)
        vx_int = Conversion(vx_floor,
                            precision=int_precision,
                            tag="vx_int",
                            debug=debug_multi)
        vx_intf = vx_floor  # Conversion(vx_int, precision = self.precision)
        vx_r = vx - vx_intf
        r_hi = NearestInteger(vx_r * 2**index_size,
                              precision=self.precision,
                              tag="r_hi",
                              debug=debug_multi)
        # clamping r_hi_int within table-size to make sure
        # it does not exceeds hi_part_table when used to index it
        r_hi_int = Max(
            Min(
                Conversion(r_hi,
                           precision=int_precision,
                           tag="r_hi_int",
                           debug=debug_multi), 2**index_size + 1), 0)
        r_lo = vx_r - r_hi * 2**-index_size
        r_lo.set_attributes(tag="r_lo", debug=debug_multi)
        vx_r.set_attributes(tag="vx_r", debug=debug_multi)
        degree = sup(guessdegree(2**(sollya.x), r_interval, error_goal)) + 2
        precision_list = [1] + [self.precision] * degree

        exp_X = ExponentInsertion(vx_int,
                                  tag="exp_X",
                                  debug=debug_multi,
                                  precision=self.precision)

        #Polynomial Approx
        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme

        poly_object, poly_error = Polynomial.build_from_approximation_with_error(
            2**(sollya.x) - 1, degree, precision_list, r_interval,
            sollya.absolute)
        Log.report(Log.Info, "Poly : %s" % poly_object)
        Log.report(Log.Info, "poly_error : ", poly_error)
        poly = polynomial_scheme_builder(poly_object.sub_poly(start_index=1),
                                         r_lo,
                                         unified_precision=self.precision)
        poly.set_attributes(tag="poly", debug=debug_multi)

        hi_part_table = ML_NewTable(dimensions=[2**index_size + 1],
                                    storage_precision=self.precision,
                                    tag=self.uniquify_name("exp2_table"),
                                    const=True)
        for i in range(2**index_size + 1):
            input_value = i * 2**-index_size
            tab_value = self.precision.round_sollya_object(
                sollya.SollyaObject(2)**(input_value))
            hi_part_table[i] = tab_value

        hi_part_value = TableLoad(hi_part_table,
                                  r_hi_int,
                                  precision=self.precision,
                                  tag="hi_part_value",
                                  debug=debug_multi)

        #Handling special cases
        oflow_bound = Constant(self.precision.get_emax() + 1,
                               precision=self.precision)
        subnormal_bound = self.precision.get_emin_subnormal()
        uflow_bound = self.precision.get_emin_normal()
        Log.report(Log.Info, "oflow : ", oflow_bound)
        #print "uflow : ", uflow_bound
        #print "sub : ", subnormal_bound
        test_overflow = Comparison(vx,
                                   oflow_bound,
                                   specifier=Comparison.GreaterOrEqual)
        test_overflow.set_attributes(tag="oflow_test",
                                     debug=debug_multi,
                                     likely=False,
                                     precision=ML_Bool)

        test_underflow = Comparison(vx, uflow_bound, specifier=Comparison.Less)
        test_underflow.set_attributes(tag="uflow_test",
                                      debug=debug_multi,
                                      likely=False,
                                      precision=ML_Bool)

        test_subnormal = Comparison(vx,
                                    subnormal_bound,
                                    specifier=Comparison.Greater)
        test_subnormal.set_attributes(tag="sub_test",
                                      debug=debug_multi,
                                      likely=False,
                                      precision=ML_Bool)

        subnormal_offset = -(uflow_bound - vx_int)
        subnormal_offset.set_attributes(tag="offset", debug=debug_multi)
        exp_offset = ExponentInsertion(subnormal_offset,
                                       precision=self.precision,
                                       debug=debug_multi,
                                       tag="exp_offset")
        exp_min = ExponentInsertion(uflow_bound,
                                    precision=self.precision,
                                    debug=debug_multi,
                                    tag="exp_min")
        subnormal_result = hi_part_value * exp_offset * exp_min * poly + hi_part_value * exp_offset * exp_min

        test_std = LogicalOr(test_overflow,
                             test_underflow,
                             precision=ML_Bool,
                             tag="std_test",
                             likely=False,
                             debug=debug_multi)

        #Reconstruction
        result = hi_part_value * exp_X * poly + hi_part_value * exp_X
        result.set_attributes(tag="result", debug=debug_multi)

        C0 = Constant(0, precision=self.precision)

        if inline_select:
            scheme = Select(
                test_std,
                Select(test_overflow, FP_PlusInfty(self.precision),
                       Select(
                           test_subnormal,
                           subnormal_result,
                           C0,
                       )),
                result,
            )
            return scheme

        else:
            return_inf = Return(FP_PlusInfty(self.precision))
            return_C0 = Return(C0)
            return_sub = Return(subnormal_result)
            return_std = Return(result)

            non_std_statement = Statement(
                ConditionBlock(
                    test_overflow, return_inf,
                    ConditionBlock(test_subnormal, return_sub, return_C0)))

            scheme = Statement(
                ConditionBlock(test_std, non_std_statement, return_std))

            return scheme
Пример #6
0
    def generate_scheme(self):
        # declaring target and instantiating optimization engine

        vx = self.implementation.add_input_variable("x", self.precision)

        Log.set_dump_stdout(True)

        Log.report(Log.Info,
                   "\033[33;1m generating implementation scheme \033[0m")
        if self.debug_flag:
            Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        r_interval = Interval(-0.5, 0.5)

        local_ulp = sup(ulp(2**r_interval, self.precision))
        Log.report(Log.Info, "ulp: ", local_ulp)
        error_goal = S2**-1 * local_ulp
        Log.report(Log.Info, "error goal: ", error_goal)

        sollya_precision = {
            ML_Binary32: sollya.binary32,
            ML_Binary64: sollya.binary64
        }[self.precision]
        int_precision = {
            ML_Binary32: ML_Int32,
            ML_Binary64: ML_Int64
        }[self.precision]

        #Argument Reduction
        vx_int = NearestInteger(vx,
                                precision=int_precision,
                                tag='vx_int',
                                debug=debug_multi)
        vx_intf = Conversion(vx_int, precision=self.precision)
        vx_r = vx - vx_intf
        vx_r.set_attributes(tag="vx_r", debug=debug_multi)
        degree = sup(guessdegree(2**(sollya.x), r_interval, error_goal)) + 2
        precision_list = [1] + [self.precision] * degree

        exp_X = ExponentInsertion(vx_int,
                                  tag="exp_X",
                                  debug=debug_multi,
                                  precision=self.precision)

        #Polynomial Approx
        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme

        poly_object, poly_error = Polynomial.build_from_approximation_with_error(
            2**(sollya.x) - 1, degree, precision_list, r_interval,
            sollya.absolute)
        Log.report(Log.Info, "Poly : %s" % poly_object)
        Log.report(Log.Info, "poly_error : ", poly_error)
        poly = polynomial_scheme_builder(poly_object.sub_poly(start_index=1),
                                         vx_r,
                                         unified_precision=self.precision)
        poly.set_attributes(tag="poly", debug=debug_multi)

        #Handling special cases
        oflow_bound = Constant(self.precision.get_emax() + 1,
                               precision=self.precision)
        subnormal_bound = self.precision.get_emin_subnormal()
        uflow_bound = self.precision.get_emin_normal()
        Log.report(Log.Info, "oflow : ", oflow_bound)
        #print "uflow : ", uflow_bound
        #print "sub : ", subnormal_bound
        test_overflow = Comparison(vx,
                                   oflow_bound,
                                   specifier=Comparison.GreaterOrEqual)
        test_overflow.set_attributes(tag="oflow_test",
                                     debug=debug_multi,
                                     likely=False,
                                     precision=ML_Bool)

        test_underflow = Comparison(vx, uflow_bound, specifier=Comparison.Less)
        test_underflow.set_attributes(tag="uflow_test",
                                      debug=debug_multi,
                                      likely=False,
                                      precision=ML_Bool)

        test_subnormal = Comparison(vx,
                                    subnormal_bound,
                                    specifier=Comparison.Greater)
        test_subnormal.set_attributes(tag="sub_test",
                                      debug=debug_multi,
                                      likely=False,
                                      precision=ML_Bool)

        subnormal_offset = -(uflow_bound - vx_int)
        subnormal_offset.set_attributes(tag="offset", debug=debug_multi)
        exp_offset = ExponentInsertion(subnormal_offset,
                                       precision=self.precision,
                                       debug=debug_multi,
                                       tag="exp_offset")
        exp_min = ExponentInsertion(uflow_bound,
                                    precision=self.precision,
                                    debug=debug_multi,
                                    tag="exp_min")
        subnormal_result = exp_offset * exp_min * poly + exp_offset * exp_min

        test_std = LogicalOr(test_overflow,
                             test_underflow,
                             precision=ML_Bool,
                             tag="std_test",
                             likely=False)

        #Reconstruction
        result = exp_X * poly + exp_X
        result.set_attributes(tag="result", debug=debug_multi)

        C0 = Constant(0, precision=self.precision)

        return_inf = Return(FP_PlusInfty(self.precision))
        return_C0 = Return(C0)
        return_sub = Return(subnormal_result)
        return_std = Return(result)

        non_std_statement = Statement(
            ConditionBlock(
                test_overflow, return_inf,
                ConditionBlock(test_subnormal, return_sub, return_C0)))

        scheme = Statement(
            ConditionBlock(test_std, non_std_statement, return_std))

        return scheme
Пример #7
0
    def generate_scheme(self):
        # declaring target and instantiating optimization engine

        vx = self.implementation.add_input_variable("x", self.precision)
        vx.set_attributes(precision=self.precision,
                          tag="vx",
                          debug=debug_multi)
        Log.set_dump_stdout(True)

        Log.report(Log.Info,
                   "\033[33;1m Generating implementation scheme \033[0m")
        if self.debug_flag:
            Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")

        # local overloading of RaiseReturn operation
        def SqrtRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        C0 = Constant(0, precision=self.precision)

        C0_plus = Constant(FP_PlusZero(self.precision))
        C0_minus = Constant(FP_MinusZero(self.precision))

        test_NaN = Test(vx,
                        specifier=Test.IsNaN,
                        likely=False,
                        debug=debug_multi,
                        tag="is_NaN",
                        precision=ML_Bool)
        test_inf = Test(vx,
                        specifier=Test.IsInfty,
                        likely=False,
                        debug=debug_multi,
                        tag="is_Inf",
                        precision=ML_Bool)
        test_negative = Comparison(vx,
                                   C0,
                                   specifier=Comparison.Less,
                                   debug=debug_multi,
                                   tag="is_Negative",
                                   precision=ML_Bool,
                                   likely=False)
        test_NaN_or_Inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               debug=debug_multi,
                               tag="is_Inf_Or_Nan",
                               precision=ML_Bool)
        test_NaN_or_Neg = LogicalOr(test_NaN, test_negative, precision=ML_Bool)

        test_std = LogicalNot(LogicalOr(test_NaN_or_Inf,
                                        test_negative,
                                        precision=ML_Bool,
                                        likely=False),
                              precision=ML_Bool,
                              likely=True)

        test_zero = Comparison(vx,
                               C0,
                               specifier=Comparison.Equal,
                               likely=False,
                               debug=debug_multi,
                               tag="Is_Zero",
                               precision=ML_Bool)

        return_NaN_or_neg = Statement(Return(FP_QNaN(self.precision)))
        return_inf = Statement(Return(FP_PlusInfty(self.precision)))

        return_PosZero = Return(C0_plus)
        return_NegZero = Return(C0_minus)

        NR_init = InverseSquareRootSeed(vx,
                                        precision=self.precision,
                                        tag="sqrt_seed",
                                        debug=debug_multi)

        result = compute_sqrt(vx,
                              NR_init,
                              int(self.num_iter),
                              precision=self.precision)

        return_non_std = ConditionBlock(
            test_NaN_or_Neg, return_NaN_or_neg,
            ConditionBlock(
                test_inf, return_inf,
                ConditionBlock(test_zero, return_PosZero, return_NegZero)))
        return_std = Return(result)

        scheme = ConditionBlock(test_std, return_std, return_non_std)
        return scheme
Пример #8
0
    def generate_scheme(self):
        # declaring target and instantiating optimization engine

        vx = self.implementation.add_input_variable("x", self.precision)
        vx.set_attributes(precision=self.precision,
                          tag="vx",
                          debug=debug_multi)
        Log.set_dump_stdout(True)

        Log.report(Log.Info,
                   "\033[33;1m Generating implementation scheme \033[0m")
        if self.debug_flag:
            Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")

        C0 = Constant(0, precision=self.precision)

        C0_plus = Constant(FP_PlusZero(self.precision))
        C0_minus = Constant(FP_MinusZero(self.precision))

        def local_test(specifier, tag):
            """ Local wrapper to generate Test operations """
            return Test(vx,
                        specifier=specifier,
                        likely=False,
                        debug=debug_multi,
                        tag="is_%s" % tag,
                        precision=ML_Bool)

        test_NaN = local_test(Test.IsNaN, "is_NaN")
        test_inf = local_test(Test.IsInfty, "is_Inf")
        test_NaN_or_Inf = local_test(Test.IsInfOrNaN, "is_Inf_Or_Nan")

        test_negative = Comparison(vx,
                                   C0,
                                   specifier=Comparison.Less,
                                   debug=debug_multi,
                                   tag="is_Negative",
                                   precision=ML_Bool,
                                   likely=False)
        test_NaN_or_Neg = LogicalOr(test_NaN, test_negative, precision=ML_Bool)

        test_std = LogicalNot(LogicalOr(test_NaN_or_Inf,
                                        test_negative,
                                        precision=ML_Bool,
                                        likely=False),
                              precision=ML_Bool,
                              likely=True)

        test_zero = Comparison(vx,
                               C0,
                               specifier=Comparison.Equal,
                               likely=False,
                               debug=debug_multi,
                               tag="Is_Zero",
                               precision=ML_Bool)

        return_NaN_or_neg = Statement(Return(FP_QNaN(self.precision)))
        return_inf = Statement(Return(FP_PlusInfty(self.precision)))

        return_PosZero = Return(C0_plus)
        return_NegZero = Return(C0_minus)

        NR_init = ReciprocalSquareRootSeed(vx,
                                           precision=self.precision,
                                           tag="sqrt_seed",
                                           debug=debug_multi)

        result = compute_sqrt(vx,
                              NR_init,
                              int(self.num_iter),
                              precision=self.precision)

        return_non_std = ConditionBlock(
            test_NaN_or_Neg, return_NaN_or_neg,
            ConditionBlock(
                test_inf, return_inf,
                ConditionBlock(test_zero, return_PosZero, return_NegZero)))
        return_std = Return(result)

        scheme = ConditionBlock(test_std, return_std, return_non_std)
        return scheme
Пример #9
0
    def generate_scheme(self):
        # declaring main input variable
        vx = self.implementation.add_input_variable("x", self.precision)

        # declaring approximation parameters
        index_size = 6
        num_iteration = 8

        Log.set_dump_stdout(True)

        Log.report(Log.Info,
                   "\033[33;1m generating implementation scheme \033[0m")
        if self.debug_flag:
            Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        def cbrt_newton_iteration(current_approx, input_value, input_inverse):
            # Cubic root of A is approximated by a Newton-Raphson iteration
            # on f(x) = 1 - A / x^3
            # x_n+1 = 4/3 * x_n - x_n^4 / (3 * A)
            # x_n+1 = 1/3 * (x_n * (1 - x_n^3/A) + x_n)

            approx_triple = Multiplication(
                current_approx, Multiplication(current_approx, current_approx))

            diff = FMSN(approx_triple, input_inverse,
                        Constant(1, precision=self.precision))
            injection = FMA(
                Multiplication(
                    current_approx,
                    Constant(1 / 3.0, precision=self.precision),
                ), diff, current_approx)

            new_approx = injection

            return new_approx

        reduced_vx = MantissaExtraction(vx, precision=self.precision)

        int_precision = self.precision.get_integer_format()

        cbrt_approx_table = ML_NewTable(
            dimensions=[2**index_size, 1],
            storage_precision=self.precision,
            tag=self.uniquify_name("cbrt_approx_table"))
        for i in range(2**index_size):
            input_value = 1 + i / SollyaObject(2**index_size)

            cbrt_approx = cbrt(input_value)
            cbrt_approx_table[i][0] = round(cbrt_approx,
                                            self.precision.get_sollya_object(),
                                            RN)

        # Modulo operations will returns a reduced exponent within [-3, 2]
        # so we approximate cbrt on this interval (with index offset by -3)
        cbrt_mod_table = ML_NewTable(dimensions=[6, 1],
                                     storage_precision=self.precision,
                                     tag=self.uniquify_name("cbrt_mod_table"))
        for i in range(6):
            input_value = SollyaObject(2)**(i - 3)
            cbrt_mod_table[i][0] = round(cbrt(input_value),
                                         self.precision.get_sollya_object(),
                                         RN)

        vx_int = TypeCast(reduced_vx, precision=int_precision)
        mask = BitLogicRightShift(vx_int,
                                  self.precision.get_precision() - index_size,
                                  precision=int_precision)
        mask = BitLogicAnd(mask,
                           Constant(2**index_size - 1,
                                    precision=int_precision),
                           precision=int_precision,
                           tag="table_index",
                           debug=debug_multi)
        table_index = mask

        int_precision = self.precision.get_integer_format()

        exp_vx = ExponentExtraction(vx, precision=int_precision, tag="exp_vx")
        exp_vx_third = Division(exp_vx,
                                Constant(3, precision=int_precision),
                                precision=int_precision,
                                tag="exp_vx_third")
        exp_vx_mod = Modulo(exp_vx,
                            Constant(3, precision=int_precision),
                            precision=int_precision,
                            tag="exp_vx_mod",
                            debug=debug_multi)

        # offset on modulo to make sure table index is positive
        exp_vx_mod = exp_vx_mod + 3

        cbrt_mod = TableLoad(cbrt_mod_table,
                             exp_vx_mod,
                             Constant(0),
                             tag="cbrt_mod")

        init_approx = Multiplication(
            Multiplication(
                # approx cbrt(mantissa)
                TableLoad(cbrt_approx_table,
                          table_index,
                          Constant(0, precision=ML_Int32),
                          tag="seed",
                          debug=debug_multi),
                # approx cbrt(2^(e%3))
                cbrt_mod,
                tag="init_mult",
                debug=debug_multi,
                precision=self.precision),
            # 2^(e/3)
            ExponentInsertion(exp_vx_third,
                              precision=self.precision,
                              tag="exp_vx_third",
                              debug=debug_multi),
            tag="init_approx",
            debug=debug_multi,
            precision=self.precision)

        inverse_red_vx = Division(Constant(1, precision=self.precision),
                                  reduced_vx)
        inverse_vx = Division(Constant(1, precision=self.precision), vx)

        current_approx = init_approx

        for i in range(num_iteration):
            #current_approx = cbrt_newton_iteration(current_approx, reduced_vx, inverse_red_vx)
            current_approx = cbrt_newton_iteration(current_approx, vx,
                                                   inverse_vx)
            current_approx.set_attributes(tag="approx_%d" % i,
                                          debug=debug_multi)

        result = current_approx
        result.set_attributes(tag="result", debug=debug_multi)

        # last iteration
        ext_precision = ML_DoubleDouble
        xn_2 = Multiplication(current_approx,
                              current_approx,
                              precision=ext_precision)
        xn_3 = Multiplication(current_approx, xn_2, precision=ext_precision)

        FourThird = Constant(4 / SollyaObject(3), precision=ext_precision)

        # main scheme
        Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m")
        scheme = Statement(Return(result))

        return scheme
Пример #10
0
    def generate_scheme(self):
        # declaring target and instantiating optimization engine

        vx = self.implementation.add_input_variable("x", self.precision)

        Log.set_dump_stdout(True)

        Log.report(Log.Info,
                   "\033[33;1m generating implementation scheme \033[0m")
        if self.debug_flag:
            Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        index_size = 3

        vx = Abs(vx)
        int_precision = {
            ML_Binary32: ML_Int32,
            ML_Binary64: ML_Int64
        }[self.precision]

        # argument reduction
        arg_reg_value = log(2) / 2**index_size
        inv_log2_value = round(1 / arg_reg_value,
                               self.precision.get_sollya_object(), RN)
        inv_log2_cst = Constant(inv_log2_value,
                                precision=self.precision,
                                tag="inv_log2")

        # for r_hi to be accurate we ensure k * log2_hi_value_cst is exact
        # by limiting the number of non-zero bits in log2_hi_value_cst
        # cosh(x) ~ exp(abs(x))/2  for a big enough x
        # cosh(x) > 2^1023 <=> exp(x) > 2^1024 <=> x > log(2^21024)
        # k = inv_log2_value * x
        # -1 for guard
        max_k_approx = inv_log2_value * log(sollya.SollyaObject(2)**1024)
        max_k_bitsize = int(ceil(log2(max_k_approx)))
        Log.report(Log.Info, "max_k_bitsize: %d" % max_k_bitsize)
        log2_hi_value_precision = self.precision.get_precision(
        ) - max_k_bitsize - 1

        log2_hi_value = round(arg_reg_value, log2_hi_value_precision, RN)
        log2_lo_value = round(arg_reg_value - log2_hi_value,
                              self.precision.get_sollya_object(), RN)
        log2_hi_value_cst = Constant(log2_hi_value,
                                     tag="log2_hi_value",
                                     precision=self.precision)
        log2_lo_value_cst = Constant(log2_lo_value,
                                     tag="log2_lo_value",
                                     precision=self.precision)

        k = Trunc(Multiplication(inv_log2_cst, vx), precision=self.precision)
        k_log2 = Multiplication(k,
                                log2_hi_value_cst,
                                precision=self.precision,
                                exact=True,
                                tag="k_log2",
                                unbreakable=True)
        r_hi = vx - k_log2
        r_hi.set_attributes(tag="r_hi", debug=debug_multi, unbreakable=True)
        r_lo = -k * log2_lo_value_cst
        # reduced argument
        r = r_hi + r_lo
        r.set_attributes(tag="r", debug=debug_multi)

        r_eval_error = self.get_eval_error(
            r_hi,
            variable_copy_map={
                vx:
                Variable("vx",
                         interval=Interval(0, 715),
                         precision=self.precision),
                k:
                Variable("k",
                         interval=Interval(0, 1024),
                         precision=int_precision)
            })
        print "r_eval_error: ", r_eval_error

        approx_interval = Interval(-arg_reg_value, arg_reg_value)
        error_goal_approx = 2**-(self.precision.get_precision())

        poly_degree = sup(
            guessdegree(exp(sollya.x), approx_interval, error_goal_approx))
        precision_list = [1] + [self.precision] * (poly_degree)

        k_integer = Conversion(k,
                               precision=int_precision,
                               tag="k_integer",
                               debug=debug_multi)
        k_hi = BitLogicRightShift(k_integer,
                                  Constant(index_size),
                                  tag="k_int_hi",
                                  precision=int_precision,
                                  debug=debug_multi)
        k_lo = Modulo(k_integer,
                      2**index_size,
                      tag="k_int_lo",
                      precision=int_precision,
                      debug=debug_multi)
        pow_exp = ExponentInsertion(Conversion(k_hi, precision=int_precision),
                                    precision=self.precision,
                                    tag="pow_exp",
                                    debug=debug_multi)

        exp_table = ML_Table(dimensions=[2 * 2**index_size, 4],
                             storage_precision=self.precision,
                             tag=self.uniquify_name("exp2_table"))
        for i in range(2 * 2**index_size):
            input_value = i - 2**index_size if i >= 2**index_size else i
            # using SollyaObject wrapper to force evaluation by sollya
            # with higher precision
            exp_value = sollya.SollyaObject(2)**((input_value) *
                                                 2**-index_size)
            mexp_value = sollya.SollyaObject(2)**((-input_value) *
                                                  2**-index_size)
            pos_value_hi = round(exp_value, self.precision.get_sollya_object(),
                                 RN)
            pos_value_lo = round(exp_value - pos_value_hi,
                                 self.precision.get_sollya_object(), RN)
            neg_value_hi = round(mexp_value,
                                 self.precision.get_sollya_object(), RN)
            neg_value_lo = round(mexp_value - neg_value_hi,
                                 self.precision.get_sollya_object(), RN)
            exp_table[i][0] = neg_value_hi
            exp_table[i][1] = neg_value_lo
            exp_table[i][2] = pos_value_hi
            exp_table[i][3] = pos_value_lo

        # log2_value = log(2) / 2^index_size
        # cosh(x) = 1/2 * (exp(x) + exp(-x))
        # exp(x) = exp(x - k * log2_value + k * log2_value
        #
        # r = x - k * log2_value
        # exp(x) = exp(r) * 2 ^ (k / 2^index_size)
        #
        # k / 2^index_size = h + l * 2^-index_size
        # exp(x) = exp(r) * 2^h * 2^(l *2^-index_size)
        #
        # cosh(x) = exp(r) * 2^(h-1) 2^(l *2^-index_size) + exp(-r) * 2^(-h-1) * 2^(-l *2^-index_size)
        #
        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(
            exp(sollya.x),
            poly_degree,
            precision_list,
            approx_interval,
            sollya.absolute,
            error_function=error_function)

        print "poly_approx_error: ", poly_approx_error, float(
            log2(poly_approx_error))

        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme
        poly_pos = polynomial_scheme_builder(
            poly_object.sub_poly(start_index=1),
            r,
            unified_precision=self.precision)
        poly_pos.set_attributes(tag="poly_pos", debug=debug_multi)

        poly_neg = polynomial_scheme_builder(
            poly_object.sub_poly(start_index=1),
            -r,
            unified_precision=self.precision)
        poly_neg.set_attributes(tag="poly_neg", debug=debug_multi)

        table_index = Addition(k_lo,
                               Constant(2**index_size,
                                        precision=int_precision),
                               precision=int_precision,
                               tag="table_index",
                               debug=debug_multi)

        neg_value_load_hi = TableLoad(exp_table,
                                      table_index,
                                      0,
                                      tag="neg_value_load_hi",
                                      debug=debug_multi)
        neg_value_load_lo = TableLoad(exp_table,
                                      table_index,
                                      1,
                                      tag="neg_value_load_lo",
                                      debug=debug_multi)
        pos_value_load_hi = TableLoad(exp_table,
                                      table_index,
                                      2,
                                      tag="pos_value_load_hi",
                                      debug=debug_multi)
        pos_value_load_lo = TableLoad(exp_table,
                                      table_index,
                                      3,
                                      tag="pos_value_load_lo",
                                      debug=debug_multi)

        k_plus = Max(
            Subtraction(k_hi,
                        Constant(1, precision=int_precision),
                        precision=int_precision,
                        tag="k_plus",
                        debug=debug_multi),
            Constant(self.precision.get_emin_normal(),
                     precision=int_precision))
        k_neg = Max(
            Subtraction(-k_hi,
                        Constant(1, precision=int_precision),
                        precision=int_precision,
                        tag="k_neg",
                        debug=debug_multi),
            Constant(self.precision.get_emin_normal(),
                     precision=int_precision))

        pow_exp_pos = ExponentInsertion(k_plus, precision=self.precision)
        pow_exp_neg = ExponentInsertion(k_neg, precision=self.precision)

        pos_exp = (
            pos_value_load_hi +
            (pos_value_load_hi * poly_pos +
             (pos_value_load_lo + pos_value_load_lo * poly_pos))) * pow_exp_pos
        pos_exp.set_attributes(tag="pos_exp", debug=debug_multi)

        neg_exp = (
            neg_value_load_hi +
            (neg_value_load_hi * poly_neg +
             (neg_value_load_lo + neg_value_load_lo * poly_neg))) * pow_exp_neg
        neg_exp.set_attributes(tag="neg_exp", debug=debug_multi)

        result = Addition(pos_exp,
                          neg_exp,
                          precision=self.precision,
                          tag="result",
                          debug=debug_multi)

        # ov_value
        ov_value = round(acosh(self.precision.get_max_value()),
                         self.precision.get_sollya_object(), RD)
        ov_flag = Comparison(Abs(vx),
                             Constant(ov_value, precision=self.precision),
                             specifier=Comparison.Greater)

        # main scheme
        Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m")
        scheme = Statement(
            Return(Select(ov_flag, FP_PlusInfty(self.precision), result)))

        return scheme