def generate_test_wrapper(self, tensor_descriptors, input_tables,
                              output_tables):
        auto_test = CodeFunction("test_wrapper", output_format=ML_Int32)

        tested_function = self.implementation.get_function_object()
        function_name = self.implementation.get_name()

        failure_report_op = FunctionOperator("report_failure")
        failure_report_function = FunctionObject("report_failure", [], ML_Void,
                                                 failure_report_op)

        printf_success_op = FunctionOperator(
            "printf",
            arg_map={0: "\"test successful %s\\n\"" % function_name},
            void_function=True,
            require_header=["stdio.h"])
        printf_success_function = FunctionObject("printf", [], ML_Void,
                                                 printf_success_op)

        # accumulate element number
        acc_num = Variable("acc_num",
                           precision=ML_Int64,
                           var_type=Variable.Local)

        test_loop = self.get_tensor_test_wrapper(
            tested_function, tensor_descriptors, input_tables, output_tables,
            acc_num, self.generate_tensor_check_loop)

        # common test scheme between scalar and vector functions
        test_scheme = Statement(test_loop, printf_success_function(),
                                Return(Constant(0, precision=ML_Int32)))
        auto_test.set_scheme(test_scheme)
        return FunctionGroup([auto_test])
  def externalize_call(self, optree, arg_list, tag = "foo", result_format = None):
    # determining return format
    return_format = optree.get_precision() if result_format is None else result_format
    assert(not return_format is None and "external call result format must be defined")
    # function_name = self.main_code_object.declare_free_function_name(tag)
    function_name = self.name_factory.declare_free_function_name(tag)

    ext_function = CodeFunction(function_name, output_format = return_format)

    # creating argument copy
    arg_map = {}
    arg_index = 0
    for arg in arg_list:
      arg_tag = arg.get_tag(default = "arg_%d" % arg_index)
      arg_index += 1
      arg_map[arg] = ext_function.add_input_variable(arg_tag, arg.get_precision())

    # copying optree while swapping argument for variables
    optree_copy = optree.copy(copy_map = arg_map)
    # instanciating external function scheme
    if isinstance(optree, ML_ArithmeticOperation):
      function_optree = Statement(Return(optree_copy))
    else:
      function_optree = Statement(optree_copy)
    ext_function.set_scheme(function_optree)
    self.name_factory.declare_function(function_name, ext_function.get_function_object())

    return ext_function
示例#3
0
def generate_function_from_optree(name_factory,
                                  optree,
                                  arg_list,
                                  tag="foo",
                                  result_format=None):
    """ Function which transform a sub-graph @p optree whose inputs are @p arg_list
        into a meta function
        @param optree operation graph to be incorporated as function boday
        @param arg_list list of @p optree's parameters to be used as function arguments
        @param name_factory engine to generate unique function name and to register function
        @param tag string to be used as seed to generate function name
        @param result_format hint to indicate function's return format (if optree is not
            an arithmetic operation (e.g. it already contains a Return node, then @p result_format
            must be used to specify the funciton return format)

        @return CodeFunction object containing the function implementation (plus the function
            would have been declared into name_factory)
        
        """
    # determining return format
    return_format = optree.get_precision(
    ) if result_format is None else result_format
    assert (not return_format is None
            and "external call result format must be defined")
    function_name = name_factory.declare_free_function_name(tag)

    ext_function = CodeFunction(function_name, output_format=return_format)

    # creating argument copy
    arg_map = {}
    arg_index = 0
    for arg in arg_list:
        arg_tag = arg.get_tag(default="arg_%d" % arg_index)
        arg_index += 1
        arg_map[arg] = ext_function.add_input_variable(arg_tag,
                                                       arg.get_precision())

    # extracting const table to make sure then are not duplicated
    table_set = extract_tables(optree)
    arg_map.update({table: table for table in table_set if table.const})

    # copying optree while swapping argument for variables
    optree_copy = optree.copy(copy_map=arg_map)
    # instanciating external function scheme
    if isinstance(optree, ML_ArithmeticOperation):
        function_optree = Statement(Return(optree_copy))
    else:
        function_optree = Statement(optree_copy)
    ext_function.set_scheme(function_optree)
    name_factory.declare_function(function_name,
                                  ext_function.get_function_object())

    return ext_function
示例#4
0
    def __init__(self,
                 precision=ML_Binary32,
                 abs_accuracy=S2**-24,
                 libm_compliant=True,
                 debug_flag=False,
                 fuse_fma=True,
                 fast_path_extract=True,
                 target=GenericProcessor(),
                 output_file="log1pf.c",
                 function_name="log1pf"):
        # declaring CodeFunction and retrieving input variable
        self.function_name = function_name
        self.precision = precision
        self.processor = target
        func_implementation = CodeFunction(self.function_name,
                                           output_format=self.precision)
        vx = func_implementation.add_input_variable("x", self.precision)

        sollya_precision = self.precision.sollya_object

        # debug utilities
        debugf = ML_Debug(display_format="%f")
        debuglf = ML_Debug(display_format="%lf")
        debugx = ML_Debug(display_format="%x")
        debuglx = ML_Debug(display_format="%\"PRIx64\"", )
        debugd = ML_Debug(display_format="%d",
                          pre_process=lambda v: "(int) %s" % v)
        debugld = ML_Debug(display_format="%ld")
        #debug_lftolx  = ML_Debug(display_format = "%\"PRIx64\"", pre_process = lambda v: "double_to_64b_encoding(%s)" % v)
        debug_lftolx = ML_Debug(
            display_format="%\"PRIx64\" ev=%x",
            pre_process=lambda v:
            "double_to_64b_encoding(%s), __k1_fpu_get_exceptions()" % v)
        debug_ddtolx = ML_Debug(
            display_format="%\"PRIx64\" %\"PRIx64\"",
            pre_process=lambda v:
            "double_to_64b_encoding(%s.hi), double_to_64b_encoding(%s.lo)" %
            (v, v))
        debug_dd = ML_Debug(display_format="{.hi=%lf, .lo=%lf}",
                            pre_process=lambda v: "%s.hi, %s.lo" % (v, v))

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        log2_hi_value = round(
            log(2),
            self.precision.get_field_size() -
            (self.precision.get_exponent_size() + 1), sollya.RN)
        log2_lo_value = round(
            log(2) - log2_hi_value, self.precision.sollya_object, sollya.RN)

        log2_hi = Constant(log2_hi_value, precision=self.precision)
        log2_lo = Constant(log2_lo_value, precision=self.precision)

        vx_exp = ExponentExtraction(vx, tag="vx_exp", debug=debugd)

        int_precision = ML_Int64 if self.precision is ML_Binary64 else ML_Int32

        # retrieving processor inverse approximation table
        dummy_var = Variable("dummy", precision=self.precision)
        dummy_div_seed = DivisionSeed(dummy_var, precision=self.precision)
        inv_approx_table = self.processor.get_recursive_implementation(
            dummy_div_seed,
            language=None,
            table_getter=lambda self: self.approx_table_map)

        # table creation
        table_index_size = 7
        log_table = ML_Table(dimensions=[2**table_index_size, 2],
                             storage_precision=self.precision)
        log_table[0][0] = 0.0
        log_table[0][1] = 0.0
        for i in xrange(1, 2**table_index_size):
            #inv_value = (1.0 + (self.processor.inv_approx_table[i] / S2**9) + S2**-52) * S2**-1
            inv_value = (1.0 + (inv_approx_table[i][0] / S2**9)) * S2**-1
            value_high = round(
                log(inv_value),
                self.precision.get_field_size() -
                (self.precision.get_exponent_size() + 1), sollya.RN)
            value_low = round(
                log(inv_value) - value_high, sollya_precision, sollya.RN)
            log_table[i][0] = value_high
            log_table[i][1] = value_low

        vx_exp = ExponentExtraction(vx, tag="vx_exp", debug=debugd)

        # case close to 0: ctz
        ctz_exp_limit = -7
        ctz_cond = vx_exp < ctz_exp_limit
        ctz_interval = Interval(-S2**ctz_exp_limit, S2**ctz_exp_limit)

        ctz_poly_degree = sup(
            guessdegree(
                log1p(sollya.x) / sollya.x, ctz_interval, S2**
                -(self.precision.get_field_size() + 1))) + 1
        ctz_poly_object = Polynomial.build_from_approximation(
            log1p(sollya.x) / sollya.x, ctz_poly_degree,
            [self.precision] * (ctz_poly_degree + 1), ctz_interval,
            sollya.absolute)

        print "generating polynomial evaluation scheme"
        ctz_poly = PolynomialSchemeEvaluator.generate_horner_scheme(
            ctz_poly_object, vx, unified_precision=self.precision)
        ctz_poly.set_attributes(tag="ctz_poly", debug=debug_lftolx)

        ctz_result = vx * ctz_poly

        neg_input = Comparison(vx,
                               -1,
                               likely=False,
                               specifier=Comparison.Less,
                               debug=debugd,
                               tag="neg_input")
        vx_nan_or_inf = Test(vx,
                             specifier=Test.IsInfOrNaN,
                             likely=False,
                             debug=debugd,
                             tag="nan_or_inf")
        vx_snan = Test(vx,
                       specifier=Test.IsSignalingNaN,
                       likely=False,
                       debug=debugd,
                       tag="snan")
        vx_inf = Test(vx,
                      specifier=Test.IsInfty,
                      likely=False,
                      debug=debugd,
                      tag="inf")
        vx_subnormal = Test(vx,
                            specifier=Test.IsSubnormal,
                            likely=False,
                            debug=debugd,
                            tag="vx_subnormal")

        log_function_code = CodeFunction(
            "new_log", [Variable("x", precision=ML_Binary64)],
            output_format=ML_Binary64)
        log_call_generator = FunctionOperator(
            log_function_code.get_name(),
            arity=1,
            output_precision=ML_Binary64,
            declare_prototype=log_function_code)
        newlog_function = FunctionObject(log_function_code.get_name(),
                                         (ML_Binary64, ), ML_Binary64,
                                         log_call_generator)

        # case away from 0.0
        pre_vxp1 = vx + 1.0
        pre_vxp1.set_attributes(tag="pre_vxp1", debug=debug_lftolx)
        pre_vxp1_exp = ExponentExtraction(pre_vxp1,
                                          tag="pre_vxp1_exp",
                                          debug=debugd)
        cm500 = Constant(-500, precision=ML_Int32)
        c0 = Constant(0, precision=ML_Int32)
        cond_scaling = pre_vxp1_exp > 2**(self.precision.get_exponent_size() -
                                          2)
        scaling_factor_exp = Select(cond_scaling, cm500, c0)
        scaling_factor = ExponentInsertion(scaling_factor_exp,
                                           precision=self.precision,
                                           tag="scaling_factor")

        vxp1 = pre_vxp1 * scaling_factor
        vxp1.set_attributes(tag="vxp1", debug=debug_lftolx)
        vxp1_exp = ExponentExtraction(vxp1, tag="vxp1_exp", debug=debugd)

        vxp1_inv = DivisionSeed(vxp1,
                                precision=self.precision,
                                tag="vxp1_inv",
                                debug=debug_lftolx,
                                silent=True)

        vxp1_dirty_inv = ExponentInsertion(-vxp1_exp,
                                           precision=self.precision,
                                           tag="vxp1_dirty_inv",
                                           debug=debug_lftolx)

        table_index = BitLogicAnd(BitLogicRightShift(
            TypeCast(vxp1, precision=int_precision, debug=debuglx),
            self.precision.get_field_size() - 7,
            debug=debuglx),
                                  0x7f,
                                  tag="table_index",
                                  debug=debuglx)

        # argument reduction
        # TODO: detect if single operand inverse seed is supported by the targeted architecture
        pre_arg_red_index = TypeCast(BitLogicAnd(TypeCast(vxp1_inv,
                                                          precision=ML_UInt64),
                                                 Constant(-2,
                                                          precision=ML_UInt64),
                                                 precision=ML_UInt64),
                                     precision=self.precision,
                                     tag="pre_arg_red_index",
                                     debug=debug_lftolx)
        arg_red_index = Select(Equal(table_index, 0),
                               vxp1_dirty_inv,
                               pre_arg_red_index,
                               tag="arg_red_index",
                               debug=debug_lftolx)

        red_vxp1 = Select(cond_scaling, arg_red_index * vxp1 - 1.0,
                          (arg_red_index * vx - 1.0) + arg_red_index)
        #red_vxp1 = arg_red_index * vxp1 - 1.0
        red_vxp1.set_attributes(tag="red_vxp1", debug=debug_lftolx)

        log_inv_lo = TableLoad(log_table,
                               table_index,
                               1,
                               tag="log_inv_lo",
                               debug=debug_lftolx)
        log_inv_hi = TableLoad(log_table,
                               table_index,
                               0,
                               tag="log_inv_hi",
                               debug=debug_lftolx)

        inv_err = S2**-6  # TODO: link to target DivisionSeed precision

        print "building mathematical polynomial"
        approx_interval = Interval(-inv_err, inv_err)
        poly_degree = sup(
            guessdegree(
                log(1 + sollya.x) / sollya.x, approx_interval, S2**
                -(self.precision.get_field_size() + 1))) + 1
        global_poly_object = Polynomial.build_from_approximation(
            log(1 + sollya.x) / sollya.x, poly_degree,
            [self.precision] * (poly_degree + 1), approx_interval,
            sollya.absolute)
        poly_object = global_poly_object.sub_poly(start_index=1)

        print "generating polynomial evaluation scheme"
        _poly = PolynomialSchemeEvaluator.generate_horner_scheme(
            poly_object, red_vxp1, unified_precision=self.precision)
        _poly.set_attributes(tag="poly", debug=debug_lftolx)
        print global_poly_object.get_sollya_object()

        vxp1_inv_exp = ExponentExtraction(vxp1_inv,
                                          tag="vxp1_inv_exp",
                                          debug=debugd)
        corr_exp = -vxp1_exp + scaling_factor_exp  # vxp1_inv_exp

        #poly = (red_vxp1) * (1 +  _poly)
        #poly.set_attributes(tag = "poly", debug = debug_lftolx, prevent_optimization = True)

        pre_result = -log_inv_hi + (red_vxp1 + red_vxp1 * _poly +
                                    (-corr_exp * log2_lo - log_inv_lo))
        pre_result.set_attributes(tag="pre_result", debug=debug_lftolx)
        exact_log2_hi_exp = -corr_exp * log2_hi
        exact_log2_hi_exp.set_attributes(tag="exact_log2_hi_exp",
                                         debug=debug_lftolx,
                                         prevent_optimization=True)
        #std_result =  exact_log2_hi_exp + pre_result

        exact_log2_lo_exp = -corr_exp * log2_lo
        exact_log2_lo_exp.set_attributes(
            tag="exact_log2_lo_exp",
            debug=debug_lftolx)  #, prevent_optimization = True)

        init = exact_log2_lo_exp - log_inv_lo
        init.set_attributes(tag="init",
                            debug=debug_lftolx,
                            prevent_optimization=True)
        fma0 = (red_vxp1 * _poly + init)  # - log_inv_lo)
        fma0.set_attributes(tag="fma0", debug=debug_lftolx)
        step0 = fma0
        step0.set_attributes(
            tag="step0", debug=debug_lftolx)  #, prevent_optimization = True)
        step1 = step0 + red_vxp1
        step1.set_attributes(tag="step1",
                             debug=debug_lftolx,
                             prevent_optimization=True)
        step2 = -log_inv_hi + step1
        step2.set_attributes(tag="step2",
                             debug=debug_lftolx,
                             prevent_optimization=True)
        std_result = exact_log2_hi_exp + step2
        std_result.set_attributes(tag="std_result",
                                  debug=debug_lftolx,
                                  prevent_optimization=True)

        # main scheme
        print "MDL scheme"
        pre_scheme = ConditionBlock(
            neg_input,
            Statement(ClearException(), Raise(ML_FPE_Invalid),
                      Return(FP_QNaN(self.precision))),
            ConditionBlock(
                vx_nan_or_inf,
                ConditionBlock(
                    vx_inf,
                    Statement(
                        ClearException(),
                        Return(FP_PlusInfty(self.precision)),
                    ),
                    Statement(ClearException(),
                              ConditionBlock(vx_snan, Raise(ML_FPE_Invalid)),
                              Return(FP_QNaN(self.precision)))),
                ConditionBlock(
                    vx_subnormal, Return(vx),
                    ConditionBlock(ctz_cond, Statement(Return(ctz_result), ),
                                   Statement(Return(std_result))))))
        scheme = pre_scheme

        #print scheme.get_str(depth = None, display_precision = True)

        opt_eng = OptimizationEngine(self.processor)

        # fusing FMA
        print "MDL fusing FMA"
        scheme = opt_eng.fuse_multiply_add(scheme, silence=True)

        print "MDL abstract scheme"
        opt_eng.instantiate_abstract_precision(scheme, None)

        #print scheme.get_str(depth = None, display_precision = True)

        print "MDL instantiated scheme"
        opt_eng.instantiate_precision(scheme, default_precision=ML_Binary32)

        print "subexpression sharing"
        opt_eng.subexpression_sharing(scheme)

        print "silencing operation"
        opt_eng.silence_fp_operations(scheme)

        # registering scheme as function implementation
        func_implementation.set_scheme(scheme)

        # check processor support
        opt_eng.check_processor_support(scheme)

        # factorizing fast path
        opt_eng.factorize_fast_path(scheme)
        #print scheme.get_str(depth = None, display_precision = True)

        cg = CCodeGenerator(self.processor,
                            declare_cst=False,
                            disable_debug=not debug_flag,
                            libm_compliant=libm_compliant)
        self.result = func_implementation.get_definition(cg,
                                                         C_Code,
                                                         static_cst=True)
        self.result.add_header("support_lib/ml_special_values.h")
        self.result.add_header("math.h")
        self.result.add_header("stdio.h")
        self.result.add_header("inttypes.h")
        #print self.result.get(cg)
        output_stream = open("%s.c" % func_implementation.get_name(), "w")
        output_stream.write(self.result.get(cg))
        output_stream.close()
示例#5
0
    def __init__(self,
                 precision=ML_Binary64,
                 abs_accuracy=S2**-24,
                 libm_compliant=True,
                 debug_flag=False,
                 fuse_fma=True,
                 fast_path_extract=True,
                 target=GenericProcessor(),
                 output_file="log_fixed.c",
                 function_name="log_fixed"):
        # declaring CodeFunction and retrieving input variable
        self.function_name = function_name
        self.precision = precision
        self.processor = target
        func_implementation = CodeFunction(self.function_name,
                                           output_format=self.precision)
        vx = func_implementation.add_input_variable("x", self.precision)

        sollya_precision = self.precision.sollya_object

        # debug utilities
        debugf = ML_Debug(display_format="%f")
        debuglf = ML_Debug(display_format="%lf")
        debugx = ML_Debug(display_format="%x")
        debuglx = ML_Debug(display_format="%\"PRIx64\"", )
        debugd = ML_Debug(display_format="%d",
                          pre_process=lambda v: "(int) %s" % v)
        debugld = ML_Debug(display_format="%ld")
        #debug_lftolx  = ML_Debug(display_format = "%\"PRIx64\"", pre_process = lambda v: "double_to_64b_encoding(%s)" % v)
        debug_lftolx = ML_Debug(
            display_format="%\"PRIx64\" ev=%x",
            pre_process=lambda v:
            "double_to_64b_encoding(%s), __k1_fpu_get_exceptions()" % v)
        debug_ddtolx = ML_Debug(
            display_format="%\"PRIx64\" %\"PRIx64\"",
            pre_process=lambda v:
            "double_to_64b_encoding(%s.hi), double_to_64b_encoding(%s.lo)" %
            (v, v))
        debug_dd = ML_Debug(display_format="{.hi=%lf, .lo=%lf}",
                            pre_process=lambda v: "%s.hi, %s.lo" % (v, v))

        vx_exp = RawSignExpExtraction(vx,
                                      tag="vx_exp",
                                      precision=ML_Int32,
                                      debug=debugd)
        vx_exp_u = Conversion(vx_exp, precision=ML_UInt32)
        vx_exp_u.set_precision(ML_UInt32)
        tt = CountLeadingZeros(vx_exp_u)
        tt_u = Conversion(tt, precision=ML_UInt32)
        t = tt_u + vx_exp_u
        scheme = Statement(Return(t))

        #print scheme.get_str(depth = None, display_precision = True)

        opt_eng = OptimizationEngine(self.processor)

        # fusing FMA
        if fuse_fma:
            print "MDL fusing FMA"
            scheme = opt_eng.fuse_multiply_add(scheme, silence=True)

        print "MDL abstract scheme"
        opt_eng.instantiate_abstract_precision(scheme, None)

        #print scheme.get_str(depth = None, display_precision = True)

        print "MDL instantiated scheme"
        opt_eng.instantiate_precision(scheme, default_precision=self.precision)

        print "subexpression sharing"
        opt_eng.subexpression_sharing(scheme)

        print "silencing operation"
        opt_eng.silence_fp_operations(scheme)

        # registering scheme as function implementation
        func_implementation.set_scheme(scheme)

        # check processor support
        opt_eng.check_processor_support(scheme)

        #print scheme.get_str(depth = None, display_precision = True)

        # factorizing fast path
        opt_eng.factorize_fast_path(scheme)
        #print scheme.get_str(depth = None, display_precision = True)

        cg = CCodeGenerator(self.processor,
                            declare_cst=False,
                            disable_debug=not debug_flag,
                            libm_compliant=libm_compliant)
        self.result = func_implementation.get_definition(cg,
                                                         C_Code,
                                                         static_cst=True)
        self.result.add_header("support_lib/ml_special_values.h")
        self.result.add_header("math.h")
        self.result.add_header("stdio.h")
        self.result.add_header("inttypes.h")
        #print self.result.get(cg)
        output_stream = open("%s.c" % func_implementation.get_name(), "w")
        output_stream.write(self.result.get(cg))
        output_stream.close()
示例#6
0
    def __init__(self,
                 precision=ML_Binary32,
                 abs_accuracy=S2**-24,
                 libm_compliant=True,
                 debug_flag=False,
                 fuse_fma=True,
                 fast_path_extract=True,
                 target=GenericProcessor(),
                 output_file="sinf.c",
                 function_name="sinf"):
        # declaring CodeFunction and retrieving input variable
        self.function_name = function_name
        self.precision = precision
        self.processor = target
        func_implementation = CodeFunction(self.function_name,
                                           output_format=self.precision)
        vx = func_implementation.add_input_variable("x", self.precision)

        sollya_precision = self.precision.sollya_object

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        test_nan_or_inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               debug=True,
                               tag="nan_or_inf")
        test_nan = Test(vx,
                        specifier=Test.IsNaN,
                        debug=True,
                        tag="is_nan_test")
        test_positive = Comparison(vx,
                                   0,
                                   specifier=Comparison.GreaterOrEqual,
                                   debug=True,
                                   tag="inf_sign")

        test_signaling_nan = Test(vx,
                                  specifier=Test.IsSignalingNaN,
                                  debug=True,
                                  tag="is_signaling_nan")
        return_snan = Statement(
            ExpRaiseReturn(ML_FPE_Invalid, return_value=FP_QNaN(ML_Binary32)))

        int_precision = ML_Int64 if self.precision is ML_Binary64 else ML_Int32

        inv_pi_value = 1 / pi

        # argument reduction
        mod_pi_x = NearestInteger(vx * inv_pi_value)
        red_vx = vx - mod_pi_x * pi

        approx_interval = Interval(0, pi / 2)

        poly_degree = sup(
            guessdegree(
                sin(sollya.x) / sollya.x, approx_interval, S2**
                -(self.precision.get_field_size() + 1))) + 1
        global_poly_object = Polynomial.build_from_approximation(
            sin(sollya.x) / sollya.x, poly_degree,
            [self.precision] * (poly_degree + 1), approx_interval,
            sollya.absolute)
        poly_object = global_poly_object  #.sub_poly(start_index = 1)

        print "generating polynomial evaluation scheme"
        _poly = PolynomialSchemeEvaluator.generate_horner_scheme(
            poly_object, red_vx, unified_precision=self.precision)
        _poly.set_attributes(tag="poly", debug=debug_lftolx)
        print global_poly_object.get_sollya_object()

        pre_result = vx * _poly

        result = pre_result
        result.set_attributes(tag="result", debug=debug_lftolx)

        # main scheme
        print "MDL scheme"
        scheme = Statement(Return(result))

        #print scheme.get_str(depth = None, display_precision = True)

        opt_eng = OptimizationEngine(self.processor)

        # fusing FMA
        print "MDL fusing FMA"
        scheme = opt_eng.fuse_multiply_add(scheme, silence=True)

        print "MDL abstract scheme"
        opt_eng.instantiate_abstract_precision(scheme, None)

        #print scheme.get_str(depth = None, display_precision = True)

        print "MDL instantiated scheme"
        opt_eng.instantiate_precision(scheme, default_precision=ML_Binary32)

        print "subexpression sharing"
        opt_eng.subexpression_sharing(scheme)

        print "silencing operation"
        opt_eng.silence_fp_operations(scheme)

        # registering scheme as function implementation
        func_implementation.set_scheme(scheme)

        # check processor support
        opt_eng.check_processor_support(scheme)

        # factorizing fast path
        opt_eng.factorize_fast_path(scheme)
        #print scheme.get_str(depth = None, display_precision = True)

        cg = CCodeGenerator(self.processor,
                            declare_cst=False,
                            disable_debug=not debug_flag,
                            libm_compliant=libm_compliant)
        self.result = func_implementation.get_definition(cg,
                                                         C_Code,
                                                         static_cst=True)
        #print self.result.get(cg)
        output_stream = open("%s.c" % func_implementation.get_name(), "w")
        output_stream.write(self.result.get(cg))
        output_stream.close()
示例#7
0
    def generate_bench_wrapper(self,
                               test_num=1,
                               loop_num=100000,
                               test_ranges=[Interval(-1.0, 1.0)],
                               debug=False):
        # interval where the array lenght is chosen from (randomly)
        index_range = self.test_index_range

        auto_test = CodeFunction("bench_wrapper", output_format=ML_Binary64)

        tested_function = self.implementation.get_function_object()
        function_name = self.implementation.get_name()

        failure_report_op = FunctionOperator("report_failure")
        failure_report_function = FunctionObject("report_failure", [], ML_Void,
                                                 failure_report_op)

        printf_success_op = FunctionOperator(
            "printf",
            arg_map={0: "\"test successful %s\\n\"" % function_name},
            void_function=True)
        printf_success_function = FunctionObject("printf", [], ML_Void,
                                                 printf_success_op)

        output_precision = FormatAttributeWrapper(self.precision, ["volatile"])

        test_total = test_num

        # number of arrays expected as inputs for tested_function
        NUM_INPUT_ARRAY = 1
        # position of the input array in tested_function operands (generally
        # equals to 1 as to 0-th input is often the destination array)
        INPUT_INDEX_OFFSET = 1

        # concatenating standard test array at the beginning of randomly
        # generated array
        TABLE_SIZE_VALUES = [
            len(std_table) for std_table in self.standard_test_cases
        ] + [
            random.randrange(index_range[0], index_range[1] + 1)
            for i in range(test_num)
        ]
        OFFSET_VALUES = [sum(TABLE_SIZE_VALUES[:i]) for i in range(test_total)]

        table_size_offset_array = generate_2d_table(
            test_total,
            2,
            ML_UInt32,
            self.uniquify_name("table_size_array"),
            value_gen=(lambda row_id:
                       (TABLE_SIZE_VALUES[row_id], OFFSET_VALUES[row_id])))

        INPUT_ARRAY_SIZE = sum(TABLE_SIZE_VALUES)

        # TODO/FIXME: implement proper input range depending on input index
        # assuming a single input array
        input_precisions = [self.get_input_precision(1).get_data_precision()]
        rng_map = [
            get_precision_rng(precision, inf(test_range), sup(test_range))
            for precision, test_range in zip(input_precisions, test_ranges)
        ]

        # generated table of inputs
        input_tables = [
            generate_1d_table(
                INPUT_ARRAY_SIZE,
                self.get_input_precision(INPUT_INDEX_OFFSET +
                                         table_id).get_data_precision(),
                self.uniquify_name("input_table_arg%d" % table_id),
                value_gen=(
                    lambda _: input_precisions[table_id].round_sollya_object(
                        rng_map[table_id].get_new_value(), sollya.RN)))
            for table_id in range(NUM_INPUT_ARRAY)
        ]

        # generate output_array
        output_array = generate_1d_table(
            INPUT_ARRAY_SIZE,
            output_precision,
            self.uniquify_name("output_array"),
            #value_gen=(lambda _: FP_QNaN(self.precision))
            value_gen=(lambda _: None),
            const=False,
            empty=True)

        # accumulate element number
        acc_num = Variable("acc_num",
                           precision=ML_Int64,
                           var_type=Variable.Local)

        def empty_post_statement_gen(input_tables, output_array,
                                     table_size_offset_array, array_offset,
                                     array_len, test_id):
            return Statement()

        test_loop = self.get_array_test_wrapper(test_total, tested_function,
                                                table_size_offset_array,
                                                input_tables, output_array,
                                                acc_num,
                                                empty_post_statement_gen)

        timer = Variable("timer", precision=ML_Int64, var_type=Variable.Local)
        printf_timing_op = FunctionOperator(
            "printf",
            arg_map={
                0:
                "\"%s %%\"PRIi64\" elts computed in %%\"PRIi64\" nanoseconds => %%.3f CPE \\n\""
                % function_name,
                1:
                FO_Arg(0),
                2:
                FO_Arg(1),
                3:
                FO_Arg(2)
            },
            void_function=True)
        printf_timing_function = FunctionObject(
            "printf", [ML_Int64, ML_Int64, ML_Binary64], ML_Void,
            printf_timing_op)

        vj = Variable("j", precision=ML_Int32, var_type=Variable.Local)
        loop_num_cst = Constant(loop_num, precision=ML_Int32, tag="loop_num")
        loop_increment = 1

        # bench measure of clock per element
        cpe_measure = Division(
            Conversion(timer, precision=ML_Binary64),
            Conversion(acc_num, precision=ML_Binary64),
            precision=ML_Binary64,
            tag="cpe_measure",
        )

        # common test scheme between scalar and vector functions
        test_scheme = Statement(
            self.processor.get_init_timestamp(),
            ReferenceAssign(timer, self.processor.get_current_timestamp()),
            ReferenceAssign(acc_num, 0),
            Loop(
                ReferenceAssign(vj, Constant(0, precision=ML_Int32)),
                vj < loop_num_cst,
                Statement(test_loop, ReferenceAssign(vj,
                                                     vj + loop_increment))),
            ReferenceAssign(
                timer,
                Subtraction(self.processor.get_current_timestamp(),
                            timer,
                            precision=ML_Int64)),
            printf_timing_function(
                Conversion(acc_num, precision=ML_Int64),
                timer,
                cpe_measure,
            ),
            Return(cpe_measure),
            # Return(Constant(0, precision = ML_Int32))
        )
        auto_test.set_scheme(test_scheme)
        return FunctionGroup([auto_test])
示例#8
0
    def __init__(self,
                 precision=ML_Binary32,
                 abs_accuracy=S2**-24,
                 libm_compliant=True,
                 debug_flag=False,
                 fuse_fma=True,
                 num_iter=3,
                 fast_path_extract=True,
                 target=GenericProcessor(),
                 output_file="__divsf3.c",
                 function_name="__divsf3"):
        # declaring CodeFunction and retrieving input variable
        self.precision = precision
        self.function_name = function_name
        exp_implementation = CodeFunction(self.function_name,
                                          output_format=precision)
        vx = exp_implementation.add_input_variable("x", precision)
        vy = exp_implementation.add_input_variable("y", precision)
        processor = target

        class NR_Iteration(object):
            def __init__(self, approx, divisor, force_fma=False):
                self.approx = approx
                self.divisor = divisor
                self.force_fma = force_fma
                if force_fma:
                    self.error = FusedMultiplyAdd(
                        divisor,
                        approx,
                        1.0,
                        specifier=FusedMultiplyAdd.SubtractNegate)
                    self.new_approx = FusedMultiplyAdd(
                        self.error,
                        self.approx,
                        self.approx,
                        specifier=FusedMultiplyAdd.Standard)
                else:
                    self.error = 1 - divisor * approx
                    self.new_approx = self.approx + self.error * self.approx

            def get_new_approx(self):
                return self.new_approx

            def get_hint_rules(self, gcg, gappa_code, exact):
                divisor = self.divisor.get_handle().get_node()
                approx = self.approx.get_handle().get_node()
                new_approx = self.new_approx.get_handle().get_node()

                Attributes.set_default_precision(ML_Exact)

                if self.force_fma:
                    rule0 = FusedMultiplyAdd(
                        divisor,
                        approx,
                        1.0,
                        specifier=FusedMultiplyAdd.SubtractNegate)
                else:
                    rule0 = 1.0 - divisor * approx
                rule1 = 1.0 - divisor * (approx - exact) - 1.0

                rule2 = new_approx - exact
                subrule = approx * (2 - divisor * approx)
                rule3 = (new_approx - subrule
                         ) - (approx - exact) * (approx - exact) * divisor

                if self.force_fma:
                    new_error = FusedMultiplyAdd(
                        divisor,
                        approx,
                        1.0,
                        specifier=FusedMultiplyAdd.SubtractNegate)
                    rule4 = FusedMultiplyAdd(new_error, approx, approx)
                else:
                    rule4 = approx + (1 - divisor * approx) * approx

                Attributes.unset_default_precision()

                # registering hints
                gcg.add_hint(gappa_code, rule0, rule1)
                gcg.add_hint(gappa_code, rule2, rule3)
                gcg.add_hint(gappa_code, subrule, rule4)

        debugf = ML_Debug(display_format="%f")
        debuglf = ML_Debug(display_format="%lf")
        debugx = ML_Debug(display_format="%x")
        debuglx = ML_Debug(display_format="%lx")
        debugd = ML_Debug(display_format="%d")
        #debug_lftolx  = ML_Debug(display_format = "%\"PRIx64\"", pre_process = lambda v: "double_to_64b_encoding(%s)" % v)
        debug_lftolx = ML_Debug(
            display_format="%\"PRIx64\" ev=%x",
            pre_process=lambda v:
            "double_to_64b_encoding(%s), __k1_fpu_get_exceptions()" % v)
        debug_ddtolx = ML_Debug(
            display_format="%\"PRIx64\" %\"PRIx64\"",
            pre_process=lambda v:
            "double_to_64b_encoding(%s.hi), double_to_64b_encoding(%s.lo)" %
            (v, v))
        debug_dd = ML_Debug(display_format="{.hi=%lf, .lo=%lf}",
                            pre_process=lambda v: "%s.hi, %s.lo" % (v, v))

        ex = Max(Min(ExponentExtraction(vx), 1020),
                 -1020,
                 tag="ex",
                 debug=debugd)
        ey = Max(Min(ExponentExtraction(vy), 1020),
                 -1020,
                 tag="ey",
                 debug=debugd)

        exact_ex = ExponentExtraction(vx, tag="exact_ex")
        exact_ey = ExponentExtraction(vy, tag="exact_ey")

        Attributes.set_default_rounding_mode(ML_RoundToNearest)
        Attributes.set_default_silent(True)

        # computing the inverse square root
        init_approx = None

        scaling_factor_x = ExponentInsertion(-ex, tag="sfx_ei")
        scaling_factor_y = ExponentInsertion(-ey, tag="sfy_ei")

        scaled_vx = vx * scaling_factor_x
        scaled_vy = vy * scaling_factor_y

        scaled_vx.set_attributes(debug=debug_lftolx, tag="scaled_vx")
        scaled_vy.set_attributes(debug=debug_lftolx, tag="scaled_vy")

        scaled_vx.set_precision(ML_Binary64)
        scaled_vy.set_precision(ML_Binary64)

        # forcing vx precision to make processor support test
        init_approx_precision = DivisionSeed(scaled_vx,
                                             scaled_vy,
                                             precision=self.precision,
                                             tag="seed",
                                             debug=debug_lftolx)
        if not processor.is_supported_operation(init_approx_precision):
            if self.precision != ML_Binary32:
                px = Conversion(
                    scaled_vx, precision=ML_Binary32, tag="px",
                    debug=debugf) if self.precision != ML_Binary32 else vx
                py = Conversion(
                    scaled_vy, precision=ML_Binary32, tag="py",
                    debug=debugf) if self.precision != ML_Binary32 else vy

                init_approx_fp32 = Conversion(DivisionSeed(
                    px, py, precision=ML_Binary32, tag="seed", debug=debugf),
                                              precision=self.precision,
                                              tag="seed_ext",
                                              debug=debug_lftolx)
                if not processor.is_supported_operation(init_approx_fp32):
                    Log.report(
                        Log.Error,
                        "The target %s does not implement inverse square root seed"
                        % processor)
                else:
                    init_approx = init_approx_fp32
            else:
                Log.report(
                    Log.Error,
                    "The target %s does not implement inverse square root seed"
                    % processor)
        else:
            init_approx = init_approx_precision

        current_approx_std = init_approx
        # correctly-rounded inverse computation
        num_iteration = num_iter

        Attributes.unset_default_rounding_mode()
        Attributes.unset_default_silent()

        def compute_div(_init_approx, _vx=None, _vy=None, scale_result=None):
            inv_iteration_list = []
            Attributes.set_default_rounding_mode(ML_RoundToNearest)
            Attributes.set_default_silent(True)
            _current_approx = _init_approx
            for i in range(num_iteration):
                new_iteration = NR_Iteration(
                    _current_approx,
                    _vy,
                    force_fma=False if (i != num_iteration - 1) else True)
                inv_iteration_list.append(new_iteration)
                _current_approx = new_iteration.get_new_approx()
                _current_approx.set_attributes(tag="iter_%d" % i,
                                               debug=debug_lftolx)

            def dividend_mult(div_approx,
                              inv_approx,
                              dividend,
                              divisor,
                              index,
                              force_fma=False):
                #yerr = dividend - div_approx * divisor
                yerr = FMSN(div_approx, divisor, dividend)
                yerr.set_attributes(tag="yerr%d" % index, debug=debug_lftolx)
                #new_div = div_approx + yerr * inv_approx
                new_div = FMA(yerr, inv_approx, div_approx)
                new_div.set_attributes(tag="new_div%d" % index,
                                       debug=debug_lftolx)
                return new_div

            # multiplication correction iteration
            # to get correctly rounded full division
            _current_approx.set_attributes(tag="final_approx",
                                           debug=debug_lftolx)
            current_div_approx = _vx * _current_approx
            num_dividend_mult_iteration = 1
            for i in range(num_dividend_mult_iteration):
                current_div_approx = dividend_mult(current_div_approx,
                                                   _current_approx, _vx, _vy,
                                                   i)

            # last iteration
            yerr_last = FMSN(current_div_approx, _vy,
                             _vx)  #, clearprevious = True)
            Attributes.unset_default_rounding_mode()
            Attributes.unset_default_silent()
            last_div_approx = FMA(yerr_last,
                                  _current_approx,
                                  current_div_approx,
                                  rounding_mode=ML_GlobalRoundMode)

            yerr_last.set_attributes(tag="yerr_last", debug=debug_lftolx)

            pre_result = last_div_approx
            pre_result.set_attributes(tag="unscaled_div_result",
                                      debug=debug_lftolx)
            if scale_result != None:
                #result = pre_result * ExponentInsertion(ex) * ExponentInsertion(-ey)
                scale_factor_0 = Max(Min(scale_result, 950),
                                     -950,
                                     tag="scale_factor_0",
                                     debug=debugd)
                scale_factor_1 = Max(Min(scale_result - scale_factor_0, 950),
                                     -950,
                                     tag="scale_factor_1",
                                     debug=debugd)
                scale_factor_2 = scale_result - (scale_factor_1 +
                                                 scale_factor_0)
                scale_factor_2.set_attributes(debug=debugd,
                                              tag="scale_factor_2")

                result = ((pre_result * ExponentInsertion(scale_factor_0)) *
                          ExponentInsertion(scale_factor_1)
                          ) * ExponentInsertion(scale_factor_2)
            else:
                result = pre_result
            result.set_attributes(tag="result", debug=debug_lftolx)

            ext_pre_result = FMA(yerr_last,
                                 _current_approx,
                                 current_div_approx,
                                 precision=ML_DoubleDouble,
                                 tag="ext_pre_result",
                                 debug=debug_ddtolx)
            subnormal_pre_result = SpecificOperation(
                ext_pre_result,
                ex - ey,
                precision=self.precision,
                specifier=SpecificOperation.Subnormalize,
                tag="subnormal_pre_result",
                debug=debug_lftolx)
            sub_scale_factor = ex - ey
            sub_scale_factor_0 = Max(Min(sub_scale_factor, 950),
                                     -950,
                                     tag="sub_scale_factor_0",
                                     debug=debugd)
            sub_scale_factor_1 = Max(Min(sub_scale_factor - sub_scale_factor_0,
                                         950),
                                     -950,
                                     tag="sub_scale_factor_1",
                                     debug=debugd)
            sub_scale_factor_2 = sub_scale_factor - (sub_scale_factor_1 +
                                                     sub_scale_factor_0)
            sub_scale_factor_2.set_attributes(debug=debugd,
                                              tag="sub_scale_factor_2")
            #subnormal_result = (subnormal_pre_result * ExponentInsertion(ex, tag ="sr_ex_ei")) * ExponentInsertion(-ey, tag = "sr_ey_ei")
            subnormal_result = (
                subnormal_pre_result *
                ExponentInsertion(sub_scale_factor_0)) * ExponentInsertion(
                    sub_scale_factor_1,
                    tag="sr_ey_ei") * ExponentInsertion(sub_scale_factor_2)
            subnormal_result.set_attributes(debug=debug_lftolx,
                                            tag="subnormal_result")
            return result, subnormal_result, _current_approx, inv_iteration_list

        def bit_match(fp_optree, bit_id, likely=False, **kwords):
            return NotEqual(BitLogicAnd(
                TypeCast(fp_optree, precision=ML_Int64), 1 << bit_id),
                            0,
                            likely=likely,
                            **kwords)

        def extract_and_inject_sign(sign_source,
                                    sign_dest,
                                    int_precision=ML_Int64,
                                    fp_precision=self.precision,
                                    **kwords):
            int_sign_dest = sign_dest if isinstance(
                sign_dest.get_precision(), ML_Fixed_Format) else TypeCast(
                    sign_dest, precision=int_precision)
            return TypeCast(BitLogicOr(
                BitLogicAnd(TypeCast(sign_source, precision=int_precision),
                            1 << (self.precision.bit_size - 1)),
                int_sign_dest),
                            precision=fp_precision)

        x_zero = Test(vx, specifier=Test.IsZero, likely=False)
        y_zero = Test(vy, specifier=Test.IsZero, likely=False)

        comp_sign = Test(vx,
                         vy,
                         specifier=Test.CompSign,
                         tag="comp_sign",
                         debug=debuglx)

        y_nan = Test(vy, specifier=Test.IsNaN, likely=False)

        x_snan = Test(vx, specifier=Test.IsSignalingNaN, likely=False)
        y_snan = Test(vy, specifier=Test.IsSignalingNaN, likely=False)

        x_inf = Test(vx, specifier=Test.IsInfty, likely=False, tag="x_inf")
        y_inf = Test(vy,
                     specifier=Test.IsInfty,
                     likely=False,
                     tag="y_inf",
                     debug=debugd)

        scheme = None
        gappa_vx, gappa_vy = None, None
        gappa_init_approx = None
        gappa_current_approx = None

        if isinstance(processor, K1B_Processor):
            print "K1B specific generation"

            gappa_vx = vx
            gappa_vy = vy

            fast_init_approx = DivisionSeed(vx,
                                            vy,
                                            precision=self.precision,
                                            tag="fast_init_approx",
                                            debug=debug_lftolx)
            slow_init_approx = DivisionSeed(scaled_vx,
                                            scaled_vy,
                                            precision=self.precision,
                                            tag="slow_init_approx",
                                            debug=debug_lftolx)

            gappa_init_approx = fast_init_approx

            specific_case = bit_match(fast_init_approx,
                                      0,
                                      tag="b0_specific_case_bit",
                                      debug=debugd)
            y_subnormal_or_zero = bit_match(fast_init_approx,
                                            1,
                                            tag="b1_y_sub_or_zero",
                                            debug=debugd)
            x_subnormal_or_zero = bit_match(fast_init_approx,
                                            2,
                                            tag="b2_x_sub_or_zero",
                                            debug=debugd)
            y_inf_or_nan = bit_match(fast_init_approx,
                                     3,
                                     tag="b3_y_inf_or_nan",
                                     debug=debugd)
            inv_underflow = bit_match(fast_init_approx,
                                      4,
                                      tag="b4_inv_underflow",
                                      debug=debugd)
            x_inf_or_nan = bit_match(fast_init_approx,
                                     5,
                                     tag="b5_x_inf_or_nan",
                                     debug=debugd)
            mult_error_underflow = bit_match(fast_init_approx,
                                             6,
                                             tag="b6_mult_error_underflow",
                                             debug=debugd)
            mult_dividend_underflow = bit_match(
                fast_init_approx,
                7,
                tag="b7_mult_dividend_underflow",
                debug=debugd)
            mult_dividend_overflow = bit_match(fast_init_approx,
                                               8,
                                               tag="b8_mult_dividend_overflow",
                                               debug=debugd)
            direct_result_flag = bit_match(fast_init_approx,
                                           9,
                                           tag="b9_direct_result_flag",
                                           debug=debugd)
            div_overflow = bit_match(fast_init_approx,
                                     10,
                                     tag="b10_div_overflow",
                                     debug=debugd)

            # bit11/eb large = bit_match(fast_init_approx, 11)
            # bit12 = bit_match(fast_init_approx, 11)

            #slow_result, slow_result_subnormal, _, _ = compute_div(slow_init_approx, scaled_vx, scaled_vy, scale_result = (ExponentInsertion(ex, tag = "eiy_sr"), ExponentInsertion(-ey, tag ="eiy_sr")))
            slow_result, slow_result_subnormal, _, _ = compute_div(
                slow_init_approx, scaled_vx, scaled_vy, scale_result=ex - ey)
            fast_result, fast_result_subnormal, fast_current_approx, inv_iteration_list = compute_div(
                fast_init_approx, vx, vy, scale_result=None)
            gappa_current_approx = fast_current_approx

            pre_scheme = ConditionBlock(
                NotEqual(specific_case,
                         0,
                         tag="specific_case",
                         likely=True,
                         debug=debugd),
                Return(fast_result),
                ConditionBlock(
                    Equal(direct_result_flag, 0, tag="direct_result_case"),
                    Return(fast_init_approx),
                    ConditionBlock(
                        x_subnormal_or_zero | y_subnormal_or_zero
                        | inv_underflow | mult_error_underflow
                        | mult_dividend_overflow | mult_dividend_underflow,
                        ConditionBlock(
                            x_zero | y_zero,
                            Return(fast_init_approx),
                            ConditionBlock(
                                Test(slow_result, specifier=Test.IsSubnormal),
                                Return(slow_result_subnormal),
                                Return(slow_result)),
                        ),
                        ConditionBlock(
                            x_inf_or_nan,
                            Return(fast_init_approx),
                            ConditionBlock(
                                y_inf_or_nan,
                                Return(fast_init_approx),
                                ConditionBlock(
                                    NotEqual(div_overflow,
                                             0,
                                             tag="div_overflow_case"),
                                    Return(
                                        RoundedSignedOverflow(
                                            fast_init_approx,
                                            tag="signed_inf")),
                                    #Return(extract_and_inject_sign(fast_init_approx, FP_PlusInfty(self.precision) , tag = "signed_inf")),
                                    Return(FP_SNaN(self.precision))))))))

            scheme = Statement(fast_result, pre_scheme)

        else:
            print "generic generation"

            x_inf_or_nan = Test(vx, specifier=Test.IsInfOrNaN, likely=False)
            y_inf_or_nan = Test(vy,
                                specifier=Test.IsInfOrNaN,
                                likely=False,
                                tag="y_inf_or_nan",
                                debug=debugd)

            result, subnormal_result, gappa_current_approx, inv_iteration_list = compute_div(
                current_approx_std,
                scaled_vx,
                scaled_vy,
                scale_result=(ExponentInsertion(ex), ExponentInsertion(-ey)))
            gappa_vx = scaled_vx
            gappa_vy = scaled_vy
            gappa_init_approx = init_approx

            # x inf and y inf
            pre_scheme = ConditionBlock(
                x_inf_or_nan,
                ConditionBlock(
                    x_inf,
                    ConditionBlock(
                        y_inf_or_nan,
                        Statement(
                            ConditionBlock(y_snan, Raise(ML_FPE_Invalid)),
                            Return(FP_QNaN(self.precision)),
                        ),
                        ConditionBlock(comp_sign,
                                       Return(FP_MinusInfty(self.precision)),
                                       Return(FP_PlusInfty(self.precision)))),
                    Statement(ConditionBlock(x_snan, Raise(ML_FPE_Invalid)),
                              Return(FP_QNaN(self.precision)))),
                ConditionBlock(
                    x_zero,
                    ConditionBlock(
                        y_zero | y_nan,
                        Statement(
                            ConditionBlock(y_snan, Raise(ML_FPE_Invalid)),
                            Return(FP_QNaN(self.precision))), Return(vx)),
                    ConditionBlock(
                        y_inf_or_nan,
                        ConditionBlock(
                            y_inf,
                            Return(
                                Select(comp_sign, FP_MinusZero(self.precision),
                                       FP_PlusZero(self.precision))),
                            Statement(
                                ConditionBlock(y_snan, Raise(ML_FPE_Invalid)),
                                Return(FP_QNaN(self.precision)))),
                        ConditionBlock(
                            y_zero,
                            Statement(
                                Raise(ML_FPE_DivideByZero),
                                ConditionBlock(
                                    comp_sign,
                                    Return(FP_MinusInfty(self.precision)),
                                    Return(FP_PlusInfty(self.precision)))),
                            ConditionBlock(
                                Test(result,
                                     specifier=Test.IsSubnormal,
                                     likely=False),
                                Statement(
                                    ConditionBlock(
                                        Comparison(
                                            yerr_last,
                                            0,
                                            specifier=Comparison.NotEqual,
                                            likely=True),
                                        Statement(
                                            Raise(ML_FPE_Inexact,
                                                  ML_FPE_Underflow))),
                                    Return(subnormal_result),
                                ),
                                Statement(
                                    ConditionBlock(
                                        Comparison(
                                            yerr_last,
                                            0,
                                            specifier=Comparison.NotEqual,
                                            likely=True),
                                        Raise(ML_FPE_Inexact)),
                                    Return(result)))))))
            rnd_mode = GetRndMode()
            scheme = Statement(rnd_mode, SetRndMode(ML_RoundToNearest),
                               yerr_last, SetRndMode(rnd_mode), pre_result,
                               ClearException(), result, pre_scheme)

        opt_eng = OptimizationEngine(processor)

        # fusing FMA
        if fuse_fma:
            print "MDL fusing FMA"
            scheme = opt_eng.fuse_multiply_add(scheme, silence=True)

        print "MDL abstract scheme"
        opt_eng.instantiate_abstract_precision(scheme, None)

        print "MDL instantiated scheme"
        opt_eng.instantiate_precision(scheme, default_precision=self.precision)

        print "subexpression sharing"
        opt_eng.subexpression_sharing(scheme)

        #print "silencing operation"
        #opt_eng.silence_fp_operations(scheme)

        # registering scheme as function implementation
        exp_implementation.set_scheme(scheme)

        #print scheme.get_str(depth = None, display_precision = True)

        # check processor support
        print "checking processor support"
        opt_eng.check_processor_support(scheme)

        # factorizing fast path
        #opt_eng.factorize_fast_path(scheme)

        print "Gappa script generation"

        cg = CCodeGenerator(processor,
                            declare_cst=False,
                            disable_debug=not debug_flag,
                            libm_compliant=libm_compliant)
        self.result = exp_implementation.get_definition(cg,
                                                        C_Code,
                                                        static_cst=True)
        self.result.add_header("math.h")
        self.result.add_header("stdio.h")
        self.result.add_header("inttypes.h")
        self.result.add_header("support_lib/ml_special_values.h")

        output_stream = open(output_file, "w")
        output_stream.write(self.result.get(cg))
        output_stream.close()
        seed_var = Variable("seed",
                            precision=self.precision,
                            interval=Interval(0.5, 1))
        cg_eval_error_copy_map = {
            gappa_init_approx.get_handle().get_node():
            seed_var,
            gappa_vx.get_handle().get_node():
            Variable("x", precision=self.precision, interval=Interval(1, 2)),
            gappa_vy.get_handle().get_node():
            Variable("y", precision=self.precision, interval=Interval(1, 2)),
        }
        G1 = Constant(1, precision=ML_Exact)
        exact = G1 / gappa_vy
        exact.set_precision(ML_Exact)
        exact.set_tag("div_exact")
        gappa_goal = gappa_current_approx.get_handle().get_node() - exact
        gappa_goal.set_precision(ML_Exact)
        gappacg = GappaCodeGenerator(target,
                                     declare_cst=False,
                                     disable_debug=True)
        gappa_code = gappacg.get_interval_code(gappa_goal,
                                               cg_eval_error_copy_map)

        new_exact_node = exact.get_handle().get_node()

        for nr in inv_iteration_list:
            nr.get_hint_rules(gappacg, gappa_code, new_exact_node)

        seed_wrt_exact = seed_var - new_exact_node
        seed_wrt_exact.set_precision(ML_Exact)
        gappacg.add_hypothesis(gappa_code, seed_wrt_exact,
                               Interval(-S2**-7, S2**-7))

        try:
            eval_error = execute_gappa_script_extract(
                gappa_code.get(gappacg))["goal"]
            print "eval_error: ", eval_error
        except:
            print "error during gappa run"
示例#9
0
    def __init__(self, 
                 precision = ML_Binary32, 
                 abs_accuracy = S2**-24, 
                 libm_compliant = True, 
                 debug_flag = False, 
                 fuse_fma = True, 
                 num_iter = 3,
                 fast_path_extract = True,
                 target = GenericProcessor(), 
                 output_file = "__divsf3.c", 
                 function_name = "__divsf3"):
        # declaring CodeFunction and retrieving input variable
        self.precision = precision
        self.function_name = function_name
        exp_implementation = CodeFunction(self.function_name, output_format = precision)
        vx = exp_implementation.add_input_variable("x", precision) 
        vy = exp_implementation.add_input_variable("y", precision) 

        class NR_Iteration(object):
            def __init__(self, approx, divisor, force_fma = False):
                self.approx = approx
                self.divisor = divisor
                self.force_fma = force_fma
                if force_fma:
                    self.error = FusedMultiplyAdd(divisor, approx, 1.0, specifier = FusedMultiplyAdd.SubtractNegate)
                    self.new_approx = FusedMultiplyAdd(self.error, self.approx, self.approx, specifier = FusedMultiplyAdd.Standard)
                else:
                    self.error = 1 - divisor * approx
                    self.new_approx = self.approx + self.error * self.approx

            def get_new_approx(self):
                return self.new_approx

            def get_hint_rules(self, gcg, gappa_code, exact):
                divisor = self.divisor.get_handle().get_node()
                approx = self.approx.get_handle().get_node()
                new_approx = self.new_approx.get_handle().get_node()

                Attributes.set_default_precision(ML_Exact)


                if self.force_fma:
                    rule0 = FusedMultiplyAdd(divisor, approx, 1.0, specifier = FusedMultiplyAdd.SubtractNegate)
                else:
                    rule0 = 1.0 - divisor * approx
                rule1 = 1.0 - divisor * (approx - exact) - 1.0
                
                rule2 = new_approx - exact
                subrule = approx * (2 - divisor * approx)
                rule3 = (new_approx - subrule) - (approx - exact) * (approx - exact) * divisor

                if self.force_fma:
                    new_error = FusedMultiplyAdd(divisor, approx, 1.0, specifier = FusedMultiplyAdd.SubtractNegate)
                    rule4 = FusedMultiplyAdd(new_error, approx, approx)
                else:
                    rule4 = approx + (1 - divisor * approx) * approx

                Attributes.unset_default_precision()

                # registering hints
                gcg.add_hint(gappa_code, rule0, rule1)
                gcg.add_hint(gappa_code, rule2, rule3)
                gcg.add_hint(gappa_code, subrule, rule4)

        debugf        = ML_Debug(display_format = "%f")
        debuglf       = ML_Debug(display_format = "%lf")
        debugx        = ML_Debug(display_format = "%x")
        debuglx       = ML_Debug(display_format = "%lx")
        debugd        = ML_Debug(display_format = "%d")
        debug_lftolx  = ML_Debug(display_format = "%\"PRIx64\"", pre_process = lambda v: "double_to_64b_encoding(%s)" % v)
        debug_ddtolx  = ML_Debug(display_format = "%\"PRIx64\" %\"PRIx64\"", pre_process = lambda v: "double_to_64b_encoding(%s.hi), double_to_64b_encoding(%s.lo)" % (v, v))
        debug_dd      = ML_Debug(display_format = "{.hi=%lf, .lo=%lf}", pre_process = lambda v: "%s.hi, %s.lo" % (v, v))

        ex = Min(ExponentExtraction(vx, tag = "ex", debug = debugd), 1020)
        ey = Min(ExponentExtraction(vy, tag = "ey", debug = debugd), 1020)

        scaling_factor_x = ExponentInsertion(-ex) #ConditionalAllocation(Abs(ex) > 100, -ex, 0)
        scaling_factor_y = ExponentInsertion(-ey) #ConditionalAllocation(Abs(ey) > 100, -ey, 0)

        scaled_vx = vx * scaling_factor_x
        scaled_vy = vy * scaling_factor_y

        scaled_vx.set_attributes(debug = debug_lftolx, tag = "scaled_vx")
        scaled_vy.set_attributes(debug = debug_lftolx, tag = "scaled_vy")

        px = Conversion(scaled_vx, precision = ML_Binary32, tag = "px", debug=debugf) if self.precision != ML_Binary32 else vx
        py = Conversion(scaled_vy, precision = ML_Binary32, tag = "py", debug=debugf) if self.precision != ML_Binary32 else vy

        pre_init_approx = DivisionSeed(px, py, precision = ML_Binary32, tag = "seed", debug = debugf)  
        init_approx = Conversion(pre_init_approx, precision = self.precision, tag = "seedd", debug = debug_lftolx) if self.precision != ML_Binary32 else pre_init_approx

        current_approx = init_approx 
        # correctly-rounded inverse computation
        num_iteration = num_iter
        inv_iteration_list = []

        Attributes.set_default_rounding_mode(ML_RoundToNearest)
        Attributes.set_default_silent(True)

        for i in range(num_iteration):
            new_iteration = NR_Iteration(current_approx, scaled_vy, force_fma = False if (i != num_iteration - 1) else True)
            inv_iteration_list.append(new_iteration)
            current_approx = new_iteration.get_new_approx()
            current_approx.set_attributes(tag = "iter_%d" % i, debug = debug_lftolx)


        def dividend_mult(div_approx, inv_approx, dividend, divisor, index, force_fma = False):
            yerr = dividend - div_approx * divisor
            #yerr = FMSN(div_approx, divisor, dividend)
            yerr.set_attributes(tag = "yerr%d" % index, debug = debug_lftolx)
            new_div = div_approx + yerr * inv_approx
            #new_div = FMA(yerr, inv_approx, div_approx)
            new_div.set_attributes(tag = "new_div%d" % index, debug = debug_lftolx)
            return new_div

        # multiplication correction iteration
        # to get correctly rounded full division
        current_approx.set_attributes(tag = "final_approx", debug = debug_lftolx)
        current_div_approx = scaled_vx * current_approx
        num_dividend_mult_iteration = 1
        for i in range(num_dividend_mult_iteration):
            current_div_approx = dividend_mult(current_div_approx, current_approx, scaled_vx, scaled_vy, i)


        # last iteration
        yerr_last = FMSN(current_div_approx, scaled_vy, scaled_vx) #, clearprevious = True)
        Attributes.unset_default_rounding_mode()
        Attributes.unset_default_silent()
        last_div_approx = FMA(yerr_last, current_approx, current_div_approx)

        yerr_last.set_attributes(tag = "yerr_last", debug = debug_lftolx)

        pre_result = last_div_approx
        pre_result.set_attributes(tag = "unscaled_div_result", debug = debug_lftolx)
        result = pre_result * ExponentInsertion(ex) * ExponentInsertion(-ey)
        result.set_attributes(tag = "result", debug = debug_lftolx)


        x_inf_or_nan = Test(vx, specifier = Test.IsInfOrNaN, likely = False)
        y_inf_or_nan = Test(vy, specifier = Test.IsInfOrNaN, likely = False, tag = "y_inf_or_nan", debug = debugd)
        comp_sign = Test(vx, vy, specifier = Test.CompSign, tag = "comp_sign", debug = debuglx )
        x_zero = Test(vx, specifier = Test.IsZero, likely = False)
        y_zero = Test(vy, specifier = Test.IsZero, likely = False)

        y_nan = Test(vy, specifier = Test.IsNaN, likely = False)

        x_snan = Test(vx, specifier = Test.IsSignalingNaN, likely = False)
        y_snan = Test(vy, specifier = Test.IsSignalingNaN, likely = False)

        x_inf = Test(vx, specifier = Test.IsInfty, likely = False, tag = "x_inf")
        y_inf = Test(vy, specifier = Test.IsInfty, likely = False, tag = "y_inf", debug = debugd)

        # determining an extended precision 
        ext_precision_map = {
            ML_Binary32: ML_Binary64,
            ML_Binary64: ML_DoubleDouble,
        }
        ext_precision = ext_precision_map[self.precision]

        ext_pre_result = FMA(yerr_last, current_approx, current_div_approx, precision = ext_precision, tag = "ext_pre_result", debug = debug_ddtolx)
        subnormal_result = None
        if isinstance(ext_precision, ML_Compound_FP_Format):
            subnormal_pre_result = SpecificOperation(ext_pre_result, ex - ey, precision = self.precision, specifier = SpecificOperation.Subnormalize, tag = "subnormal_pre_result", debug = debug_lftolx)
            subnormal_result = (subnormal_pre_result * ExponentInsertion(ex)) * ExponentInsertion(-ey)
        else:
            subnormal_result = Conversion(ext_pre_result * ExponentInsertion(ex - ey, tag = "final_scaling_factor", precision = ext_precision), precision = self.precision)


        # x inf and y inf 
        pre_scheme = ConditionBlock(x_inf_or_nan, 
            ConditionBlock(x_inf,
                ConditionBlock(y_inf_or_nan, 
                    Statement(
                        ConditionBlock(y_snan, Raise(ML_FPE_Invalid)),
                        Return(FP_QNaN(self.precision)),
                    ),
                    ConditionBlock(comp_sign, Return(FP_MinusInfty(self.precision)), Return(FP_PlusInfty(self.precision)))
                ),
                Statement(
                    ConditionBlock(x_snan, Raise(ML_FPE_Invalid)),
                    Return(FP_QNaN(self.precision))
                )
            ),
            ConditionBlock(x_zero,
                ConditionBlock(y_zero | y_nan,
                    Statement(
                        ConditionBlock(y_snan, Raise(ML_FPE_Invalid)),
                        Return(FP_QNaN(self.precision))
                    ),
                    Return(vx)
                ),
                ConditionBlock(y_inf_or_nan,
                    ConditionBlock(y_inf,
                        Return(Select(comp_sign, FP_MinusZero(self.precision), FP_PlusZero(self.precision))),
                        Statement(
                            ConditionBlock(y_snan, Raise(ML_FPE_Invalid)),
                            Return(FP_QNaN(self.precision))
                        )
                    ),
                    ConditionBlock(y_zero,
                        Statement(
                            Raise(ML_FPE_DivideByZero),
                            ConditionBlock(comp_sign, 
                                Return(FP_MinusInfty(self.precision)),
                                Return(FP_PlusInfty(self.precision))
                            )
                        ),
                        ConditionBlock(Test(result, specifier = Test.IsSubnormal, likely = False),
                            Statement(
                                ConditionBlock(Comparison(yerr_last, 0, specifier = Comparison.NotEqual, likely = True),
                                    Statement(Raise(ML_FPE_Inexact, ML_FPE_Underflow))
                                ),
                                Return(subnormal_result),
                            ),
                            Statement(
                                ConditionBlock(Comparison(yerr_last, 0, specifier = Comparison.NotEqual, likely = True),
                                    Raise(ML_FPE_Inexact)
                                ),
                                Return(result)
                            )
                        )
                    )
                )
            )
        )

        rnd_mode = GetRndMode()
        scheme = Statement(rnd_mode, SetRndMode(ML_RoundToNearest), yerr_last, SetRndMode(rnd_mode), pre_result, ClearException(), result, pre_scheme)


        processor = target

        opt_eng = OptimizationEngine(processor)

        # fusing FMA
        if fuse_fma:
            print "MDL fusing FMA"
            scheme = opt_eng.fuse_multiply_add(scheme, silence = True)

        print "MDL abstract scheme"
        opt_eng.instantiate_abstract_precision(scheme, None)


        print "MDL instantiated scheme"
        opt_eng.instantiate_precision(scheme, default_precision = self.precision)


        print "subexpression sharing"
        opt_eng.subexpression_sharing(scheme)

        #print "silencing operation"
        #opt_eng.silence_fp_operations(scheme)

        # registering scheme as function implementation
        exp_implementation.set_scheme(scheme)

        #print scheme.get_str(depth = None, display_precision = True)

        # check processor support
        opt_eng.check_processor_support(scheme)

        # factorizing fast path
        #opt_eng.factorize_fast_path(scheme)
        
        cg = CCodeGenerator(processor, declare_cst = False, disable_debug = not debug_flag, libm_compliant = libm_compliant)
        self.result = exp_implementation.get_definition(cg, C_Code, static_cst = True)
        self.result.add_header("math.h")
        self.result.add_header("stdio.h")
        self.result.add_header("inttypes.h")
        self.result.add_header("support_lib/ml_special_values.h")

        output_stream = open(output_file, "w")
        output_stream.write(self.result.get(cg))
        output_stream.close()
        seed_var = Variable("seed", precision = self.precision, interval = Interval(0.5, 1))
        cg_eval_error_copy_map = {
            init_approx.get_handle().get_node(): seed_var,
            scaled_vx.get_handle().get_node(): Variable("x", precision = self.precision, interval = Interval(1, 2)),
            scaled_vy.get_handle().get_node(): Variable("y", precision = self.precision, interval = Interval(1, 2)),
        }
        G1 = Constant(1, precision = ML_Exact)
        exact = G1 / scaled_vy
        exact.set_precision(ML_Exact)
        exact.set_tag("div_exact")
        gappa_goal = current_approx.get_handle().get_node() - exact
        gappa_goal.set_precision(ML_Exact)
        gappacg = GappaCodeGenerator(target, declare_cst = False, disable_debug = True)
        gappa_code = gappacg.get_interval_code(gappa_goal, cg_eval_error_copy_map)

        new_exact_node = exact.get_handle().get_node()

        for nr in inv_iteration_list:
            nr.get_hint_rules(gappacg, gappa_code, new_exact_node)

        seed_wrt_exact = seed_var - new_exact_node
        seed_wrt_exact.set_precision(ML_Exact)
        gappacg.add_hypothesis(gappa_code, seed_wrt_exact, Interval(-S2**-7, S2**-7))

        eval_error = execute_gappa_script_extract(gappa_code.get(gappacg))["goal"]
        print "eval_error: ", eval_error
示例#10
0
  def generate_scheme(self):
    vx = self.implementation.add_input_variable("x", self.precision) 
    sollya_precision = self.get_input_precision().sollya_object

    # local overloading of RaiseReturn operation
    def ExpRaiseReturn(*args, **kwords):
        kwords["arg_value"] = vx
        kwords["function_name"] = self.function_name
        return RaiseReturn(*args, **kwords)


    log2_hi_value = round(log(2), self.precision.get_field_size() - (self.precision.get_exponent_size() + 1), sollya.RN)
    log2_lo_value = round(log(2) - log2_hi_value, self.precision.sollya_object, sollya.RN)

    log2_hi = Constant(log2_hi_value, precision = self.precision)
    log2_lo = Constant(log2_lo_value, precision = self.precision)

    vx_exp  = ExponentExtraction(vx, tag = "vx_exp", debug = debugd)

    int_precision = self.precision.get_integer_format()

    # retrieving processor inverse approximation table
    dummy_var = Variable("dummy", precision = self.precision)
    dummy_div_seed = ReciprocalSeed(dummy_var, precision = self.precision)
    inv_approx_table = self.processor.get_recursive_implementation(dummy_div_seed, language = None, table_getter = lambda self: self.approx_table_map)

    # table creation
    table_index_size = 7
    log_table = ML_NewTable(dimensions = [2**table_index_size, 2], storage_precision = self.precision)
    log_table[0][0] = 0.0
    log_table[0][1] = 0.0
    for i in range(1, 2**table_index_size):
        #inv_value = (1.0 + (self.processor.inv_approx_table[i] / S2**9) + S2**-52) * S2**-1
        inv_value = inv_approx_table[i] # (1.0 + (inv_approx_table[i] / S2**9) ) * S2**-1
        value_high = round(log(inv_value), self.precision.get_field_size() - (self.precision.get_exponent_size() + 1), sollya.RN)
        value_low = round(log(inv_value) - value_high, sollya_precision, sollya.RN)
        log_table[i][0] = value_high
        log_table[i][1] = value_low


    vx_exp = ExponentExtraction(vx, tag = "vx_exp", debug = debugd)

    # case close to 0: ctz
    ctz_exp_limit = -7
    ctz_cond = vx_exp < ctz_exp_limit
    ctz_interval = Interval(-S2**ctz_exp_limit, S2**ctz_exp_limit)

    ctz_poly_degree = sup(guessdegree(log1p(sollya.x)/sollya.x, ctz_interval, S2**-(self.precision.get_field_size()+1))) + 1
    ctz_poly_object = Polynomial.build_from_approximation(log1p(sollya.x)/sollya.x, ctz_poly_degree, [self.precision]*(ctz_poly_degree+1), ctz_interval, sollya.absolute)

    Log.report(Log.Info, "generating polynomial evaluation scheme")
    ctz_poly = PolynomialSchemeEvaluator.generate_horner_scheme(ctz_poly_object, vx, unified_precision = self.precision)
    ctz_poly.set_attributes(tag = "ctz_poly", debug = debug_lftolx)

    ctz_result = vx * ctz_poly

    neg_input = Comparison(vx, -1, likely = False, specifier = Comparison.Less, debug = debugd, tag = "neg_input")
    vx_nan_or_inf = Test(vx, specifier = Test.IsInfOrNaN, likely = False, debug = debugd, tag = "nan_or_inf")
    vx_snan = Test(vx, specifier = Test.IsSignalingNaN, likely = False, debug = debugd, tag = "snan")
    vx_inf  = Test(vx, specifier = Test.IsInfty, likely = False, debug = debugd, tag = "inf")
    vx_subnormal = Test(vx, specifier = Test.IsSubnormal, likely = False, debug = debugd, tag = "vx_subnormal")
    
    log_function_code = CodeFunction("new_log", [Variable("x", precision = ML_Binary64)], output_format = ML_Binary64) 
    log_call_generator = FunctionOperator(log_function_code.get_name(), arity = 1, output_precision = ML_Binary64, declare_prototype = log_function_code)
    newlog_function = FunctionObject(log_function_code.get_name(), (ML_Binary64,), ML_Binary64, log_call_generator)


    # case away from 0.0
    pre_vxp1 = vx + 1.0
    pre_vxp1.set_attributes(tag = "pre_vxp1", debug = debug_lftolx)
    pre_vxp1_exp = ExponentExtraction(pre_vxp1, tag = "pre_vxp1_exp", debug = debugd)
    cm500 = Constant(-500, precision = ML_Int32)
    c0 = Constant(0, precision = ML_Int32)
    cond_scaling = pre_vxp1_exp > 2**(self.precision.get_exponent_size()-2)
    scaling_factor_exp = Select(cond_scaling, cm500, c0)
    scaling_factor = ExponentInsertion(scaling_factor_exp, precision = self.precision, tag = "scaling_factor")

    vxp1 = pre_vxp1 * scaling_factor
    vxp1.set_attributes(tag = "vxp1", debug = debug_lftolx)
    vxp1_exp = ExponentExtraction(vxp1, tag = "vxp1_exp", debug = debugd)

    vxp1_inv = ReciprocalSeed(vxp1, precision = self.precision, tag = "vxp1_inv", debug = debug_lftolx, silent = True)

    vxp1_dirty_inv = ExponentInsertion(-vxp1_exp, precision = self.precision, tag = "vxp1_dirty_inv", debug = debug_lftolx)

    table_index = BitLogicAnd(BitLogicRightShift(TypeCast(vxp1, precision = int_precision, debug = debuglx), self.precision.get_field_size() - 7, debug = debuglx), 0x7f, tag = "table_index", debug = debuglx) 

    # argument reduction
    # TODO: detect if single operand inverse seed is supported by the targeted architecture
    pre_arg_red_index = TypeCast(BitLogicAnd(TypeCast(vxp1_inv, precision = ML_UInt64), Constant(-2, precision = ML_UInt64), precision = ML_UInt64), precision = self.precision, tag = "pre_arg_red_index", debug = debug_lftolx)
    arg_red_index = Select(Equal(table_index, 0), vxp1_dirty_inv, pre_arg_red_index, tag = "arg_red_index", debug = debug_lftolx)

    red_vxp1 = Select(cond_scaling, arg_red_index * vxp1 - 1.0, (arg_red_index * vx - 1.0) + arg_red_index)
    #red_vxp1 = arg_red_index * vxp1 - 1.0
    red_vxp1.set_attributes(tag = "red_vxp1", debug = debug_lftolx)

    log_inv_lo = TableLoad(log_table, table_index, 1, tag = "log_inv_lo", debug = debug_lftolx) 
    log_inv_hi = TableLoad(log_table, table_index, 0, tag = "log_inv_hi", debug = debug_lftolx)

    inv_err = S2**-6 # TODO: link to target DivisionSeed precision

    Log.report(Log.Info, "building mathematical polynomial")
    approx_interval = Interval(-inv_err, inv_err)
    poly_degree = sup(guessdegree(log(1+sollya.x)/sollya.x, approx_interval, S2**-(self.precision.get_field_size()+1))) + 1
    global_poly_object = Polynomial.build_from_approximation(log(1+sollya.x)/sollya.x, poly_degree, [self.precision]*(poly_degree+1), approx_interval, sollya.absolute)
    poly_object = global_poly_object.sub_poly(start_index = 1)

    Log.report(Log.Info, "generating polynomial evaluation scheme")
    _poly = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object, red_vxp1, unified_precision = self.precision)
    _poly.set_attributes(tag = "poly", debug = debug_lftolx)
    Log.report(Log.Info, global_poly_object.get_sollya_object())


    vxp1_inv_exp = ExponentExtraction(vxp1_inv, tag = "vxp1_inv_exp", debug = debugd)
    corr_exp = Conversion(-vxp1_exp + scaling_factor_exp, precision = self.precision)# vxp1_inv_exp

    #poly = (red_vxp1) * (1 +  _poly)
    #poly.set_attributes(tag = "poly", debug = debug_lftolx, prevent_optimization = True)

    pre_result = -log_inv_hi + (red_vxp1 + red_vxp1 * _poly + (-corr_exp * log2_lo - log_inv_lo))
    pre_result.set_attributes(tag = "pre_result", debug = debug_lftolx)
    exact_log2_hi_exp = - corr_exp * log2_hi
    exact_log2_hi_exp.set_attributes(tag = "exact_log2_hi_exp", debug = debug_lftolx, prevent_optimization = True)
    #std_result =  exact_log2_hi_exp + pre_result

    exact_log2_lo_exp = - corr_exp * log2_lo
    exact_log2_lo_exp.set_attributes(tag = "exact_log2_lo_exp", debug = debug_lftolx)#, prevent_optimization = True)
    
    init = exact_log2_lo_exp  - log_inv_lo
    init.set_attributes(tag = "init", debug = debug_lftolx, prevent_optimization = True)
    fma0 = (red_vxp1 * _poly + init) # - log_inv_lo)
    fma0.set_attributes(tag = "fma0", debug = debug_lftolx)
    step0 = fma0 
    step0.set_attributes(tag = "step0", debug = debug_lftolx) #, prevent_optimization = True)
    step1 = step0 + red_vxp1
    step1.set_attributes(tag = "step1", debug = debug_lftolx, prevent_optimization = True)
    step2 = -log_inv_hi + step1
    step2.set_attributes(tag = "step2", debug = debug_lftolx, prevent_optimization = True)
    std_result = exact_log2_hi_exp + step2
    std_result.set_attributes(tag = "std_result", debug = debug_lftolx, prevent_optimization = True)


    # main scheme
    Log.report(Log.Info, "MDL scheme")
    pre_scheme = ConditionBlock(neg_input,
        Statement(
            ClearException(),
            Raise(ML_FPE_Invalid),
            Return(FP_QNaN(self.precision))
        ),
        ConditionBlock(vx_nan_or_inf,
            ConditionBlock(vx_inf,
                Statement(
                    ClearException(),
                    Return(FP_PlusInfty(self.precision)),
                ),
                Statement(
                    ClearException(),
                    ConditionBlock(vx_snan,
                        Raise(ML_FPE_Invalid)
                    ),
                    Return(FP_QNaN(self.precision))
                )
            ),
            ConditionBlock(vx_subnormal,
                Return(vx),
                ConditionBlock(ctz_cond,
                    Statement(
                        Return(ctz_result),
                    ),
                    Statement(
                        Return(std_result)
                    )
                )
            )
        )
    )
    scheme = pre_scheme
    return scheme