示例#1
0
    def test_ref_assign(self):
        """ test behavior of StaticVectorizer on predicated ReferenceAssign """
        va = Variable("a")
        vb = Variable("b")
        vc = Variable("c")
        scheme = Statement(
            ReferenceAssign(va, Constant(3)),
            ConditionBlock(
                (va > vb).modify_attributes(likely=True),
                Statement(ReferenceAssign(vb, va),
                          ReferenceAssign(va, Constant(11)), Return(va)),
            ), ReferenceAssign(va, Constant(7)), Return(vb))
        vectorized_path = StaticVectorizer().extract_vectorizable_path(
            scheme, fallback_policy)

        linearized_most_likely_path = instanciate_variable(
            vectorized_path.linearized_optree,
            vectorized_path.variable_mapping)
        test_result = (isinstance(linearized_most_likely_path, Constant)
                       and linearized_most_likely_path.get_value() == 11)
        if not test_result:
            print("test UT_StaticVectorizer failure")
            print("scheme: {}".format(scheme.get_str()))
            print("linearized_most_likely_path: {}".format(
                linearized_most_likely_path))
        self.assertTrue(test_result)
示例#2
0
    def generate_scheme(self):
        size_format = ML_Int32

        # Matrix storage
        in_storage = self.implementation.add_input_variable(
            "buffer_in", ML_Pointer_Format(self.precision))
        kernel_storage = self.implementation.add_input_variable(
            "buffer_kernel", ML_Pointer_Format(self.precision))
        out_storage = self.implementation.add_input_variable(
            "buffer_out", ML_Pointer_Format(self.precision))

        # Matrix sizes
        w = self.implementation.add_input_variable("w", size_format)
        h = self.implementation.add_input_variable("h", size_format)

        # A is a (n x p) matrix in row-major
        tIn = Tensor(in_storage,
                     TensorDescriptor([w, h], [1, w], self.precision))
        # B is a (p x m) matrix in row-major
        kernel_strides = [1]
        for previous_dim in self.kernel_size[:-1]:
            kernel_strides.append(previous_dim * kernel_strides[-1])
        print("kernel_strides: {}".format(kernel_strides))
        tKernel = Tensor(
            kernel_storage,
            TensorDescriptor(self.kernel_size, kernel_strides, self.precision))
        # C is a (n x m) matrix in row-major
        tOut = Tensor(out_storage,
                      TensorDescriptor([w, h], [1, w], self.precision))

        index_format = ML_Int32

        # main NDRange description
        i = Variable("i", precision=index_format, var_type=Variable.Local)
        j = Variable("j", precision=index_format, var_type=Variable.Local)
        k_w = Variable("k_w", precision=index_format, var_type=Variable.Local)
        k_h = Variable("k_h", precision=index_format, var_type=Variable.Local)
        result = NDRange([IterRange(i, 0, w - 1),
                          IterRange(j, 0, h - 1)],
                         WriteAccessor(
                             tOut, [i, j],
                             Sum(Sum(Multiplication(
                                 ReadAccessor(tIn, [i + k_w, j - k_h],
                                              self.precision),
                                 ReadAccessor(tKernel, [k_w, k_h],
                                              self.precision)),
                                     IterRange(k_w,
                                               -(self.kernel_size[0] - 1) // 2,
                                               (self.kernel_size[0] - 1) // 2),
                                     precision=self.precision),
                                 IterRange(k_h,
                                           -(self.kernel_size[1] - 1) // 2,
                                           (self.kernel_size[1] - 1) // 2),
                                 precision=self.precision)))

        mdl_scheme = expand_ndrange(result)
        print("mdl_scheme:\n{}".format(mdl_scheme.get_str(depth=None)))
        return Statement(mdl_scheme, Return())
示例#3
0
    def generate_scheme(self):
        size_format = ML_Int32

        # Matrix storage
        A_storage = self.implementation.add_input_variable("buffer_a", ML_Pointer_Format(self.precision))
        B_storage = self.implementation.add_input_variable("buffer_b", ML_Pointer_Format(self.precision))
        C_storage = self.implementation.add_input_variable("buffer_c", ML_Pointer_Format(self.precision))

        # Matrix sizes
        n = self.implementation.add_input_variable("n", size_format)
        m = self.implementation.add_input_variable("m", size_format)
        p = self.implementation.add_input_variable("p", size_format)


        # A is a (n x p) matrix in row-major
        tA = Tensor(A_storage, TensorDescriptor([p, n], [1, p], self.precision))
        # B is a (p x m) matrix in row-major
        tB = Tensor(B_storage, TensorDescriptor([m, p], [1, m], self.precision))
        # C is a (n x m) matrix in row-major
        tC = Tensor(C_storage, TensorDescriptor([m, n], [1, m], self.precision))

        index_format = ML_Int32

        #
        i = Variable("i", precision=index_format, var_type=Variable.Local)
        j = Variable("j", precision=index_format, var_type=Variable.Local)
        k = Variable("k", precision=index_format, var_type=Variable.Local)
        result = NDRange(
            [IterRange(j, 0, m-1), IterRange(i, 0, n -1)],
            WriteAccessor(
                tC, [j, i],
                Sum(
                    Multiplication(
                        ReadAccessor(tA, [k, i], self.precision),
                        ReadAccessor(tB, [j, k], self.precision),
                        precision=self.precision),
                    IterRange(k, 0, p - 1),
                    precision=self.precision)))

        #mdl_scheme = expand_ndrange(exchange_loop_order(tile_ndrange(result, {j: 2, i: 2}), [1, 0]))
        if self.vectorize:
            mdl_scheme = expand_ndrange(vectorize_ndrange(result, j, 4))
        else:
            mdl_scheme = expand_ndrange(exchange_loop_order(tile_ndrange(result, {j: 2, i: 2}), [1, 0]))
        print("mdl_scheme:\n{}".format(mdl_scheme.get_str(depth=None, display_precision=True)))
        return Statement(
            mdl_scheme,
            Return()
        )
示例#4
0
 def __init__(self,
              register_id,
              register_format,
              reg_tag,
              var_tag=None,
              **kw):
     """ register tag is stored as inner Variable's name
         and original variable's name is stored in self.var_tag """
     #reg_tag = "unamed-reg" if reg_tag is None else reg_tag
     # indirection toward register's tag (if _reg_tag's value is None, then
     # register tag is undefined)
     self._reg_tag = reg_tag
     Variable.__init__(self, self.reg_tag, precision=register_format, **kw)
     self.var_tag = var_tag
     self.register_id = register_id
    def generate_test_wrapper(self, tensor_descriptors, input_tables,
                              output_tables):
        auto_test = CodeFunction("test_wrapper", output_format=ML_Int32)

        tested_function = self.implementation.get_function_object()
        function_name = self.implementation.get_name()

        failure_report_op = FunctionOperator("report_failure")
        failure_report_function = FunctionObject("report_failure", [], ML_Void,
                                                 failure_report_op)

        printf_success_op = FunctionOperator(
            "printf",
            arg_map={0: "\"test successful %s\\n\"" % function_name},
            void_function=True,
            require_header=["stdio.h"])
        printf_success_function = FunctionObject("printf", [], ML_Void,
                                                 printf_success_op)

        # accumulate element number
        acc_num = Variable("acc_num",
                           precision=ML_Int64,
                           var_type=Variable.Local)

        test_loop = self.get_tensor_test_wrapper(
            tested_function, tensor_descriptors, input_tables, output_tables,
            acc_num, self.generate_tensor_check_loop)

        # common test scheme between scalar and vector functions
        test_scheme = Statement(test_loop, printf_success_function(),
                                Return(Constant(0, precision=ML_Int32)))
        auto_test.set_scheme(test_scheme)
        return FunctionGroup([auto_test])
def tile_ndrange(ndrange, tile, index_format=ML_Int32):
    """ inplace transform ndrange such that it iterate over a sub-tile of
        size tile rather than a single element
        tile is a dict(var_index -> tile_dim) """
    # The transformation is performed by replacing each range
    # implicating one of the variable from tile, by a range whose step is the tile's dimension
    # and then adding a sub-iterange using a sub-alias for the tile's variable whose range
    # is [0; tile's dimension - 1]
    new_var_range_list = []
    var_transform_map = {}
    kernel_var_range_list = []
    # transform var_range_list
    for iter_range in ndrange.var_range_list:
        var_index = iter_range.var_index
        if var_index in tile:
            tile_dim = tile[var_index]
            new_iter_range = IterRange(var_index,
                                       iter_range.first_index,
                                       iter_range.last_index,
                                       index_step=tile_dim)
            new_var_range_list.append(new_iter_range)
            sub_var = Variable("sub_%s" % var_index.get_tag(),
                               precision=index_format,
                               var_type=Variable.Local)
            sub_var_range = IterRange(sub_var, var_index,
                                      var_index + tile_dim - 1)
            kernel_var_range_list.append(sub_var_range)
            var_transform_map[iter_range.var_index] = sub_var_range
        else:
            new_var_range_list.append(iter_range)
    # tile kernel
    new_kernel = substitute_var(ndrange.kernel, var_transform_map)
    sub_ndrange = NDRange(kernel_var_range_list, new_kernel)
    return NDRange(new_var_range_list, sub_ndrange)
    def generate_inline_scheme(self, vx):
        """ generate a pair <variable, scheme>
            scheme is the operation graph to compute self function on vx
            and variable is the result variable """
        result_var = Variable("r",
                              precision=self.get_precision(),
                              var_type=Variable.Local)
        scalar_scheme = self.generate_scalar_scheme(vx)
        result_scheme = inline_function(scalar_scheme, result_var, {vx: vx})

        return result_var, result_scheme
    def generate_tensor_check_loop(self, tensor_descriptors, input_tables,
                                   output_tables):
        # unpack tensor descriptors tuple
        (input_tensor_descriptor_list,
         output_tensor_descriptor_list) = tensor_descriptors
        # internal array iterator index
        vj = Variable("j", precision=ML_UInt32, var_type=Variable.Local)

        printf_error_detail_function = self.get_printf_error_detail_fct(
            output_tensor_descriptor_list[0])

        NUM_INPUT_ARRAY = len(input_tables)

        # generate the expected table for the whole multi-array
        expected_tables = self.generate_expected_table(tensor_descriptors,
                                                       input_tables)

        # global statement to list all checks
        check_statement = Statement()

        # implement check for each output tensor
        for out_id, out_td in enumerate(output_tensor_descriptor_list):
            # expected values for the (vj)-th entry of the sub-array
            expected_values = [
                TableLoad(expected_tables[out_id], vj, i)
                for i in range(self.accuracy.get_num_output_value())
            ]
            # local result for the (vj)-th entry of the sub-array
            local_result = TableLoad(output_tables[out_id], vj)

            array_len = out_td.get_bounding_size()

            if self.break_error:
                return_statement_break = Statement(
                    printf_error_detail_function(*((vj, ) + (local_result, ))),
                    self.accuracy.get_output_print_call(
                        self.function_name, output_values))
            else:
                return_statement_break = Statement(
                    printf_error_detail_function(*((vj, ) + (local_result, ))),
                    self.accuracy.get_output_print_call(
                        self.function_name, expected_values),
                    Return(Constant(1, precision=ML_Int32)))
            check_array_loop = Loop(
                ReferenceAssign(vj, 0), vj < array_len,
                Statement(
                    ConditionBlock(
                        self.accuracy.get_output_check_test(
                            local_result, expected_values),
                        return_statement_break),
                    ReferenceAssign(vj, vj + 1),
                ))
            check_statement.add(check_array_loop)
        return check_statement
示例#9
0
 def rec_bb_processing(bb):
     """ perform variable renaming in the basic block @p bb
         and recursivly in bb's children in the dominator tree """
     Log.report(LOG_LEVEL_GEN_BB_VERBOSE, "processing bb {}", bb)
     # because a node can be duplicated between
     # its declaration and its use in a subsequent operation in the same
     # basic block, we must make sure it is processed only once
     # by update_used_var for a given <var>. Thus for each
     # <var> we store a memoization_map of processed nodes
     updated_used_var_memoization_map = {}
     def get_mem_map(var):
         """ return the updated_used_var memoization_map associated
             to @p var """
         if not var in updated_used_var_memoization_map:
             updated_used_var_memoization_map[var] = {}
         return updated_used_var_memoization_map[var]
     for op in bb.get_inputs():
         if op in memoization_map:
             continue
         else:
             memoization_map[op] = None
         Log.report(LOG_LEVEL_GEN_BB_VERBOSE, "processing op {}", op)
         if not isinstance(op, PhiNode):
             for var in get_var_used_by_non_phi(op):
                 Log.report(LOG_LEVEL_GEN_BB_VERBOSE, "processing var {} used by non-phi node", var)
                 updating_reaching_def(bbg, reaching_def, var, op)
                 Log.report(LOG_LEVEL_GEN_BB_VERBOSE, "updating var from {} to {} used by non-phi node", var, reaching_def[var])
                 local_mem_map = get_mem_map(var)
                 update_used_var(op, var, reaching_def[var], memoization_map=local_mem_map)
                 # to avoid multiple update we add the output memoization_table
                 # to the table of the destination variable
                 # so the last time the destination variable is considered for update
                 # it will discard all update made during this BB processing
                 get_mem_map(reaching_def[var]).update(local_mem_map)
         for var in get_var_def_by_op(op):
             updating_reaching_def(bbg, reaching_def, var, op)
             vp = Variable("%s_%d" % (var.get_tag(), new_var_index(var)), precision=var.get_precision()) # TODO: tag
             update_def_var(op, var, vp)
             reaching_def[vp] = reaching_def[var]
             reaching_def[var] = vp
             bbg.variable_defs[vp] = op
     Log.report(LOG_LEVEL_GEN_BB_VERBOSE, "processing phi in successor")
     for phi in get_phi_list_in_bb_successor(bb):
         for index, var, var_bb in get_indexed_var_used_by_phi(phi):
             Log.report(LOG_LEVEL_GEN_BB_VERBOSE, "processing operand #{} of phi: {}, var_bb is {}", index, var, var_bb)
             if not isinstance(var_bb, EmptyOperand):
                 continue
             # updating_reaching_def(bbg, reaching_def, var, phi)
             update_indexed_used_var_in_phi(phi, index, var, reaching_def[var], bb)
             break
     # finally traverse sub-tree
     if bb in bbg.dominator_tree:
         for child in bbg.dominator_tree[bb]:
             rec_bb_processing(child)
示例#10
0
def simplify_inverse(optree, processor):
    dummy_var = Variable("dummy_var_seed", precision = optree.get_precision())
    dummy_div_seed = DivisionSeed(dummy_var, precision = optree.get_precision())
    inv_approx_table = processor.get_recursive_implementation(dummy_div_seed, language = None, table_getter = lambda self: self.approx_table_map)

    seed_input = optree.inputs[0]
    c0 = Constant(0, precision = ML_Int32)

    if optree.get_precision() == inv_approx_table.get_storage_precision():
        return TableLoad(inv_approx_table, inv_approx_table.get_index_function()(seed_input), c0, precision = optree.get_precision()) 
    else:
        return Conversion(TableLoad(inv_approx_table, inv_approx_table.get_index_function()(seed_input), c0, precision = inv_approx_table.get_storage_precision()), precision = optree.get_precision()) 
示例#11
0
文件: fmod.py 项目: metalibm/metalibm
    def generate_scalar_scheme(self, vx, vy):
        div = Division(vx, vy, precision=self.precision)
        div_if = Trunc(div, precision=self.precision)
        rem = Variable("rem",
                       var_type=Variable.Local,
                       precision=self.precision)
        qi = Variable("qi", var_type=Variable.Local, precision=self.precision)
        qi_bound = Constant(S2**self.precision.get_mantissa_size())
        init_rem = FusedMultiplyAdd(-div_if, vy, vx)

        # factorizing 1 / vy to save time
        # NOTES: it makes rem / vy approximate
        # shared_rcp = Division(1, vy, precision=self.precision)

        iterative_fmod = Loop(
            Statement(
                ReferenceAssign(rem, init_rem),
                ReferenceAssign(qi, div_if),
            ),
            Abs(qi) > qi_bound,
            Statement(
                ReferenceAssign(
                    qi,
                    #Trunc(shared_rcp * rem, precision=self.precision)
                    Trunc(rem / vy, precision=self.precision)),
                ReferenceAssign(rem, FMA(-qi, vy, rem))))
        scheme = Statement(
            rem,
            # shared_rcp,
            iterative_fmod,
            ConditionBlock(
                # if rem's sign and vx sign mismatch
                (rem * vx < 0.0).modify_attributes(tag="update_cond",
                                                   debug=debug_multi),
                Return(rem + vy),
                Return(rem),
            ))
        return scheme
示例#12
0
def instanciate_fct_call(node, precision):
    """ replace FunctionCall node by the actual function
        scheme """
    vx_list = [
        node.get_input(i) for i in range(node.get_function_object().arity)
    ]
    func_name = node.get_function_object().name
    fct_ctor, fct_args, fct_range_function = FUNCTION_MAP[func_name]
    var_result = Variable("local_result",
                          precision=precision,
                          var_type=Variable.Local)
    local_args = {"precision": precision, "libm_compliant": False}
    local_args.update(fct_args)
    fct_scheme = generate_inline_fct_scheme(fct_ctor, var_result, vx_list,
                                            local_args)
    return var_result, fct_scheme
    def get_tensor_test_wrapper(self,
                                tested_function,
                                tensor_descriptors,
                                input_tables,
                                output_tables,
                                acc_num,
                                post_statement_generator,
                                NUM_INPUT_ARRAY=1):
        """ generate a test loop for multi-array tests
             @param test_num number of elementary array tests to be executed
             @param tested_function FunctionObject to be tested
             @param table_size_offset_array ML_NewTable object containing
                    (table-size, offset) pairs for multi-array testing
             @param input_table ML_NewTable containing multi-array test inputs
             @param output_table ML_NewTable containing multi-array test outputs
             @param post_statement_generator is generator used to generate
                    a statement executed at the end of the test of one of the
                    arrays of the multi-test. It expects 6 arguments:
                    (input_tables, output_array, table_size_offset_array,
                     array_offset, array_len, test_id)
             @param printf_function FunctionObject to print error case
        """
        array_len = Variable("len",
                             precision=ML_UInt32,
                             var_type=Variable.Local)

        def pointer_add(table_addr, offset):
            pointer_format = table_addr.get_precision_as_pointer_format()
            return Addition(table_addr, offset, precision=pointer_format)

        array_inputs = tuple(input_tables[in_id]
                             for in_id in range(NUM_INPUT_ARRAY))
        function_call = tested_function(*(self.get_ordered_arg_tuple(
            tensor_descriptors, input_tables, output_tables)))

        post_statement = post_statement_generator(tensor_descriptors,
                                                  input_tables, output_tables)

        test_statement = Statement(
            function_call,
            post_statement,
        )

        return test_statement
def expand_kernel_expr(kernel, iterator_format=ML_Int32):
    """ Expand a kernel expression into the corresponding MDL graph """
    if isinstance(kernel, NDRange):
        return expand_ndrange(kernel)
    elif isinstance(kernel, Sum):
        var_iter = kernel.index_iter_range.var_index
        # TODO/FIXME to be uniquified
        acc = Variable("acc",
                       var_type=Variable.Local,
                       precision=kernel.precision)
        # TODO/FIXME implement proper acc init
        if kernel.precision.is_vector_format():
            C0 = Constant([0] * kernel.precision.get_vector_size(),
                          precision=kernel.precision)
        else:
            C0 = Constant(0, precision=kernel.precision)
        scheme = Loop(
            Statement(
                ReferenceAssign(var_iter, kernel.index_iter_range.first_index),
                ReferenceAssign(acc, C0)),
            var_iter <= kernel.index_iter_range.last_index,
            Statement(
                ReferenceAssign(
                    acc,
                    Addition(acc,
                             expand_kernel_expr(kernel.elt_operation),
                             precision=kernel.precision)),
                # loop iterator increment
                ReferenceAssign(var_iter, var_iter +
                                kernel.index_iter_range.index_step)))
        return PlaceHolder(acc, scheme)
    elif isinstance(kernel, (ReadAccessor, WriteAccessor)):
        return expand_accessor(kernel)
    elif is_leaf_node(kernel):
        return kernel
    else:
        # vanilla metalibm ops are left unmodified (except
        # recursive expansion)
        for index, op in enumerate(kernel.inputs):
            new_op = expand_kernel_expr(op)
            kernel.set_input(index, new_op)
        return kernel
示例#15
0
def vectorize_function_scheme(vectorizer,
                              name_factory,
                              scalar_scheme,
                              scalar_output_format,
                              scalar_arg_list,
                              vector_size,
                              sub_vector_size=None):
    """ Use a vectorization engine @p vectorizer to vectorize the sub-graph @p
        scalar_scheme, that is transforming and inputs and outputs from scalar
        to vectors and performing required internal path duplication """

    sub_vector_size = vector_size if sub_vector_size is None else sub_vector_size

    vec_arg_list, vector_scheme, vector_mask = \
        vectorizer.vectorize_scheme(scalar_scheme, scalar_arg_list,
                                    vector_size, sub_vector_size)

    vector_output_format = vectorize_format(scalar_output_format, vector_size)

    vec_res = Variable("vec_res",
                       precision=vector_output_format,
                       var_type=Variable.Local)

    vector_mask.set_attributes(tag="vector_mask", debug=debug_multi)

    callback_name = "scalar_callback"
    scalar_callback_fct = generate_function_from_optree(
        name_factory, scalar_scheme, scalar_arg_list, callback_name,
        scalar_output_format)
    scalar_callback = scalar_callback_fct.get_function_object()

    if no_scalar_fallback_required(vector_mask):
        function_scheme = Statement(
            Return(vector_scheme, precision=vector_output_format))
    function_scheme = generate_c_vector_wrapper(vector_size, vec_arg_list,
                                                vector_scheme, vector_mask,
                                                vec_res, scalar_callback)

    return vec_res, vec_arg_list, function_scheme, scalar_callback, scalar_callback_fct
示例#16
0
    def generate_scheme(self):
        # declaring target and instantiating optimization engine
        vx = self.implementation.add_input_variable("x", self.precision)

        Log.set_dump_stdout(True)

        Log.report(Log.Info,
                   "\033[33;1m generating implementation scheme \033[0m")
        if self.debug_flag:
            Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            if self.libm_compliant:
                return RaiseReturn(*args, precision=self.precision, **kwords)
            else:
                return Return(kwords["return_value"], precision=self.precision)

        test_nan_or_inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               debug=debug_multi,
                               tag="nan_or_inf")
        test_nan = Test(vx,
                        specifier=Test.IsNaN,
                        debug=debug_multi,
                        tag="is_nan_test")
        test_positive = Comparison(vx,
                                   0,
                                   specifier=Comparison.GreaterOrEqual,
                                   debug=debug_multi,
                                   tag="inf_sign")

        test_signaling_nan = Test(vx,
                                  specifier=Test.IsSignalingNaN,
                                  debug=debug_multi,
                                  tag="is_signaling_nan")
        return_snan = Statement(
            ExpRaiseReturn(ML_FPE_Invalid,
                           return_value=FP_QNaN(self.precision)))

        # return in case of infinity input
        infty_return = Statement(
            ConditionBlock(
                test_positive,
                Return(FP_PlusInfty(self.precision), precision=self.precision),
                Return(FP_PlusZero(self.precision), precision=self.precision)))
        # return in case of specific value input (NaN or inf)
        specific_return = ConditionBlock(
            test_nan,
            ConditionBlock(
                test_signaling_nan, return_snan,
                Return(FP_QNaN(self.precision), precision=self.precision)),
            infty_return)
        # return in case of standard (non-special) input

        # exclusion of early overflow and underflow cases
        precision_emax = self.precision.get_emax()
        precision_max_value = S2 * S2**precision_emax
        exp_overflow_bound = sollya.ceil(log(precision_max_value))
        early_overflow_test = Comparison(vx,
                                         exp_overflow_bound,
                                         likely=False,
                                         specifier=Comparison.Greater)
        early_overflow_return = Statement(
            ClearException() if self.libm_compliant else Statement(),
            ExpRaiseReturn(ML_FPE_Inexact,
                           ML_FPE_Overflow,
                           return_value=FP_PlusInfty(self.precision)))

        precision_emin = self.precision.get_emin_subnormal()
        precision_min_value = S2**precision_emin
        exp_underflow_bound = floor(log(precision_min_value))

        early_underflow_test = Comparison(vx,
                                          exp_underflow_bound,
                                          likely=False,
                                          specifier=Comparison.Less)
        early_underflow_return = Statement(
            ClearException() if self.libm_compliant else Statement(),
            ExpRaiseReturn(ML_FPE_Inexact,
                           ML_FPE_Underflow,
                           return_value=FP_PlusZero(self.precision)))

        # constant computation
        invlog2 = self.precision.round_sollya_object(1 / log(2), sollya.RN)

        interval_vx = Interval(exp_underflow_bound, exp_overflow_bound)
        interval_fk = interval_vx * invlog2
        interval_k = Interval(floor(inf(interval_fk)),
                              sollya.ceil(sup(interval_fk)))

        log2_hi_precision = self.precision.get_field_size() - (
            sollya.ceil(log2(sup(abs(interval_k)))) + 2)
        Log.report(Log.Info, "log2_hi_precision: %d" % log2_hi_precision)
        invlog2_cst = Constant(invlog2, precision=self.precision)
        log2_hi = round(log(2), log2_hi_precision, sollya.RN)
        log2_lo = self.precision.round_sollya_object(
            log(2) - log2_hi, sollya.RN)

        # argument reduction
        unround_k = vx * invlog2
        unround_k.set_attributes(tag="unround_k", debug=debug_multi)
        k = NearestInteger(unround_k,
                           precision=self.precision,
                           debug=debug_multi)
        ik = NearestInteger(unround_k,
                            precision=self.precision.get_integer_format(),
                            debug=debug_multi,
                            tag="ik")
        ik.set_tag("ik")
        k.set_tag("k")
        exact_pre_mul = (k * log2_hi)
        exact_pre_mul.set_attributes(exact=True)
        exact_hi_part = vx - exact_pre_mul
        exact_hi_part.set_attributes(exact=True,
                                     tag="exact_hi",
                                     debug=debug_multi,
                                     prevent_optimization=True)
        exact_lo_part = -k * log2_lo
        exact_lo_part.set_attributes(tag="exact_lo",
                                     debug=debug_multi,
                                     prevent_optimization=True)
        r = exact_hi_part + exact_lo_part
        r.set_tag("r")
        r.set_attributes(debug=debug_multi)

        approx_interval = Interval(-log(2) / 2, log(2) / 2)

        approx_interval_half = approx_interval / 2
        approx_interval_split = [
            Interval(-log(2) / 2, inf(approx_interval_half)),
            approx_interval_half,
            Interval(sup(approx_interval_half),
                     log(2) / 2)
        ]

        # TODO: should be computed automatically
        exact_hi_interval = approx_interval
        exact_lo_interval = -interval_k * log2_lo

        opt_r = self.optimise_scheme(r, copy={})

        tag_map = {}
        self.opt_engine.register_nodes_by_tag(opt_r, tag_map)

        cg_eval_error_copy_map = {
            vx:
            Variable("x", precision=self.precision, interval=interval_vx),
            tag_map["k"]:
            Variable("k", interval=interval_k, precision=self.precision)
        }

        #try:
        if is_gappa_installed():
            eval_error = self.gappa_engine.get_eval_error_v2(
                self.opt_engine,
                opt_r,
                cg_eval_error_copy_map,
                gappa_filename="red_arg.g")
        else:
            eval_error = 0.0
            Log.report(Log.Warning,
                       "gappa is not installed in this environnement")
        Log.report(Log.Info, "eval error: %s" % eval_error)

        local_ulp = sup(ulp(sollya.exp(approx_interval), self.precision))
        # FIXME refactor error_goal from accuracy
        Log.report(Log.Info, "accuracy: %s" % self.accuracy)
        if isinstance(self.accuracy, ML_Faithful):
            error_goal = local_ulp
        elif isinstance(self.accuracy, ML_CorrectlyRounded):
            error_goal = S2**-1 * local_ulp
        elif isinstance(self.accuracy, ML_DegradedAccuracyAbsolute):
            error_goal = self.accuracy.goal
        elif isinstance(self.accuracy, ML_DegradedAccuracyRelative):
            error_goal = self.accuracy.goal
        else:
            Log.report(Log.Error, "unknown accuracy: %s" % self.accuracy)

        # error_goal = local_ulp #S2**-(self.precision.get_field_size()+1)
        error_goal_approx = S2**-1 * error_goal

        Log.report(Log.Info,
                   "\033[33;1m building mathematical polynomial \033[0m\n")
        poly_degree = max(
            sup(
                guessdegree(
                    expm1(sollya.x) / sollya.x, approx_interval,
                    error_goal_approx)) - 1, 2)
        init_poly_degree = poly_degree

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_estrin_scheme
        #polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme

        while 1:
            Log.report(Log.Info, "attempting poly degree: %d" % poly_degree)
            precision_list = [1] + [self.precision] * (poly_degree)
            poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(
                expm1(sollya.x),
                poly_degree,
                precision_list,
                approx_interval,
                sollya.absolute,
                error_function=error_function)
            Log.report(Log.Info, "polynomial: %s " % poly_object)
            sub_poly = poly_object.sub_poly(start_index=2)
            Log.report(Log.Info, "polynomial: %s " % sub_poly)

            Log.report(Log.Info, "poly approx error: %s" % poly_approx_error)

            Log.report(
                Log.Info,
                "\033[33;1m generating polynomial evaluation scheme \033[0m")
            pre_poly = polynomial_scheme_builder(
                poly_object, r, unified_precision=self.precision)
            pre_poly.set_attributes(tag="pre_poly", debug=debug_multi)

            pre_sub_poly = polynomial_scheme_builder(
                sub_poly, r, unified_precision=self.precision)
            pre_sub_poly.set_attributes(tag="pre_sub_poly", debug=debug_multi)

            poly = 1 + (exact_hi_part + (exact_lo_part + pre_sub_poly))
            poly.set_tag("poly")

            # optimizing poly before evaluation error computation
            #opt_poly = self.opt_engine.optimization_process(poly, self.precision, fuse_fma = fuse_fma)
            #opt_sub_poly = self.opt_engine.optimization_process(pre_sub_poly, self.precision, fuse_fma = fuse_fma)
            opt_poly = self.optimise_scheme(poly)
            opt_sub_poly = self.optimise_scheme(pre_sub_poly)

            # evaluating error of the polynomial approximation
            r_gappa_var = Variable("r",
                                   precision=self.precision,
                                   interval=approx_interval)
            exact_hi_gappa_var = Variable("exact_hi",
                                          precision=self.precision,
                                          interval=exact_hi_interval)
            exact_lo_gappa_var = Variable("exact_lo",
                                          precision=self.precision,
                                          interval=exact_lo_interval)
            vx_gappa_var = Variable("x",
                                    precision=self.precision,
                                    interval=interval_vx)
            k_gappa_var = Variable("k",
                                   interval=interval_k,
                                   precision=self.precision)

            #print "exact_hi interval: ", exact_hi_interval

            sub_poly_error_copy_map = {
                #r.get_handle().get_node(): r_gappa_var,
                #vx.get_handle().get_node():  vx_gappa_var,
                exact_hi_part.get_handle().get_node():
                exact_hi_gappa_var,
                exact_lo_part.get_handle().get_node():
                exact_lo_gappa_var,
                #k.get_handle().get_node(): k_gappa_var,
            }

            poly_error_copy_map = {
                exact_hi_part.get_handle().get_node(): exact_hi_gappa_var,
                exact_lo_part.get_handle().get_node(): exact_lo_gappa_var,
            }

            if is_gappa_installed():
                sub_poly_eval_error = -1.0
                sub_poly_eval_error = self.gappa_engine.get_eval_error_v2(
                    self.opt_engine,
                    opt_sub_poly,
                    sub_poly_error_copy_map,
                    gappa_filename="%s_gappa_sub_poly.g" % self.function_name)

                dichotomy_map = [
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[0],
                    },
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[1],
                    },
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[2],
                    },
                ]
                poly_eval_error_dico = self.gappa_engine.get_eval_error_v3(
                    self.opt_engine,
                    opt_poly,
                    poly_error_copy_map,
                    gappa_filename="gappa_poly.g",
                    dichotomy=dichotomy_map)

                poly_eval_error = max(
                    [sup(abs(err)) for err in poly_eval_error_dico])
            else:
                poly_eval_error = 0.0
                sub_poly_eval_error = 0.0
                Log.report(Log.Warning,
                           "gappa is not installed in this environnement")
                Log.report(Log.Info, "stopping autonomous degree research")
                # incrementing polynomial degree to counteract initial decrementation effect
                poly_degree += 1
                break
            Log.report(Log.Info, "poly evaluation error: %s" % poly_eval_error)
            Log.report(Log.Info,
                       "sub poly evaluation error: %s" % sub_poly_eval_error)

            global_poly_error = None
            global_rel_poly_error = None

            for case_index in range(3):
                poly_error = poly_approx_error + poly_eval_error_dico[
                    case_index]
                rel_poly_error = sup(
                    abs(poly_error /
                        sollya.exp(approx_interval_split[case_index])))
                if global_rel_poly_error == None or rel_poly_error > global_rel_poly_error:
                    global_rel_poly_error = rel_poly_error
                    global_poly_error = poly_error
            flag = error_goal > global_rel_poly_error

            if flag:
                break
            else:
                poly_degree += 1

        late_overflow_test = Comparison(ik,
                                        self.precision.get_emax(),
                                        specifier=Comparison.Greater,
                                        likely=False,
                                        debug=debug_multi,
                                        tag="late_overflow_test")
        overflow_exp_offset = (self.precision.get_emax() -
                               self.precision.get_field_size() / 2)
        diff_k = Subtraction(
            ik,
            Constant(overflow_exp_offset,
                     precision=self.precision.get_integer_format()),
            precision=self.precision.get_integer_format(),
            debug=debug_multi,
            tag="diff_k",
        )
        late_overflow_result = (ExponentInsertion(
            diff_k, precision=self.precision) * poly) * ExponentInsertion(
                overflow_exp_offset, precision=self.precision)
        late_overflow_result.set_attributes(silent=False,
                                            tag="late_overflow_result",
                                            debug=debug_multi,
                                            precision=self.precision)
        late_overflow_return = ConditionBlock(
            Test(late_overflow_result, specifier=Test.IsInfty, likely=False),
            ExpRaiseReturn(ML_FPE_Overflow,
                           return_value=FP_PlusInfty(self.precision)),
            Return(late_overflow_result, precision=self.precision))

        late_underflow_test = Comparison(k,
                                         self.precision.get_emin_normal(),
                                         specifier=Comparison.LessOrEqual,
                                         likely=False)
        underflow_exp_offset = 2 * self.precision.get_field_size()
        corrected_exp = Addition(
            ik,
            Constant(underflow_exp_offset,
                     precision=self.precision.get_integer_format()),
            precision=self.precision.get_integer_format(),
            tag="corrected_exp")
        late_underflow_result = (
            ExponentInsertion(corrected_exp, precision=self.precision) *
            poly) * ExponentInsertion(-underflow_exp_offset,
                                      precision=self.precision)
        late_underflow_result.set_attributes(debug=debug_multi,
                                             tag="late_underflow_result",
                                             silent=False)
        test_subnormal = Test(late_underflow_result,
                              specifier=Test.IsSubnormal)
        late_underflow_return = Statement(
            ConditionBlock(
                test_subnormal,
                ExpRaiseReturn(ML_FPE_Underflow,
                               return_value=late_underflow_result)),
            Return(late_underflow_result, precision=self.precision))

        twok = ExponentInsertion(ik,
                                 tag="exp_ik",
                                 debug=debug_multi,
                                 precision=self.precision)
        #std_result = twok * ((1 + exact_hi_part * pre_poly) + exact_lo_part * pre_poly)
        std_result = twok * poly
        std_result.set_attributes(tag="std_result", debug=debug_multi)
        result_scheme = ConditionBlock(
            late_overflow_test, late_overflow_return,
            ConditionBlock(late_underflow_test, late_underflow_return,
                           Return(std_result, precision=self.precision)))
        std_return = ConditionBlock(
            early_overflow_test, early_overflow_return,
            ConditionBlock(early_underflow_test, early_underflow_return,
                           result_scheme))

        # main scheme
        Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m")
        scheme = ConditionBlock(
            test_nan_or_inf,
            Statement(ClearException() if self.libm_compliant else Statement(),
                      specific_return), std_return)

        return scheme
示例#17
0
    def generate_scheme(self):
        """ main scheme generation """

        int_size = 3
        frac_size = self.width - int_size

        input_precision = fixed_point(int_size, frac_size)
        output_precision = fixed_point(int_size, frac_size)

        expected_interval = {}

        # declaring main input variable
        var_x = self.implementation.add_input_signal("x", input_precision)
        x_interval = Interval(-10.3,10.7)
        var_x.set_interval(x_interval)
        expected_interval[var_x] = x_interval

        var_y = self.implementation.add_input_signal("y", input_precision)
        y_interval = Interval(-17.9,17.2)
        var_y.set_interval(y_interval)
        expected_interval[var_y] = y_interval

        var_z = self.implementation.add_input_signal("z", input_precision)
        z_interval = Interval(-7.3,7.7)
        var_z.set_interval(z_interval)
        expected_interval[var_z] = z_interval

        cst = Constant(42.5, tag = "cst")
        expected_interval[cst] = Interval(42.5)

        conv_ceil = Ceil(var_x, tag = "ceil")
        expected_interval[conv_ceil] = sollya.ceil(x_interval)

        conv_floor = Floor(var_y, tag = "floor")
        expected_interval[conv_floor] = sollya.floor(y_interval)

        mult = var_z * var_x
        mult.set_tag("mult")
        mult_interval = z_interval * x_interval
        expected_interval[mult] = mult_interval

        large_add = (var_x + var_y) - mult
        large_add.set_attributes(tag = "large_add")
        large_add_interval = (x_interval + y_interval) - mult_interval
        expected_interval[large_add] = large_add_interval

        var_x_lzc = CountLeadingZeros(var_x, tag="var_x_lzc")
        expected_interval[var_x_lzc] = Interval(0, input_precision.get_bit_size())

        reduced_result = Max(0, Min(large_add, 13))
        reduced_result.set_tag("reduced_result")
        reduced_result_interval = interval_max(
            Interval(0),
            interval_min(
                large_add_interval,
                Interval(13)
            )
        )
        expected_interval[reduced_result] = reduced_result_interval

        select_result = Select(
            var_x > var_y,
            reduced_result,
            var_z,
            tag = "select_result"
        )
        select_interval = interval_union(reduced_result_interval, z_interval)
        expected_interval[select_result] = select_interval

        # floating-point operation on mantissa and exponents
        fp_x_range = Interval(-0.01, 100)

        unbound_fp_var = Variable("fp_x", precision=ML_Binary32, interval=fp_x_range)
        mant_fp_x = MantissaExtraction(unbound_fp_var, tag="mant_fp_x", precision=ML_Binary32)
        exp_fp_x = ExponentExtraction(unbound_fp_var, tag="exp_fp_x", precision=ML_Int32)
        ins_exp_fp_x = ExponentInsertion(exp_fp_x, tag="ins_exp_fp_x", precision=ML_Binary32)

        expected_interval[unbound_fp_var] = fp_x_range
        expected_interval[exp_fp_x] = Interval(
            sollya.floor(sollya.log2(sollya.inf(abs(fp_x_range)))),
            sollya.floor(sollya.log2(sollya.sup(abs(fp_x_range))))
        )
        expected_interval[mant_fp_x] = Interval(1, 2)
        expected_interval[ins_exp_fp_x] = Interval(
            S2**sollya.inf(expected_interval[exp_fp_x]),
            S2**sollya.sup(expected_interval[exp_fp_x])
        )


        # checking interval evaluation
        for var in [var_x_lzc, exp_fp_x, unbound_fp_var, mant_fp_x, ins_exp_fp_x, cst, var_x, var_y, mult, large_add, reduced_result, select_result, conv_ceil, conv_floor]:
            interval = evaluate_range(var)
            expected = expected_interval[var]
            print("{}: {}".format(var.get_tag(), interval))
            print("  vs expected {}".format(expected))
            assert not interval is None
            assert interval == expected


        return [self.implementation]
示例#18
0
        return Addition(
            Constant(cst0_rounded, precision=cst0_format),
            Multiplication(var_node, poly_node, precision=mul_format),
            precision=add_format), add_format.epsilon  # TODO: local error only
    else:
        Log.report(Log.Error, "poly degree must be positive or null. {}, {}",
                   poly_object.degree, poly_object)


if __name__ == "__main__":
    implem_results = []
    for eps_target in [S2**-40, S2**-50, S2**-55, S2**-60, S2**-65]:
        approx_interval = Interval(-S2**-5, S2**-5)
        ctx = MLL_Context(ML_Binary64, approx_interval)
        vx = Variable("x",
                      precision=ctx.variableFormat,
                      interval=approx_interval)
        # guessding the best degree
        poly_degree = int(
            sup(
                sollya.guessdegree(sollya.exp(sollya.x), approx_interval,
                                   eps_target)))
        # asking sollya to provide the approximation
        poly_object = Polynomial.build_from_approximation(
            sollya.exp(sollya.x), poly_degree,
            [sollya.doubledouble] * (poly_degree + 1), vx.interval)
        print("poly object is {}".format(poly_object))
        poly_graph, poly_epsilon = mll_implementpoly_horner(
            ctx, poly_object, eps_target, vx)
        print("poly_graph is {}".format(
            poly_graph.get_str(depth=None, display_precision=True)))
                Statement(
                    expand_sub_ndrange(var_range_list, kernel),
                    # loop iterator increment
                    ReferenceAssign(var_range.var_index, var_range.var_index +
                                    var_range.index_step)),
            )
        return scheme

    return expand_sub_ndrange(ndrange.var_range_list, ndrange.kernel)


if __name__ == "__main__":
    size_format = ML_Int32

    # Matrix sizes
    n = Variable("n", precision=size_format)
    m = Variable("m", precision=size_format)
    p = Variable("p", precision=size_format)

    from metalibm_core.core.ml_formats import ML_Binary32
    precision = ML_Binary32

    # A is a (n x p) matrix in row-major
    tA = Tensor(None, TensorDescriptor([p, n], [1, p], precision))
    # B is a (p x m) matrix in row-major
    tB = Tensor(None, TensorDescriptor([m, p], [1, m], precision))
    # C is a (n x m) matrix in row-major
    tC = Tensor(None, TensorDescriptor([m, n], [1, m], precision))

    index_format = ML_Int32
示例#20
0
    def generate_array_check_loop(self, input_tables, output_array,
                                  table_size_offset_array, array_offset,
                                  array_len, test_id):
        # internal array iterator index
        vj = Variable("j", precision=ML_UInt32, var_type=Variable.Local)

        printf_input_function = self.get_printf_input_function()

        printf_error_template = "printf(\"max %s error is %s \\n\", %s)" % (
            self.function_name,
            self.precision.get_display_format().format_string,
            self.precision.get_display_format().pre_process_fct("{0}"))
        printf_error_op = TemplateOperatorFormat(printf_error_template,
                                                 arity=1,
                                                 void_function=True,
                                                 require_header=["stdio.h"])

        printf_error_function = FunctionObject("printf", [self.precision],
                                               ML_Void, printf_error_op)

        printf_max_op = FunctionOperator(
            "printf",
            arg_map={
                0:
                "\"max %s error is reached at input number %s \\n \"" %
                (self.function_name, "%d"),
                1:
                FO_Arg(0)
            },
            void_function=True,
            require_header=["stdio.h"])
        printf_max_function = FunctionObject("printf", [self.precision],
                                             ML_Void, printf_max_op)

        NUM_INPUT_ARRAY = len(input_tables)

        # generate the expected table for the whole multi-array
        expected_table = self.generate_expected_table(input_tables,
                                                      table_size_offset_array)

        # inputs for the (vj)-th entry of the sub-arrat
        local_inputs = tuple(
            TableLoad(input_tables[in_id], array_offset + vj)
            for in_id in range(NUM_INPUT_ARRAY))
        # expected values for the (vj)-th entry of the sub-arrat
        expected_values = [
            TableLoad(expected_table, array_offset + vj, i)
            for i in range(self.accuracy.get_num_output_value())
        ]
        # local result for the (vj)-th entry of the sub-arrat
        local_result = TableLoad(output_array, array_offset + vj)

        if self.break_error:
            return_statement_break = Statement(
                printf_input_function(*((vj, ) + local_inputs +
                                        (local_result, ))),
                self.accuracy.get_output_print_call(self.function_name,
                                                    output_values))
        else:
            return_statement_break = Statement(
                printf_input_function(*((vj, ) + local_inputs +
                                        (local_result, ))),
                self.accuracy.get_output_print_call(self.function_name,
                                                    expected_values),
                Return(Constant(1, precision=ML_Int32)))

        # loop implementation to check sub-array array_offset
        # results validity
        check_array_loop = Loop(
            ReferenceAssign(vj, 0), vj < array_len,
            Statement(
                ConditionBlock(
                    self.accuracy.get_output_check_test(
                        local_result, expected_values),
                    return_statement_break),
                ReferenceAssign(vj, vj + 1),
            ))
        return check_array_loop
示例#21
0
    def generate_scalar_scheme(self, vx, vy):
        # fixing inputs' node tag
        vx.set_attributes(tag="x")
        vy.set_attributes(tag="y")

        int_precision = self.precision.get_integer_format()

        # assuming x = m.2^e (m in [1, 2[)
        #          n, positive or null integers
        #
        # pow(x, n) = x^(y)
        #             = exp(y * log(x))
        #             = 2^(y * log2(x))
        #             = 2^(y * (log2(m) + e))
        #
        e = ExponentExtraction(vx, tag="e", precision=int_precision)
        m = MantissaExtraction(vx, tag="m", precision=self.precision)

        # approximation log2(m)

        # retrieving processor inverse approximation table
        dummy_var = Variable("dummy", precision = self.precision)
        dummy_div_seed = ReciprocalSeed(dummy_var, precision = self.precision)
        inv_approx_table = self.processor.get_recursive_implementation(
            dummy_div_seed, language=None,
            table_getter= lambda self: self.approx_table_map)

        log_f = sollya.log(sollya.x) # /sollya.log(self.basis)



        ml_log_args = ML_GenericLog.get_default_args(precision=self.precision, basis=2)
        ml_log = ML_GenericLog(ml_log_args)
        log_table, log_table_tho, table_index_range = ml_log.generate_log_table(log_f, inv_approx_table)
        log_approx = ml_log.generate_reduced_log_split(Abs(m, precision=self.precision), log_f, inv_approx_table, log_table)

        log_approx = Select(Equal(vx, 0), FP_MinusInfty(self.precision), log_approx)
        log_approx.set_attributes(tag="log_approx", debug=debug_multi)
        r = Multiplication(log_approx, vy, tag="r", debug=debug_multi)


        # 2^(y * (log2(m) + e)) = 2^(y * log2(m)) * 2^(y * e)
        #
        # log_approx = log2(Abs(m))
        # r = y * log_approx ~ y * log2(m)
        #
        # NOTES: manage cases where e is negative and
        # (y * log2(m)) AND (y * e) could cancel out
        # if e positive, whichever the sign of y (y * log2(m)) and (y * e) CANNOT
        # be of opposite signs

        # log2(m) in [0, 1[ so cancellation can occur only if e == -1
        # we split 2^x in 2^x = 2^t0 * 2^t1
        # if e < 0: t0 = y * (log2(m) + e), t1=0
        # else:     t0 = y * log2(m), t1 = y * e

        t_cond = e < 0

        # e_y ~ e * y
        e_f = Conversion(e, precision=self.precision)
        #t0 = Select(t_cond, (e_f + log_approx) * vy, Multiplication(e_f, vy), tag="t0")
        #NearestInteger(t0, precision=self.precision, tag="t0_int")

        EY = NearestInteger(e_f * vy, tag="EY", precision=self.precision)
        LY = NearestInteger(log_approx * vy, tag="LY", precision=self.precision)
        t0_int = Select(t_cond, EY + LY, EY, tag="t0_int")
        t0_frac = Select(t_cond, FMA(e_f, vy, -EY) + FMA(log_approx, vy, -LY) ,EY - t0_int, tag="t0_frac")
        #t0_frac.set_attributes(tag="t0_frac")

        ml_exp2_args = ML_Exp2.get_default_args(precision=self.precision)
        ml_exp2 = ML_Exp2(ml_exp2_args)

        exp2_t0_frac = ml_exp2.generate_scalar_scheme(t0_frac, inline_select=True)
        exp2_t0_frac.set_attributes(tag="exp2_t0_frac", debug=debug_multi)

        exp2_t0_int = ExponentInsertion(Conversion(t0_int, precision=int_precision), precision=self.precision, tag="exp2_t0_int")

        t1 = Select(t_cond, Constant(0, precision=self.precision), r)
        exp2_t1 = ml_exp2.generate_scalar_scheme(t1, inline_select=True)
        exp2_t1.set_attributes(tag="exp2_t1", debug=debug_multi)

        result_sign = Constant(1.0, precision=self.precision) # Select(n_is_odd, CopySign(vx, Constant(1.0, precision=self.precision)), 1)

        y_int = NearestInteger(vy, precision=self.precision)
        y_is_integer = Equal(y_int, vy)
        y_is_even = LogicalOr(
            # if y is a number (exc. inf) greater than 2**mantissa_size * 2,
            # then it is an integer multiple of 2 => even
            Abs(vy) >= 2**(self.precision.get_mantissa_size()+1),
            LogicalAnd(
                y_is_integer and Abs(vy) < 2**(self.precision.get_mantissa_size()+1),
                # we want to limit the modulo computation to an integer input
                Equal(Modulo(Conversion(y_int, precision=int_precision), 2), 0)
            )
        )
        y_is_odd = LogicalAnd(
            LogicalAnd(
                Abs(vy) < 2**(self.precision.get_mantissa_size()+1),
                y_is_integer
            ),
            Equal(Modulo(Conversion(y_int, precision=int_precision), 2), 1)
        )


        # special cases management
        special_case_results = Statement(
            # x is sNaN OR y is sNaN
            ConditionBlock(
                LogicalOr(Test(vx, specifier=Test.IsSignalingNaN), Test(vy, specifier=Test.IsSignalingNaN)),
                Return(FP_QNaN(self.precision))
            ),
            # pow(x, ±0) is 1 if x is not a signaling NaN
            ConditionBlock(
                Test(vy, specifier=Test.IsZero),
                Return(Constant(1.0, precision=self.precision))
            ),
            # pow(±0, y) is ±∞ and signals the divideByZero exception for y an odd integer <0
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsZero), LogicalAnd(y_is_odd, vy < 0)),
                Return(Select(Test(vx, specifier=Test.IsPositiveZero), FP_PlusInfty(self.precision), FP_MinusInfty(self.precision))),
            ),
            # pow(±0, −∞) is +∞ with no exception
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsZero), Test(vy, specifier=Test.IsNegativeInfty)),
                Return(FP_MinusInfty(self.precision)),
            ),
            # pow(±0, +∞) is +0 with no exception
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsZero), Test(vy, specifier=Test.IsPositiveInfty)),
                Return(FP_PlusInfty(self.precision)),
            ),
            # pow(±0, y) is ±0 for finite y>0 an odd integer
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsZero), LogicalAnd(y_is_odd, vy > 0)),
                Return(vx),
            ),
            # pow(−1, ±∞) is 1 with no exception
            ConditionBlock(
                LogicalAnd(Equal(vx, -1), Test(vy, specifier=Test.IsInfty)),
                Return(Constant(1.0, precision=self.precision)),
            ),
            # pow(+1, y) is 1 for any y (even a quiet NaN)
            ConditionBlock(
                vx == 1,
                Return(Constant(1.0, precision=self.precision)),
            ),
            # pow(x, +∞) is +0 for −1<x<1
            ConditionBlock(
                LogicalAnd(Abs(vx) < 1, Test(vy, specifier=Test.IsPositiveInfty)),
                Return(FP_PlusZero(self.precision))
            ),
            # pow(x, +∞) is +∞ for x<−1 or for 1<x (including ±∞)
            ConditionBlock(
                LogicalAnd(Abs(vx) > 1, Test(vy, specifier=Test.IsPositiveInfty)),
                Return(FP_PlusInfty(self.precision))
            ),
            # pow(x, −∞) is +∞ for −1<x<1
            ConditionBlock(
                LogicalAnd(Abs(vx) < 1, Test(vy, specifier=Test.IsNegativeInfty)),
                Return(FP_PlusInfty(self.precision))
            ),
            # pow(x, −∞) is +0 for x<−1 or for 1<x (including ±∞)
            ConditionBlock(
                LogicalAnd(Abs(vx) > 1, Test(vy, specifier=Test.IsNegativeInfty)),
                Return(FP_PlusZero(self.precision))
            ),
            # pow(+∞, y) is +0 for a number y < 0
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsPositiveInfty), vy < 0),
                Return(FP_PlusZero(self.precision))
            ),
            # pow(+∞, y) is +∞ for a number y > 0
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsPositiveInfty), vy > 0),
                Return(FP_PlusInfty(self.precision))
            ),
            # pow(−∞, y) is −0 for finite y < 0 an odd integer
            # TODO: check y is finite
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsNegativeInfty), LogicalAnd(y_is_odd, vy < 0)),
                Return(FP_MinusZero(self.precision)),
            ),
            # pow(−∞, y) is −∞ for finite y > 0 an odd integer
            # TODO: check y is finite
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsNegativeInfty), LogicalAnd(y_is_odd, vy > 0)),
                Return(FP_MinusInfty(self.precision)),
            ),
            # pow(−∞, y) is +0 for finite y < 0 and not an odd integer
            # TODO: check y is finite
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsNegativeInfty), LogicalAnd(LogicalNot(y_is_odd), vy < 0)),
                Return(FP_PlusZero(self.precision)),
            ),
            # pow(−∞, y) is +∞ for finite y > 0 and not an odd integer
            # TODO: check y is finite
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsNegativeInfty), LogicalAnd(LogicalNot(y_is_odd), vy > 0)),
                Return(FP_PlusInfty(self.precision)),
            ),
            # pow(±0, y) is +∞ and signals the divideByZero exception for finite y<0 and not an odd integer
            # TODO: signal divideByZero exception
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsZero), LogicalAnd(LogicalNot(y_is_odd), vy < 0)),
                Return(FP_PlusInfty(self.precision)),
            ),
            # pow(±0, y) is +0 for finite y>0 and not an odd integer
            ConditionBlock(
                LogicalAnd(Test(vx, specifier=Test.IsZero), LogicalAnd(LogicalNot(y_is_odd), vy > 0)),
                Return(FP_PlusZero(self.precision)),
            ),
        )

        # manage n=1 separately to avoid catastrophic propagation of errors
        # between log2 and exp2 to eventually compute the identity function
        # test-case #3
        result = Statement(
            special_case_results,
            # fallback default cases
            Return(result_sign * exp2_t1 * exp2_t0_int * exp2_t0_frac))
        return result
示例#22
0
    def solve_eval_error(self, gappa_init_approx, gappa_current_approx,
                         div_approx, gappa_vx, gappa_vy, inv_iteration_list,
                         div_iteration_list, seed_accuracy, seed_interval):
        """ compute the evaluation error of reciprocal approximation of
            (1 / gappa_vy)

            :param seed_accuracy: absolute error for seed value
            :type seed_accuracy: SollyaObject

        """
        seed_var = Variable("seed",
                            precision=self.precision,
                            interval=seed_interval)
        cg_eval_error_copy_map = {
            gappa_init_approx.get_handle().get_node():
            seed_var,
            gappa_vy.get_handle().get_node():
            Variable("y", precision=self.precision, interval=Interval(1, 2)),
            gappa_vx.get_handle().get_node():
            Variable("x", precision=self.precision, interval=Interval(1, 2)),
        }

        yerr_last = div_iteration_list[-1].yerr

        # copying cg_eval_error_copy_map to allow mutation during
        # optimise_scheme while keeping a clean copy for later use
        optimisation_copy_map = cg_eval_error_copy_map.copy()
        gappa_current_approx = self.optimise_scheme(gappa_current_approx,
                                                    copy=optimisation_copy_map)
        div_approx = self.optimise_scheme(div_approx,
                                          copy=optimisation_copy_map)
        yerr_last = self.optimise_scheme(yerr_last, copy=optimisation_copy_map)
        yerr_last.get_handle().set_node(yerr_last)
        G1 = Constant(1, precision=ML_Exact)
        exact_recp = G1 / gappa_vy
        exact_recp.set_precision(ML_Exact)
        exact_recp.set_tag("exact_recp")
        recp_approx_error_goal = gappa_current_approx - exact_recp
        recp_approx_error_goal.set_attributes(precision=ML_Exact,
                                              tag="recp_approx_error_goal")

        gappacg = GappaCodeGenerator(self.processor,
                                     declare_cst=False,
                                     disable_debug=True)
        gappa_code = GappaCodeObject()

        exact_div = gappa_vx * exact_recp
        exact_div.set_attributes(precision=ML_Exact, tag="exact_div")
        div_approx_error_goal = div_approx - exact_div
        div_approx_error_goal.set_attributes(precision=ML_Exact,
                                             tag="div_approx_error_goal")

        bound_list = [op for op in cg_eval_error_copy_map]

        gappacg.add_goal(gappa_code, yerr_last)

        gappa_code = gappacg.get_interval_code(
            [recp_approx_error_goal, div_approx_error_goal],
            bound_list,
            cg_eval_error_copy_map,
            gappa_code=gappa_code,
            register_bound_hypothesis=False)

        for node in bound_list:
            gappacg.add_hypothesis(gappa_code, cg_eval_error_copy_map[node],
                                   cg_eval_error_copy_map[node].get_interval())

        new_exact_recp_node = exact_recp.get_handle().get_node()
        new_exact_div_node = exact_div.get_handle().get_node()

        # adding specific hints for Newton-Raphson reciprocal iteration
        for nr in inv_iteration_list:
            nr.get_hint_rules(gappacg, gappa_code, new_exact_recp_node)

        for div_iter in div_iteration_list:
            div_iter.get_hint_rules(gappacg, gappa_code, new_exact_recp_node,
                                    new_exact_div_node)

        seed_wrt_exact = seed_var - new_exact_recp_node
        seed_wrt_exact.set_attributes(precision=ML_Exact, tag="seed_wrt_exact")
        gappacg.add_hypothesis(gappa_code, seed_wrt_exact,
                               Interval(-seed_accuracy, seed_accuracy))

        try:
            gappa_results = execute_gappa_script_extract(
                gappa_code.get(gappacg))
            recp_eval_error = gappa_results["recp_approx_error_goal"]
            div_eval_error = gappa_results["div_approx_error_goal"]
            print("eval error(s): recp={}, div={}".format(
                recp_eval_error, div_eval_error))
        except:
            print("error during gappa run")
            raise
            recp_eval_error = None
            div_eval_error = None
        return recp_eval_error, div_eval_error
示例#23
0
    def generate_scalar_scheme(self, vx, n):
        # fixing inputs' node tag
        vx.set_attributes(tag="x")
        n.set_attributes(tag="n")

        int_precision = self.precision.get_integer_format()

        # assuming x = m.2^e (m in [1, 2[)
        #          n, positive or null integers
        #
        # rootn(x, n) = x^(1/n)
        #             = exp(1/n * log(x))
        #             = 2^(1/n * log2(x))
        #             = 2^(1/n * (log2(m) + e))
        #

        # approximation log2(m)

        # retrieving processor inverse approximation table
        dummy_var = Variable("dummy", precision=self.precision)
        dummy_div_seed = ReciprocalSeed(dummy_var, precision=self.precision)
        inv_approx_table = self.processor.get_recursive_implementation(
            dummy_div_seed,
            language=None,
            table_getter=lambda self: self.approx_table_map)

        log_f = sollya.log(sollya.x)  # /sollya.log(self.basis)

        use_reciprocal = False

        # non-scaled vx used to compute vx^1
        unmodified_vx = vx

        is_subnormal = Test(vx, specifier=Test.IsSubnormal, tag="is_subnormal")
        exp_correction_factor = self.precision.get_mantissa_size()
        mantissa_factor = Constant(2**exp_correction_factor,
                                   tag="mantissa_factor")
        vx = Select(is_subnormal, vx * mantissa_factor, vx, tag="corrected_vx")

        m = MantissaExtraction(vx, tag="m", precision=self.precision)
        e = ExponentExtraction(vx, tag="e", precision=int_precision)
        e = Select(is_subnormal,
                   e - exp_correction_factor,
                   e,
                   tag="corrected_e")

        ml_log_args = ML_GenericLog.get_default_args(precision=self.precision,
                                                     basis=2)
        ml_log = ML_GenericLog(ml_log_args)
        log_table, log_table_tho, table_index_range = ml_log.generate_log_table(
            log_f, inv_approx_table)
        log_approx = ml_log.generate_reduced_log_split(
            Abs(m, precision=self.precision), log_f, inv_approx_table,
            log_table)
        # floating-point version of n
        n_f = Conversion(n, precision=self.precision, tag="n_f")
        inv_n = Division(Constant(1, precision=self.precision), n_f)

        log_approx = Select(Equal(vx, 0), FP_MinusInfty(self.precision),
                            log_approx)
        log_approx.set_attributes(tag="log_approx", debug=debug_multi)
        if use_reciprocal:
            r = Multiplication(log_approx, inv_n, tag="r", debug=debug_multi)
        else:
            r = Division(log_approx, n_f, tag="r", debug=debug_multi)

        # e_n ~ e / n
        e_f = Conversion(e, precision=self.precision, tag="e_f")
        if use_reciprocal:
            e_n = Multiplication(e_f, inv_n, tag="e_n")
        else:
            e_n = Division(e_f, n_f, tag="e_n")
        error_e_n = FMA(e_n, -n_f, e_f, tag="error_e_n")
        e_n_int = NearestInteger(e_n, precision=self.precision, tag="e_n_int")
        pre_e_n_frac = e_n - e_n_int
        pre_e_n_frac.set_attributes(tag="pre_e_n_frac")
        e_n_frac = pre_e_n_frac + error_e_n * inv_n
        e_n_frac.set_attributes(tag="e_n_frac")

        ml_exp2_args = ML_Exp2.get_default_args(precision=self.precision)
        ml_exp2 = ML_Exp2(ml_exp2_args)
        exp2_r = ml_exp2.generate_scalar_scheme(r, inline_select=True)
        exp2_r.set_attributes(tag="exp2_r", debug=debug_multi)

        exp2_e_n_frac = ml_exp2.generate_scalar_scheme(e_n_frac,
                                                       inline_select=True)
        exp2_e_n_frac.set_attributes(tag="exp2_e_n_frac", debug=debug_multi)

        exp2_e_n_int = ExponentInsertion(Conversion(e_n_int,
                                                    precision=int_precision),
                                         precision=self.precision,
                                         tag="exp2_e_n_int")

        n_is_even = Equal(Modulo(n, 2), 0, tag="n_is_even", debug=debug_multi)
        n_is_odd = LogicalNot(n_is_even, tag="n_is_odd")
        result_sign = Select(
            n_is_odd, CopySign(vx, Constant(1.0, precision=self.precision)), 1)

        # managing n == -1
        if self.expand_div:
            ml_division_args = ML_Division.get_default_args(
                precision=self.precision, input_formats=[self.precision] * 2)
            ml_division = ML_Division(ml_division_args)
            self.division_implementation = ml_division.implementation
            self.division_implementation.set_scheme(
                ml_division.generate_scheme())
            ml_division_fct = self.division_implementation.get_function_object(
            )
        else:
            ml_division_fct = Division

        # manage n=1 separately to avoid catastrophic propagation of errors
        # between log2 and exp2 to eventually compute the identity function
        # test-case #3
        result = ConditionBlock(
            LogicalOr(LogicalOr(Test(vx, specifier=Test.IsNaN), Equal(n, 0)),
                      LogicalAnd(n_is_even, vx < 0)),
            Return(FP_QNaN(self.precision)),
            Statement(
                ConditionBlock(
                    Equal(n, -1, tag="n_is_mone"),
                    #Return(Division(Constant(1, precision=self.precision), unmodified_vx, tag="div_res", precision=self.precision)),
                    Return(
                        ml_division_fct(Constant(1, precision=self.precision),
                                        unmodified_vx,
                                        tag="div_res",
                                        precision=self.precision)),
                ),
                ConditionBlock(
                    # rootn( ±inf, n) is +∞ for even n< 0.
                    Test(vx, specifier=Test.IsInfty),
                    Statement(
                        ConditionBlock(
                            n < 0,
                            #LogicalAnd(n_is_odd, n < 0),
                            Return(
                                Select(Test(vx,
                                            specifier=Test.IsPositiveInfty),
                                       Constant(FP_PlusZero(self.precision),
                                                precision=self.precision),
                                       Constant(FP_MinusZero(self.precision),
                                                precision=self.precision),
                                       precision=self.precision)),
                            Return(vx),
                        ), ),
                ),
                ConditionBlock(
                    # rootn(±0, n) is ±∞ for odd n < 0.
                    LogicalAnd(LogicalAnd(n_is_odd, n < 0),
                               Equal(vx, 0),
                               tag="n_is_odd_and_neg"),
                    Return(
                        Select(Test(vx, specifier=Test.IsPositiveZero),
                               Constant(FP_PlusInfty(self.precision),
                                        precision=self.precision),
                               Constant(FP_MinusInfty(self.precision),
                                        precision=self.precision),
                               precision=self.precision)),
                ),
                ConditionBlock(
                    # rootn( ±0, n) is +∞ for even n< 0.
                    LogicalAnd(LogicalAnd(n_is_even, n < 0), Equal(vx, 0)),
                    Return(FP_PlusInfty(self.precision))),
                ConditionBlock(
                    # rootn(±0, n) is +0 for even n > 0.
                    LogicalAnd(n_is_even, Equal(vx, 0)),
                    Return(vx)),
                ConditionBlock(
                    Equal(n, 1), Return(unmodified_vx),
                    Return(result_sign * exp2_r * exp2_e_n_int *
                           exp2_e_n_frac))))
        return result
示例#24
0
    def generate_scheme(self):
        # declaring target and instantiating optimization engine
        precision_ptr = self.get_input_precision(0)
        index_format = self.get_input_precision(2)

        dst = self.implementation.add_input_variable("dst", precision_ptr)
        src = self.implementation.add_input_variable("src", precision_ptr)
        n = self.implementation.add_input_variable("len", index_format)

        i = Variable("i", precision=index_format, var_type=Variable.Local)
        CU1 = Constant(1, precision=index_format)
        CU0 = Constant(0, precision=index_format)
        inc = i + CU1

        elt_input = TableLoad(src, i, precision=self.precision)

        local_exp = Variable("local_exp",
                             precision=self.precision,
                             var_type=Variable.Local)

        if self.use_libm_function:
            libm_exp_operator = FunctionOperator("expf", arity=1)
            libm_exp = FunctionObject("expf", [ML_Binary32], ML_Binary32,
                                      libm_exp_operator)

            elt_result = ReferenceAssign(local_exp, libm_exp(elt_input))
        else:
            exponential_args = ML_Exponential.get_default_args(
                precision=self.precision,
                libm_compliant=False,
                debug=False,
            )

            meta_exponential = ML_Exponential(exponential_args)
            exponential_scheme = meta_exponential.generate_scheme()

            elt_result = inline_function(
                exponential_scheme,
                local_exp,
                {meta_exponential.implementation.arg_list[0]: elt_input},
            )

        elt_acc = Variable("elt_acc",
                           precision=self.precision,
                           var_type=Variable.Local)

        exp_loop = Loop(
            ReferenceAssign(i, CU0),
            i < n,
            Statement(ReferenceAssign(local_exp, 0), elt_result,
                      TableStore(local_exp, dst, i, precision=ML_Void),
                      ReferenceAssign(elt_acc, elt_acc + local_exp),
                      ReferenceAssign(i, i + CU1)),
        )

        sum_rcp = Division(1,
                           elt_acc,
                           precision=self.precision,
                           tag="sum_rcp",
                           debug=debug_multi)

        div_loop = Loop(
            ReferenceAssign(i, CU0),
            i < n,
            Statement(
                TableStore(Multiplication(
                    TableLoad(dst, i, precision=self.precision), sum_rcp),
                           dst,
                           i,
                           precision=ML_Void), ReferenceAssign(i, inc)),
        )

        main_scheme = Statement(ReferenceAssign(elt_acc, 0), exp_loop, sum_rcp,
                                div_loop)

        return main_scheme
示例#25
0
    def generate_bench(self, processor, test_num=1000, unroll_factor=10):
        """ generate performance bench for self.op_class """
        initial_inputs = [
            Constant(random.uniform(inf(self.init_interval),
                                    sup(self.init_interval)),
                     precision=precision)
            for i, precision in enumerate(self.input_precisions)
        ]

        var_inputs = [
            Variable("var_%d" % i,
                     precision=FormatAttributeWrapper(precision, ["volatile"]),
                     var_type=Variable.Local)
            for i, precision in enumerate(self.input_precisions)
        ]

        printf_timing_op = FunctionOperator(
            "printf",
            arg_map={
                0: "\"%s[%s] %%lld elts computed "\
                   "in %%lld cycles =>\\n     %%.3f CPE \\n\"" %
                (
                    self.bench_name,
                    self.output_precision.get_display_format()
                ),
                1: FO_Arg(0),
                2: FO_Arg(1),
                3: FO_Arg(2),
                4: FO_Arg(3)
            }, void_function=True
        )
        printf_timing_function = FunctionObject(
            "printf", [self.output_precision, ML_Int64, ML_Int64, ML_Binary64],
            ML_Void, printf_timing_op)
        timer = Variable("timer", precision=ML_Int64, var_type=Variable.Local)

        void_function_op = FunctionOperator("(void)",
                                            arity=1,
                                            void_function=True)
        void_function = FunctionObject("(void)", [self.output_precision],
                                       ML_Void, void_function_op)

        # initialization of operation inputs
        init_assign = metaop.Statement()
        for var_input, init_value in zip(var_inputs, initial_inputs):
            init_assign.push(ReferenceAssign(var_input, init_value))

        # test loop
        loop_i = Variable("i", precision=ML_Int64, var_type=Variable.Local)
        test_num_cst = Constant(test_num / unroll_factor,
                                precision=ML_Int64,
                                tag="test_num")

        # Goal build a chain of dependant operation to measure
        # elementary operation latency
        local_inputs = tuple(var_inputs)
        local_result = self.op_class(*local_inputs,
                                     precision=self.output_precision,
                                     unbreakable=True)
        for i in range(unroll_factor - 1):
            local_inputs = tuple([local_result] + var_inputs[1:])
            local_result = self.op_class(*local_inputs,
                                         precision=self.output_precision,
                                         unbreakable=True)
        # renormalisation
        local_result = self.renorm_function(local_result)

        # variable assignation to build dependency chain
        var_assign = Statement()
        var_assign.push(ReferenceAssign(var_inputs[0], local_result))
        final_value = var_inputs[0]

        # loop increment value
        loop_increment = 1

        test_loop = Loop(
            ReferenceAssign(loop_i, Constant(0, precision=ML_Int32)),
            loop_i < test_num_cst,
            Statement(var_assign,
                      ReferenceAssign(loop_i, loop_i + loop_increment)),
        )

        # bench scheme
        test_scheme = Statement(
            ReferenceAssign(timer, processor.get_current_timestamp()),
            init_assign,
            test_loop,
            ReferenceAssign(
                timer,
                Subtraction(processor.get_current_timestamp(),
                            timer,
                            precision=ML_Int64)),
            # prevent intermediary variable simplification
            void_function(final_value),
            printf_timing_function(
                final_value, Constant(test_num, precision=ML_Int64), timer,
                Division(Conversion(timer, precision=ML_Binary64),
                         Constant(test_num, precision=ML_Binary64),
                         precision=ML_Binary64))
            # ,Return(Constant(0, precision = ML_Int32))
        )

        return test_scheme
示例#26
0
    def get_array_test_wrapper(self,
                               test_num,
                               tested_function,
                               table_size_offset_array,
                               input_tables,
                               output_array,
                               acc_num,
                               post_statement_generator,
                               NUM_INPUT_ARRAY=1):
        """ generate a test loop for multi-array tests
             @param test_num number of elementary array tests to be executed
             @param tested_function FunctionObject to be tested
             @param table_size_offset_array ML_NewTable object containing
                    (table-size, offset) pairs for multi-array testing
             @param input_table ML_NewTable containing multi-array test inputs
             @param output_table ML_NewTable containing multi-array test outputs
             @param post_statement_generator is generator used to generate
                    a statement executed at the end of the test of one of the
                    arrays of the multi-test. It expects 6 arguments:
                    (input_tables, output_array, table_size_offset_array,
                     array_offset, array_len, test_id)
             @param printf_function FunctionObject to print error case
        """
        test_id = Variable("test_id",
                           precision=ML_Int32,
                           var_type=Variable.Local)
        test_num_cst = Constant(test_num, precision=ML_Int32, tag="test_num")

        array_len = Variable("len",
                             precision=ML_UInt32,
                             var_type=Variable.Local)

        array_offset = TableLoad(table_size_offset_array, test_id, 1)

        def pointer_add(table_addr, offset):
            pointer_format = table_addr.get_precision_as_pointer_format()
            return Addition(table_addr, offset, precision=pointer_format)

        array_inputs = tuple(
            pointer_add(input_tables[in_id], array_offset)
            for in_id in range(NUM_INPUT_ARRAY))
        function_call = tested_function(
            *((pointer_add(output_array, array_offset), ) + array_inputs +
              (array_len, )))

        post_statement = post_statement_generator(input_tables, output_array,
                                                  table_size_offset_array,
                                                  array_offset, array_len,
                                                  test_id)

        loop_increment = 1

        test_loop = Loop(
            ReferenceAssign(test_id, Constant(0, precision=ML_Int32)),
            test_id < test_num_cst,
            Statement(
                ReferenceAssign(array_len,
                                TableLoad(table_size_offset_array, test_id,
                                          0)),
                function_call,
                post_statement,
                ReferenceAssign(
                    acc_num, acc_num +
                    Conversion(array_len, precision=acc_num.precision)),
                ReferenceAssign(test_id, test_id + loop_increment),
            ),
        )

        test_statement = Statement()

        # adding functional test_loop to test statement
        test_statement.add(test_loop)

        return test_statement
示例#27
0
Log.report(LOG_PASS_INFO, "Registering generate Basic-Blocks pass")
Pass.register(Pass_GenerateBasicBlock)
# registering ssa translation pass
Log.report(LOG_PASS_INFO, "Registering ssa translation pass")
Pass.register(Pass_SSATranslate)
# registering basic-block simplification pass
Log.report(LOG_PASS_INFO, "Registering basic-block simplification pass")
Pass.register(Pass_BBSimplification)

if __name__ == "__main__":
    bb_root = BasicBlock(tag="bb_root")
    bb_1 = BasicBlock(tag="bb_1")
    bb_2 = BasicBlock(tag="bb_2")
    bb_3 = BasicBlock(tag="bb_3")

    var_x = Variable("x", precision=None)
    var_y = Variable("y", precision=None)

    bb_root.add(ReferenceAssign(var_x, 1))
    bb_root.add(ReferenceAssign(var_y, 2))
    bb_root.add(ConditionalBranch(var_x > var_y, bb_1, bb_2))

    bb_1.add(ReferenceAssign(var_x, 2))
    bb_1.add(UnconditionalBranch(bb_3))

    bb_2.add(ReferenceAssign(var_y, 3))
    bb_2.add(UnconditionalBranch(bb_3))

    bb_3.add(ReferenceAssign(var_y, var_x))

示例#28
0
    def generate_scheme(self):
        # declaring target and instantiating optimization engine
        precision_ptr = self.get_input_precision(0)
        index_format = self.get_input_precision(2)
        multi_elt_num = self.multi_elt_num

        dst = self.implementation.add_input_variable("dst", precision_ptr)
        src = self.implementation.add_input_variable("src", precision_ptr)
        n = self.implementation.add_input_variable("len", index_format)

        i = Variable("i", precision=index_format, var_type=Variable.Local)
        CU0 = Constant(0, precision=index_format)

        element_format = self.precision

        self.function_list = []

        if multi_elt_num > 1:
            element_format = VECTOR_TYPE_MAP[self.precision][multi_elt_num]

        elt_input = TableLoad(src, i, precision=element_format)

        local_exp = Variable("local_exp",
                             precision=element_format,
                             var_type=Variable.Local)

        if self.use_libm_function:
            libm_fct_operator = FunctionOperator(self.use_libm_function,
                                                 arity=1)
            libm_fct = FunctionObject(self.use_libm_function, [ML_Binary32],
                                      ML_Binary32, libm_fct_operator)

            if multi_elt_num > 1:
                result_list = [
                    libm_fct(
                        VectorElementSelection(elt_input,
                                               Constant(elt_id,
                                                        precision=ML_Integer),
                                               precision=self.precision))
                    for elt_id in range(multi_elt_num)
                ]
                result = VectorAssembling(*result_list,
                                          precision=element_format)
            else:
                result = libm_fct(elt_input)
            elt_result = ReferenceAssign(local_exp, result)
        else:
            if multi_elt_num > 1:
                scalar_result = Variable("scalar_result",
                                         precision=self.precision,
                                         var_type=Variable.Local)
                fct_ctor_args = self.function_ctor.get_default_args(
                    precision=self.precision,
                    libm_compliant=False,
                )

                meta_function = self.function_ctor(fct_ctor_args)
                exponential_scheme = meta_function.generate_scheme()

                # instanciating required passes for typing
                pass_inst_abstract_prec = PassInstantiateAbstractPrecision(
                    self.processor)
                pass_inst_prec = PassInstantiatePrecision(
                    self.processor, default_precision=None)

                # exectuting format instanciation passes on optree
                exponential_scheme = pass_inst_abstract_prec.execute_on_optree(
                    exponential_scheme)
                exponential_scheme = pass_inst_prec.execute_on_optree(
                    exponential_scheme)

                vectorizer = StaticVectorizer()

                # extracting scalar argument from meta_exponential meta function
                scalar_input = meta_function.implementation.arg_list[0]

                # vectorize scalar scheme
                vector_result, vec_arg_list, vector_scheme, scalar_callback, scalar_callback_fct = vectorize_function_scheme(
                    vectorizer,
                    self.get_main_code_object(), exponential_scheme,
                    element_format.get_scalar_format(), [scalar_input],
                    multi_elt_num)

                elt_result = inline_function(vector_scheme, vector_result,
                                             {vec_arg_list[0]: elt_input})

                local_exp = vector_result

                self.function_list.append(scalar_callback_fct)
                libm_fct = scalar_callback

            else:
                scalar_input = elt_input
                scalar_result = local_exp

                elt_result = generate_inline_fct_scheme(
                    self.function_ctor, scalar_result, [scalar_input], {
                        "precision": self.precision,
                        "libm_compliant": False
                    })

        CU1 = Constant(1, precision=index_format)

        local_exp_init_value = Constant(0, precision=self.precision)
        if multi_elt_num > 1:
            local_exp_init_value = Constant([0] * multi_elt_num,
                                            precision=element_format)
            remain_n = Modulo(n, multi_elt_num, precision=index_format)
            iter_n = n - remain_n
            CU_ELTNUM = Constant(multi_elt_num, precision=index_format)
            inc = i + CU_ELTNUM
        else:
            remain_n = None
            iter_n = n
            inc = i + CU1

        # main loop processing multi_elt_num element(s) per iteration
        main_loop = Loop(
            ReferenceAssign(i, CU0),
            i < iter_n,
            Statement(ReferenceAssign(local_exp, local_exp_init_value),
                      elt_result,
                      TableStore(local_exp, dst, i, precision=ML_Void),
                      ReferenceAssign(i, inc)),
        )
        # epilog to process remaining item (when the length is not a multiple
        # of multi_elt_num)
        if not remain_n is None:
            # TODO/FIXME: try alternative method for processing epilog
            #             by using full vector length and mask
            epilog_loop = Loop(
                Statement(), i < n,
                Statement(
                    TableStore(libm_fct(
                        TableLoad(src, i, precision=self.precision)),
                               dst,
                               i,
                               precision=ML_Void),
                    ReferenceAssign(i, i + CU1),
                ))
            main_loop = Statement(main_loop, epilog_loop)

        return main_loop
示例#29
0
    def generate_bench_wrapper(self,
                               test_num=1,
                               loop_num=100000,
                               test_ranges=[Interval(-1.0, 1.0)],
                               debug=False):
        # interval where the array lenght is chosen from (randomly)
        index_range = self.test_index_range

        auto_test = CodeFunction("bench_wrapper", output_format=ML_Binary64)

        tested_function = self.implementation.get_function_object()
        function_name = self.implementation.get_name()

        failure_report_op = FunctionOperator("report_failure")
        failure_report_function = FunctionObject("report_failure", [], ML_Void,
                                                 failure_report_op)

        printf_success_op = FunctionOperator(
            "printf",
            arg_map={0: "\"test successful %s\\n\"" % function_name},
            void_function=True)
        printf_success_function = FunctionObject("printf", [], ML_Void,
                                                 printf_success_op)

        output_precision = FormatAttributeWrapper(self.precision, ["volatile"])

        test_total = test_num

        # number of arrays expected as inputs for tested_function
        NUM_INPUT_ARRAY = 1
        # position of the input array in tested_function operands (generally
        # equals to 1 as to 0-th input is often the destination array)
        INPUT_INDEX_OFFSET = 1

        # concatenating standard test array at the beginning of randomly
        # generated array
        TABLE_SIZE_VALUES = [
            len(std_table) for std_table in self.standard_test_cases
        ] + [
            random.randrange(index_range[0], index_range[1] + 1)
            for i in range(test_num)
        ]
        OFFSET_VALUES = [sum(TABLE_SIZE_VALUES[:i]) for i in range(test_total)]

        table_size_offset_array = generate_2d_table(
            test_total,
            2,
            ML_UInt32,
            self.uniquify_name("table_size_array"),
            value_gen=(lambda row_id:
                       (TABLE_SIZE_VALUES[row_id], OFFSET_VALUES[row_id])))

        INPUT_ARRAY_SIZE = sum(TABLE_SIZE_VALUES)

        # TODO/FIXME: implement proper input range depending on input index
        # assuming a single input array
        input_precisions = [self.get_input_precision(1).get_data_precision()]
        rng_map = [
            get_precision_rng(precision, inf(test_range), sup(test_range))
            for precision, test_range in zip(input_precisions, test_ranges)
        ]

        # generated table of inputs
        input_tables = [
            generate_1d_table(
                INPUT_ARRAY_SIZE,
                self.get_input_precision(INPUT_INDEX_OFFSET +
                                         table_id).get_data_precision(),
                self.uniquify_name("input_table_arg%d" % table_id),
                value_gen=(
                    lambda _: input_precisions[table_id].round_sollya_object(
                        rng_map[table_id].get_new_value(), sollya.RN)))
            for table_id in range(NUM_INPUT_ARRAY)
        ]

        # generate output_array
        output_array = generate_1d_table(
            INPUT_ARRAY_SIZE,
            output_precision,
            self.uniquify_name("output_array"),
            #value_gen=(lambda _: FP_QNaN(self.precision))
            value_gen=(lambda _: None),
            const=False,
            empty=True)

        # accumulate element number
        acc_num = Variable("acc_num",
                           precision=ML_Int64,
                           var_type=Variable.Local)

        def empty_post_statement_gen(input_tables, output_array,
                                     table_size_offset_array, array_offset,
                                     array_len, test_id):
            return Statement()

        test_loop = self.get_array_test_wrapper(test_total, tested_function,
                                                table_size_offset_array,
                                                input_tables, output_array,
                                                acc_num,
                                                empty_post_statement_gen)

        timer = Variable("timer", precision=ML_Int64, var_type=Variable.Local)
        printf_timing_op = FunctionOperator(
            "printf",
            arg_map={
                0:
                "\"%s %%\"PRIi64\" elts computed in %%\"PRIi64\" nanoseconds => %%.3f CPE \\n\""
                % function_name,
                1:
                FO_Arg(0),
                2:
                FO_Arg(1),
                3:
                FO_Arg(2)
            },
            void_function=True)
        printf_timing_function = FunctionObject(
            "printf", [ML_Int64, ML_Int64, ML_Binary64], ML_Void,
            printf_timing_op)

        vj = Variable("j", precision=ML_Int32, var_type=Variable.Local)
        loop_num_cst = Constant(loop_num, precision=ML_Int32, tag="loop_num")
        loop_increment = 1

        # bench measure of clock per element
        cpe_measure = Division(
            Conversion(timer, precision=ML_Binary64),
            Conversion(acc_num, precision=ML_Binary64),
            precision=ML_Binary64,
            tag="cpe_measure",
        )

        # common test scheme between scalar and vector functions
        test_scheme = Statement(
            self.processor.get_init_timestamp(),
            ReferenceAssign(timer, self.processor.get_current_timestamp()),
            ReferenceAssign(acc_num, 0),
            Loop(
                ReferenceAssign(vj, Constant(0, precision=ML_Int32)),
                vj < loop_num_cst,
                Statement(test_loop, ReferenceAssign(vj,
                                                     vj + loop_increment))),
            ReferenceAssign(
                timer,
                Subtraction(self.processor.get_current_timestamp(),
                            timer,
                            precision=ML_Int64)),
            printf_timing_function(
                Conversion(acc_num, precision=ML_Int64),
                timer,
                cpe_measure,
            ),
            Return(cpe_measure),
            # Return(Constant(0, precision = ML_Int32))
        )
        auto_test.set_scheme(test_scheme)
        return FunctionGroup([auto_test])
示例#30
0
    def generate_scheme(self):
        # declaring function input variable
        vx = self.implementation.add_input_variable("x", ML_Binary32)
        # declaring specific interval for input variable <x>
        vx.set_interval(Interval(-1, 1))

        # declaring free Variable y
        vy = Variable("y", precision=ML_Exact)

        # declaring expression with vx variable
        expr = vx * vx - vx * 2
        # declaring second expression with vx variable
        expr2 = vx * vx - vx

        # optimizing expressions (defining every unknown precision as the
        # default one + some optimization as FMA merging if enabled)
        opt_expr = self.optimise_scheme(expr)
        opt_expr2 = self.optimise_scheme(expr2)

        # setting specific tag name for optimized expression (to be extracted
        # from gappa script )
        opt_expr.set_tag("goal")
        opt_expr2.set_tag("new_goal")

        # defining default goal to gappa execution
        gappa_goal = opt_expr

        # declaring EXACT expression to be used as hint in Gappa's script
        annotation = self.opt_engine.exactify(vy * (1 / vy))

        # the dict var_bound is used to limit the DAG part to be explored when
        # generating the gappa script, each pair (key, value), indicate a node to
        # stop at <key>
        # and a node to replace it with during the generation: <node>,
        # <node> must be a Variable instance with defined interval
        # vx.get_handle().get_node() is used to retrieve the node instanciating
        # the abstract node <vx> after the call to self.optimise_scheme
        var_bound = {
            vx.get_handle().get_node():
            Variable("x", precision=ML_Binary32, interval=vx.get_interval())
        }
        # generating gappa code to determine interval for <opt_expr>
        # NOTES: var_bound must be converted from an iterator to a list to avoid
        # implicit modification by get_interval_code
        gappa_code = self.gappa_engine.get_interval_code(
            [opt_expr], list(var_bound.keys()), var_bound)

        # add a manual hint to the gappa code
        # which state thtat vy * (1 / vy) -> 1 { vy <> 0 };
        self.gappa_engine.add_hint(
            gappa_code, annotation, Constant(1, precision=ML_Exact),
            Comparison(vy,
                       Constant(0, precision=ML_Integer),
                       specifier=Comparison.NotEqual,
                       precision=ML_Bool))

        # adding the expression <opt_expr2> as an extra goal in the gappa script
        self.gappa_engine.add_goal(gappa_code, opt_expr2)

        # executing gappa on the script generated from <gappa_code>
        # extract the result and store them into <gappa_result>
        # which is a dict indexed by the goals' tag
        if is_gappa_installed():
            gappa_result = execute_gappa_script_extract(
                gappa_code.get(self.gappa_engine))
            Log.report(Log.Info, "eval error: ", gappa_result["new_goal"])
        else:
            Log.report(
                Log.Warning,
                "gappa was not installed: unable to check execute_gappa_script_extract"
            )

        # dummy scheme to make functionnal code generation
        scheme = Statement(Return(vx))

        return scheme