def generate_tensor_check_loop(self, tensor_descriptors, input_tables,
                                   output_tables):
        # unpack tensor descriptors tuple
        (input_tensor_descriptor_list,
         output_tensor_descriptor_list) = tensor_descriptors
        # internal array iterator index
        vj = Variable("j", precision=ML_UInt32, var_type=Variable.Local)

        printf_error_detail_function = self.get_printf_error_detail_fct(
            output_tensor_descriptor_list[0])

        NUM_INPUT_ARRAY = len(input_tables)

        # generate the expected table for the whole multi-array
        expected_tables = self.generate_expected_table(tensor_descriptors,
                                                       input_tables)

        # global statement to list all checks
        check_statement = Statement()

        # implement check for each output tensor
        for out_id, out_td in enumerate(output_tensor_descriptor_list):
            # expected values for the (vj)-th entry of the sub-array
            expected_values = [
                TableLoad(expected_tables[out_id], vj, i)
                for i in range(self.accuracy.get_num_output_value())
            ]
            # local result for the (vj)-th entry of the sub-array
            local_result = TableLoad(output_tables[out_id], vj)

            array_len = out_td.get_bounding_size()

            if self.break_error:
                return_statement_break = Statement(
                    printf_error_detail_function(*((vj, ) + (local_result, ))),
                    self.accuracy.get_output_print_call(
                        self.function_name, output_values))
            else:
                return_statement_break = Statement(
                    printf_error_detail_function(*((vj, ) + (local_result, ))),
                    self.accuracy.get_output_print_call(
                        self.function_name, expected_values),
                    Return(Constant(1, precision=ML_Int32)))
            check_array_loop = Loop(
                ReferenceAssign(vj, 0), vj < array_len,
                Statement(
                    ConditionBlock(
                        self.accuracy.get_output_check_test(
                            local_result, expected_values),
                        return_statement_break),
                    ReferenceAssign(vj, vj + 1),
                ))
            check_statement.add(check_array_loop)
        return check_statement
示例#2
0
def simplify_inverse(optree, processor):
    dummy_var = Variable("dummy_var_seed", precision = optree.get_precision())
    dummy_div_seed = DivisionSeed(dummy_var, precision = optree.get_precision())
    inv_approx_table = processor.get_recursive_implementation(dummy_div_seed, language = None, table_getter = lambda self: self.approx_table_map)

    seed_input = optree.inputs[0]
    c0 = Constant(0, precision = ML_Int32)

    if optree.get_precision() == inv_approx_table.get_storage_precision():
        return TableLoad(inv_approx_table, inv_approx_table.get_index_function()(seed_input), c0, precision = optree.get_precision()) 
    else:
        return Conversion(TableLoad(inv_approx_table, inv_approx_table.get_index_function()(seed_input), c0, precision = inv_approx_table.get_storage_precision()), precision = optree.get_precision()) 
示例#3
0
def legalize_invsqrt_seed(optree):
    """ Legalize an InverseSquareRootSeed optree """
    assert isinstance(optree, ReciprocalSquareRootSeed)
    op_prec = optree.get_precision()
    # input = 1.m_hi-m_lo * 2^e
    # approx = 2^(-int(e/2)) * approx_insqrt(1.m_hi) * (e % 2 ? 1.0 : ~2**-0.5)
    op_input = optree.get_input(0)
    convert_back = False
    approx_prec = ML_Binary32

    if op_prec != approx_prec:
        op_input = Conversion(op_input, precision=ML_Binary32)
        convert_back = True

    # TODO: fix integer precision selection
    #       as we are in a late code generation stage, every node's precision
    #       must be set
    op_exp = ExponentExtraction(op_input,
                                tag="op_exp",
                                debug=debug_multi,
                                precision=ML_Int32)
    neg_half_exp = Division(Negation(op_exp, precision=ML_Int32),
                            Constant(2, precision=ML_Int32),
                            precision=ML_Int32)
    approx_exp = ExponentInsertion(neg_half_exp,
                                   tag="approx_exp",
                                   debug=debug_multi,
                                   precision=approx_prec)
    op_exp_parity = Modulo(op_exp,
                           Constant(2, precision=ML_Int32),
                           precision=ML_Int32)
    approx_exp_correction = Select(Equal(op_exp_parity,
                                         Constant(0, precision=ML_Int32)),
                                   Constant(1.0, precision=approx_prec),
                                   Select(Equal(
                                       op_exp_parity,
                                       Constant(-1, precision=ML_Int32)),
                                          Constant(S2**0.5,
                                                   precision=approx_prec),
                                          Constant(S2**-0.5,
                                                   precision=approx_prec),
                                          precision=approx_prec),
                                   precision=approx_prec,
                                   tag="approx_exp_correction",
                                   debug=debug_multi)
    table_index = invsqrt_approx_table.get_index_function()(op_input)
    table_index.set_attributes(tag="invsqrt_index", debug=debug_multi)
    approx = Multiplication(TableLoad(invsqrt_approx_table,
                                      table_index,
                                      precision=approx_prec),
                            Multiplication(approx_exp_correction,
                                           approx_exp,
                                           precision=approx_prec),
                            tag="invsqrt_approx",
                            debug=debug_multi,
                            precision=approx_prec)
    if approx_prec != op_prec:
        return Conversion(approx, precision=op_prec)
    else:
        return approx
示例#4
0
    def generate_scheme(self):
        # declaring function input variable
        vx = self.implementation.add_input_variable("x", self.get_input_precision(0))

        bf16_params = ML_NewTable(dimensions=[self.table_size], storage_precision=BFloat16)
        for i in range(self.table_size):
            bf16_params[i] = 1.1**i

        conv_vx = Conversion(TableLoad(bf16_params, vx), precision=ML_Binary32, tag="conv_vx", debug=debug_multi)

        result = conv_vx

        scheme = Return(result, precision=self.precision, debug=debug_multi)

        return scheme
示例#5
0
def legalize_reciprocal_seed(optree):
    """ Legalize an ReciprocalSeed optree """
    assert isinstance(optree, ReciprocalSeed)
    op_prec = optree.get_precision()
    initial_prec = op_prec
    back_convert = False
    op_input = optree.get_input(0)

    INV_APPROX_TABLE_FORMAT = generic_inv_approx_table.get_storage_precision()

    if op_prec != INV_APPROX_TABLE_FORMAT:
        op_input = Conversion(op_input, precision=INV_APPROX_TABLE_FORMAT)
        op_prec = INV_APPROX_TABLE_FORMAT
        back_convert = True
    # input = 1.m_hi-m_lo * 2^e
    # approx = 2^(-int(e/2)) * approx_insqrt(1.m_hi) * (e % 2 ? 1.0 : ~2**-0.5)

    # TODO: fix integer precision selection
    #       as we are in a late code generation stage, every node's precision
    #       must be set
    int_prec = op_prec.get_integer_format()
    op_sign = CopySign(op_input,
                       Constant(1.0, precision=op_prec),
                       precision=op_prec)
    op_exp = ExponentExtraction(op_input,
                                tag="op_exp",
                                debug=debug_multi,
                                precision=int_prec)
    neg_exp = Negation(op_exp, precision=int_prec)
    approx_exp = ExponentInsertion(neg_exp,
                                   tag="approx_exp",
                                   debug=debug_multi,
                                   precision=op_prec)
    table_index = generic_inv_approx_table.get_index_function()(op_input)
    table_index.set_attributes(tag="inv_index", debug=debug_multi)
    approx = Multiplication(TableLoad(generic_inv_approx_table,
                                      table_index,
                                      precision=op_prec),
                            Multiplication(approx_exp,
                                           op_sign,
                                           precision=op_prec),
                            tag="inv_approx",
                            debug=debug_multi,
                            precision=op_prec)
    if back_convert:
        return Conversion(approx, precision=initial_prec)
    else:
        return approx
def expand_accessor(accessor):
    """ Expand an accessor node into a valid MDL description """
    if isinstance(accessor, ReadAccessor):
        # check dimensionnality: the number of sub-indexes in ReadAccessor's
        # index_expr must match the dimensionnality of ReadAccessor's tensor
        # tensor_descriptor
        return TableLoad(accessor.tensor.base_buffer,
                         accessor.tensor.descriptor.generate_linearized_offset(
                             accessor.index_expr),
                         precision=accessor.value_format)
    elif isinstance(accessor, WriteAccessor):
        return TableStore(
            expand_kernel_expr(accessor.value_expr),
            accessor.tensor.base_buffer,
            accessor.tensor.descriptor.generate_linearized_offset(
                accessor.index_expr),
            precision=ML_Void,
        )
    else:
        raise NotImplementedError
示例#7
0
def piecewise_approximation(function,
                            variable,
                            precision,
                            bound_low=-1.0,
                            bound_high=1.0,
                            num_intervals=16,
                            max_degree=2,
                            error_threshold=S2**-24,
                            odd=False,
                            even=False):
    """ Generate a piecewise approximation

        :param function: function to be approximated
        :type function: SollyaObject
        :param variable: input variable
        :type variable: Variable
        :param precision: variable's format
        :type precision: ML_Format
        :param bound_low: lower bound for the approximation interval
        :param bound_high: upper bound for the approximation interval
        :param num_intervals: number of sub-interval / sub-division of the main interval
        :param max_degree: maximum degree for an approximation on any sub-interval
        :param error_threshold: error bound for an approximation on any sub-interval

        :return: pair (scheme, error) where scheme is a graph node for an
            approximation scheme of function evaluated at variable, and error
            is the maximum approximation error encountered
        :rtype tuple(ML_Operation, SollyaObject): """

    degree_generator = piecewise_approximation_degree_generator(
        function,
        bound_low,
        bound_high,
        num_intervals=num_intervals,
        error_threshold=error_threshold,
    )
    degree_list = list(degree_generator)

    # if max_degree is None then we determine it locally
    if max_degree is None:
        max_degree = max(degree_list)
    # table to store coefficients of the approximation on each segment
    coeff_table = ML_NewTable(
        dimensions=[num_intervals, max_degree + 1],
        storage_precision=precision,
        tag="coeff_table",
        const=True  # by default all approximation coeff table are const
    )

    error_function = lambda p, f, ai, mod, t: sollya.dirtyinfnorm(p - f, ai)
    max_approx_error = 0.0
    interval_size = (bound_high - bound_low) / num_intervals

    for i in range(num_intervals):
        subint_low = bound_low + i * interval_size
        subint_high = bound_low + (i + 1) * interval_size

        local_function = function(sollya.x + subint_low)
        local_interval = Interval(-interval_size, interval_size)

        local_degree = degree_list[i]
        if local_degree > max_degree:
            Log.report(
                Log.Warning,
                "local_degree {} exceeds max_degree bound ({}) in piecewise_approximation",
                local_degree, max_degree)
        # as max_degree defines the size of the table we can use
        # it as the degree for each sub-interval polynomial
        # as there is nothing to gain (yet) by using a smaller polynomial
        degree = max_degree  # min(max_degree, local_degree)

        if function(subint_low) == 0.0:
            # if the lower bound is a zero to the function, we
            # need to force value=0 for the constant coefficient
            # and extend the approximation interval
            local_poly_degree_list = list(
                range(1 if even else 0, degree + 1, 2 if odd or even else 1))
            poly_object, approx_error = Polynomial.build_from_approximation_with_error(
                function(sollya.x) / sollya.x,
                local_poly_degree_list,
                [precision] * len(local_poly_degree_list),
                Interval(-subint_high * 0.95, subint_high),
                sollya.absolute,
                error_function=error_function)
            # multiply by sollya.x
            poly_object = poly_object.sub_poly(offset=-1)
        else:
            try:
                poly_object, approx_error = Polynomial.build_from_approximation_with_error(
                    local_function,
                    degree, [precision] * (degree + 1),
                    local_interval,
                    sollya.absolute,
                    error_function=error_function)
            except SollyaError as err:
                # try to see if function is constant on the interval (possible
                # failure cause for fpminmax)
                cst_value = precision.round_sollya_object(
                    function(subint_low), sollya.RN)
                accuracy = error_threshold
                diff_with_cst_range = sollya.supnorm(cst_value, local_function,
                                                     local_interval,
                                                     sollya.absolute, accuracy)
                diff_with_cst = sup(abs(diff_with_cst_range))
                if diff_with_cst < error_threshold:
                    Log.report(Log.Info, "constant polynomial detected")
                    poly_object = Polynomial([function(subint_low)] +
                                             [0] * degree)
                    approx_error = diff_with_cst
                else:
                    Log.report(
                        Log.error,
                        "degree: {} for index {}, diff_with_cst={} (vs error_threshold={}) ",
                        degree,
                        i,
                        diff_with_cst,
                        error_threshold,
                        error=err)
        for ci in range(max_degree + 1):
            if ci in poly_object.coeff_map:
                coeff_table[i][ci] = poly_object.coeff_map[ci]
            else:
                coeff_table[i][ci] = 0.0

        if approx_error > error_threshold:
            Log.report(
                Log.Warning,
                "piecewise_approximation on index {} exceeds error threshold: {} > {}",
                i, approx_error, error_threshold)
        max_approx_error = max(max_approx_error, abs(approx_error))
    # computing offset
    diff = Subtraction(variable,
                       Constant(bound_low, precision=precision),
                       tag="diff",
                       debug=debug_multi,
                       precision=precision)
    int_prec = precision.get_integer_format()

    # delta = bound_high - bound_low
    delta_ratio = Constant(num_intervals / (bound_high - bound_low),
                           precision=precision)
    # computing table index
    # index = nearestint(diff / delta * <num_intervals>)
    index = Max(0,
                Min(
                    NearestInteger(
                        Multiplication(diff, delta_ratio, precision=precision),
                        precision=int_prec,
                    ), num_intervals - 1),
                tag="index",
                debug=debug_multi,
                precision=int_prec)
    poly_var = Subtraction(diff,
                           Multiplication(
                               Conversion(index, precision=precision),
                               Constant(interval_size, precision=precision)),
                           precision=precision,
                           tag="poly_var",
                           debug=debug_multi)
    # generating indexed polynomial
    coeffs = [(ci, TableLoad(coeff_table, index, ci))
              for ci in range(max_degree + 1)][::-1]
    poly_scheme = PolynomialSchemeEvaluator.generate_horner_scheme2(
        coeffs, poly_var, precision, {}, precision)
    return poly_scheme, max_approx_error
示例#8
0
def generic_poly_split(offset_fct, indexing, target_eps, coeff_precision, vx):
    """ generate the meta approximation for @p offset_fct over several
        intervals defined by @p indexing object
        For each sub-interval, a polynomial approximation with
        maximal_error @p target_eps is tabulated, and evaluated using format
        @p coeff_precision.
        The input variable is @p vx """
    # computing degree for a different polynomial approximation on each
    # sub-interval
    poly_degree_list = [
        int(sup(guessdegree(offset_fct(offset), sub_interval, target_eps)))
        for offset, sub_interval in indexing.get_offseted_sub_list()
    ]
    poly_max_degree = max(poly_degree_list)

    # tabulating polynomial coefficients on split_num sub-interval of interval
    poly_table = ML_NewTable(
        dimensions=[indexing.split_num, poly_max_degree + 1],
        storage_precision=coeff_precision,
        const=True)
    offset_table = ML_NewTable(dimensions=[indexing.split_num],
                               storage_precision=coeff_precision,
                               const=True)
    max_error = 0.0

    for sub_index in range(indexing.split_num):
        poly_degree = poly_degree_list[sub_index]
        offset, approx_interval = indexing.get_offseted_sub_interval(sub_index)
        offset_table[sub_index] = offset
        if poly_degree == 0:
            # managing constant approximation separately since it seems
            # to break sollya
            local_approx = coeff_precision.round_sollya_object(
                offset_fct(offset)(inf(approx_interval)))
            poly_table[sub_index][0] = local_approx
            for monomial_index in range(1, poly_max_degree + 1):
                poly_table[sub_index][monomial_index] = 0
            approx_error = sollya.infnorm(
                offset_fct(offset) - local_approx, approx_interval)

        else:
            poly_object, approx_error = Polynomial.build_from_approximation_with_error(
                offset_fct(offset), poly_degree,
                [coeff_precision] * (poly_degree + 1), approx_interval,
                sollya.relative)

            for monomial_index in range(poly_max_degree + 1):
                if monomial_index <= poly_degree:
                    poly_table[sub_index][
                        monomial_index] = poly_object.coeff_map[monomial_index]
                else:
                    poly_table[sub_index][monomial_index] = 0
        max_error = max(approx_error, max_error)

    Log.report(Log.Debug, "max approx error is {}", max_error)

    # indexing function: derive index from input @p vx value
    poly_index = indexing.get_index_node(vx)
    poly_index.set_attributes(tag="poly_index", debug=debug_multi)

    ext_precision = get_extended_fp_precision(coeff_precision)

    # building polynomial evaluation scheme
    offset = TableLoad(offset_table,
                       poly_index,
                       precision=coeff_precision,
                       tag="offset",
                       debug=debug_multi)
    poly = TableLoad(poly_table,
                     poly_index,
                     poly_max_degree,
                     precision=coeff_precision,
                     tag="poly_init",
                     debug=debug_multi)
    red_vx = Subtraction(vx,
                         offset,
                         precision=vx.precision,
                         tag="red_vx",
                         debug=debug_multi)
    for monomial_index in range(poly_max_degree, -1, -1):
        coeff = TableLoad(poly_table,
                          poly_index,
                          monomial_index,
                          precision=coeff_precision,
                          tag="poly_%d" % monomial_index,
                          debug=debug_multi)
        #fma_precision = coeff_precision if monomial_index > 1 else ext_precision
        fma_precision = coeff_precision
        poly = FMA(red_vx, poly, coeff, precision=fma_precision)

    #return Conversion(poly, precision=coeff_precision)
    #return poly.hi
    return poly
示例#9
0
    def generate_scheme(self):
        # declaring CodeFunction and retrieving input variable
        vx = self.implementation.add_input_variable("x", self.precision)

        table_size_log = self.table_size_log
        integer_size = 31
        integer_precision = ML_Int32

        max_bound = sup(abs(self.input_intervals[0]))
        max_bound_log = int(ceil(log2(max_bound)))
        Log.report(Log.Info, "max_bound_log=%s " % max_bound_log)
        scaling_power = integer_size - max_bound_log
        Log.report(Log.Info, "scaling power: %s " % scaling_power)

        storage_precision = ML_Custom_FixedPoint_Format(1, 30, signed=True)

        Log.report(Log.Info, "tabulating cosine and sine")
        # cosine and sine fused table
        fused_table = ML_NewTable(
            dimensions=[2**table_size_log, 2],
            storage_precision=storage_precision,
            tag="fast_lib_shared_table")  # self.uniquify_name("cossin_table"))
        # filling table
        for i in range(2**table_size_log):
            local_x = i / S2**table_size_log * S2**max_bound_log

            cos_local = cos(
                local_x
            )  # nearestint(cos(local_x) * S2**storage_precision.get_frac_size())

            sin_local = sin(
                local_x
            )  # nearestint(sin(local_x) * S2**storage_precision.get_frac_size())

            fused_table[i][0] = cos_local
            fused_table[i][1] = sin_local

        # argument reduction evaluation scheme
        # scaling_factor = Constant(S2**scaling_power, precision = self.precision)

        red_vx_precision = ML_Custom_FixedPoint_Format(31 - scaling_power,
                                                       scaling_power,
                                                       signed=True)
        Log.report(
            Log.Verbose, "red_vx_precision.get_c_bit_size()=%d" %
            red_vx_precision.get_c_bit_size())
        # red_vx = NearestInteger(vx * scaling_factor, precision = integer_precision)
        red_vx = Conversion(vx,
                            precision=red_vx_precision,
                            tag="red_vx",
                            debug=debug_fixed32)

        computation_precision = red_vx_precision  # self.precision
        output_precision = self.get_output_precision()
        Log.report(Log.Info,
                   "computation_precision is %s" % computation_precision)
        Log.report(Log.Info, "storage_precision     is %s" % storage_precision)
        Log.report(Log.Info, "output_precision      is %s" % output_precision)

        hi_mask_value = 2**32 - 2**(32 - table_size_log - 1)
        hi_mask = Constant(hi_mask_value, precision=ML_Int32)
        Log.report(Log.Info, "hi_mask=0x%x" % hi_mask_value)

        red_vx_hi_int = BitLogicAnd(TypeCast(red_vx, precision=ML_Int32),
                                    hi_mask,
                                    precision=ML_Int32,
                                    tag="red_vx_hi_int",
                                    debug=debugd)
        red_vx_hi = TypeCast(red_vx_hi_int,
                             precision=red_vx_precision,
                             tag="red_vx_hi",
                             debug=debug_fixed32)
        red_vx_lo = red_vx - red_vx_hi
        red_vx_lo.set_attributes(precision=red_vx_precision,
                                 tag="red_vx_lo",
                                 debug=debug_fixed32)
        table_index = BitLogicRightShift(TypeCast(red_vx, precision=ML_Int32),
                                         scaling_power -
                                         (table_size_log - max_bound_log),
                                         precision=ML_Int32,
                                         tag="table_index",
                                         debug=debugd)

        tabulated_cos = TableLoad(fused_table,
                                  table_index,
                                  0,
                                  tag="tab_cos",
                                  precision=storage_precision,
                                  debug=debug_fixed32)
        tabulated_sin = TableLoad(fused_table,
                                  table_index,
                                  1,
                                  tag="tab_sin",
                                  precision=storage_precision,
                                  debug=debug_fixed32)

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        Log.report(Log.Info, "building polynomial approximation for cosine")
        # cosine polynomial approximation
        poly_interval = Interval(0, S2**(max_bound_log - table_size_log))
        Log.report(Log.Info, "poly_interval=%s " % poly_interval)
        cos_poly_degree = 2  # int(sup(guessdegree(cos(x), poly_interval, accuracy_goal)))

        Log.report(Log.Verbose, "cosine polynomial approximation")
        cos_poly_object, cos_approx_error = Polynomial.build_from_approximation_with_error(
            cos(sollya.x), [0, 2],
            [0] + [computation_precision.get_bit_size()],
            poly_interval,
            sollya.absolute,
            error_function=error_function)
        #cos_eval_scheme = PolynomialSchemeEvaluator.generate_horner_scheme(cos_poly_object, red_vx_lo, unified_precision = computation_precision)
        Log.report(Log.Info, "cos_approx_error=%e" % cos_approx_error)
        cos_coeff_list = cos_poly_object.get_ordered_coeff_list()
        coeff_C0 = cos_coeff_list[0][1]
        coeff_C2 = Constant(cos_coeff_list[1][1],
                            precision=ML_Custom_FixedPoint_Format(-1,
                                                                  32,
                                                                  signed=True))

        Log.report(Log.Info, "building polynomial approximation for sine")

        # sine polynomial approximation
        sin_poly_degree = 2  # int(sup(guessdegree(sin(x)/x, poly_interval, accuracy_goal)))
        Log.report(Log.Info, "sine poly degree: %e" % sin_poly_degree)
        Log.report(Log.Verbose, "sine polynomial approximation")
        sin_poly_object, sin_approx_error = Polynomial.build_from_approximation_with_error(
            sin(sollya.x) / sollya.x, [0, 2], [0] +
            [computation_precision.get_bit_size()] * (sin_poly_degree + 1),
            poly_interval,
            sollya.absolute,
            error_function=error_function)
        sin_coeff_list = sin_poly_object.get_ordered_coeff_list()
        coeff_S0 = sin_coeff_list[0][1]
        coeff_S2 = Constant(sin_coeff_list[1][1],
                            precision=ML_Custom_FixedPoint_Format(-1,
                                                                  32,
                                                                  signed=True))

        # scheme selection between sine and cosine
        if self.cos_output:
            scheme = self.generate_cos_scheme(computation_precision,
                                              tabulated_cos, tabulated_sin,
                                              coeff_S2, coeff_C2, red_vx_lo)
        else:
            scheme = self.generate_sin_scheme(computation_precision,
                                              tabulated_cos, tabulated_sin,
                                              coeff_S2, coeff_C2, red_vx_lo)

        result = Conversion(scheme, precision=self.get_output_precision())

        Log.report(
            Log.Verbose, "result operation tree :\n %s " % result.get_str(
                display_precision=True, depth=None, memoization_map={}))
        scheme = Statement(Return(result))

        return scheme
示例#10
0
    def generate_scheme(self):
        # declaring target and instantiating optimization engine
        precision_ptr = self.get_input_precision(0)
        index_format = self.get_input_precision(2)
        multi_elt_num = self.multi_elt_num

        dst = self.implementation.add_input_variable("dst", precision_ptr)
        src = self.implementation.add_input_variable("src", precision_ptr)
        n = self.implementation.add_input_variable("len", index_format)

        i = Variable("i", precision=index_format, var_type=Variable.Local)
        CU0 = Constant(0, precision=index_format)

        element_format = self.precision

        self.function_list = []

        if multi_elt_num > 1:
            element_format = VECTOR_TYPE_MAP[self.precision][multi_elt_num]

        elt_input = TableLoad(src, i, precision=element_format)

        local_exp = Variable("local_exp",
                             precision=element_format,
                             var_type=Variable.Local)

        if self.use_libm_function:
            libm_fct_operator = FunctionOperator(self.use_libm_function,
                                                 arity=1)
            libm_fct = FunctionObject(self.use_libm_function, [ML_Binary32],
                                      ML_Binary32, libm_fct_operator)

            if multi_elt_num > 1:
                result_list = [
                    libm_fct(
                        VectorElementSelection(elt_input,
                                               Constant(elt_id,
                                                        precision=ML_Integer),
                                               precision=self.precision))
                    for elt_id in range(multi_elt_num)
                ]
                result = VectorAssembling(*result_list,
                                          precision=element_format)
            else:
                result = libm_fct(elt_input)
            elt_result = ReferenceAssign(local_exp, result)
        else:
            if multi_elt_num > 1:
                scalar_result = Variable("scalar_result",
                                         precision=self.precision,
                                         var_type=Variable.Local)
                fct_ctor_args = self.function_ctor.get_default_args(
                    precision=self.precision,
                    libm_compliant=False,
                )

                meta_function = self.function_ctor(fct_ctor_args)
                exponential_scheme = meta_function.generate_scheme()

                # instanciating required passes for typing
                pass_inst_abstract_prec = PassInstantiateAbstractPrecision(
                    self.processor)
                pass_inst_prec = PassInstantiatePrecision(
                    self.processor, default_precision=None)

                # exectuting format instanciation passes on optree
                exponential_scheme = pass_inst_abstract_prec.execute_on_optree(
                    exponential_scheme)
                exponential_scheme = pass_inst_prec.execute_on_optree(
                    exponential_scheme)

                vectorizer = StaticVectorizer()

                # extracting scalar argument from meta_exponential meta function
                scalar_input = meta_function.implementation.arg_list[0]

                # vectorize scalar scheme
                vector_result, vec_arg_list, vector_scheme, scalar_callback, scalar_callback_fct = vectorize_function_scheme(
                    vectorizer,
                    self.get_main_code_object(), exponential_scheme,
                    element_format.get_scalar_format(), [scalar_input],
                    multi_elt_num)

                elt_result = inline_function(vector_scheme, vector_result,
                                             {vec_arg_list[0]: elt_input})

                local_exp = vector_result

                self.function_list.append(scalar_callback_fct)
                libm_fct = scalar_callback

            else:
                scalar_input = elt_input
                scalar_result = local_exp

                elt_result = generate_inline_fct_scheme(
                    self.function_ctor, scalar_result, [scalar_input], {
                        "precision": self.precision,
                        "libm_compliant": False
                    })

        CU1 = Constant(1, precision=index_format)

        local_exp_init_value = Constant(0, precision=self.precision)
        if multi_elt_num > 1:
            local_exp_init_value = Constant([0] * multi_elt_num,
                                            precision=element_format)
            remain_n = Modulo(n, multi_elt_num, precision=index_format)
            iter_n = n - remain_n
            CU_ELTNUM = Constant(multi_elt_num, precision=index_format)
            inc = i + CU_ELTNUM
        else:
            remain_n = None
            iter_n = n
            inc = i + CU1

        # main loop processing multi_elt_num element(s) per iteration
        main_loop = Loop(
            ReferenceAssign(i, CU0),
            i < iter_n,
            Statement(ReferenceAssign(local_exp, local_exp_init_value),
                      elt_result,
                      TableStore(local_exp, dst, i, precision=ML_Void),
                      ReferenceAssign(i, inc)),
        )
        # epilog to process remaining item (when the length is not a multiple
        # of multi_elt_num)
        if not remain_n is None:
            # TODO/FIXME: try alternative method for processing epilog
            #             by using full vector length and mask
            epilog_loop = Loop(
                Statement(), i < n,
                Statement(
                    TableStore(libm_fct(
                        TableLoad(src, i, precision=self.precision)),
                               dst,
                               i,
                               precision=ML_Void),
                    ReferenceAssign(i, i + CU1),
                ))
            main_loop = Statement(main_loop, epilog_loop)

        return main_loop
示例#11
0
    def generate_scheme(self):
        # declaring target and instantiating optimization engine
        precision_ptr = self.get_input_precision(0)
        index_format = self.get_input_precision(2)

        dst = self.implementation.add_input_variable("dst", precision_ptr)
        src = self.implementation.add_input_variable("src", precision_ptr)
        n = self.implementation.add_input_variable("len", index_format)

        i = Variable("i", precision=index_format, var_type=Variable.Local)
        CU1 = Constant(1, precision=index_format)
        CU0 = Constant(0, precision=index_format)
        inc = i + CU1

        elt_input = TableLoad(src, i, precision=self.precision)

        local_exp = Variable("local_exp",
                             precision=self.precision,
                             var_type=Variable.Local)

        if self.use_libm_function:
            libm_exp_operator = FunctionOperator("expf", arity=1)
            libm_exp = FunctionObject("expf", [ML_Binary32], ML_Binary32,
                                      libm_exp_operator)

            elt_result = ReferenceAssign(local_exp, libm_exp(elt_input))
        else:
            exponential_args = ML_Exponential.get_default_args(
                precision=self.precision,
                libm_compliant=False,
                debug=False,
            )

            meta_exponential = ML_Exponential(exponential_args)
            exponential_scheme = meta_exponential.generate_scheme()

            elt_result = inline_function(
                exponential_scheme,
                local_exp,
                {meta_exponential.implementation.arg_list[0]: elt_input},
            )

        elt_acc = Variable("elt_acc",
                           precision=self.precision,
                           var_type=Variable.Local)

        exp_loop = Loop(
            ReferenceAssign(i, CU0),
            i < n,
            Statement(ReferenceAssign(local_exp, 0), elt_result,
                      TableStore(local_exp, dst, i, precision=ML_Void),
                      ReferenceAssign(elt_acc, elt_acc + local_exp),
                      ReferenceAssign(i, i + CU1)),
        )

        sum_rcp = Division(1,
                           elt_acc,
                           precision=self.precision,
                           tag="sum_rcp",
                           debug=debug_multi)

        div_loop = Loop(
            ReferenceAssign(i, CU0),
            i < n,
            Statement(
                TableStore(Multiplication(
                    TableLoad(dst, i, precision=self.precision), sum_rcp),
                           dst,
                           i,
                           precision=ML_Void), ReferenceAssign(i, inc)),
        )

        main_scheme = Statement(ReferenceAssign(elt_acc, 0), exp_loop, sum_rcp,
                                div_loop)

        return main_scheme
示例#12
0
    def get_array_test_wrapper(self,
                               test_num,
                               tested_function,
                               table_size_offset_array,
                               input_tables,
                               output_array,
                               acc_num,
                               post_statement_generator,
                               NUM_INPUT_ARRAY=1):
        """ generate a test loop for multi-array tests
             @param test_num number of elementary array tests to be executed
             @param tested_function FunctionObject to be tested
             @param table_size_offset_array ML_NewTable object containing
                    (table-size, offset) pairs for multi-array testing
             @param input_table ML_NewTable containing multi-array test inputs
             @param output_table ML_NewTable containing multi-array test outputs
             @param post_statement_generator is generator used to generate
                    a statement executed at the end of the test of one of the
                    arrays of the multi-test. It expects 6 arguments:
                    (input_tables, output_array, table_size_offset_array,
                     array_offset, array_len, test_id)
             @param printf_function FunctionObject to print error case
        """
        test_id = Variable("test_id",
                           precision=ML_Int32,
                           var_type=Variable.Local)
        test_num_cst = Constant(test_num, precision=ML_Int32, tag="test_num")

        array_len = Variable("len",
                             precision=ML_UInt32,
                             var_type=Variable.Local)

        array_offset = TableLoad(table_size_offset_array, test_id, 1)

        def pointer_add(table_addr, offset):
            pointer_format = table_addr.get_precision_as_pointer_format()
            return Addition(table_addr, offset, precision=pointer_format)

        array_inputs = tuple(
            pointer_add(input_tables[in_id], array_offset)
            for in_id in range(NUM_INPUT_ARRAY))
        function_call = tested_function(
            *((pointer_add(output_array, array_offset), ) + array_inputs +
              (array_len, )))

        post_statement = post_statement_generator(input_tables, output_array,
                                                  table_size_offset_array,
                                                  array_offset, array_len,
                                                  test_id)

        loop_increment = 1

        test_loop = Loop(
            ReferenceAssign(test_id, Constant(0, precision=ML_Int32)),
            test_id < test_num_cst,
            Statement(
                ReferenceAssign(array_len,
                                TableLoad(table_size_offset_array, test_id,
                                          0)),
                function_call,
                post_statement,
                ReferenceAssign(
                    acc_num, acc_num +
                    Conversion(array_len, precision=acc_num.precision)),
                ReferenceAssign(test_id, test_id + loop_increment),
            ),
        )

        test_statement = Statement()

        # adding functional test_loop to test statement
        test_statement.add(test_loop)

        return test_statement
示例#13
0
    def generate_array_check_loop(self, input_tables, output_array,
                                  table_size_offset_array, array_offset,
                                  array_len, test_id):
        # internal array iterator index
        vj = Variable("j", precision=ML_UInt32, var_type=Variable.Local)

        printf_input_function = self.get_printf_input_function()

        printf_error_template = "printf(\"max %s error is %s \\n\", %s)" % (
            self.function_name,
            self.precision.get_display_format().format_string,
            self.precision.get_display_format().pre_process_fct("{0}"))
        printf_error_op = TemplateOperatorFormat(printf_error_template,
                                                 arity=1,
                                                 void_function=True,
                                                 require_header=["stdio.h"])

        printf_error_function = FunctionObject("printf", [self.precision],
                                               ML_Void, printf_error_op)

        printf_max_op = FunctionOperator(
            "printf",
            arg_map={
                0:
                "\"max %s error is reached at input number %s \\n \"" %
                (self.function_name, "%d"),
                1:
                FO_Arg(0)
            },
            void_function=True,
            require_header=["stdio.h"])
        printf_max_function = FunctionObject("printf", [self.precision],
                                             ML_Void, printf_max_op)

        NUM_INPUT_ARRAY = len(input_tables)

        # generate the expected table for the whole multi-array
        expected_table = self.generate_expected_table(input_tables,
                                                      table_size_offset_array)

        # inputs for the (vj)-th entry of the sub-arrat
        local_inputs = tuple(
            TableLoad(input_tables[in_id], array_offset + vj)
            for in_id in range(NUM_INPUT_ARRAY))
        # expected values for the (vj)-th entry of the sub-arrat
        expected_values = [
            TableLoad(expected_table, array_offset + vj, i)
            for i in range(self.accuracy.get_num_output_value())
        ]
        # local result for the (vj)-th entry of the sub-arrat
        local_result = TableLoad(output_array, array_offset + vj)

        if self.break_error:
            return_statement_break = Statement(
                printf_input_function(*((vj, ) + local_inputs +
                                        (local_result, ))),
                self.accuracy.get_output_print_call(self.function_name,
                                                    output_values))
        else:
            return_statement_break = Statement(
                printf_input_function(*((vj, ) + local_inputs +
                                        (local_result, ))),
                self.accuracy.get_output_print_call(self.function_name,
                                                    expected_values),
                Return(Constant(1, precision=ML_Int32)))

        # loop implementation to check sub-array array_offset
        # results validity
        check_array_loop = Loop(
            ReferenceAssign(vj, 0), vj < array_len,
            Statement(
                ConditionBlock(
                    self.accuracy.get_output_check_test(
                        local_result, expected_values),
                    return_statement_break),
                ReferenceAssign(vj, vj + 1),
            ))
        return check_array_loop
示例#14
0
def piecewise_approximation(function,
                            variable,
                            precision,
                            bound_low=-1.0,
                            bound_high=1.0,
                            num_intervals=16,
                            max_degree=2,
                            error_threshold=sollya.S2**-24):
    """ To be documented """
    # table to store coefficients of the approximation on each segment
    coeff_table = ML_NewTable(dimensions=[num_intervals, max_degree + 1],
                              storage_precision=precision,
                              tag="coeff_table")

    error_function = lambda p, f, ai, mod, t: sollya.dirtyinfnorm(p - f, ai)
    max_approx_error = 0.0
    interval_size = (bound_high - bound_low) / num_intervals

    for i in range(num_intervals):
        subint_low = bound_low + i * interval_size
        subint_high = bound_low + (i + 1) * interval_size

        #local_function = function(sollya.x)
        #local_interval = Interval(subint_low, subint_high)
        local_function = function(sollya.x + subint_low)
        local_interval = Interval(-interval_size, interval_size)

        local_degree = sollya.guessdegree(local_function, local_interval,
                                          error_threshold)
        degree = min(max_degree, local_degree)

        if function(subint_low) == 0.0:
            # if the lower bound is a zero to the function, we
            # need to force value=0 for the constant coefficient
            # and extend the approximation interval
            degree_list = range(1, degree + 1)
            poly_object, approx_error = Polynomial.build_from_approximation_with_error(
                function(sollya.x),
                degree_list, [precision] * len(degree_list),
                Interval(-subint_high, subint_high),
                sollya.absolute,
                error_function=error_function)
        else:
            try:
                poly_object, approx_error = Polynomial.build_from_approximation_with_error(
                    local_function,
                    degree, [precision] * (degree + 1),
                    local_interval,
                    sollya.absolute,
                    error_function=error_function)
            except SollyaError as err:
                print("degree: {}".format(degree))
                raise err
        for ci in range(degree + 1):
            if ci in poly_object.coeff_map:
                coeff_table[i][ci] = poly_object.coeff_map[ci]
            else:
                coeff_table[i][ci] = 0.0

        max_approx_error = max(max_approx_error, abs(approx_error))
    # computing offset
    diff = Subtraction(variable,
                       Constant(bound_low, precision=precision),
                       tag="diff",
                       precision=precision)
    # delta = bound_high - bound_low
    delta_ratio = Constant(num_intervals / (bound_high - bound_low),
                           precision=precision)
    # computing table index
    # index = nearestint(diff / delta * <num_intervals>)
    index = Max(0,
                Min(
                    NearestInteger(Multiplication(diff,
                                                  delta_ratio,
                                                  precision=precision),
                                   precision=ML_Int32), num_intervals - 1),
                tag="index",
                debug=True,
                precision=ML_Int32)
    poly_var = Subtraction(diff,
                           Multiplication(
                               Conversion(index, precision=precision),
                               Constant(interval_size, precision=precision)),
                           precision=precision,
                           tag="poly_var",
                           debug=True)
    # generating indexed polynomial
    coeffs = [(ci, TableLoad(coeff_table, index, ci))
              for ci in range(degree + 1)][::-1]
    poly_scheme = PolynomialSchemeEvaluator.generate_horner_scheme2(
        coeffs, poly_var, precision, {}, precision)
    return poly_scheme, max_approx_error