示例#1
0
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
###############################################################################
# last-modified:    Mar  7th, 2018
# Author(s): Nicolas Brunie <*****@*****.**>
###############################################################################
import sys

import sollya

from sollya import (Interval, ceil, floor, round, inf, sup, log, exp, expm1,
                    log2, cosh, guessdegree, dirtyinfnorm, RN, acosh, RD)
S2 = sollya.SollyaObject(2)
from sollya import parse as sollya_parse

from metalibm_core.core.ml_operations import *
from metalibm_core.core.ml_formats import *
from metalibm_core.core.ml_table import ML_NewTable
from metalibm_core.code_generation.generic_processor import GenericProcessor
from metalibm_core.core.polynomials import *
from metalibm_core.core.ml_function import (ML_Function, ML_FunctionBasis,
                                            DefaultArgTemplate)
from metalibm_core.code_generation.generator_utility import (FunctionOperator,
                                                             FO_Result, FO_Arg)
from metalibm_core.core.ml_complex_formats import ML_Mpfr_t
from metalibm_core.core.special_values import FP_PlusInfty
from metalibm_core.core.simple_scalar_function import ScalarUnaryFunction
示例#2
0
 def computeBoundMultiplication(self, out_format, input_format_lhs,
                                input_format_rhs):
     eps = sollya.SollyaObject(
         out_format.meta_block.local_relative_error_eval(
             input_format_lhs.mp_node, input_format_rhs.mp_node))
     return eps
示例#3
0
    def generate_scheme(self):
        # declaring target and instantiating optimization engine

        vx = self.implementation.add_input_variable("x", self.precision)

        Log.set_dump_stdout(True)

        Log.report(Log.Info,
                   "\033[33;1m generating implementation scheme \033[0m")
        if self.debug_flag:
            Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        index_size = 3

        vx = Abs(vx)
        int_precision = self.precision.get_integer_format()

        # argument reduction
        arg_reg_value = log(2) / 2**index_size
        inv_log2_value = round(1 / arg_reg_value,
                               self.precision.get_sollya_object(), RN)
        inv_log2_cst = Constant(inv_log2_value,
                                precision=self.precision,
                                tag="inv_log2")

        # for r_hi to be accurate we ensure k * log2_hi_value_cst is exact
        # by limiting the number of non-zero bits in log2_hi_value_cst
        # cosh(x) ~ exp(abs(x))/2  for a big enough x
        # cosh(x) > 2^1023 <=> exp(x) > 2^1024 <=> x > log(2^1024)
        # k = inv_log2_value * x
        # -1 for guard
        max_k_approx = inv_log2_value * log(sollya.SollyaObject(2)**1024)
        max_k_bitsize = int(ceil(log2(max_k_approx)))
        Log.report(Log.Info, "max_k_bitsize: %d" % max_k_bitsize)
        log2_hi_value_precision = self.precision.get_precision(
        ) - max_k_bitsize - 1

        log2_hi_value = round(arg_reg_value, log2_hi_value_precision, RN)
        log2_lo_value = round(arg_reg_value - log2_hi_value,
                              self.precision.get_sollya_object(), RN)
        log2_hi_value_cst = Constant(log2_hi_value,
                                     tag="log2_hi_value",
                                     precision=self.precision)
        log2_lo_value_cst = Constant(log2_lo_value,
                                     tag="log2_lo_value",
                                     precision=self.precision)

        k = Trunc(Multiplication(inv_log2_cst, vx), precision=self.precision)
        k_log2 = Multiplication(k,
                                log2_hi_value_cst,
                                precision=self.precision,
                                exact=True,
                                tag="k_log2",
                                unbreakable=True)
        r_hi = vx - k_log2
        r_hi.set_attributes(tag="r_hi", debug=debug_multi, unbreakable=True)
        r_lo = -k * log2_lo_value_cst
        # reduced argument
        r = r_hi + r_lo
        r.set_attributes(tag="r", debug=debug_multi)

        r_eval_error = self.get_eval_error(
            r_hi,
            variable_copy_map={
                vx:
                Variable("vx",
                         interval=Interval(0, 715),
                         precision=self.precision),
                k:
                Variable("k",
                         interval=Interval(0, 1024),
                         precision=self.precision)
            })

        approx_interval = Interval(-arg_reg_value, arg_reg_value)
        error_goal_approx = 2**-(self.precision.get_precision())

        poly_degree = sup(
            guessdegree(exp(sollya.x), approx_interval, error_goal_approx))
        precision_list = [1] + [self.precision] * (poly_degree)

        k_integer = Conversion(k,
                               precision=int_precision,
                               tag="k_integer",
                               debug=debug_multi)
        k_hi = BitLogicRightShift(k_integer,
                                  Constant(index_size),
                                  tag="k_int_hi",
                                  precision=int_precision,
                                  debug=debug_multi)
        k_lo = Modulo(k_integer,
                      2**index_size,
                      tag="k_int_lo",
                      precision=int_precision,
                      debug=debug_multi)
        pow_exp = ExponentInsertion(Conversion(k_hi, precision=int_precision),
                                    precision=self.precision,
                                    tag="pow_exp",
                                    debug=debug_multi)

        exp_table = ML_NewTable(dimensions=[2 * 2**index_size, 4],
                                storage_precision=self.precision,
                                tag=self.uniquify_name("exp2_table"))
        for i in range(2 * 2**index_size):
            input_value = i - 2**index_size if i >= 2**index_size else i

            reduced_hi_prec = int(self.precision.get_mantissa_size() * 2 / 3.0)
            # using SollyaObject wrapper to force evaluation by sollya
            # with higher precision
            exp_value = sollya.SollyaObject(2)**((input_value) *
                                                 2**-index_size)
            mexp_value = sollya.SollyaObject(2)**((-input_value) *
                                                  2**-index_size)
            pos_value_hi = round(exp_value, reduced_hi_prec, RN)
            pos_value_lo = round(exp_value - pos_value_hi,
                                 self.precision.get_sollya_object(), RN)
            neg_value_hi = round(mexp_value, reduced_hi_prec, RN)
            neg_value_lo = round(mexp_value - neg_value_hi,
                                 self.precision.get_sollya_object(), RN)
            exp_table[i][0] = neg_value_hi
            exp_table[i][1] = neg_value_lo
            exp_table[i][2] = pos_value_hi
            exp_table[i][3] = pos_value_lo

        # log2_value = log(2) / 2^index_size
        # cosh(x) = 1/2 * (exp(x) + exp(-x))
        # exp(x) = exp(x - k * log2_value + k * log2_value)
        #
        # r = x - k * log2_value
        # exp(x) = exp(r) * 2 ^ (k / 2^index_size)
        #
        # k / 2^index_size = h + l * 2^-index_size, with k, h, l integers
        # exp(x) = exp(r) * 2^h * 2^(l *2^-index_size)
        #
        # cosh(x) = exp(r) * 2^(h-1) 2^(l *2^-index_size) + exp(-r) * 2^(-h-1) * 2^(-l *2^-index_size)
        # S=2^(h-1), T = 2^(-h-1)
        # exp(r)  = 1 + poly_pos(r)
        # exp(-r) = 1 + poly_neg(r)
        # 2^(l / 2^index_size)  = pos_value_hi + pos_value_lo
        # 2^(-l / 2^index_size) = neg_value_hi + neg_value_lo
        #
        # cosh(x) =
        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(
            exp(sollya.x),
            poly_degree,
            precision_list,
            approx_interval,
            sollya.absolute,
            error_function=error_function)

        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme
        poly_pos = polynomial_scheme_builder(
            poly_object.sub_poly(start_index=1),
            r,
            unified_precision=self.precision)
        poly_pos.set_attributes(tag="poly_pos", debug=debug_multi)

        poly_neg = polynomial_scheme_builder(
            poly_object.sub_poly(start_index=1),
            -r,
            unified_precision=self.precision)
        poly_neg.set_attributes(tag="poly_neg", debug=debug_multi)

        table_index = Addition(k_lo,
                               Constant(2**index_size,
                                        precision=int_precision),
                               precision=int_precision,
                               tag="table_index",
                               debug=debug_multi)

        neg_value_load_hi = TableLoad(exp_table,
                                      table_index,
                                      0,
                                      tag="neg_value_load_hi",
                                      debug=debug_multi)
        neg_value_load_lo = TableLoad(exp_table,
                                      table_index,
                                      1,
                                      tag="neg_value_load_lo",
                                      debug=debug_multi)
        pos_value_load_hi = TableLoad(exp_table,
                                      table_index,
                                      2,
                                      tag="pos_value_load_hi",
                                      debug=debug_multi)
        pos_value_load_lo = TableLoad(exp_table,
                                      table_index,
                                      3,
                                      tag="pos_value_load_lo",
                                      debug=debug_multi)

        k_plus = Max(
            Subtraction(k_hi,
                        Constant(1, precision=int_precision),
                        precision=int_precision,
                        tag="k_plus",
                        debug=debug_multi),
            Constant(self.precision.get_emin_normal(),
                     precision=int_precision))
        k_neg = Max(
            Subtraction(-k_hi,
                        Constant(1, precision=int_precision),
                        precision=int_precision,
                        tag="k_neg",
                        debug=debug_multi),
            Constant(self.precision.get_emin_normal(),
                     precision=int_precision))

        pow_exp_pos = ExponentInsertion(k_plus,
                                        precision=self.precision,
                                        tag="pow_exp_pos",
                                        debug=debug_multi)
        pow_exp_neg = ExponentInsertion(k_neg,
                                        precision=self.precision,
                                        tag="pow_exp_neg",
                                        debug=debug_multi)

        hi_terms = (pos_value_load_hi * pow_exp_pos +
                    neg_value_load_hi * pow_exp_neg)
        hi_terms.set_attributes(tag="hi_terms")

        pos_exp = (
            pos_value_load_hi * poly_pos +
            (pos_value_load_lo + pos_value_load_lo * poly_pos)) * pow_exp_pos
        pos_exp.set_attributes(tag="pos_exp", debug=debug_multi)

        neg_exp = (
            neg_value_load_hi * poly_neg +
            (neg_value_load_lo + neg_value_load_lo * poly_neg)) * pow_exp_neg
        neg_exp.set_attributes(tag="neg_exp", debug=debug_multi)

        result = Addition(Addition(
            pos_exp,
            neg_exp,
            precision=self.precision,
        ),
                          hi_terms,
                          precision=self.precision,
                          tag="result",
                          debug=debug_multi)

        # ov_value
        ov_value = round(acosh(self.precision.get_max_value()),
                         self.precision.get_sollya_object(), RD)
        ov_flag = Comparison(Abs(vx),
                             Constant(ov_value, precision=self.precision),
                             specifier=Comparison.Greater,
                             tag="ov_flag")

        # main scheme
        Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m")
        scheme = Statement(
            Return(Select(ov_flag, FP_PlusInfty(self.precision), result)))

        return scheme
示例#4
0
    @property
    def tag(self):
        return self.ctor.function_name

GEN_LOG_ARGS = {"basis": sollya.exp(1), "function_name": "ml_genlog", "extra_passes" : ["beforecodegen:fuse_fma"]}
GEN_LOG2_ARGS =  {"basis": 2, "function_name": "ml_genlog2", "extra_passes" : ["beforecodegen:fuse_fma"]}
GEN_LOG10_ARGS =  {"basis": 10, "function_name": "ml_genlog10", "extra_passes" : ["beforecodegen:fuse_fma"]}

class LibmFunctionTest(FunctionTest):
    @property
    def tag(self):
        # NOTES/FIXME: 0-th element of self.arg_map_list is chosen
        # for tag determination without considering the others
        return self.title + "_" + self.arg_map_list[0]["bench_function_name"]

S2 = sollya.SollyaObject(2)
S10 = sollya.SollyaObject(10)
def emulate_exp2(v):
    return S2**v
def emulate_exp10(v):
    return S10**v

# libm functions
LIBM_FUNCTION_LIST = [
    # single precision
    LibmFunctionTest(metalibm_functions.external_bench.ML_ExternalBench, [{"bench_function_name": fname, "emulate": emulate, "auto_test": 0, "headers": ["math.h"]}], title="libm")
    for fname, emulate in [
        ("expf", sollya.exp), ("exp2f", emulate_exp2), ("exp10f", emulate_exp10), ("expm1f", sollya.expm1),
        ("logf", sollya.log), ("log2f", sollya.log2), ("log10f", sollya.log10), ("log1p", sollya.log1p),
        ("cosf", sollya.cos), ("sinf", sollya.sin), ("tanf", sollya.tan), ("atanf", sollya.atan),
        ("coshf", sollya.cosh), ("sinhf", sollya.sinh), ("tanhf", sollya.tanh),
示例#5
0
    def generate_bipartite_approx_module(self, vx):
        """
            vx input value
        """
        debug_fixed, debug_std = self.get_debug_utils()
        # size of most significant table index (for linear slope tabulation)
        alpha = self.alpha  # 6
        # size of medium significant table index (for initial value table index LSB)
        beta = self.beta  # 5
        # size of least significant table index (for linear offset tabulation)
        gamma = self.gamma  # 5

        guard_bits = self.guard_bits  # 3

        vx.set_interval(self.interval)

        range_hi = sollya.sup(self.interval)
        range_lo = sollya.inf(self.interval)
        f_hi = self.function(range_hi)
        f_lo = self.function(range_lo)
        # fixed by format used for reduced_x
        range_size = range_hi - range_lo
        range_size_log2 = int(sollya.log2(range_size))
        assert 2**range_size_log2 == range_size

        reduced_x = Conversion(BitLogicRightShift(vx - range_lo,
                                                  range_size_log2),
                               precision=fixed_point(0,
                                                     alpha + beta + gamma,
                                                     signed=False),
                               tag="reduced_x",
                               debug=debug_fixed)

        alpha_index = self.get_fixed_slice(
            reduced_x,
            0,
            alpha - 1,
            align_hi=FixedPointPosition.FromMSBToLSB,
            align_lo=FixedPointPosition.FromMSBToLSB,
            tag="alpha_index",
            debug=debug_std)
        gamma_index = self.get_fixed_slice(
            reduced_x,
            gamma - 1,
            0,
            align_hi=FixedPointPosition.FromLSBToLSB,
            align_lo=FixedPointPosition.FromLSBToLSB,
            tag="gamma_index",
            debug=debug_std)

        beta_index = self.get_fixed_slice(
            reduced_x,
            alpha,
            gamma,
            align_hi=FixedPointPosition.FromMSBToLSB,
            align_lo=FixedPointPosition.FromLSBToLSB,
            tag="beta_index",
            debug=debug_std)

        # Assuming monotonic function
        f_absmax = max(abs(f_hi), abs(f_lo))
        f_absmin = min(abs(f_hi), abs(f_lo))

        f_msb = int(sollya.ceil(sollya.log2(f_absmax))) + 1
        f_lsb = int(sollya.floor(sollya.log2(f_absmin)))
        storage_lsb = f_lsb - self.precision.get_bit_size() - guard_bits

        f_int_size = f_msb
        f_frac_size = -storage_lsb

        storage_format = fixed_point(f_int_size, f_frac_size, signed=False)
        Log.report(Log.Info, "storage_format is {}".format(storage_format))

        # table of initial value index
        tiv_index = Concatenation(alpha_index,
                                  beta_index,
                                  tag="tiv_index",
                                  debug=debug_std)
        # table of offset value index
        to_index = Concatenation(alpha_index,
                                 gamma_index,
                                 tag="to_index",
                                 debug=debug_std)

        tiv_index_size = int(alpha + beta)
        to_index_size = int(alpha + gamma)

        Log.report(Log.Info, "initial table structures")
        table_iv = ML_NewTable(dimensions=[2**tiv_index_size],
                               storage_precision=storage_format,
                               tag="tiv")
        table_offset = ML_NewTable(dimensions=[2**to_index_size],
                                   storage_precision=storage_format,
                                   tag="to")

        slope_table = [None] * (2**alpha)
        slope_delta = 1.0 / sollya.SollyaObject(2**alpha)
        delta_u = range_size * slope_delta * 2**-15
        Log.report(Log.Info, "computing slope value")
        for i in range(2**alpha):
            # slope is computed at the middle of range_size interval
            slope_x = range_lo + (i + 0.5) * range_size * slope_delta
            # TODO: gross approximation of derivatives
            f_xpu = self.function(slope_x + delta_u / 2)
            f_xmu = self.function(slope_x - delta_u / 2)
            slope = (f_xpu - f_xmu) / delta_u
            slope_table[i] = slope

        range_rcp_steps = 1.0 / sollya.SollyaObject(2**tiv_index_size)
        Log.report(Log.Info, "computing value for initial-value table")
        for i in range(2**tiv_index_size):
            slope_index = i / 2**beta
            iv_x = range_lo + i * range_rcp_steps * range_size
            offset_x = 0.5 * range_rcp_steps * range_size
            # initial value is computed so that the piecewise linear
            # approximation intersects the function at iv_x + offset_x
            iv_y = self.function(
                iv_x + offset_x) - offset_x * slope_table[int(slope_index)]
            initial_value = storage_format.round_sollya_object(iv_y)
            table_iv[i] = initial_value

        # determining table of initial value interval
        #tiv_min = table_iv[0]
        #tiv_max = table_iv[0]
        #for i in range(1, 2**tiv_index_size):
        #    tiv_min = min(tiv_min, table_iv[i])
        #    tiv_max = max(tiv_max, table_iv[i])
        tiv_min = min(table_iv)
        tiv_max = max(table_iv)
        table_iv.set_interval(Interval(tiv_min, tiv_max))

        offset_step = range_size / S2**(alpha + beta + gamma)
        for i in range(2**alpha):
            Log.report(Log.Info,
                       "computing offset value for sub-table {}".format(i))
            for j in range(2**gamma):
                to_i = i * 2**gamma + j
                offset = slope_table[i] * j * offset_step
                table_offset[to_i] = offset

        # determining table of offset interval
        to_min = table_offset[0]
        to_max = table_offset[0]
        for i in range(1, 2**(alpha + gamma)):
            to_min = min(to_min, table_offset[i])
            to_max = max(to_max, table_offset[i])
        offset_interval = Interval(to_min, to_max)
        table_offset.set_interval(offset_interval)

        initial_value = TableLoad(table_iv,
                                  tiv_index,
                                  precision=storage_format,
                                  tag="initial_value",
                                  debug=debug_fixed)

        offset_precision = get_fixed_type_from_interval(offset_interval, 16)
        Log.report(
            Log.Verbose, "offset_precision is {} ({} bits)".format(
                offset_precision, offset_precision.get_bit_size()))
        table_offset.get_precision().storage_precision = offset_precision

        # rounding table value
        for i in range(1, 2**(alpha + gamma)):
            table_offset[i] = offset_precision.round_sollya_object(
                table_offset[i])

        offset_value = TableLoad(table_offset,
                                 to_index,
                                 precision=offset_precision,
                                 tag="offset_value",
                                 debug=debug_fixed)

        Log.report(
            Log.Verbose,
            "initial_value's interval: {}, offset_value's interval: {}".format(
                evaluate_range(initial_value), evaluate_range(offset_value)))

        final_add = initial_value + offset_value
        round_bit = final_add  # + FixedPointPosition(final_add, io_precision.get_bit_size(), align=FixedPointPosition.FromMSBToLSB)

        result = Conversion(initial_value + offset_value,
                            precision=self.precision,
                            tag="vr_out",
                            debug=debug_fixed)
        # Approximation error evaluation
        approx_error = 0.0
        for i in range(2**alpha):
            for j in range(2**beta):
                tiv_i = (i * 2**beta + j)
                # = range_lo + tiv_i * range_rcp_steps * range_size
                iv = table_iv[tiv_i]
                for k in range(2**gamma):
                    to_i = i * 2**gamma + k
                    offset = table_offset[to_i]
                    approx_value = offset + iv
                    table_x = range_lo + range_size * (
                        (i * 2**beta + j) * 2**gamma + k) / S2**(alpha + beta +
                                                                 gamma)
                    local_error = abs(self.function(table_x) - approx_value)
                    approx_error = max(approx_error, local_error)
        error_log2 = float(sollya.log2(approx_error))
        Log.report(
            Log.Verbose, "approx_error is {}, error_log2 is {}".format(
                float(approx_error), error_log2))

        # table size
        table_iv_size = 2**(alpha + beta)
        table_offset_size = 2**(alpha + gamma)
        Log.report(
            Log.Verbose,
            "tables' size are {} entries".format(table_iv_size +
                                                 table_offset_size))
        return result
示例#6
0
 def __eq__(lhs, rhs):
     if not is_numeric_value(rhs):
         return False
     else:
         return sollya.SollyaObject.__eq__(sollya.SollyaObject(lhs),
                                           sollya.SollyaObject(rhs))
示例#7
0
 def numeric_emulate(self, input_value):
     return sollya.SollyaObject(2)**(input_value)
示例#8
0
 def get_sollya_object(self):
     return sollya.SollyaObject(0)