Exemplo n.º 1
0
def dirty_multi_node_expand(node, precision, mem_map=None, fma=True):
    """ Dirty expand node into Hi and Lo part, storing
        already processed temporary values in mem_map """
    mem_map = mem_map or {}
    if node in mem_map:
        return mem_map[node]
    elif isinstance(node, Constant):
        value = node.get_value()
        value_hi = sollya.round(value, precision.sollya_object, sollya.RN)
        value_lo = sollya.round(value - value_hi, precision.sollya_object,
                                sollya.RN)
        ch = Constant(value_hi, tag=node.get_tag() + "hi", precision=precision)
        cl = Constant(value_lo, tag=node.get_tag() +
                      "lo", precision=precision) if value_lo != 0 else None
        if cl is None:
            Log.report(Log.Info, "simplified constant")
        result = ch, cl
        mem_map[node] = result
        return result
    else:
        # Case of Addition or Multiplication nodes:
        # 1. retrieve inputs
        # 2. dirty convert inputs recursively
        # 3. forward to the right metamacro
        assert isinstance(node, Addition) or isinstance(node, Multiplication)
        lhs = node.get_input(0)
        rhs = node.get_input(1)
        op1h, op1l = dirty_multi_node_expand(lhs, precision, mem_map, fma)
        op2h, op2l = dirty_multi_node_expand(rhs, precision, mem_map, fma)
        if isinstance(node, Addition):
            result = Add222(op1h, op1l, op2h, op2l) \
                    if op1l is not None and op2l is not None \
                    else Add212(op1h, op2h, op2l) \
                    if op1l is None and op2l is not None \
                    else Add212(op2h, op1h, op1l) \
                    if op2l is None and op1l is not None \
                    else Add211(op1h, op2h)
            mem_map[node] = result
            return result

        elif isinstance(node, Multiplication):
            result = Mul222(op1h, op1l, op2h, op2l, fma=fma) \
                    if op1l is not None and op2l is not None \
                    else Mul212(op1h, op2h, op2l, fma=fma) \
                    if op1l is None and op2l is not None \
                    else Mul212(op2h, op1h, op1l, fma=fma) \
                    if op2l is None and op1l is not None \
                    else Mul211(op1h, op2h, fma=fma)
            mem_map[node] = result
            return result
Exemplo n.º 2
0
        def compute_log(_vx, exp_corr_factor=None):
            _vx_mant = MantissaExtraction(_vx,
                                          tag="_vx_mant",
                                          precision=self.precision,
                                          debug=debug_multi)
            _vx_exp = ExponentExtraction(_vx, tag="_vx_exp", debug=debug_multi)

            table_index = inv_approx_table.index_function(_vx_mant)

            table_index.set_attributes(tag="table_index", debug=debug_multi)

            tho_cond = _vx_mant > Constant(sollya.sqrt(2),
                                           precision=self.precision)
            tho = Select(tho_cond,
                         Constant(1.0, precision=self.precision),
                         Constant(0.0, precision=self.precision),
                         precision=self.precision,
                         tag="tho",
                         debug=debug_multi)

            rcp = ReciprocalSeed(_vx_mant, precision=self.precision, tag="rcp")
            r = Multiplication(rcp,
                               _vx_mant,
                               precision=self.precision,
                               tag="r")

            int_format = self.precision.get_integer_format()

            # argument reduction
            # TODO: detect if single operand inverse seed is supported by the targeted architecture
            pre_arg_red_index = TypeCast(BitLogicAnd(
                TypeCast(ReciprocalSeed(_vx_mant,
                                        precision=self.precision,
                                        tag="seed",
                                        debug=debug_multi,
                                        silent=True),
                         precision=int_format),
                Constant(-2, precision=int_format),
                precision=int_format),
                                         precision=self.precision,
                                         tag="pre_arg_red_index",
                                         debug=debug_multi)

            arg_red_index = Select(Equal(table_index, 0),
                                   1.0,
                                   pre_arg_red_index,
                                   tag="arg_red_index",
                                   debug=debug_multi)
            _red_vx = arg_red_index * _vx_mant - 1.0
            inv_err = S2**-6
            red_interval = Interval(1 - inv_err, 1 + inv_err)
            _red_vx.set_attributes(tag="_red_vx",
                                   debug=debug_multi,
                                   interval=red_interval)

            # return in case of standard (non-special) input
            _log_inv_lo = Select(tho_cond,
                                 TableLoad(log_table_tho, table_index, 1),
                                 TableLoad(log_table, table_index, 1),
                                 tag="log_inv_lo",
                                 debug=debug_multi)

            _log_inv_hi = Select(tho_cond,
                                 TableLoad(log_table_tho, table_index, 0),
                                 TableLoad(log_table, table_index, 0),
                                 tag="log_inv_hi",
                                 debug=debug_multi)

            Log.report(Log.Info, "building mathematical polynomial")
            approx_interval = Interval(-inv_err, inv_err)
            poly_degree = sup(
                guessdegree(
                    log(1 + sollya.x) / sollya.x, approx_interval, S2**
                    -(self.precision.get_field_size() + 1))) + 1
            global_poly_object = Polynomial.build_from_approximation(
                log(1 + x) / x, poly_degree,
                [self.precision] * (poly_degree + 1), approx_interval,
                sollya.absolute)
            poly_object = global_poly_object.sub_poly(start_index=1)

            Log.report(Log.Info, "generating polynomial evaluation scheme")
            _poly = PolynomialSchemeEvaluator.generate_horner_scheme(
                poly_object, _red_vx, unified_precision=self.precision)
            _poly.set_attributes(tag="poly", debug=debug_multi)
            Log.report(Log.Info, poly_object.get_sollya_object())

            corr_exp = Conversion(_vx_exp if exp_corr_factor == None else
                                  _vx_exp + exp_corr_factor,
                                  precision=self.precision) + tho
            corr_exp.set_attributes(tag="corr_exp", debug=debug_multi)

            # _poly approximates log10(1+r)/r
            # _poly * red_vx approximates log10(x)

            m0h, m0l = Mul211(_red_vx, _poly)
            m0h, m0l = Add212(_red_vx, m0h, m0l)
            m0h.set_attributes(tag="m0h", debug=debug_multi)
            m0l.set_attributes(tag="m0l")
            l0_h = corr_exp * log2_hi
            l0_l = corr_exp * log2_lo
            l0_h.set_attributes(tag="l0_h")
            l0_l.set_attributes(tag="l0_l")
            rh, rl = Add222(l0_h, l0_l, m0h, m0l)
            rh.set_attributes(tag="rh0", debug=debug_multi)
            rl.set_attributes(tag="rl0", debug=debug_multi)
            rh, rl = Add222(-_log_inv_hi, -_log_inv_lo, rh, rl)
            rh.set_attributes(tag="rh", debug=debug_multi)
            rl.set_attributes(tag="rl", debug=debug_multi)

            if sollya.log(self.basis) != 1.0:
                lbh = self.precision.round_sollya_object(
                    1 / sollya.log(self.basis))
                lbl = self.precision.round_sollya_object(
                    1 / sollya.log(self.basis) - lbh)
                rh, rl = Mul222(rh, rl, lbh, lbl)
                return rh
            else:
                return rh
Exemplo n.º 3
0
  def generate_scheme(self):
    """Produce an abstract scheme for the logarithm.

    This abstract scheme will be used by the code generation backend.
    """
    if self.precision not in [ML_Binary32, ML_Binary64]:
        Log.report(Log.Error, "The demanded precision is not supported")

    vx = self.implementation.add_input_variable("x", self.precision)


    def default_bool_convert(optree, precision=None, **kw):
        return bool_convert(optree, precision, -1, 0, **kw) \
                if isinstance(self.processor, VectorBackend) \
                else bool_convert(optree, precision, 1, 0, **kw)

    precision = self.precision.sollya_object
    int_prec = self.precision.get_integer_format()
    Log.report(Log.Info, "int_prec is %s" % int_prec)
    uint_prec = self.precision.get_unsigned_integer_format()


    Log.report(Log.Info, "MDL constants")
    cgpe_scheme_idx = int(self.cgpe_index)
    table_index_size = int(self.tbl_index_size)
    #
    table_nb_elements = 2**(table_index_size)
    table_dimensions = [2*table_nb_elements]  # two values are stored for each element
    field_size = Constant(self.precision.get_field_size(),
                          precision = int_prec,
                          tag = 'field_size')
    if self.log_radix == EXP_1:
      log2_hi = Constant(
        round(log(2), precision, sollya.RN),
        precision = self.precision,
        tag = 'log2_hi')
      log2_lo = Constant(
        round(log(2) - round(log(2), precision, sollya.RN),
              precision, sollya.RN),
        precision = self.precision,
        tag = 'log2_lo')
    elif self.log_radix == 10:
      log2_hi = Constant(
        round(log10(2), precision, sollya.RN),
        precision = self.precision,
        tag = 'log2_hi')
      log2_lo = Constant(
        round(log10(2) - round(log10(2), precision, sollya.RN),
              precision, sollya.RN),
        precision = self.precision,
        tag = 'log2_lo')
    # ... if log_radix == '2' then log2(2) == 1

    # subnormal_mask aims at trapping positive subnormals except zero.
    # That's why we will subtract 1 to the integer bitstring of the input, and
    # then compare for Less (strict) the resulting integer bitstring to this
    # mask, e.g.  0x7fffff for binary32.
    if self.no_subnormal == False:
      subnormal_mask = Constant((1 << self.precision.get_field_size()) - 1,
                                precision = int_prec, tag = 'subnormal_mask')
    fp_one = Constant(1.0, precision = self.precision, tag = 'fp_one')
    fp_one_as_uint = TypeCast(fp_one, precision = uint_prec,
                              tag = 'fp_one_as_uint')
    int_zero = Constant(0, precision = int_prec, tag = 'int_zero')
    int_one  = Constant(1, precision = int_prec, tag = 'int_one')
    table_mantissa_half_ulp = Constant(
            1 << (self.precision.field_size - table_index_size - 1),
            precision = int_prec
            )
    table_s_exp_index_mask = Constant(
            ~((table_mantissa_half_ulp.get_value() << 1) - 1),
            precision = uint_prec
            )

    Log.report(Log.Info, "MDL table")
    # The table holds approximations of -log(2^tau * r_i) so we first compute
    # the index value for which tau changes from 1 to 0.
    cut = sqrt(2.)
    tau_index_limit = floor(table_nb_elements * (2./cut - 1))
    sollya_logtbl = [
      (-log1p(float(i) / table_nb_elements)
      + (0 if i <= tau_index_limit else log(2.))) / log(self.log_radix)
      for i in range(table_nb_elements)
    ]
    # ...
    init_logtbl_hi = [
            round(sollya_logtbl[i],
                  self.precision.get_mantissa_size(),
                  sollya.RN)
            for i in range(table_nb_elements)
    ]
    init_logtbl_lo = [
            round(sollya_logtbl[i] - init_logtbl_hi[i],
                  self.precision.get_mantissa_size(),
                  sollya.RN)
            for i in range(table_nb_elements)
    ]
    init_logtbl = [tmp[i] for i in range(len(init_logtbl_hi)) for tmp in [init_logtbl_hi, init_logtbl_lo]]
    log1p_table = ML_NewTable(dimensions = table_dimensions,
                              storage_precision = self.precision,
                              init_data = init_logtbl,
                              tag = 'ml_log1p_table')
    # ...
    if self.no_rcp:
      sollya_rcptbl = [
        (1/((1+float(i)/table_nb_elements)+2**(-1-int(self.tbl_index_size))))
        for i in range(table_nb_elements)
      ]
      init_rcptbl = [
            round(sollya_rcptbl[i],
                  int(self.tbl_index_size)+1, # self.precision.get_mantissa_size(),
                  sollya.RN)
            for i in range(table_nb_elements)
      ]
      rcp_table = ML_NewTable(dimensions = [table_nb_elements],
                              storage_precision = self.precision,
                              init_data = init_rcptbl,
                              tag = 'ml_rcp_table')
    # ...

    Log.report(Log.Info, 'MDL unified subnormal handling')
    vx_as_int = TypeCast(vx, precision = int_prec, tag = 'vx_as_int')
    if self.no_subnormal == False:
      vx_as_uint = TypeCast(vx, precision = uint_prec, tag = 'vx_as_uint')
      # Avoid the 0.0 case by subtracting 1 from vx_as_int
      tmp = Comparison(vx_as_int - 1, subnormal_mask,
                       specifier = Comparison.Less)
      is_subnormal = default_bool_convert(
        tmp, # Will catch negative values as well as NaNs with sign bit set
        precision = int_prec)
      is_subnormal.set_attributes(tag = "is_subnormal")
      if not(isinstance(self.processor, VectorBackend)):
        is_subnormal = Subtraction(Constant(0, precision = int_prec),
                                   is_subnormal,
                                   precision = int_prec)

      #################################################
      # Vectorizable integer based subnormal handling #
      #################################################
      # 1. lzcnt
      # custom lzcount-like for subnormal numbers using FPU (see draft article)
      Zi = BitLogicOr(vx_as_uint, fp_one_as_uint, precision = uint_prec, tag="Zi")
      Zf = Subtraction(
        TypeCast(Zi, precision = self.precision),
        fp_one,
        precision = self.precision,
        tag="Zf")
      # Zf exponent is -(nlz(x) - exponent_size).
      # 2. compute shift value
      # Vectorial comparison on x86+sse/avx is going to look like
      # '|0x00|0xff|0x00|0x00|' and that's why we use Negate.
      # But for scalar code generation, comparison will rather be either 0 or 1
      # in C. Thus mask below won't be correct for a scalar implementation.
      # FIXME: Can we know the backend that will be called and choose in
      # consequence? Should we make something arch-agnostic instead?
      #
      n_value = BitLogicAnd(
        Addition(
          DirtyExponentExtraction(Zf, self.precision),
          Constant(
            self.precision.get_bias(),
            precision = int_prec),
          precision = int_prec),
        is_subnormal,
        precision = int_prec,
        tag = "n_value")
      alpha = Negation(n_value, tag="alpha")
      #
      # 3. shift left
      # renormalized_mantissa = BitLogicLeftShift(vx_as_int, value)
      normal_vx_as_int = BitLogicLeftShift(vx_as_int, alpha)
      # 4. set exponent to the right value
      # Compute the exponent to add : (p-1)-(value) + 1 = p-1-value
      # The final "+ 1" comes from the fact that once renormalized, the
      # floating-point datum has a biased exponent of 1
      #tmp0 = Subtraction(
      #        field_size,
      #        value,
      #        precision = int_prec,
      #        tag="tmp0")
      # Set the value to 0 if the number is not subnormal
      #tmp1 = BitLogicAnd(tmp0, is_subnormal)
      #renormalized_exponent = BitLogicLeftShift(
      #        tmp1,
      #        field_size
      #        )
    else: # no_subnormal == True
      normal_vx_as_int = vx_as_int
      
    #normal_vx_as_int = renormalized_mantissa + renormalized_exponent
    normal_vx = TypeCast(normal_vx_as_int, precision = self.precision,
                         tag = 'normal_vx')

    # alpha = BitLogicAnd(field_size, is_subnormal, tag = 'alpha')
    # XXX Extract the mantissa, see if this is supported in the x86 vector
    # backend or if it still uses the support_lib.
    vx_mantissa = MantissaExtraction(normal_vx, precision = self.precision)

    Log.report(Log.Info, "MDL scheme")
    if self.force_division == True:
      rcp_m = Division(fp_one, vx_mantissa, precision = self.precision)
    elif self.no_rcp == False:
      rcp_m = ReciprocalSeed(vx_mantissa, precision = self.precision)
      if not self.processor.is_supported_operation(rcp_m):
        if self.precision == ML_Binary64:
          # Try using a binary32 FastReciprocal
          binary32_m = Conversion(vx_mantissa, precision = ML_Binary32)
          rcp_m = ReciprocalSeed(binary32_m, precision = ML_Binary32)
          rcp_m = Conversion(rcp_m, precision = ML_Binary64)
        if not self.processor.is_supported_operation(rcp_m):
          # FIXME An approximation table could be used instead but for vector
          # implementations another GATHER would be required.
          # However this may well be better than a division...
          rcp_m = Division(fp_one, vx_mantissa, precision = self.precision)
    else: # ... use a look-up table
      rcp_shift = BitLogicLeftShift(normal_vx_as_int, self.precision.get_exponent_size() + 1)
      rcp_idx = BitLogicRightShift(rcp_shift, self.precision.get_exponent_size() + 1 + self.precision.get_field_size() - int(self.tbl_index_size))
      rcp_m = TableLoad(rcp_table, rcp_idx, tag = 'rcp_idx',
                        debug = debug_multi)
    #  
    rcp_m.set_attributes(tag = 'rcp_m')

    # exponent is normally either 0 or -1, since m is in [1, 2). Possible
    # optimization?
    # exponent = ExponentExtraction(rcp_m, precision = self.precision,
    #         tag = 'exponent')

    ri_round = TypeCast(
            Addition(
                TypeCast(rcp_m, precision = int_prec),
                table_mantissa_half_ulp,
                precision = int_prec
                ),
            precision = uint_prec
            )
    ri_fast_rndn = BitLogicAnd(
            ri_round,
            table_s_exp_index_mask,
            tag = 'ri_fast_rndn',
            precision = uint_prec
            )
    # u = m * ri - 1
    ul = None
    if self.no_rcp == True: # ... u does not fit on a single word
      tmp_u, tmp_ul = Mul211(vx_mantissa,         
                             TypeCast(ri_fast_rndn, precision = self.precision), 
                             fma = (self.no_fma == False))
      fp_minus_one = Constant(-1.0, precision = self.precision, tag = 'fp_minus_one')
      u, ul = Add212(fp_minus_one, tmp_u, tmp_ul)      
      u.set_attributes(tag='uh')
      ul.set_attributes(tag='ul')
    elif self.no_fma == False:
      u = FusedMultiplyAdd(
        vx_mantissa,
        TypeCast(ri_fast_rndn, precision = self.precision),
        fp_one,
        specifier = FusedMultiplyAdd.Subtract,
        tag = 'u')
    else: # disable FMA
      # tmph + tmpl = m * ri, where tmph ~ 1
      tmph, tmpl = Mul211(vx_mantissa,         
                          TypeCast(ri_fast_rndn, precision = self.precision), 
                          fma = False)
      # u_tmp = tmph - 1 ... exact due to Sterbenz
      u_tmp = Subtraction(tmph, fp_one, precision = self.precision)
      # u = u_tmp - tmpl ... exact since the result u is representable as a single word
      u = Addition(u_tmp, tmpl, precision = self.precision, tag = 'u')
    
    unneeded_bits = Constant(
            self.precision.field_size - table_index_size,
            precision=uint_prec,
            tag="unneeded_bits"
            )
    assert self.precision.field_size - table_index_size >= 0
    ri_bits = BitLogicRightShift(
            ri_fast_rndn,
            unneeded_bits,
            precision = uint_prec,
            tag = "ri_bits"
            )
    # Retrieve mantissa's MSBs + first bit of exponent, for tau computation in case
    # exponent is 0 (i.e. biased 127, i.e. first bit of exponent is set.).
    # In this particular case, i = 0 but tau is 1
    # table_index does not need to be as long as uint_prec might be,
    # try and keep it the size of size_t.
    size_t_prec = ML_UInt32
    signed_size_t_prec = ML_Int32
    table_index_mask = Constant(
            (1 << (table_index_size + 1)) - 1,
            precision = size_t_prec
            )
    table_index = BitLogicAnd(
            Conversion(ri_bits, precision = size_t_prec),
            table_index_mask,
            tag = 'table_index',
            precision = size_t_prec
            )
    # Compute tau using the tau_index_limit value.
    tmp = default_bool_convert(
            Comparison(
                TypeCast(table_index, precision = signed_size_t_prec),
                Constant(tau_index_limit, precision = signed_size_t_prec),
                specifier = Comparison.Greater
                if isinstance(self.processor, VectorBackend)
                else Comparison.LessOrEqual
                ),
            precision = signed_size_t_prec,
            tag="tmp"
            )
    # A true tmp will typically be -1 for VectorBackends, but 1 for standard C.
    tau = Conversion(
        Addition(tmp, Constant(1, precision=signed_size_t_prec), precision = signed_size_t_prec, tag="pre_add")
            if isinstance(self.processor, VectorBackend)
            else tmp,
            precision=int_prec,
            tag="pre_tau"
        )
    tau.set_attributes(tag = 'tau')
    # Update table_index: keep only table_index_size bits
    table_index_hi = BitLogicAnd(
            table_index,
            Constant((1 << table_index_size) - 1, precision = size_t_prec),
            precision = size_t_prec
            )
    # table_index_hi = table_index_hi << 1
    table_index_hi = BitLogicLeftShift(
            table_index_hi,
            Constant(1, precision = size_t_prec),
            precision = size_t_prec,
            tag = "table_index_hi"
            )
    # table_index_lo = table_index_hi + 1
    table_index_lo = Addition(
            table_index_hi,
            Constant(1, precision = size_t_prec),
            precision = size_t_prec,
            tag = "table_index_lo"
            )

    tbl_hi = TableLoad(log1p_table, table_index_hi, tag = 'tbl_hi',
                       debug = debug_multi)
    tbl_lo = TableLoad(log1p_table, table_index_lo, tag = 'tbl_lo',
                       debug = debug_multi)
    # Compute exponent e + tau - alpha, but first subtract the bias.
    if self.no_subnormal == False:
      tmp_eptau = Addition(
        Addition(
          BitLogicRightShift(
            normal_vx_as_int,
            field_size,
            tag = 'exponent',
            interval = self.precision.get_exponent_interval(),
            precision = int_prec),
          Constant(
            self.precision.get_bias(),
            precision = int_prec)),
        tau,
        tag = 'tmp_eptau',
        precision = int_prec)
      exponent = Subtraction(tmp_eptau, alpha, precision = int_prec)
    else:
      exponent = Addition(
        Addition(
          BitLogicRightShift(
            normal_vx_as_int,
            field_size,
            tag = 'exponent',
            interval = self.precision.get_exponent_interval(),
            precision = int_prec),
          Constant(
            self.precision.get_bias(),
            precision = int_prec)),
        tau,
        tag = 'tmp_eptau',
        precision = int_prec)
    #
    fp_exponent = Conversion(exponent, precision = self.precision,
                             tag = 'fp_exponent')

    Log.report(Log.Info, 'MDL polynomial approximation')
    if self.log_radix == EXP_1:
      sollya_function = log(1 + sollya.x)
    elif self.log_radix == 2:
      sollya_function = log2(1 + sollya.x)
    elif self.log_radix == 10:
      sollya_function = log10(1 + sollya.x)
    # ...
    if self.force_division == True: # rcp accuracy is 2^(-p)
      boundrcp = 2**(-self.precision.get_precision())
    else:
      boundrcp = 1.5 * 2**(-12)           # ... see Intel intrinsics guide
      if self.precision in [ML_Binary64]:
        if not self.processor.is_supported_operation(rcp_m):
          boundrcp = (1+boundrcp)*(1+2**(-24)) - 1
        else:
          boundrcp = 2**(-14)             # ... see Intel intrinsics guide
    arg_red_mag = boundrcp + 2**(-table_index_size-1) + boundrcp * 2**(-table_index_size-1)
    if self.no_rcp == False:
      approx_interval = Interval(-arg_red_mag, arg_red_mag)
    else:
      approx_interval = Interval(-2**(-int(self.tbl_index_size)+1),2**(-int(self.tbl_index_size)+1))
    max_eps = 2**-(2*(self.precision.get_field_size()))
    Log.report(Log.Info, "max acceptable error for polynomial = {}".format(float.hex(max_eps)))
    poly_degree = sup(
            guessdegree(
                sollya_function,
                approx_interval,
                max_eps,
                )
            )
    Log.report(Log.Info, "poly degree is ", poly_degree)
    if self.log_radix == EXP_1:
      poly_object = Polynomial.build_from_approximation(
        sollya_function,
        range(2, int(poly_degree) + 1), # Force 1st 2 coeffs to 0 and 1, resp.
        # Emulate double-self.precision coefficient formats
        [self.precision.get_mantissa_size()*2 + 1]*(poly_degree - 1),
        approx_interval,
        sollya.absolute,
        0 + sollya._x_) # Force the first 2 coefficients to 0 and 1, resp.
    else: # ... == '2' or '10'
      poly_object = Polynomial.build_from_approximation(
        sollya_function,
        range(1, int(poly_degree) + 1), # Force 1st coeff to 0
        # Emulate double-self.precision coefficient formats
        [self.precision.get_mantissa_size()*2 + 1]*(poly_degree),
        approx_interval,
        sollya.absolute,
        0) # Force the first coefficients to 0

    Log.report(Log.Info, str(poly_object))

    constant_precision = ML_SingleSingle if self.precision == ML_Binary32 \
            else ML_DoubleDouble if self.precision == ML_Binary64 \
            else None
    if is_cgpe_available():
        log1pu_poly = PolynomialSchemeEvaluator.generate_cgpe_scheme(
                poly_object,
                u,
                unified_precision = self.precision,
                constant_precision = constant_precision, scheme_id = cgpe_scheme_idx
                )
    else:
        Log.report(Log.Warning,
                "CGPE not available, falling back to std poly evaluator")
        log1pu_poly = PolynomialSchemeEvaluator.generate_horner_scheme(
                poly_object,
                u,
                unified_precision = self.precision,
                constant_precision = constant_precision
                )

    # XXX Dirty implementation of double-(self.precision) poly
    def dirty_poly_node_conversion(node, variable_h, variable_l, use_fma):
        return dirty_multi_node_expand(
          node, self.precision, mem_map={variable_h: (variable_h, variable_l)}, fma=use_fma)
    log1pu_poly_hi, log1pu_poly_lo = dirty_poly_node_conversion(log1pu_poly, u, ul,
                                                                use_fma=(self.no_fma == False))

    log1pu_poly_hi.set_attributes(tag = 'log1pu_poly_hi')
    log1pu_poly_lo.set_attributes(tag = 'log1pu_poly_lo')

    # Compute log(2) * (e + tau - alpha)
    if self.log_radix != 2: # 'e' or '10'
      log2e_hi, log2e_lo = Mul212(fp_exponent, log2_hi, log2_lo, 
                                  fma = (self.no_fma == False))
   
    # Add log1p(u)
    if self.log_radix != 2: # 'e' or '10'
      tmp_res_hi, tmp_res_lo = Add222(log2e_hi, log2e_lo,
                                      log1pu_poly_hi, log1pu_poly_lo)
    else:
      tmp_res_hi, tmp_res_lo = Add212(fp_exponent,
                                      log1pu_poly_hi, log1pu_poly_lo)

    # Add -log(2^(tau)/m) approximation retrieved by two table lookups
    logx_hi = Add122(tmp_res_hi, tmp_res_lo, tbl_hi, tbl_lo)[0]
    logx_hi.set_attributes(tag = 'logx_hi')

    scheme = Return(logx_hi, precision = self.precision)

    return scheme
Exemplo n.º 4
0
    def generate_reduced_log_split(self,
                                   _vx_mant,
                                   log_f,
                                   inv_approx_table,
                                   log_table,
                                   log_table_tho=None,
                                   corr_exp=None,
                                   tho_cond=None):
        """ Generate a logarithm approximation (log_f(_vx_mant) + corr_exp) for a reduced argument
            _vx_mant which is assumed to be within [1, 2[ (i.e. an extracted mantissa)

            Addiing exponent correction (optionnal) """

        log2_hi_value = round(
            log_f(2),
            self.precision.get_field_size() -
            (self.precision.get_exponent_size() + 1), RN)
        log2_lo_value = round(
            log_f(2) - log2_hi_value, self.precision.sollya_object, RN)

        log2_hi = Constant(log2_hi_value, precision=self.precision)
        log2_lo = Constant(log2_lo_value, precision=self.precision)

        table_index = inv_approx_table.index_function(_vx_mant)

        table_index.set_attributes(tag="table_index", debug=debug_multi)

        rcp = ReciprocalSeed(_vx_mant, precision=self.precision, tag="rcp")
        r = Multiplication(rcp, _vx_mant, precision=self.precision, tag="r")

        int_format = self.precision.get_integer_format()

        # argument reduction
        # TODO: detect if single operand inverse seed is supported by the targeted architecture
        pre_arg_red_index = TypeCast(BitLogicAnd(
            TypeCast(ReciprocalSeed(_vx_mant,
                                    precision=self.precision,
                                    tag="seed",
                                    debug=debug_multi,
                                    silent=True),
                     precision=int_format),
            Constant(-2, precision=int_format),
            precision=int_format),
                                     precision=self.precision,
                                     tag="pre_arg_red_index",
                                     debug=debug_multi)

        C0 = Constant(0, precision=table_index.get_precision())
        index_comp_0 = Equal(table_index,
                             C0,
                             tag="index_comp_0",
                             debug=debug_multi)

        arg_red_index = Select(index_comp_0,
                               1.0,
                               pre_arg_red_index,
                               tag="arg_red_index",
                               debug=debug_multi)
        #_red_vx        = arg_red_index * _vx_mant - 1.0
        _red_vx = FMA(arg_red_index, _vx_mant, -1.0)
        inv_err = S2**-6
        red_interval = Interval(1 - inv_err, 1 + inv_err)
        _red_vx.set_attributes(tag="_red_vx",
                               debug=debug_multi,
                               interval=red_interval)

        # return in case of standard (non-special) input
        if not tho_cond is None:
            assert not log_table_tho is None
            _log_inv_lo = Select(tho_cond,
                                 TableLoad(log_table_tho, table_index, 1),
                                 TableLoad(log_table, table_index, 1),
                                 tag="log_inv_lo",
                                 debug=debug_multi)

            _log_inv_hi = Select(tho_cond,
                                 TableLoad(log_table_tho, table_index, 0),
                                 TableLoad(log_table, table_index, 0),
                                 tag="log_inv_hi",
                                 debug=debug_multi)
        else:
            assert log_table_tho is None
            _log_inv_lo = TableLoad(log_table, table_index, 1)
            _log_inv_hi = TableLoad(log_table, table_index, 0)

        Log.report(Log.Info, "building mathematical polynomial")
        approx_interval = Interval(-inv_err, inv_err)
        poly_degree = sup(
            guessdegree(
                log(1 + sollya.x) / sollya.x, approx_interval, S2**
                -(self.precision.get_field_size() + 1))) + 1
        global_poly_object = Polynomial.build_from_approximation(
            log(1 + x) / x, poly_degree, [self.precision] * (poly_degree + 1),
            approx_interval, sollya.absolute)
        poly_object = global_poly_object.sub_poly(start_index=1)

        Log.report(Log.Info, "generating polynomial evaluation scheme")
        _poly = PolynomialSchemeEvaluator.generate_horner_scheme(
            poly_object, _red_vx, unified_precision=self.precision)
        _poly.set_attributes(tag="poly", debug=debug_multi)
        Log.report(Log.Info, "{}", poly_object.get_sollya_object())

        # _poly approximates log10(1+r)/r
        # _poly * red_vx approximates log10(x)

        m0h, m0l = Mul211(_red_vx, _poly)
        m0h, m0l = Add212(_red_vx, m0h, m0l)
        m0h.set_attributes(tag="m0h", debug=debug_multi)
        m0l.set_attributes(tag="m0l")
        if not corr_exp is None:
            l0_h = corr_exp * log2_hi
            l0_l = corr_exp * log2_lo
            l0_h.set_attributes(tag="l0_h")
            l0_l.set_attributes(tag="l0_l")
            rh, rl = Add222(l0_h, l0_l, m0h, m0l)
        else:
            # bypass exponent addition if no exponent correction is disabled
            rh, rl = m0h, m0l
        rh.set_attributes(tag="rh0", debug=debug_multi)
        rl.set_attributes(tag="rl0", debug=debug_multi)
        rh, rl = Add222(-_log_inv_hi, -_log_inv_lo, rh, rl)
        rh.set_attributes(tag="rh", debug=debug_multi)
        rl.set_attributes(tag="rl", debug=debug_multi)

        # FIXME: log<self.basis>(vx) is computed as log(vx) / log(self.basis)
        # which could be optimized for some value of self.basis (e.g. 2)
        if sollya.log(self.basis) != 1.0:
            lbh = self.precision.round_sollya_object(1 /
                                                     sollya.log(self.basis))
            lbl = self.precision.round_sollya_object(1 /
                                                     sollya.log(self.basis) -
                                                     lbh)
            rh, rl = Mul222(rh, rl, lbh, lbl)
            return rh
        else:
            return rh