def dirty_multi_node_expand(node, precision, mem_map=None, fma=True): """ Dirty expand node into Hi and Lo part, storing already processed temporary values in mem_map """ mem_map = mem_map or {} if node in mem_map: return mem_map[node] elif isinstance(node, Constant): value = node.get_value() value_hi = sollya.round(value, precision.sollya_object, sollya.RN) value_lo = sollya.round(value - value_hi, precision.sollya_object, sollya.RN) ch = Constant(value_hi, tag=node.get_tag() + "hi", precision=precision) cl = Constant(value_lo, tag=node.get_tag() + "lo", precision=precision) if value_lo != 0 else None if cl is None: Log.report(Log.Info, "simplified constant") result = ch, cl mem_map[node] = result return result else: # Case of Addition or Multiplication nodes: # 1. retrieve inputs # 2. dirty convert inputs recursively # 3. forward to the right metamacro assert isinstance(node, Addition) or isinstance(node, Multiplication) lhs = node.get_input(0) rhs = node.get_input(1) op1h, op1l = dirty_multi_node_expand(lhs, precision, mem_map, fma) op2h, op2l = dirty_multi_node_expand(rhs, precision, mem_map, fma) if isinstance(node, Addition): result = Add222(op1h, op1l, op2h, op2l) \ if op1l is not None and op2l is not None \ else Add212(op1h, op2h, op2l) \ if op1l is None and op2l is not None \ else Add212(op2h, op1h, op1l) \ if op2l is None and op1l is not None \ else Add211(op1h, op2h) mem_map[node] = result return result elif isinstance(node, Multiplication): result = Mul222(op1h, op1l, op2h, op2l, fma=fma) \ if op1l is not None and op2l is not None \ else Mul212(op1h, op2h, op2l, fma=fma) \ if op1l is None and op2l is not None \ else Mul212(op2h, op1h, op1l, fma=fma) \ if op2l is None and op1l is not None \ else Mul211(op1h, op2h, fma=fma) mem_map[node] = result return result
def compute_log(_vx, exp_corr_factor=None): _vx_mant = MantissaExtraction(_vx, tag="_vx_mant", precision=self.precision, debug=debug_multi) _vx_exp = ExponentExtraction(_vx, tag="_vx_exp", debug=debug_multi) table_index = inv_approx_table.index_function(_vx_mant) table_index.set_attributes(tag="table_index", debug=debug_multi) tho_cond = _vx_mant > Constant(sollya.sqrt(2), precision=self.precision) tho = Select(tho_cond, Constant(1.0, precision=self.precision), Constant(0.0, precision=self.precision), precision=self.precision, tag="tho", debug=debug_multi) rcp = ReciprocalSeed(_vx_mant, precision=self.precision, tag="rcp") r = Multiplication(rcp, _vx_mant, precision=self.precision, tag="r") int_format = self.precision.get_integer_format() # argument reduction # TODO: detect if single operand inverse seed is supported by the targeted architecture pre_arg_red_index = TypeCast(BitLogicAnd( TypeCast(ReciprocalSeed(_vx_mant, precision=self.precision, tag="seed", debug=debug_multi, silent=True), precision=int_format), Constant(-2, precision=int_format), precision=int_format), precision=self.precision, tag="pre_arg_red_index", debug=debug_multi) arg_red_index = Select(Equal(table_index, 0), 1.0, pre_arg_red_index, tag="arg_red_index", debug=debug_multi) _red_vx = arg_red_index * _vx_mant - 1.0 inv_err = S2**-6 red_interval = Interval(1 - inv_err, 1 + inv_err) _red_vx.set_attributes(tag="_red_vx", debug=debug_multi, interval=red_interval) # return in case of standard (non-special) input _log_inv_lo = Select(tho_cond, TableLoad(log_table_tho, table_index, 1), TableLoad(log_table, table_index, 1), tag="log_inv_lo", debug=debug_multi) _log_inv_hi = Select(tho_cond, TableLoad(log_table_tho, table_index, 0), TableLoad(log_table, table_index, 0), tag="log_inv_hi", debug=debug_multi) Log.report(Log.Info, "building mathematical polynomial") approx_interval = Interval(-inv_err, inv_err) poly_degree = sup( guessdegree( log(1 + sollya.x) / sollya.x, approx_interval, S2** -(self.precision.get_field_size() + 1))) + 1 global_poly_object = Polynomial.build_from_approximation( log(1 + x) / x, poly_degree, [self.precision] * (poly_degree + 1), approx_interval, sollya.absolute) poly_object = global_poly_object.sub_poly(start_index=1) Log.report(Log.Info, "generating polynomial evaluation scheme") _poly = PolynomialSchemeEvaluator.generate_horner_scheme( poly_object, _red_vx, unified_precision=self.precision) _poly.set_attributes(tag="poly", debug=debug_multi) Log.report(Log.Info, poly_object.get_sollya_object()) corr_exp = Conversion(_vx_exp if exp_corr_factor == None else _vx_exp + exp_corr_factor, precision=self.precision) + tho corr_exp.set_attributes(tag="corr_exp", debug=debug_multi) # _poly approximates log10(1+r)/r # _poly * red_vx approximates log10(x) m0h, m0l = Mul211(_red_vx, _poly) m0h, m0l = Add212(_red_vx, m0h, m0l) m0h.set_attributes(tag="m0h", debug=debug_multi) m0l.set_attributes(tag="m0l") l0_h = corr_exp * log2_hi l0_l = corr_exp * log2_lo l0_h.set_attributes(tag="l0_h") l0_l.set_attributes(tag="l0_l") rh, rl = Add222(l0_h, l0_l, m0h, m0l) rh.set_attributes(tag="rh0", debug=debug_multi) rl.set_attributes(tag="rl0", debug=debug_multi) rh, rl = Add222(-_log_inv_hi, -_log_inv_lo, rh, rl) rh.set_attributes(tag="rh", debug=debug_multi) rl.set_attributes(tag="rl", debug=debug_multi) if sollya.log(self.basis) != 1.0: lbh = self.precision.round_sollya_object( 1 / sollya.log(self.basis)) lbl = self.precision.round_sollya_object( 1 / sollya.log(self.basis) - lbh) rh, rl = Mul222(rh, rl, lbh, lbl) return rh else: return rh
def generate_scheme(self): """Produce an abstract scheme for the logarithm. This abstract scheme will be used by the code generation backend. """ if self.precision not in [ML_Binary32, ML_Binary64]: Log.report(Log.Error, "The demanded precision is not supported") vx = self.implementation.add_input_variable("x", self.precision) def default_bool_convert(optree, precision=None, **kw): return bool_convert(optree, precision, -1, 0, **kw) \ if isinstance(self.processor, VectorBackend) \ else bool_convert(optree, precision, 1, 0, **kw) precision = self.precision.sollya_object int_prec = self.precision.get_integer_format() Log.report(Log.Info, "int_prec is %s" % int_prec) uint_prec = self.precision.get_unsigned_integer_format() Log.report(Log.Info, "MDL constants") cgpe_scheme_idx = int(self.cgpe_index) table_index_size = int(self.tbl_index_size) # table_nb_elements = 2**(table_index_size) table_dimensions = [2*table_nb_elements] # two values are stored for each element field_size = Constant(self.precision.get_field_size(), precision = int_prec, tag = 'field_size') if self.log_radix == EXP_1: log2_hi = Constant( round(log(2), precision, sollya.RN), precision = self.precision, tag = 'log2_hi') log2_lo = Constant( round(log(2) - round(log(2), precision, sollya.RN), precision, sollya.RN), precision = self.precision, tag = 'log2_lo') elif self.log_radix == 10: log2_hi = Constant( round(log10(2), precision, sollya.RN), precision = self.precision, tag = 'log2_hi') log2_lo = Constant( round(log10(2) - round(log10(2), precision, sollya.RN), precision, sollya.RN), precision = self.precision, tag = 'log2_lo') # ... if log_radix == '2' then log2(2) == 1 # subnormal_mask aims at trapping positive subnormals except zero. # That's why we will subtract 1 to the integer bitstring of the input, and # then compare for Less (strict) the resulting integer bitstring to this # mask, e.g. 0x7fffff for binary32. if self.no_subnormal == False: subnormal_mask = Constant((1 << self.precision.get_field_size()) - 1, precision = int_prec, tag = 'subnormal_mask') fp_one = Constant(1.0, precision = self.precision, tag = 'fp_one') fp_one_as_uint = TypeCast(fp_one, precision = uint_prec, tag = 'fp_one_as_uint') int_zero = Constant(0, precision = int_prec, tag = 'int_zero') int_one = Constant(1, precision = int_prec, tag = 'int_one') table_mantissa_half_ulp = Constant( 1 << (self.precision.field_size - table_index_size - 1), precision = int_prec ) table_s_exp_index_mask = Constant( ~((table_mantissa_half_ulp.get_value() << 1) - 1), precision = uint_prec ) Log.report(Log.Info, "MDL table") # The table holds approximations of -log(2^tau * r_i) so we first compute # the index value for which tau changes from 1 to 0. cut = sqrt(2.) tau_index_limit = floor(table_nb_elements * (2./cut - 1)) sollya_logtbl = [ (-log1p(float(i) / table_nb_elements) + (0 if i <= tau_index_limit else log(2.))) / log(self.log_radix) for i in range(table_nb_elements) ] # ... init_logtbl_hi = [ round(sollya_logtbl[i], self.precision.get_mantissa_size(), sollya.RN) for i in range(table_nb_elements) ] init_logtbl_lo = [ round(sollya_logtbl[i] - init_logtbl_hi[i], self.precision.get_mantissa_size(), sollya.RN) for i in range(table_nb_elements) ] init_logtbl = [tmp[i] for i in range(len(init_logtbl_hi)) for tmp in [init_logtbl_hi, init_logtbl_lo]] log1p_table = ML_NewTable(dimensions = table_dimensions, storage_precision = self.precision, init_data = init_logtbl, tag = 'ml_log1p_table') # ... if self.no_rcp: sollya_rcptbl = [ (1/((1+float(i)/table_nb_elements)+2**(-1-int(self.tbl_index_size)))) for i in range(table_nb_elements) ] init_rcptbl = [ round(sollya_rcptbl[i], int(self.tbl_index_size)+1, # self.precision.get_mantissa_size(), sollya.RN) for i in range(table_nb_elements) ] rcp_table = ML_NewTable(dimensions = [table_nb_elements], storage_precision = self.precision, init_data = init_rcptbl, tag = 'ml_rcp_table') # ... Log.report(Log.Info, 'MDL unified subnormal handling') vx_as_int = TypeCast(vx, precision = int_prec, tag = 'vx_as_int') if self.no_subnormal == False: vx_as_uint = TypeCast(vx, precision = uint_prec, tag = 'vx_as_uint') # Avoid the 0.0 case by subtracting 1 from vx_as_int tmp = Comparison(vx_as_int - 1, subnormal_mask, specifier = Comparison.Less) is_subnormal = default_bool_convert( tmp, # Will catch negative values as well as NaNs with sign bit set precision = int_prec) is_subnormal.set_attributes(tag = "is_subnormal") if not(isinstance(self.processor, VectorBackend)): is_subnormal = Subtraction(Constant(0, precision = int_prec), is_subnormal, precision = int_prec) ################################################# # Vectorizable integer based subnormal handling # ################################################# # 1. lzcnt # custom lzcount-like for subnormal numbers using FPU (see draft article) Zi = BitLogicOr(vx_as_uint, fp_one_as_uint, precision = uint_prec, tag="Zi") Zf = Subtraction( TypeCast(Zi, precision = self.precision), fp_one, precision = self.precision, tag="Zf") # Zf exponent is -(nlz(x) - exponent_size). # 2. compute shift value # Vectorial comparison on x86+sse/avx is going to look like # '|0x00|0xff|0x00|0x00|' and that's why we use Negate. # But for scalar code generation, comparison will rather be either 0 or 1 # in C. Thus mask below won't be correct for a scalar implementation. # FIXME: Can we know the backend that will be called and choose in # consequence? Should we make something arch-agnostic instead? # n_value = BitLogicAnd( Addition( DirtyExponentExtraction(Zf, self.precision), Constant( self.precision.get_bias(), precision = int_prec), precision = int_prec), is_subnormal, precision = int_prec, tag = "n_value") alpha = Negation(n_value, tag="alpha") # # 3. shift left # renormalized_mantissa = BitLogicLeftShift(vx_as_int, value) normal_vx_as_int = BitLogicLeftShift(vx_as_int, alpha) # 4. set exponent to the right value # Compute the exponent to add : (p-1)-(value) + 1 = p-1-value # The final "+ 1" comes from the fact that once renormalized, the # floating-point datum has a biased exponent of 1 #tmp0 = Subtraction( # field_size, # value, # precision = int_prec, # tag="tmp0") # Set the value to 0 if the number is not subnormal #tmp1 = BitLogicAnd(tmp0, is_subnormal) #renormalized_exponent = BitLogicLeftShift( # tmp1, # field_size # ) else: # no_subnormal == True normal_vx_as_int = vx_as_int #normal_vx_as_int = renormalized_mantissa + renormalized_exponent normal_vx = TypeCast(normal_vx_as_int, precision = self.precision, tag = 'normal_vx') # alpha = BitLogicAnd(field_size, is_subnormal, tag = 'alpha') # XXX Extract the mantissa, see if this is supported in the x86 vector # backend or if it still uses the support_lib. vx_mantissa = MantissaExtraction(normal_vx, precision = self.precision) Log.report(Log.Info, "MDL scheme") if self.force_division == True: rcp_m = Division(fp_one, vx_mantissa, precision = self.precision) elif self.no_rcp == False: rcp_m = ReciprocalSeed(vx_mantissa, precision = self.precision) if not self.processor.is_supported_operation(rcp_m): if self.precision == ML_Binary64: # Try using a binary32 FastReciprocal binary32_m = Conversion(vx_mantissa, precision = ML_Binary32) rcp_m = ReciprocalSeed(binary32_m, precision = ML_Binary32) rcp_m = Conversion(rcp_m, precision = ML_Binary64) if not self.processor.is_supported_operation(rcp_m): # FIXME An approximation table could be used instead but for vector # implementations another GATHER would be required. # However this may well be better than a division... rcp_m = Division(fp_one, vx_mantissa, precision = self.precision) else: # ... use a look-up table rcp_shift = BitLogicLeftShift(normal_vx_as_int, self.precision.get_exponent_size() + 1) rcp_idx = BitLogicRightShift(rcp_shift, self.precision.get_exponent_size() + 1 + self.precision.get_field_size() - int(self.tbl_index_size)) rcp_m = TableLoad(rcp_table, rcp_idx, tag = 'rcp_idx', debug = debug_multi) # rcp_m.set_attributes(tag = 'rcp_m') # exponent is normally either 0 or -1, since m is in [1, 2). Possible # optimization? # exponent = ExponentExtraction(rcp_m, precision = self.precision, # tag = 'exponent') ri_round = TypeCast( Addition( TypeCast(rcp_m, precision = int_prec), table_mantissa_half_ulp, precision = int_prec ), precision = uint_prec ) ri_fast_rndn = BitLogicAnd( ri_round, table_s_exp_index_mask, tag = 'ri_fast_rndn', precision = uint_prec ) # u = m * ri - 1 ul = None if self.no_rcp == True: # ... u does not fit on a single word tmp_u, tmp_ul = Mul211(vx_mantissa, TypeCast(ri_fast_rndn, precision = self.precision), fma = (self.no_fma == False)) fp_minus_one = Constant(-1.0, precision = self.precision, tag = 'fp_minus_one') u, ul = Add212(fp_minus_one, tmp_u, tmp_ul) u.set_attributes(tag='uh') ul.set_attributes(tag='ul') elif self.no_fma == False: u = FusedMultiplyAdd( vx_mantissa, TypeCast(ri_fast_rndn, precision = self.precision), fp_one, specifier = FusedMultiplyAdd.Subtract, tag = 'u') else: # disable FMA # tmph + tmpl = m * ri, where tmph ~ 1 tmph, tmpl = Mul211(vx_mantissa, TypeCast(ri_fast_rndn, precision = self.precision), fma = False) # u_tmp = tmph - 1 ... exact due to Sterbenz u_tmp = Subtraction(tmph, fp_one, precision = self.precision) # u = u_tmp - tmpl ... exact since the result u is representable as a single word u = Addition(u_tmp, tmpl, precision = self.precision, tag = 'u') unneeded_bits = Constant( self.precision.field_size - table_index_size, precision=uint_prec, tag="unneeded_bits" ) assert self.precision.field_size - table_index_size >= 0 ri_bits = BitLogicRightShift( ri_fast_rndn, unneeded_bits, precision = uint_prec, tag = "ri_bits" ) # Retrieve mantissa's MSBs + first bit of exponent, for tau computation in case # exponent is 0 (i.e. biased 127, i.e. first bit of exponent is set.). # In this particular case, i = 0 but tau is 1 # table_index does not need to be as long as uint_prec might be, # try and keep it the size of size_t. size_t_prec = ML_UInt32 signed_size_t_prec = ML_Int32 table_index_mask = Constant( (1 << (table_index_size + 1)) - 1, precision = size_t_prec ) table_index = BitLogicAnd( Conversion(ri_bits, precision = size_t_prec), table_index_mask, tag = 'table_index', precision = size_t_prec ) # Compute tau using the tau_index_limit value. tmp = default_bool_convert( Comparison( TypeCast(table_index, precision = signed_size_t_prec), Constant(tau_index_limit, precision = signed_size_t_prec), specifier = Comparison.Greater if isinstance(self.processor, VectorBackend) else Comparison.LessOrEqual ), precision = signed_size_t_prec, tag="tmp" ) # A true tmp will typically be -1 for VectorBackends, but 1 for standard C. tau = Conversion( Addition(tmp, Constant(1, precision=signed_size_t_prec), precision = signed_size_t_prec, tag="pre_add") if isinstance(self.processor, VectorBackend) else tmp, precision=int_prec, tag="pre_tau" ) tau.set_attributes(tag = 'tau') # Update table_index: keep only table_index_size bits table_index_hi = BitLogicAnd( table_index, Constant((1 << table_index_size) - 1, precision = size_t_prec), precision = size_t_prec ) # table_index_hi = table_index_hi << 1 table_index_hi = BitLogicLeftShift( table_index_hi, Constant(1, precision = size_t_prec), precision = size_t_prec, tag = "table_index_hi" ) # table_index_lo = table_index_hi + 1 table_index_lo = Addition( table_index_hi, Constant(1, precision = size_t_prec), precision = size_t_prec, tag = "table_index_lo" ) tbl_hi = TableLoad(log1p_table, table_index_hi, tag = 'tbl_hi', debug = debug_multi) tbl_lo = TableLoad(log1p_table, table_index_lo, tag = 'tbl_lo', debug = debug_multi) # Compute exponent e + tau - alpha, but first subtract the bias. if self.no_subnormal == False: tmp_eptau = Addition( Addition( BitLogicRightShift( normal_vx_as_int, field_size, tag = 'exponent', interval = self.precision.get_exponent_interval(), precision = int_prec), Constant( self.precision.get_bias(), precision = int_prec)), tau, tag = 'tmp_eptau', precision = int_prec) exponent = Subtraction(tmp_eptau, alpha, precision = int_prec) else: exponent = Addition( Addition( BitLogicRightShift( normal_vx_as_int, field_size, tag = 'exponent', interval = self.precision.get_exponent_interval(), precision = int_prec), Constant( self.precision.get_bias(), precision = int_prec)), tau, tag = 'tmp_eptau', precision = int_prec) # fp_exponent = Conversion(exponent, precision = self.precision, tag = 'fp_exponent') Log.report(Log.Info, 'MDL polynomial approximation') if self.log_radix == EXP_1: sollya_function = log(1 + sollya.x) elif self.log_radix == 2: sollya_function = log2(1 + sollya.x) elif self.log_radix == 10: sollya_function = log10(1 + sollya.x) # ... if self.force_division == True: # rcp accuracy is 2^(-p) boundrcp = 2**(-self.precision.get_precision()) else: boundrcp = 1.5 * 2**(-12) # ... see Intel intrinsics guide if self.precision in [ML_Binary64]: if not self.processor.is_supported_operation(rcp_m): boundrcp = (1+boundrcp)*(1+2**(-24)) - 1 else: boundrcp = 2**(-14) # ... see Intel intrinsics guide arg_red_mag = boundrcp + 2**(-table_index_size-1) + boundrcp * 2**(-table_index_size-1) if self.no_rcp == False: approx_interval = Interval(-arg_red_mag, arg_red_mag) else: approx_interval = Interval(-2**(-int(self.tbl_index_size)+1),2**(-int(self.tbl_index_size)+1)) max_eps = 2**-(2*(self.precision.get_field_size())) Log.report(Log.Info, "max acceptable error for polynomial = {}".format(float.hex(max_eps))) poly_degree = sup( guessdegree( sollya_function, approx_interval, max_eps, ) ) Log.report(Log.Info, "poly degree is ", poly_degree) if self.log_radix == EXP_1: poly_object = Polynomial.build_from_approximation( sollya_function, range(2, int(poly_degree) + 1), # Force 1st 2 coeffs to 0 and 1, resp. # Emulate double-self.precision coefficient formats [self.precision.get_mantissa_size()*2 + 1]*(poly_degree - 1), approx_interval, sollya.absolute, 0 + sollya._x_) # Force the first 2 coefficients to 0 and 1, resp. else: # ... == '2' or '10' poly_object = Polynomial.build_from_approximation( sollya_function, range(1, int(poly_degree) + 1), # Force 1st coeff to 0 # Emulate double-self.precision coefficient formats [self.precision.get_mantissa_size()*2 + 1]*(poly_degree), approx_interval, sollya.absolute, 0) # Force the first coefficients to 0 Log.report(Log.Info, str(poly_object)) constant_precision = ML_SingleSingle if self.precision == ML_Binary32 \ else ML_DoubleDouble if self.precision == ML_Binary64 \ else None if is_cgpe_available(): log1pu_poly = PolynomialSchemeEvaluator.generate_cgpe_scheme( poly_object, u, unified_precision = self.precision, constant_precision = constant_precision, scheme_id = cgpe_scheme_idx ) else: Log.report(Log.Warning, "CGPE not available, falling back to std poly evaluator") log1pu_poly = PolynomialSchemeEvaluator.generate_horner_scheme( poly_object, u, unified_precision = self.precision, constant_precision = constant_precision ) # XXX Dirty implementation of double-(self.precision) poly def dirty_poly_node_conversion(node, variable_h, variable_l, use_fma): return dirty_multi_node_expand( node, self.precision, mem_map={variable_h: (variable_h, variable_l)}, fma=use_fma) log1pu_poly_hi, log1pu_poly_lo = dirty_poly_node_conversion(log1pu_poly, u, ul, use_fma=(self.no_fma == False)) log1pu_poly_hi.set_attributes(tag = 'log1pu_poly_hi') log1pu_poly_lo.set_attributes(tag = 'log1pu_poly_lo') # Compute log(2) * (e + tau - alpha) if self.log_radix != 2: # 'e' or '10' log2e_hi, log2e_lo = Mul212(fp_exponent, log2_hi, log2_lo, fma = (self.no_fma == False)) # Add log1p(u) if self.log_radix != 2: # 'e' or '10' tmp_res_hi, tmp_res_lo = Add222(log2e_hi, log2e_lo, log1pu_poly_hi, log1pu_poly_lo) else: tmp_res_hi, tmp_res_lo = Add212(fp_exponent, log1pu_poly_hi, log1pu_poly_lo) # Add -log(2^(tau)/m) approximation retrieved by two table lookups logx_hi = Add122(tmp_res_hi, tmp_res_lo, tbl_hi, tbl_lo)[0] logx_hi.set_attributes(tag = 'logx_hi') scheme = Return(logx_hi, precision = self.precision) return scheme
def generate_reduced_log_split(self, _vx_mant, log_f, inv_approx_table, log_table, log_table_tho=None, corr_exp=None, tho_cond=None): """ Generate a logarithm approximation (log_f(_vx_mant) + corr_exp) for a reduced argument _vx_mant which is assumed to be within [1, 2[ (i.e. an extracted mantissa) Addiing exponent correction (optionnal) """ log2_hi_value = round( log_f(2), self.precision.get_field_size() - (self.precision.get_exponent_size() + 1), RN) log2_lo_value = round( log_f(2) - log2_hi_value, self.precision.sollya_object, RN) log2_hi = Constant(log2_hi_value, precision=self.precision) log2_lo = Constant(log2_lo_value, precision=self.precision) table_index = inv_approx_table.index_function(_vx_mant) table_index.set_attributes(tag="table_index", debug=debug_multi) rcp = ReciprocalSeed(_vx_mant, precision=self.precision, tag="rcp") r = Multiplication(rcp, _vx_mant, precision=self.precision, tag="r") int_format = self.precision.get_integer_format() # argument reduction # TODO: detect if single operand inverse seed is supported by the targeted architecture pre_arg_red_index = TypeCast(BitLogicAnd( TypeCast(ReciprocalSeed(_vx_mant, precision=self.precision, tag="seed", debug=debug_multi, silent=True), precision=int_format), Constant(-2, precision=int_format), precision=int_format), precision=self.precision, tag="pre_arg_red_index", debug=debug_multi) C0 = Constant(0, precision=table_index.get_precision()) index_comp_0 = Equal(table_index, C0, tag="index_comp_0", debug=debug_multi) arg_red_index = Select(index_comp_0, 1.0, pre_arg_red_index, tag="arg_red_index", debug=debug_multi) #_red_vx = arg_red_index * _vx_mant - 1.0 _red_vx = FMA(arg_red_index, _vx_mant, -1.0) inv_err = S2**-6 red_interval = Interval(1 - inv_err, 1 + inv_err) _red_vx.set_attributes(tag="_red_vx", debug=debug_multi, interval=red_interval) # return in case of standard (non-special) input if not tho_cond is None: assert not log_table_tho is None _log_inv_lo = Select(tho_cond, TableLoad(log_table_tho, table_index, 1), TableLoad(log_table, table_index, 1), tag="log_inv_lo", debug=debug_multi) _log_inv_hi = Select(tho_cond, TableLoad(log_table_tho, table_index, 0), TableLoad(log_table, table_index, 0), tag="log_inv_hi", debug=debug_multi) else: assert log_table_tho is None _log_inv_lo = TableLoad(log_table, table_index, 1) _log_inv_hi = TableLoad(log_table, table_index, 0) Log.report(Log.Info, "building mathematical polynomial") approx_interval = Interval(-inv_err, inv_err) poly_degree = sup( guessdegree( log(1 + sollya.x) / sollya.x, approx_interval, S2** -(self.precision.get_field_size() + 1))) + 1 global_poly_object = Polynomial.build_from_approximation( log(1 + x) / x, poly_degree, [self.precision] * (poly_degree + 1), approx_interval, sollya.absolute) poly_object = global_poly_object.sub_poly(start_index=1) Log.report(Log.Info, "generating polynomial evaluation scheme") _poly = PolynomialSchemeEvaluator.generate_horner_scheme( poly_object, _red_vx, unified_precision=self.precision) _poly.set_attributes(tag="poly", debug=debug_multi) Log.report(Log.Info, "{}", poly_object.get_sollya_object()) # _poly approximates log10(1+r)/r # _poly * red_vx approximates log10(x) m0h, m0l = Mul211(_red_vx, _poly) m0h, m0l = Add212(_red_vx, m0h, m0l) m0h.set_attributes(tag="m0h", debug=debug_multi) m0l.set_attributes(tag="m0l") if not corr_exp is None: l0_h = corr_exp * log2_hi l0_l = corr_exp * log2_lo l0_h.set_attributes(tag="l0_h") l0_l.set_attributes(tag="l0_l") rh, rl = Add222(l0_h, l0_l, m0h, m0l) else: # bypass exponent addition if no exponent correction is disabled rh, rl = m0h, m0l rh.set_attributes(tag="rh0", debug=debug_multi) rl.set_attributes(tag="rl0", debug=debug_multi) rh, rl = Add222(-_log_inv_hi, -_log_inv_lo, rh, rl) rh.set_attributes(tag="rh", debug=debug_multi) rl.set_attributes(tag="rl", debug=debug_multi) # FIXME: log<self.basis>(vx) is computed as log(vx) / log(self.basis) # which could be optimized for some value of self.basis (e.g. 2) if sollya.log(self.basis) != 1.0: lbh = self.precision.round_sollya_object(1 / sollya.log(self.basis)) lbl = self.precision.round_sollya_object(1 / sollya.log(self.basis) - lbh) rh, rl = Mul222(rh, rl, lbh, lbl) return rh else: return rh