def generate_scheme(self): vx = self.implementation.add_input_variable("x", FIXED_FORMAT) # declaring specific interval for input variable <x> vx.set_interval(Interval(-1, 1)) acc_format = ML_Custom_FixedPoint_Format(6, 58, False) c = Constant(2, precision=acc_format, tag="C2") ivx = vx add_ivx = Addition( c, Multiplication(ivx, ivx, precision=acc_format, tag="mul"), precision=acc_format, tag="add" ) result = add_ivx input_mapping = {ivx: ivx.get_precision().round_sollya_object(0.125)} error_eval_map = runtime_error_eval.generate_error_eval_graph(result, input_mapping) # dummy scheme to make functionnal code generation scheme = Statement() for node in error_eval_map: scheme.add(error_eval_map[node]) scheme.add(Return(result)) return scheme
def Add211(x, y): """ Multi-precision Addition (2sum) HI, LO = x + y TODO: missing assumption on input order """ zh = Addition(x, y) t1 = Subtraction(zh, x) zl = Subtraction(y, t1) return zh, zl
def Mul212(x, yh, yl, precision=None, fma=True): """ Multi-precision Multiplication: HI, LO = x * [yh:yl] """ t1, t2 = Mul211(x, yh, precision, fma) t3 = Multiplication(x, yl, precision=precision) t4 = Addition(t2, t3, precision=precision) return Add211(t1, t4, precision)
def Mul211(x, y, precision=None, fma=True): """ Multi-precision Multiplication HI, LO = x * y """ zh = Multiplication(x, y, precision=precision) if fma == True: zl = FMS(x, y, zh, precision=precision) else: xh, xl = Split(x, precision=precision) yh, yl = Split(y, precision=precision) r1 = Multiplication(xh, yh, precision=precision) r2 = Subtraction(r1, zh, precision=precision) r3 = Multiplication(xh, yl, precision=precision) r4 = Multiplication(xl, yh, precision=precision) r5 = Multiplication(xl, yl, precision=precision) r6 = Addition(r2, r3, precision=precision) r7 = Addition(r6, r4, precision=precision) zl = Addition(r7, r5, precision=precision) return zh, zl
def Mul211(x, y, fma=True): """ Multi-precision Multiplication HI, LO = x * y """ zh = Multiplication(x, y) if fma == True: zl = FMS(x, y, zh) else: xh, xl = Split(x) yh, yl = Split(y) r1 = Multiplication(xh, yh) r2 = Subtraction(r1, zh) r3 = Multiplication(xh, yl) r4 = Multiplication(xl, yh) r5 = Multiplication(xl, yl) r6 = Addition(r2, r3) r7 = Addition(r6, r4) zl = Addition(r7, r5) return zh, zl
def Add212(xh, yh, yl, precision=None): """ Multi-precision Addition: HI, LO = xh + [yh:yl] """ # r = xh + yh # s1 = xh - r # s2 = s1 + yh # s = s2 + yl # zh = r + s # zl = (r - zh) + s r = Addition(xh, yh, precision=precision) s1 = Subtraction(xh, r, precision=precision) s2 = Addition(s1, yh, precision=precision) s = Addition(s2, yl, precision=precision) zh = Addition(r, s, precision=precision) zl = Addition(Subtraction(r, zh, precision=precision), s, precision=precision) return zh, zl
def Split(a, precision=None): """... splitting algorithm for Dekker TwoMul""" cst_value = {ML_Binary32: 4097, ML_Binary64: 134217729}[a.precision] s = Constant(cst_value, precision=a.get_precision(), tag='fp_split') c = Multiplication(s, a, precision=precision) tmp = Subtraction(a, c, precision=precision) ah = Addition(tmp, c, precision=precision) al = Subtraction(a, ah, precision=precision) return ah, al
def expand_sub(self, node): lhs = node.get_input(0) rhs = node.get_input(1) tag = node.get_tag() precision = node.get_precision() new_node = Addition(lhs, Negation(rhs, precision=rhs.precision), precision=precision) forward_attributes(node, new_node) return self.expand_node(new_node)
def Mul222(xh, xl, yh, yl, fma=True): """ Multi-precision Multiplication: HI, LO = [xh:xl] * [yh:yl] """ if fma == True: ph = Multiplication(xh, yh) pl = FMS(xh, yh, ph) pl = FMA(xh, yl, pl) pl = FMA(xl, yh, pl) zh = Addition(ph, pl) zl = Subtraction(ph, zh) zl = Addition(zl, pl) else: t1, t2 = Mul211(xh, yh, fma) t3 = Multiplication(xh, yl) t4 = Multiplication(xl, yh) t5 = Addition(t3, t4) t6 = Addition(t2, t5) zh, zl = Add211(t1, t6) return zh, zl
def generate_fasttwosum(vx, vy): """Return two optrees for a FastTwoSum operation. Precondition: |vx| >= |vy|. The return value is a tuple (sum, error). """ s = Addition(vx, vy) b = Subtraction(z, vx) e = Subtraction(vy, b) return s, e
def Split(a): """... splitting algorithm for Dekker TwoMul""" # if a.get_precision() == ML_Binary32: s = Constant(4097, precision=a.get_precision(), tag='fp_split') # elif a.get_precision() == ML_Binary64: # s = Constant(134217729, precision = a.get_precision(), tag = 'fp_split') c = Multiplication(s, a) tmp = Subtraction(a, c) ah = Addition(tmp, c) al = Subtraction(a, ah) return ah, al
def generate_scheme(self): var = self.implementation.add_input_variable("x", self.precision) var_y = self.implementation.add_input_variable("y", self.precision) var_z = self.implementation.add_input_variable("z", self.precision) mult = Multiplication(var, var_z, precision=self.precision) add = Addition(var_y, mult, precision=self.precision) test_program = Statement( add, Return(add) ) return test_program
def Add222(xh, xl, yh, yl): """ Multi-precision Addition: HI, LO = [xh:xl] + [yh:yl] """ r = Addition(xh, yh) s1 = Subtraction(xh, r) s2 = Addition(s1, yh) s3 = Addition(s2, yl) s = Addition(s3, xl) zh = Addition(r, s) zl = Addition(Subtraction(r, zh), s) return zh, zl
def Add222(xh, xl, yh, yl, precision=None): """ Multi-precision Addition: HI, LO = [xh:xl] + [yh:yl] """ r = Addition(xh, yh, precision=precision) s1 = Subtraction(xh, r, precision=precision) s2 = Addition(s1, yh, precision=precision) s3 = Addition(s2, yl, precision=precision) s = Addition(s3, xl, precision=precision) zh = Addition(r, s, precision=precision) zl = Addition(Subtraction(r, zh, precision=precision), s, precision=precision) return zh, zl
def generate_scheme(self): """ main scheme generation """ Log.report(Log.Info, "width parameter is {}".format(self.width)) int_size = 3 frac_size = self.width - int_size input_precision = fixed_point(int_size, frac_size) output_precision = fixed_point(int_size, frac_size) # declaring main input variable var_x = self.implementation.add_input_signal("x", input_precision) var_y = self.implementation.add_input_signal("y", input_precision) var_x.set_attributes(debug=debug_fixed) var_y.set_attributes(debug=debug_fixed) test = (var_x > 1) test.set_attributes(tag="test", debug=debug_std) sub = var_x - var_y c = Constant(0) pre_result_select = Select(c > sub, Select(c < var_y, sub, Select(LogicalAnd( c > var_x, c < var_y, tag="last_lev_cond"), var_x, c, tag="last_lev_sel"), tag="pre_select"), var_y, tag="pre_result_select") pre_result = Max(0, var_x - var_y, tag="pre_result") result = Conversion(Addition(pre_result, pre_result_select, tag="add"), precision=output_precision) self.implementation.add_output_signal("vr_out", result) return [self.implementation]
def expand_kernel_expr(kernel, iterator_format=ML_Int32): """ Expand a kernel expression into the corresponding MDL graph """ if isinstance(kernel, NDRange): return expand_ndrange(kernel) elif isinstance(kernel, Sum): var_iter = kernel.index_iter_range.var_index # TODO/FIXME to be uniquified acc = Variable("acc", var_type=Variable.Local, precision=kernel.precision) # TODO/FIXME implement proper acc init if kernel.precision.is_vector_format(): C0 = Constant([0] * kernel.precision.get_vector_size(), precision=kernel.precision) else: C0 = Constant(0, precision=kernel.precision) scheme = Loop( Statement( ReferenceAssign(var_iter, kernel.index_iter_range.first_index), ReferenceAssign(acc, C0)), var_iter <= kernel.index_iter_range.last_index, Statement( ReferenceAssign( acc, Addition(acc, expand_kernel_expr(kernel.elt_operation), precision=kernel.precision)), # loop iterator increment ReferenceAssign(var_iter, var_iter + kernel.index_iter_range.index_step))) return PlaceHolder(acc, scheme) elif isinstance(kernel, (ReadAccessor, WriteAccessor)): return expand_accessor(kernel) elif is_leaf_node(kernel): return kernel else: # vanilla metalibm ops are left unmodified (except # recursive expansion) for index, op in enumerate(kernel.inputs): new_op = expand_kernel_expr(op) kernel.set_input(index, new_op) return kernel
def expand_sub(self, node): lhs = node.get_input(0) rhs = node.get_input(1) tag = node.get_tag() precision = node.get_precision() # Subtraction x - y is transformed into x + (-y) # WARNING: if y is not expandable (e.g. scalar precision) # this could stop expansion new_node = Addition(lhs, Negation(rhs, precision=rhs.precision), precision=precision) forward_attributes(node, new_node) expanded_node = self.expand_node(new_node) Log.report( LOG_LEVEL_EXPAND_VERBOSE, "expanding Subtraction {} into {} with expanded form {}", node, new_node, ", ".join( (op.get_str(display_precision=True, depth=None)) for op in expanded_node)) return expanded_node
def generate_exp_extraction(optree): if optree.precision.is_vector_format(): base_precision = optree.precision.get_scalar_format() vector_size = optree.precision.get_vector_size() int_precision = { v2float32: v2int32, v2float64: v2int64, v4float32: v4int32, v4float64: v4int64, v8float32: v8int32, v8float64: v8int64, }[optree.precision] #base_precision.get_integer_format() bias_cst = [base_precision.get_bias()] * vector_size else: base_precision = optree.precision int_precision = base_precision.get_integer_format() bias_cst = base_precision.get_bias() return Addition(generate_raw_exp_extraction(optree), Constant(bias_cst, precision=int_precision), precision=int_precision)
def generate_exp_insertion(optree, result_precision): """ generate the expanded version of ExponentInsertion with @p optree as input and assuming @p result_precision as output precision """ if result_precision.is_vector_format(): scalar_format = optree.precision.get_scalar_format() vector_size = optree.precision.get_vector_size() bias_cst = [-result_precision.get_scalar_format().get_bias() ] * vector_size shift_cst = [result_precision.get_scalar_format().get_field_size() ] * vector_size else: scalar_format = optree.precision bias_cst = -result_precision.get_bias() shift_cst = result_precision.get_field_size() assert is_std_integer_format(scalar_format) biased_exponent = Addition(optree, Constant(bias_cst, precision=optree.precision), precision=optree.precision) result = BitLogicLeftShift(biased_exponent, Constant(shift_cst, precision=optree.precision), precision=optree.precision) return TypeCast(result, precision=result_precision)
def generate_exp_insertion(optree, result_precision): """ generate the expanded version of ExponentInsertion with @p optree as input and assuming @p result_precision as output precision """ if result_precision.is_vector_format(): scalar_format = optree.precision.get_scalar_format() vector_size = optree.precision.get_vector_size() # determine the working format (for expression) work_format = VECTOR_TYPE_MAP[result_precision.get_scalar_format(). get_integer_format()][vector_size] bias_cst = [-result_precision.get_scalar_format().get_bias() ] * vector_size shift_cst = [result_precision.get_scalar_format().get_field_size() ] * vector_size else: scalar_format = optree.precision work_format = result_precision.get_integer_format() bias_cst = -result_precision.get_bias() shift_cst = result_precision.get_field_size() if not is_std_integer_format(scalar_format): Log.report( Log.Error, "{} should be a std integer format in generate_exp_insertion {} with precision {}", scalar_format, optree, result_precision) assert is_std_integer_format(scalar_format) biased_exponent = Addition(Conversion(optree, precision=work_format) if not optree.precision is work_format else optree, Constant(bias_cst, precision=work_format), precision=work_format) result = BitLogicLeftShift(biased_exponent, Constant( shift_cst, precision=work_format, ), precision=work_format) return TypeCast(result, precision=result_precision)
def legalizer(exp_insertion_node): optree = exp_insertion_node.get_input(0) if result_precision.is_vector_format(): scalar_format = optree.precision.get_scalar_format() vector_size = optree.precision.get_vector_size() bias_cst = [-result_precision.get_scalar_format().get_bias() ] * vector_size shift_cst = [ result_precision.get_scalar_format().get_field_size() ] * vector_size else: scalar_format = optree.precision bias_cst = -result_precision.get_bias() shift_cst = result_precision.get_field_size() assert is_std_integer_format(scalar_format) biased_exponent = Addition(optree, Constant(bias_cst, precision=optree.precision), precision=optree.precision) result = BitLogicLeftShift(biased_exponent, Constant(shift_cst, precision=optree.precision), precision=optree.precision) return TypeCast(result, precision=result_precision)
def mll_implementpoly_horner(ctx, poly_object, eps, variable): """ generate an implementation of polynomail @p poly_object of @p variable whose evalution error is bounded by @p eps. @p variable must have a interval and a precision set :param ctx: multi-word precision context to use :type ctx: MLL_Context :param poly_object: polynomial object to implement :type poly_object: :param eps: target relative error bound :param variable: polynomial input variable :type variable: ML_Operation :return: <implementation node>, <real relative error> :rtype: tuple(ML_Operation, SollyaObject) """ if poly_object.degree == 0: # constant only cst = poly_object.coeff_map[0] rounded_cst = ctx.roundConstant(cst, eps) cst_format = ctx.computeConstantFormat(rounded_cst) return Constant(cst, precision=cst_format), cst_format.epsilon elif poly_object.degree == 1: # cst0 + cst1 * var # final relative error is # (cst0 (1 + e0) + cst1 * var (1 + e1) (1 + ev) (1 + em))(1 + ea) # (cst0 + e0 * cst0 + cst1 * var (1 + e1 + ev + e1 * ev) (1 + em))(1 + ea) # (cst0 + e0 * cst0 + cst1 * var (1 + e1 + ev + e1 * ev + em + e1 * em + ev * em + e1 * ev * em) )(1 + ea) # (cst0 + cst1 * var) (1 + ea) (1 + e0 * cst0 + + e1 + ev + e1 * ev + em + e1 * em + ev * em + e1 * ev * em) # em is epsilon for the multiplication # ea is epsilon for the addition # overall error is cst0 = poly_object.coeff_map[0] cst1 = poly_object.coeff_map[1] eps_mul = eps / 4 eps_add = eps / 2 cst1_rounded = ctx.roundConstant(cst1, eps / 4) cst1_error = abs((cst1 - cst1_rounded) / cst1_rounded) cst1_format = ctx.computeConstantFormat(cst1_rounded) cst0_rounded = ctx.roundConstant(cst0, eps / 4) cst0_format = ctx.computeConstantFormat(cst0_rounded) eps_var = eps / 4 var_format = ctx.computeNeededVariableFormat(variable.interval, eps_var, variable.precision) var_node = legalize_node_format(variable, var_format) mul_format = ctx.computeOutputFormatMultiplication( eps_mul, cst1_format, var_format) add_format = ctx.computeOutputFormatAddition(eps_add, cst0_format, mul_format) return Addition( Constant(cst0_rounded, precision=cst0_format), Multiplication(Constant(cst1_rounded, precision=cst1_format), var_node, precision=mul_format), precision=add_format), add_format.epsilon # TODO: local error only elif poly_object.degree > 1: # cst0 + var * poly cst0 = poly_object.coeff_map[0] cst0_rounded = ctx.roundConstant(cst0, eps / 4) cst0_format = ctx.computeConstantFormat(cst0_rounded) eps_var = eps / 4 var_format = ctx.computeNeededVariableFormat(variable.interval, eps_var, variable.precision) var_node = legalize_node_format(variable, var_format) sub_poly = poly_object.sub_poly(start_index=1, offset=1) eps_poly = eps / 4 poly_node, poly_accuracy = mll_implementpoly_horner( ctx, sub_poly, eps_poly, variable) eps_mul = eps / 4 mul_format = ctx.computeOutputFormatMultiplication( eps_mul, var_format, poly_node.precision) eps_add = eps / 4 add_format = ctx.computeOutputFormatAddition(eps_add, cst0_format, mul_format) return Addition( Constant(cst0_rounded, precision=cst0_format), Multiplication(var_node, poly_node, precision=mul_format), precision=add_format), add_format.epsilon # TODO: local error only else: Log.report(Log.Error, "poly degree must be positive or null. {}, {}", poly_object.degree, poly_object)
# }, # FusedMultiplyAdd.DotProduct: { # lambda optree: True: # lambda optree, processor: Addition(Multiplication(optree.inputs[0], optree.inputs[1], precision = optree.get_precision()), Multiplication(optree.inputs[2], optree.inputs[3], precision = optree.get_precision()), precision = optree.get_precision()), # }, # FusedMultiplyAdd.DotProductNegate: { # lambda optree: True: # lambda optree, processor: Subtraction(Multiplication(optree.inputs[0], optree.inputs[1], precision = optree.get_precision()), Multiplication(optree.inputs[2], optree.inputs[3], precision = optree.get_precision()), precision = optree.get_precision()), # }, #}, Subtraction: { None: { lambda optree: True: lambda optree, processor: Addition( optree.inputs[0], Negation(optree.inputs[1], precision=optree.inputs[1].get_precision()), precision=optree.get_precision()) }, }, DivisionSeed: { None: { lambda optree: True: simplify_inverse, }, }, } def silence_fp_operations(optree, force=False, memoization_map=None): """ ensure that all floating-point operations from optree root have the silent attribute set to True """
def Add1111(x, y, z, precision=None): uh, ul = Add211(y, z, precision=precision) th, tl = Add211(x, uh, precision=precision) v = Add_round_to_odd(tl, ul, precision=precision) return Addition(v, th, precision=precision)
def generate_expr(self, code_object, optree, folded=True, result_var=None, initial=False, __exact=None, language=None, strip_outer_parenthesis=False, force_variable_storing=False, next_block=None): """ code generation function """ # search if <optree> has already been processed if self.has_memoization(optree): return self.get_memoization(optree) result = None # implementation generation if isinstance(optree, CodeVariable): # adding LLVM variable "%" prefix if optree.name[0] != "%": optree.name = "%" + optree.name result = optree elif isinstance(optree, Variable): result = CodeVariable("%" + optree.get_tag(), optree.get_precision()) elif isinstance(optree, Constant): precision = optree.get_precision() result = generate_Constant_expr(optree) #result = CodeExpression(precision.get_gappa_cst(optree.get_value()), precision) elif isinstance(optree, BasicBlock): bb_label = self.get_bb_label(code_object, optree) code_object << (bb_label + ":") code_object.open_level(header="") for op in optree.inputs: self.generate_expr(code_object, op, folded=folded, initial=True, language=language) code_object.close_level(footer="", cr="") return None elif isinstance(optree, ConditionalBranch): cond = optree.get_input(0) if_bb = optree.get_input(1) else_bb = optree.get_input(2) if_label = self.get_bb_label(code_object, if_bb) else_label = self.get_bb_label(code_object, else_bb) cond_code = self.generate_expr( code_object, cond, folded=folded, language=language) code_object << "br i1 {cond} , label %{if_label}, label %{else_label}\n".format( cond=cond_code.get(), if_label=if_label, else_label=else_label ) # generating destination bb # self.generate_expr(code_object, if_bb, folded=folded, language=language) # self.generate_expr(code_object, else_bb, folded=folded, language=language) return None elif isinstance(optree, UnconditionalBranch): dest_bb = optree.get_input(0) code_object << "br label %{}\n".format(self.get_bb_label(code_object, dest_bb)) # generating destination bb # self.generate_expr(code_object, dest_bb, folded=folded, language=language) return None elif isinstance(optree, BasicBlockList): for bb in optree.inputs: self.generate_expr(code_object, bb, folded=folded, language=language) return None elif isinstance(optree, Statement): Log.report(Log.Error, "Statement are not supported in LLVM-IR codegen" "They must be translated to BB (e.g. through gen_basic_block pass)" "faulty node: {}", optree) elif isinstance(optree, ConditionBlock): Log.report(Log.Error, "ConditionBlock are not supported in LLVM-IR codegen" "They must be translated to BB (e.g. through gen_basic_block pass)" "faulty node: {}", optree) elif isinstance(optree, Loop): Log.report(Log.Error, "Loop are not supported in LLVM-IR codegen" "They must be translated to BB (e.g. through gen_basic_block pass)" "faulty node: {}", optree) elif isinstance(optree, PhiNode): output_var = optree.get_input(0) output_var_code = self.generate_expr( code_object, output_var, folded=folded, language=language) value_list = [] for input_var, bb_var in zip(optree.get_inputs()[1::2], optree.get_inputs()[2::2]): assert isinstance(input_var, Variable) assert isinstance(bb_var, BasicBlock) input_var = self.generate_expr( code_object, input_var, folded=folded, language=language ) bb_label = self.get_bb_label(code_object, bb_var) value_list.append("[{var}, %{bb}]".format(var=input_var.get(), bb=bb_label)) code_object << "{output_var} = phi {precision} {value_list}\n".format( output_var=output_var_code.get(), precision=llvm_ir_format(precision=output_var.get_precision()), value_list=(", ".join(value_list)) ) return None elif isinstance(optree, ReferenceAssign): output_var = optree.get_input(0) result_value = optree.get_input(1) # In LLVM it is illegal to assign a constant value, directly to a # variable so with insert a dummy add with 0 if isinstance(result_value, Constant): cst_precision = result_value.get_precision() result_value = Addition( result_value, Constant(0, precision=cst_precision), precision=cst_precision) # TODO/FIXME: fix single static assignation enforcement #output_var_code = self.generate_expr( # code_object, output_var, folded=False, language=language #) result_value_code = self.generate_expr( code_object, result_value, folded=folded, result_var="%"+output_var.get_tag(), language=language ) assert isinstance(result_value_code, CodeVariable) # code_object << self.generate_assignation(output_var_code.get(), result_value_code.get(), precision=output_var_code.precision) # debug msg generation is not supported in LLVM code genrator return None else: result = self.processor.generate_expr(self, code_object, optree, optree.inputs, folded = folded, result_var = result_var, language = self.language) # each operation is generated on a separate line # registering result into memoization table self.add_memoization(optree, result) # debug management if optree.get_debug() and not self.disable_debug: code_object << self.generate_debug_msg(optree, result) if strip_outer_parenthesis and isinstance(result, CodeExpression): result.strip_outer_parenthesis() return result
def generate_scheme(self): # declaring function input variable v_x = [ self.implementation.add_input_variable( "x%d" % index, self.get_input_precision(index)) for index in range(self.arity) ] double_format = { ML_Binary32: ML_SingleSingle, ML_Binary64: ML_DoubleDouble }[self.precision] # testing Add211 exact_add = Addition(v_x[0], v_x[1], precision=double_format, tag="exact_add") # testing Mul211 exact_mul = Multiplication(v_x[0], v_x[1], precision=double_format, tag="exact_mul") # testing Sub211 exact_sub = Subtraction(v_x[1], v_x[0], precision=double_format, tag="exact_sub") # testing Add222 multi_add = Addition(exact_add, exact_sub, precision=double_format, tag="multi_add") # testing Mul222 multi_mul = Multiplication(multi_add, exact_mul, precision=double_format, tag="multi_mul") # testing Add221 and Add212 and Sub222 multi_sub = Subtraction(Addition(exact_sub, v_x[1], precision=double_format, tag="add221"), Addition(v_x[0], multi_mul, precision=double_format, tag="add212"), precision=double_format, tag="sub222") # testing Mul212 and Mul221 mul212 = Multiplication(multi_sub, v_x[0], precision=double_format, tag="mul212") mul221 = Multiplication(exact_mul, v_x[1], precision=double_format, tag="mul221") # testing Sub221 and Sub212 sub221 = Subtraction(mul212, mul221.hi, precision=double_format, tag="sub221") sub212 = Subtraction(sub221, mul212.lo, precision=double_format, tag="sub212") # testing FMA2111 fma2111 = FMA(sub221.lo, sub212.hi, mul221.hi, precision=double_format, tag="fma2111") # testing FMA2112 fma2112 = FMA(fma2111.lo, fma2111.hi, fma2111, precision=double_format, tag="fma2112") # testing FMA2212 fma2212 = FMA(fma2112, fma2112.hi, fma2112, precision=double_format, tag="fma2212") # testing FMA2122 fma2122 = FMA(fma2212.lo, fma2212, fma2212, precision=double_format, tag="fma2122") # testing FMA22222 fma2222 = FMA(fma2122, fma2212, fma2111, precision=double_format, tag="fma2222") # testing Add122 add122 = Addition(fma2222, fma2222, precision=self.precision, tag="add122") # testing Add112 add112 = Addition(add122, fma2222, precision=self.precision, tag="add112") # testing Add121 add121 = Addition(fma2222, add112, precision=self.precision, tag="add121") # testing subnormalization multi_subnormalize = SpecificOperation( Addition(add121, add112, precision=double_format), Constant(3, precision=self.precision.get_integer_format()), specifier=SpecificOperation.Subnormalize, precision=double_format, tag="multi_subnormalize") result = Conversion(multi_subnormalize, precision=self.precision) scheme = Statement(Return(result)) return scheme
def generate_cos_scheme(self, computation_precision, tabulated_cos, tabulated_sin, sin_C2, cos_C2, red_vx_lo): cos_C2 = Multiplication(tabulated_cos, cos_C2, precision=ML_Custom_FixedPoint_Format( -1, 32, signed=True), tag="cos_C2") u2 = Multiplication( red_vx_lo, red_vx_lo, precision= computation_precision, # ML_Custom_FixedPoint_Format(5, 26, signed = True) tag="u2") sin_u = Multiplication( tabulated_sin, red_vx_lo, precision= computation_precision, # ML_Custom_FixedPoint_Format(1, 30, signed = True) tag="sin_u") cos_C2_u2 = Multiplication( cos_C2, u2, precision= computation_precision, # ML_Custom_FixedPoint_Format(1, 30,signed = True) tag="cos_C2_u2") S2_u2 = Multiplication(sin_C2, u2, precision=ML_Custom_FixedPoint_Format( -1, 32, signed=True), tag="S2_u2") S2_u3_sin = Multiplication( S2_u2, sin_u, precision= computation_precision, # ML_Custom_FixedPoint_Format(5,26, signed = True) tag="S2_u3_sin") cos_C2_u2_P_cos = Addition( tabulated_cos, cos_C2_u2, precision= computation_precision, # ML_Custom_FixedPoint_Format(5, 26, signed = True) tag="cos_C2_u2_P_cos") cos_C2_u2_P_cos_M_sin_u = Subtraction( cos_C2_u2_P_cos, sin_u, precision= computation_precision # ML_Custom_FixedPoint_Format(5, 26, signed = True) ) scheme = Subtraction( cos_C2_u2_P_cos_M_sin_u, S2_u3_sin, precision= computation_precision # ML_Custom_FixedPoint_Format(5, 26, signed = True) ) return scheme
def generate_scheme(self): # declaring target and instantiating optimization engine vx = self.implementation.add_input_variable("x", self.precision) Log.set_dump_stdout(True) Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m") if self.debug_flag: Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m") # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name if self.libm_compliant: return RaiseReturn(*args, precision=self.precision, **kwords) else: return Return(kwords["return_value"], precision=self.precision) test_nan_or_inf = Test(vx, specifier=Test.IsInfOrNaN, likely=False, debug=debug_multi, tag="nan_or_inf") test_nan = Test(vx, specifier=Test.IsNaN, debug=debug_multi, tag="is_nan_test") test_positive = Comparison(vx, 0, specifier=Comparison.GreaterOrEqual, debug=debug_multi, tag="inf_sign") test_signaling_nan = Test(vx, specifier=Test.IsSignalingNaN, debug=debug_multi, tag="is_signaling_nan") return_snan = Statement( ExpRaiseReturn(ML_FPE_Invalid, return_value=FP_QNaN(self.precision))) # return in case of infinity input infty_return = Statement( ConditionBlock( test_positive, Return(FP_PlusInfty(self.precision), precision=self.precision), Return(FP_PlusZero(self.precision), precision=self.precision))) # return in case of specific value input (NaN or inf) specific_return = ConditionBlock( test_nan, ConditionBlock( test_signaling_nan, return_snan, Return(FP_QNaN(self.precision), precision=self.precision)), infty_return) # return in case of standard (non-special) input # exclusion of early overflow and underflow cases precision_emax = self.precision.get_emax() precision_max_value = S2 * S2**precision_emax exp_overflow_bound = sollya.ceil(log(precision_max_value)) early_overflow_test = Comparison(vx, exp_overflow_bound, likely=False, specifier=Comparison.Greater) early_overflow_return = Statement( ClearException() if self.libm_compliant else Statement(), ExpRaiseReturn(ML_FPE_Inexact, ML_FPE_Overflow, return_value=FP_PlusInfty(self.precision))) precision_emin = self.precision.get_emin_subnormal() precision_min_value = S2**precision_emin exp_underflow_bound = floor(log(precision_min_value)) early_underflow_test = Comparison(vx, exp_underflow_bound, likely=False, specifier=Comparison.Less) early_underflow_return = Statement( ClearException() if self.libm_compliant else Statement(), ExpRaiseReturn(ML_FPE_Inexact, ML_FPE_Underflow, return_value=FP_PlusZero(self.precision))) # constant computation invlog2 = self.precision.round_sollya_object(1 / log(2), sollya.RN) interval_vx = Interval(exp_underflow_bound, exp_overflow_bound) interval_fk = interval_vx * invlog2 interval_k = Interval(floor(inf(interval_fk)), sollya.ceil(sup(interval_fk))) log2_hi_precision = self.precision.get_field_size() - ( sollya.ceil(log2(sup(abs(interval_k)))) + 2) Log.report(Log.Info, "log2_hi_precision: %d" % log2_hi_precision) invlog2_cst = Constant(invlog2, precision=self.precision) log2_hi = round(log(2), log2_hi_precision, sollya.RN) log2_lo = self.precision.round_sollya_object( log(2) - log2_hi, sollya.RN) # argument reduction unround_k = vx * invlog2 unround_k.set_attributes(tag="unround_k", debug=debug_multi) k = NearestInteger(unround_k, precision=self.precision, debug=debug_multi) ik = NearestInteger(unround_k, precision=self.precision.get_integer_format(), debug=debug_multi, tag="ik") ik.set_tag("ik") k.set_tag("k") exact_pre_mul = (k * log2_hi) exact_pre_mul.set_attributes(exact=True) exact_hi_part = vx - exact_pre_mul exact_hi_part.set_attributes(exact=True, tag="exact_hi", debug=debug_multi, prevent_optimization=True) exact_lo_part = -k * log2_lo exact_lo_part.set_attributes(tag="exact_lo", debug=debug_multi, prevent_optimization=True) r = exact_hi_part + exact_lo_part r.set_tag("r") r.set_attributes(debug=debug_multi) approx_interval = Interval(-log(2) / 2, log(2) / 2) approx_interval_half = approx_interval / 2 approx_interval_split = [ Interval(-log(2) / 2, inf(approx_interval_half)), approx_interval_half, Interval(sup(approx_interval_half), log(2) / 2) ] # TODO: should be computed automatically exact_hi_interval = approx_interval exact_lo_interval = -interval_k * log2_lo opt_r = self.optimise_scheme(r, copy={}) tag_map = {} self.opt_engine.register_nodes_by_tag(opt_r, tag_map) cg_eval_error_copy_map = { vx: Variable("x", precision=self.precision, interval=interval_vx), tag_map["k"]: Variable("k", interval=interval_k, precision=self.precision) } #try: if is_gappa_installed(): eval_error = self.gappa_engine.get_eval_error_v2( self.opt_engine, opt_r, cg_eval_error_copy_map, gappa_filename="red_arg.g") else: eval_error = 0.0 Log.report(Log.Warning, "gappa is not installed in this environnement") Log.report(Log.Info, "eval error: %s" % eval_error) local_ulp = sup(ulp(sollya.exp(approx_interval), self.precision)) # FIXME refactor error_goal from accuracy Log.report(Log.Info, "accuracy: %s" % self.accuracy) if isinstance(self.accuracy, ML_Faithful): error_goal = local_ulp elif isinstance(self.accuracy, ML_CorrectlyRounded): error_goal = S2**-1 * local_ulp elif isinstance(self.accuracy, ML_DegradedAccuracyAbsolute): error_goal = self.accuracy.goal elif isinstance(self.accuracy, ML_DegradedAccuracyRelative): error_goal = self.accuracy.goal else: Log.report(Log.Error, "unknown accuracy: %s" % self.accuracy) # error_goal = local_ulp #S2**-(self.precision.get_field_size()+1) error_goal_approx = S2**-1 * error_goal Log.report(Log.Info, "\033[33;1m building mathematical polynomial \033[0m\n") poly_degree = max( sup( guessdegree( expm1(sollya.x) / sollya.x, approx_interval, error_goal_approx)) - 1, 2) init_poly_degree = poly_degree error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai) polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_estrin_scheme #polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme while 1: Log.report(Log.Info, "attempting poly degree: %d" % poly_degree) precision_list = [1] + [self.precision] * (poly_degree) poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error( expm1(sollya.x), poly_degree, precision_list, approx_interval, sollya.absolute, error_function=error_function) Log.report(Log.Info, "polynomial: %s " % poly_object) sub_poly = poly_object.sub_poly(start_index=2) Log.report(Log.Info, "polynomial: %s " % sub_poly) Log.report(Log.Info, "poly approx error: %s" % poly_approx_error) Log.report( Log.Info, "\033[33;1m generating polynomial evaluation scheme \033[0m") pre_poly = polynomial_scheme_builder( poly_object, r, unified_precision=self.precision) pre_poly.set_attributes(tag="pre_poly", debug=debug_multi) pre_sub_poly = polynomial_scheme_builder( sub_poly, r, unified_precision=self.precision) pre_sub_poly.set_attributes(tag="pre_sub_poly", debug=debug_multi) poly = 1 + (exact_hi_part + (exact_lo_part + pre_sub_poly)) poly.set_tag("poly") # optimizing poly before evaluation error computation #opt_poly = self.opt_engine.optimization_process(poly, self.precision, fuse_fma = fuse_fma) #opt_sub_poly = self.opt_engine.optimization_process(pre_sub_poly, self.precision, fuse_fma = fuse_fma) opt_poly = self.optimise_scheme(poly) opt_sub_poly = self.optimise_scheme(pre_sub_poly) # evaluating error of the polynomial approximation r_gappa_var = Variable("r", precision=self.precision, interval=approx_interval) exact_hi_gappa_var = Variable("exact_hi", precision=self.precision, interval=exact_hi_interval) exact_lo_gappa_var = Variable("exact_lo", precision=self.precision, interval=exact_lo_interval) vx_gappa_var = Variable("x", precision=self.precision, interval=interval_vx) k_gappa_var = Variable("k", interval=interval_k, precision=self.precision) #print "exact_hi interval: ", exact_hi_interval sub_poly_error_copy_map = { #r.get_handle().get_node(): r_gappa_var, #vx.get_handle().get_node(): vx_gappa_var, exact_hi_part.get_handle().get_node(): exact_hi_gappa_var, exact_lo_part.get_handle().get_node(): exact_lo_gappa_var, #k.get_handle().get_node(): k_gappa_var, } poly_error_copy_map = { exact_hi_part.get_handle().get_node(): exact_hi_gappa_var, exact_lo_part.get_handle().get_node(): exact_lo_gappa_var, } if is_gappa_installed(): sub_poly_eval_error = -1.0 sub_poly_eval_error = self.gappa_engine.get_eval_error_v2( self.opt_engine, opt_sub_poly, sub_poly_error_copy_map, gappa_filename="%s_gappa_sub_poly.g" % self.function_name) dichotomy_map = [ { exact_hi_part.get_handle().get_node(): approx_interval_split[0], }, { exact_hi_part.get_handle().get_node(): approx_interval_split[1], }, { exact_hi_part.get_handle().get_node(): approx_interval_split[2], }, ] poly_eval_error_dico = self.gappa_engine.get_eval_error_v3( self.opt_engine, opt_poly, poly_error_copy_map, gappa_filename="gappa_poly.g", dichotomy=dichotomy_map) poly_eval_error = max( [sup(abs(err)) for err in poly_eval_error_dico]) else: poly_eval_error = 0.0 sub_poly_eval_error = 0.0 Log.report(Log.Warning, "gappa is not installed in this environnement") Log.report(Log.Info, "stopping autonomous degree research") # incrementing polynomial degree to counteract initial decrementation effect poly_degree += 1 break Log.report(Log.Info, "poly evaluation error: %s" % poly_eval_error) Log.report(Log.Info, "sub poly evaluation error: %s" % sub_poly_eval_error) global_poly_error = None global_rel_poly_error = None for case_index in range(3): poly_error = poly_approx_error + poly_eval_error_dico[ case_index] rel_poly_error = sup( abs(poly_error / sollya.exp(approx_interval_split[case_index]))) if global_rel_poly_error == None or rel_poly_error > global_rel_poly_error: global_rel_poly_error = rel_poly_error global_poly_error = poly_error flag = error_goal > global_rel_poly_error if flag: break else: poly_degree += 1 late_overflow_test = Comparison(ik, self.precision.get_emax(), specifier=Comparison.Greater, likely=False, debug=debug_multi, tag="late_overflow_test") overflow_exp_offset = (self.precision.get_emax() - self.precision.get_field_size() / 2) diff_k = Subtraction( ik, Constant(overflow_exp_offset, precision=self.precision.get_integer_format()), precision=self.precision.get_integer_format(), debug=debug_multi, tag="diff_k", ) late_overflow_result = (ExponentInsertion( diff_k, precision=self.precision) * poly) * ExponentInsertion( overflow_exp_offset, precision=self.precision) late_overflow_result.set_attributes(silent=False, tag="late_overflow_result", debug=debug_multi, precision=self.precision) late_overflow_return = ConditionBlock( Test(late_overflow_result, specifier=Test.IsInfty, likely=False), ExpRaiseReturn(ML_FPE_Overflow, return_value=FP_PlusInfty(self.precision)), Return(late_overflow_result, precision=self.precision)) late_underflow_test = Comparison(k, self.precision.get_emin_normal(), specifier=Comparison.LessOrEqual, likely=False) underflow_exp_offset = 2 * self.precision.get_field_size() corrected_exp = Addition( ik, Constant(underflow_exp_offset, precision=self.precision.get_integer_format()), precision=self.precision.get_integer_format(), tag="corrected_exp") late_underflow_result = ( ExponentInsertion(corrected_exp, precision=self.precision) * poly) * ExponentInsertion(-underflow_exp_offset, precision=self.precision) late_underflow_result.set_attributes(debug=debug_multi, tag="late_underflow_result", silent=False) test_subnormal = Test(late_underflow_result, specifier=Test.IsSubnormal) late_underflow_return = Statement( ConditionBlock( test_subnormal, ExpRaiseReturn(ML_FPE_Underflow, return_value=late_underflow_result)), Return(late_underflow_result, precision=self.precision)) twok = ExponentInsertion(ik, tag="exp_ik", debug=debug_multi, precision=self.precision) #std_result = twok * ((1 + exact_hi_part * pre_poly) + exact_lo_part * pre_poly) std_result = twok * poly std_result.set_attributes(tag="std_result", debug=debug_multi) result_scheme = ConditionBlock( late_overflow_test, late_overflow_return, ConditionBlock(late_underflow_test, late_underflow_return, Return(std_result, precision=self.precision))) std_return = ConditionBlock( early_overflow_test, early_overflow_return, ConditionBlock(early_underflow_test, early_underflow_return, result_scheme)) # main scheme Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m") scheme = ConditionBlock( test_nan_or_inf, Statement(ClearException() if self.libm_compliant else Statement(), specific_return), std_return) return scheme
def pointer_add(table_addr, offset): pointer_format = table_addr.get_precision_as_pointer_format() return Addition(table_addr, offset, precision=pointer_format)