示例#1
0
    def generate_scheme(self):
        vx = self.implementation.add_input_variable("x", FIXED_FORMAT)
        # declaring specific interval for input variable <x>
        vx.set_interval(Interval(-1, 1))

        acc_format = ML_Custom_FixedPoint_Format(6, 58, False)

        c = Constant(2, precision=acc_format, tag="C2")

        ivx = vx
        add_ivx = Addition(
                    c,
                    Multiplication(ivx, ivx, precision=acc_format, tag="mul"),
                    precision=acc_format,
                    tag="add"
                  )
        result = add_ivx

        input_mapping = {ivx: ivx.get_precision().round_sollya_object(0.125)}
        error_eval_map = runtime_error_eval.generate_error_eval_graph(result, input_mapping)

        # dummy scheme to make functionnal code generation
        scheme = Statement()
        for node in error_eval_map:
            scheme.add(error_eval_map[node])
        scheme.add(Return(result))
        return scheme
示例#2
0
def Add211(x, y):
    """ Multi-precision Addition (2sum) HI, LO = x + y 
        TODO: missing assumption on input order """
    zh = Addition(x, y)
    t1 = Subtraction(zh, x)
    zl = Subtraction(y, t1)
    return zh, zl
示例#3
0
def Mul212(x, yh, yl, precision=None, fma=True):
    """ Multi-precision Multiplication:
        HI, LO = x * [yh:yl] """
    t1, t2 = Mul211(x, yh, precision, fma)
    t3 = Multiplication(x, yl, precision=precision)
    t4 = Addition(t2, t3, precision=precision)
    return Add211(t1, t4, precision)
示例#4
0
def Mul211(x, y, precision=None, fma=True):
    """ Multi-precision Multiplication HI, LO = x * y """
    zh = Multiplication(x, y, precision=precision)
    if fma == True:
        zl = FMS(x, y, zh, precision=precision)
    else:
        xh, xl = Split(x, precision=precision)
        yh, yl = Split(y, precision=precision)
        r1 = Multiplication(xh, yh, precision=precision)
        r2 = Subtraction(r1, zh, precision=precision)
        r3 = Multiplication(xh, yl, precision=precision)
        r4 = Multiplication(xl, yh, precision=precision)
        r5 = Multiplication(xl, yl, precision=precision)
        r6 = Addition(r2, r3, precision=precision)
        r7 = Addition(r6, r4, precision=precision)
        zl = Addition(r7, r5, precision=precision)
    return zh, zl
示例#5
0
def Mul211(x, y, fma=True):
    """ Multi-precision Multiplication HI, LO = x * y """
    zh = Multiplication(x, y)
    if fma == True:
        zl = FMS(x, y, zh)
    else:
        xh, xl = Split(x)
        yh, yl = Split(y)
        r1 = Multiplication(xh, yh)
        r2 = Subtraction(r1, zh)
        r3 = Multiplication(xh, yl)
        r4 = Multiplication(xl, yh)
        r5 = Multiplication(xl, yl)
        r6 = Addition(r2, r3)
        r7 = Addition(r6, r4)
        zl = Addition(r7, r5)
    return zh, zl
示例#6
0
def Add212(xh, yh, yl, precision=None):
    """ Multi-precision Addition:
        HI, LO = xh + [yh:yl] """
    # r = xh + yh
    # s1 = xh - r
    # s2 = s1 + yh
    # s = s2 + yl
    # zh = r + s
    # zl = (r - zh) + s
    r = Addition(xh, yh, precision=precision)
    s1 = Subtraction(xh, r, precision=precision)
    s2 = Addition(s1, yh, precision=precision)
    s = Addition(s2, yl, precision=precision)
    zh = Addition(r, s, precision=precision)
    zl = Addition(Subtraction(r, zh, precision=precision),
                  s,
                  precision=precision)
    return zh, zl
示例#7
0
def Split(a, precision=None):
    """... splitting algorithm for Dekker TwoMul"""
    cst_value = {ML_Binary32: 4097, ML_Binary64: 134217729}[a.precision]
    s = Constant(cst_value, precision=a.get_precision(), tag='fp_split')
    c = Multiplication(s, a, precision=precision)
    tmp = Subtraction(a, c, precision=precision)
    ah = Addition(tmp, c, precision=precision)
    al = Subtraction(a, ah, precision=precision)
    return ah, al
 def expand_sub(self, node):
     lhs = node.get_input(0)
     rhs = node.get_input(1)
     tag = node.get_tag()
     precision = node.get_precision()
     new_node = Addition(lhs,
                         Negation(rhs, precision=rhs.precision),
                         precision=precision)
     forward_attributes(node, new_node)
     return self.expand_node(new_node)
示例#9
0
def Mul222(xh, xl, yh, yl, fma=True):
    """ Multi-precision Multiplication:
        HI, LO = [xh:xl] * [yh:yl] """
    if fma == True:
        ph = Multiplication(xh, yh)
        pl = FMS(xh, yh, ph)
        pl = FMA(xh, yl, pl)
        pl = FMA(xl, yh, pl)
        zh = Addition(ph, pl)
        zl = Subtraction(ph, zh)
        zl = Addition(zl, pl)
    else:
        t1, t2 = Mul211(xh, yh, fma)
        t3 = Multiplication(xh, yl)
        t4 = Multiplication(xl, yh)
        t5 = Addition(t3, t4)
        t6 = Addition(t2, t5)
        zh, zl = Add211(t1, t6)
    return zh, zl
示例#10
0
def generate_fasttwosum(vx, vy):
    """Return two optrees for a FastTwoSum operation.
 
    Precondition: |vx| >= |vy|.
    The return value is a tuple (sum, error).
    """
    s = Addition(vx, vy)
    b = Subtraction(z, vx)
    e = Subtraction(vy, b)
    return s, e
示例#11
0
def Split(a):
    """... splitting algorithm for Dekker TwoMul"""
    # if a.get_precision() == ML_Binary32:
    s = Constant(4097, precision=a.get_precision(), tag='fp_split')
    # elif a.get_precision() == ML_Binary64:
    #    s = Constant(134217729, precision = a.get_precision(), tag = 'fp_split')
    c = Multiplication(s, a)
    tmp = Subtraction(a, c)
    ah = Addition(tmp, c)
    al = Subtraction(a, ah)
    return ah, al
示例#12
0
    def generate_scheme(self):
        var = self.implementation.add_input_variable("x", self.precision)
        var_y = self.implementation.add_input_variable("y", self.precision)
        var_z = self.implementation.add_input_variable("z", self.precision)
        mult = Multiplication(var, var_z, precision=self.precision)
        add = Addition(var_y, mult, precision=self.precision)

        test_program = Statement(
            add,
            Return(add)
        )
        return test_program
示例#13
0
def Add222(xh, xl, yh, yl):
    """ Multi-precision Addition:
        HI, LO = [xh:xl] + [yh:yl] """
    r = Addition(xh, yh)
    s1 = Subtraction(xh, r)
    s2 = Addition(s1, yh)
    s3 = Addition(s2, yl)
    s = Addition(s3, xl)
    zh = Addition(r, s)
    zl = Addition(Subtraction(r, zh), s)
    return zh, zl
示例#14
0
def Add222(xh, xl, yh, yl, precision=None):
    """ Multi-precision Addition:
        HI, LO = [xh:xl] + [yh:yl] """
    r = Addition(xh, yh, precision=precision)
    s1 = Subtraction(xh, r, precision=precision)
    s2 = Addition(s1, yh, precision=precision)
    s3 = Addition(s2, yl, precision=precision)
    s = Addition(s3, xl, precision=precision)
    zh = Addition(r, s, precision=precision)
    zl = Addition(Subtraction(r, zh, precision=precision),
                  s,
                  precision=precision)
    return zh, zl
示例#15
0
    def generate_scheme(self):
        """ main scheme generation """
        Log.report(Log.Info, "width parameter is {}".format(self.width))
        int_size = 3
        frac_size = self.width - int_size

        input_precision = fixed_point(int_size, frac_size)
        output_precision = fixed_point(int_size, frac_size)

        # declaring main input variable
        var_x = self.implementation.add_input_signal("x", input_precision)
        var_y = self.implementation.add_input_signal("y", input_precision)
        var_x.set_attributes(debug=debug_fixed)
        var_y.set_attributes(debug=debug_fixed)

        test = (var_x > 1)
        test.set_attributes(tag="test", debug=debug_std)

        sub = var_x - var_y
        c = Constant(0)

        pre_result_select = Select(c > sub,
                                   Select(c < var_y,
                                          sub,
                                          Select(LogicalAnd(
                                              c > var_x,
                                              c < var_y,
                                              tag="last_lev_cond"),
                                                 var_x,
                                                 c,
                                                 tag="last_lev_sel"),
                                          tag="pre_select"),
                                   var_y,
                                   tag="pre_result_select")
        pre_result = Max(0, var_x - var_y, tag="pre_result")

        result = Conversion(Addition(pre_result, pre_result_select, tag="add"),
                            precision=output_precision)

        self.implementation.add_output_signal("vr_out", result)

        return [self.implementation]
def expand_kernel_expr(kernel, iterator_format=ML_Int32):
    """ Expand a kernel expression into the corresponding MDL graph """
    if isinstance(kernel, NDRange):
        return expand_ndrange(kernel)
    elif isinstance(kernel, Sum):
        var_iter = kernel.index_iter_range.var_index
        # TODO/FIXME to be uniquified
        acc = Variable("acc",
                       var_type=Variable.Local,
                       precision=kernel.precision)
        # TODO/FIXME implement proper acc init
        if kernel.precision.is_vector_format():
            C0 = Constant([0] * kernel.precision.get_vector_size(),
                          precision=kernel.precision)
        else:
            C0 = Constant(0, precision=kernel.precision)
        scheme = Loop(
            Statement(
                ReferenceAssign(var_iter, kernel.index_iter_range.first_index),
                ReferenceAssign(acc, C0)),
            var_iter <= kernel.index_iter_range.last_index,
            Statement(
                ReferenceAssign(
                    acc,
                    Addition(acc,
                             expand_kernel_expr(kernel.elt_operation),
                             precision=kernel.precision)),
                # loop iterator increment
                ReferenceAssign(var_iter, var_iter +
                                kernel.index_iter_range.index_step)))
        return PlaceHolder(acc, scheme)
    elif isinstance(kernel, (ReadAccessor, WriteAccessor)):
        return expand_accessor(kernel)
    elif is_leaf_node(kernel):
        return kernel
    else:
        # vanilla metalibm ops are left unmodified (except
        # recursive expansion)
        for index, op in enumerate(kernel.inputs):
            new_op = expand_kernel_expr(op)
            kernel.set_input(index, new_op)
        return kernel
 def expand_sub(self, node):
     lhs = node.get_input(0)
     rhs = node.get_input(1)
     tag = node.get_tag()
     precision = node.get_precision()
     # Subtraction x - y is transformed into x + (-y)
     # WARNING: if y is not expandable (e.g. scalar precision)
     #          this could stop expansion
     new_node = Addition(lhs,
                         Negation(rhs, precision=rhs.precision),
                         precision=precision)
     forward_attributes(node, new_node)
     expanded_node = self.expand_node(new_node)
     Log.report(
         LOG_LEVEL_EXPAND_VERBOSE,
         "expanding Subtraction {} into {} with expanded form {}", node,
         new_node, ", ".join(
             (op.get_str(display_precision=True, depth=None))
             for op in expanded_node))
     return expanded_node
示例#18
0
def generate_exp_extraction(optree):
    if optree.precision.is_vector_format():
        base_precision = optree.precision.get_scalar_format()
        vector_size = optree.precision.get_vector_size()
        int_precision = {
            v2float32: v2int32,
            v2float64: v2int64,
            v4float32: v4int32,
            v4float64: v4int64,
            v8float32: v8int32,
            v8float64: v8int64,
        }[optree.precision]
        #base_precision.get_integer_format()
        bias_cst = [base_precision.get_bias()] * vector_size
    else:
        base_precision = optree.precision
        int_precision = base_precision.get_integer_format()
        bias_cst = base_precision.get_bias()
    return Addition(generate_raw_exp_extraction(optree),
                    Constant(bias_cst, precision=int_precision),
                    precision=int_precision)
示例#19
0
def generate_exp_insertion(optree, result_precision):
    """ generate the expanded version of ExponentInsertion
        with @p optree as input and assuming @p result_precision
        as output precision """
    if result_precision.is_vector_format():
        scalar_format = optree.precision.get_scalar_format()
        vector_size = optree.precision.get_vector_size()
        bias_cst = [-result_precision.get_scalar_format().get_bias()
                    ] * vector_size
        shift_cst = [result_precision.get_scalar_format().get_field_size()
                     ] * vector_size
    else:
        scalar_format = optree.precision
        bias_cst = -result_precision.get_bias()
        shift_cst = result_precision.get_field_size()
    assert is_std_integer_format(scalar_format)
    biased_exponent = Addition(optree,
                               Constant(bias_cst, precision=optree.precision),
                               precision=optree.precision)
    result = BitLogicLeftShift(biased_exponent,
                               Constant(shift_cst, precision=optree.precision),
                               precision=optree.precision)
    return TypeCast(result, precision=result_precision)
示例#20
0
def generate_exp_insertion(optree, result_precision):
    """ generate the expanded version of ExponentInsertion
        with @p optree as input and assuming @p result_precision
        as output precision """
    if result_precision.is_vector_format():
        scalar_format = optree.precision.get_scalar_format()
        vector_size = optree.precision.get_vector_size()
        # determine the working format (for expression)
        work_format = VECTOR_TYPE_MAP[result_precision.get_scalar_format().
                                      get_integer_format()][vector_size]
        bias_cst = [-result_precision.get_scalar_format().get_bias()
                    ] * vector_size
        shift_cst = [result_precision.get_scalar_format().get_field_size()
                     ] * vector_size
    else:
        scalar_format = optree.precision
        work_format = result_precision.get_integer_format()
        bias_cst = -result_precision.get_bias()
        shift_cst = result_precision.get_field_size()
    if not is_std_integer_format(scalar_format):
        Log.report(
            Log.Error,
            "{} should be a std integer format in generate_exp_insertion {} with precision {}",
            scalar_format, optree, result_precision)
    assert is_std_integer_format(scalar_format)
    biased_exponent = Addition(Conversion(optree, precision=work_format) if
                               not optree.precision is work_format else optree,
                               Constant(bias_cst, precision=work_format),
                               precision=work_format)
    result = BitLogicLeftShift(biased_exponent,
                               Constant(
                                   shift_cst,
                                   precision=work_format,
                               ),
                               precision=work_format)
    return TypeCast(result, precision=result_precision)
示例#21
0
 def legalizer(exp_insertion_node):
     optree = exp_insertion_node.get_input(0)
     if result_precision.is_vector_format():
         scalar_format = optree.precision.get_scalar_format()
         vector_size = optree.precision.get_vector_size()
         bias_cst = [-result_precision.get_scalar_format().get_bias()
                     ] * vector_size
         shift_cst = [
             result_precision.get_scalar_format().get_field_size()
         ] * vector_size
     else:
         scalar_format = optree.precision
         bias_cst = -result_precision.get_bias()
         shift_cst = result_precision.get_field_size()
     assert is_std_integer_format(scalar_format)
     biased_exponent = Addition(optree,
                                Constant(bias_cst,
                                         precision=optree.precision),
                                precision=optree.precision)
     result = BitLogicLeftShift(biased_exponent,
                                Constant(shift_cst,
                                         precision=optree.precision),
                                precision=optree.precision)
     return TypeCast(result, precision=result_precision)
示例#22
0
def mll_implementpoly_horner(ctx, poly_object, eps, variable):
    """ generate an implementation of polynomail @p poly_object of @p variable
        whose evalution error is bounded by @p eps. @p variable must have a
        interval and a precision set

        :param ctx: multi-word precision context to use
        :type ctx: MLL_Context
        :param poly_object: polynomial object to implement
        :type poly_object:
        :param eps: target relative error bound
        :param variable: polynomial input variable
        :type variable: ML_Operation

        :return: <implementation node>, <real relative error>
        :rtype: tuple(ML_Operation, SollyaObject)
    """
    if poly_object.degree == 0:
        # constant only
        cst = poly_object.coeff_map[0]
        rounded_cst = ctx.roundConstant(cst, eps)
        cst_format = ctx.computeConstantFormat(rounded_cst)
        return Constant(cst, precision=cst_format), cst_format.epsilon

    elif poly_object.degree == 1:
        # cst0 + cst1 * var
        # final relative error is
        # (cst0 (1 + e0) + cst1 * var (1 + e1) (1 + ev) (1 + em))(1 + ea)
        # (cst0  + e0 * cst0  + cst1 * var (1 + e1 + ev + e1 * ev) (1 + em))(1 + ea)
        # (cst0  + e0 * cst0  + cst1 * var (1 + e1 + ev + e1 * ev + em + e1 * em + ev * em + e1 * ev * em) )(1 + ea)
        # (cst0 + cst1 * var) (1 + ea) (1 + e0 * cst0 + + e1 + ev + e1 * ev + em + e1 * em + ev * em + e1 * ev * em)
        # em is epsilon for the multiplication
        # ea is epsilon for the addition
        # overall error is
        cst0 = poly_object.coeff_map[0]
        cst1 = poly_object.coeff_map[1]
        eps_mul = eps / 4
        eps_add = eps / 2

        cst1_rounded = ctx.roundConstant(cst1, eps / 4)
        cst1_error = abs((cst1 - cst1_rounded) / cst1_rounded)
        cst1_format = ctx.computeConstantFormat(cst1_rounded)
        cst0_rounded = ctx.roundConstant(cst0, eps / 4)
        cst0_format = ctx.computeConstantFormat(cst0_rounded)

        eps_var = eps / 4
        var_format = ctx.computeNeededVariableFormat(variable.interval,
                                                     eps_var,
                                                     variable.precision)
        var_node = legalize_node_format(variable, var_format)
        mul_format = ctx.computeOutputFormatMultiplication(
            eps_mul, cst1_format, var_format)
        add_format = ctx.computeOutputFormatAddition(eps_add, cst0_format,
                                                     mul_format)

        return Addition(
            Constant(cst0_rounded, precision=cst0_format),
            Multiplication(Constant(cst1_rounded, precision=cst1_format),
                           var_node,
                           precision=mul_format),
            precision=add_format), add_format.epsilon  # TODO: local error only

    elif poly_object.degree > 1:
        # cst0 + var * poly
        cst0 = poly_object.coeff_map[0]
        cst0_rounded = ctx.roundConstant(cst0, eps / 4)
        cst0_format = ctx.computeConstantFormat(cst0_rounded)

        eps_var = eps / 4
        var_format = ctx.computeNeededVariableFormat(variable.interval,
                                                     eps_var,
                                                     variable.precision)
        var_node = legalize_node_format(variable, var_format)

        sub_poly = poly_object.sub_poly(start_index=1, offset=1)
        eps_poly = eps / 4
        poly_node, poly_accuracy = mll_implementpoly_horner(
            ctx, sub_poly, eps_poly, variable)

        eps_mul = eps / 4
        mul_format = ctx.computeOutputFormatMultiplication(
            eps_mul, var_format, poly_node.precision)

        eps_add = eps / 4
        add_format = ctx.computeOutputFormatAddition(eps_add, cst0_format,
                                                     mul_format)

        return Addition(
            Constant(cst0_rounded, precision=cst0_format),
            Multiplication(var_node, poly_node, precision=mul_format),
            precision=add_format), add_format.epsilon  # TODO: local error only
    else:
        Log.report(Log.Error, "poly degree must be positive or null. {}, {}",
                   poly_object.degree, poly_object)
示例#23
0
    #    },
    #    FusedMultiplyAdd.DotProduct: {
    #        lambda optree: True:
    #            lambda optree, processor: Addition(Multiplication(optree.inputs[0], optree.inputs[1], precision = optree.get_precision()), Multiplication(optree.inputs[2], optree.inputs[3], precision = optree.get_precision()), precision = optree.get_precision()),
    #    },
    #    FusedMultiplyAdd.DotProductNegate: {
    #        lambda optree: True:
    #            lambda optree, processor: Subtraction(Multiplication(optree.inputs[0], optree.inputs[1], precision = optree.get_precision()), Multiplication(optree.inputs[2], optree.inputs[3], precision = optree.get_precision()), precision = optree.get_precision()),
    #    },
    #},
    Subtraction: {
        None: {
            lambda optree: True:
            lambda optree, processor: Addition(
                optree.inputs[0],
                Negation(optree.inputs[1],
                         precision=optree.inputs[1].get_precision()),
                precision=optree.get_precision())
        },
    },
    DivisionSeed: {
        None: {
            lambda optree: True: simplify_inverse,
        },
    },
}


def silence_fp_operations(optree, force=False, memoization_map=None):
    """ ensure that all floating-point operations from optree root
        have the silent attribute set to True """
示例#24
0
def Add1111(x, y, z, precision=None):
    uh, ul = Add211(y, z, precision=precision)
    th, tl = Add211(x, uh, precision=precision)
    v = Add_round_to_odd(tl, ul, precision=precision)
    return Addition(v, th, precision=precision)
示例#25
0
    def generate_expr(self, code_object, optree, folded=True, result_var=None, initial=False, __exact=None, language=None, strip_outer_parenthesis=False, force_variable_storing=False, next_block=None):
        """ code generation function """

        # search if <optree> has already been processed
        if self.has_memoization(optree):
            return self.get_memoization(optree)

        result = None
        # implementation generation
        if isinstance(optree, CodeVariable):
            # adding LLVM variable "%" prefix
            if optree.name[0] != "%":
                optree.name = "%" + optree.name
            result = optree

        elif isinstance(optree, Variable):
            result = CodeVariable("%" + optree.get_tag(), optree.get_precision())

        elif isinstance(optree, Constant):
            precision = optree.get_precision()
            result = generate_Constant_expr(optree)
            #result = CodeExpression(precision.get_gappa_cst(optree.get_value()), precision)

        elif isinstance(optree, BasicBlock):
            bb_label = self.get_bb_label(code_object, optree)
            code_object << (bb_label + ":")
            code_object.open_level(header="")
            for op in optree.inputs:
                self.generate_expr(code_object, op, folded=folded,
                    initial=True, language=language)
            code_object.close_level(footer="", cr="")
            return None

        elif isinstance(optree, ConditionalBranch):
            cond = optree.get_input(0)
            if_bb = optree.get_input(1)
            else_bb = optree.get_input(2)
            if_label = self.get_bb_label(code_object, if_bb)
            else_label = self.get_bb_label(code_object, else_bb)

            cond_code = self.generate_expr(
                code_object, cond, folded=folded, language=language)

            code_object << "br i1 {cond} , label %{if_label}, label %{else_label}\n".format(
                cond=cond_code.get(),
                if_label=if_label,
                else_label=else_label
            )
            # generating destination bb
            # self.generate_expr(code_object, if_bb, folded=folded, language=language)
            # self.generate_expr(code_object, else_bb, folded=folded, language=language)
            return None

        elif isinstance(optree, UnconditionalBranch):
            dest_bb = optree.get_input(0)
            code_object << "br label %{}\n".format(self.get_bb_label(code_object, dest_bb))
            # generating destination bb
            # self.generate_expr(code_object, dest_bb, folded=folded, language=language)
            return None

        elif isinstance(optree, BasicBlockList):
            for bb in optree.inputs:
                self.generate_expr(code_object, bb, folded=folded, language=language)
            return None

        elif isinstance(optree, Statement):
            Log.report(Log.Error, "Statement are not supported in LLVM-IR codegen"
                "They must be translated to BB (e.g. through gen_basic_block pass)"
                "faulty node: {}", optree)

        elif isinstance(optree, ConditionBlock):
            Log.report(Log.Error, "ConditionBlock are not supported in LLVM-IR codegen"
                "They must be translated to BB (e.g. through gen_basic_block pass)"
                "faulty node: {}", optree)

        elif isinstance(optree, Loop):
            Log.report(Log.Error, "Loop are not supported in LLVM-IR codegen"
                "They must be translated to BB (e.g. through gen_basic_block pass)"
                "faulty node: {}", optree)

        elif isinstance(optree, PhiNode):
            output_var = optree.get_input(0)
            output_var_code = self.generate_expr(
                code_object, output_var, folded=folded, language=language)

            value_list = []
            for input_var, bb_var in zip(optree.get_inputs()[1::2], optree.get_inputs()[2::2]):
                assert isinstance(input_var, Variable)
                assert isinstance(bb_var, BasicBlock)
                input_var = self.generate_expr(
                    code_object, input_var, folded=folded, language=language
                )
                bb_label = self.get_bb_label(code_object, bb_var)
                value_list.append("[{var}, %{bb}]".format(var=input_var.get(), bb=bb_label))

            code_object << "{output_var} = phi {precision} {value_list}\n".format(
                output_var=output_var_code.get(),
                precision=llvm_ir_format(precision=output_var.get_precision()),
                value_list=(", ".join(value_list))
            )

            return None

        elif isinstance(optree, ReferenceAssign):
            output_var = optree.get_input(0)
            result_value = optree.get_input(1)

            # In LLVM it is illegal to assign a constant value, directly to a
            # variable so with insert a dummy add with 0
            if isinstance(result_value, Constant):
                cst_precision = result_value.get_precision()
                result_value = Addition(
                    result_value,
                    Constant(0, precision=cst_precision),
                    precision=cst_precision)

            # TODO/FIXME: fix single static assignation enforcement
            #output_var_code = self.generate_expr(
            #    code_object, output_var, folded=False, language=language
            #)

            result_value_code = self.generate_expr(
                code_object, result_value, folded=folded, result_var="%"+output_var.get_tag(), language=language
            )
            assert isinstance(result_value_code, CodeVariable)
            # code_object << self.generate_assignation(output_var_code.get(), result_value_code.get(), precision=output_var_code.precision)
            # debug msg generation is not supported in LLVM code genrator

            return None

        else:
            result = self.processor.generate_expr(self, code_object, optree, optree.inputs, folded = folded, result_var = result_var, language = self.language)
            # each operation is generated on a separate line

        # registering result into memoization table
        self.add_memoization(optree, result)

        # debug management
        if optree.get_debug() and not self.disable_debug:
            code_object << self.generate_debug_msg(optree, result)


        if strip_outer_parenthesis and isinstance(result, CodeExpression):
          result.strip_outer_parenthesis()
        return result
示例#26
0
    def generate_scheme(self):
        # declaring function input variable
        v_x = [
            self.implementation.add_input_variable(
                "x%d" % index, self.get_input_precision(index))
            for index in range(self.arity)
        ]

        double_format = {
            ML_Binary32: ML_SingleSingle,
            ML_Binary64: ML_DoubleDouble
        }[self.precision]

        # testing Add211
        exact_add = Addition(v_x[0],
                             v_x[1],
                             precision=double_format,
                             tag="exact_add")
        # testing Mul211
        exact_mul = Multiplication(v_x[0],
                                   v_x[1],
                                   precision=double_format,
                                   tag="exact_mul")
        # testing Sub211
        exact_sub = Subtraction(v_x[1],
                                v_x[0],
                                precision=double_format,
                                tag="exact_sub")
        # testing Add222
        multi_add = Addition(exact_add,
                             exact_sub,
                             precision=double_format,
                             tag="multi_add")
        # testing Mul222
        multi_mul = Multiplication(multi_add,
                                   exact_mul,
                                   precision=double_format,
                                   tag="multi_mul")
        # testing Add221 and Add212 and Sub222
        multi_sub = Subtraction(Addition(exact_sub,
                                         v_x[1],
                                         precision=double_format,
                                         tag="add221"),
                                Addition(v_x[0],
                                         multi_mul,
                                         precision=double_format,
                                         tag="add212"),
                                precision=double_format,
                                tag="sub222")
        # testing Mul212 and Mul221
        mul212 = Multiplication(multi_sub,
                                v_x[0],
                                precision=double_format,
                                tag="mul212")
        mul221 = Multiplication(exact_mul,
                                v_x[1],
                                precision=double_format,
                                tag="mul221")
        # testing Sub221 and Sub212
        sub221 = Subtraction(mul212,
                             mul221.hi,
                             precision=double_format,
                             tag="sub221")
        sub212 = Subtraction(sub221,
                             mul212.lo,
                             precision=double_format,
                             tag="sub212")
        # testing FMA2111
        fma2111 = FMA(sub221.lo,
                      sub212.hi,
                      mul221.hi,
                      precision=double_format,
                      tag="fma2111")
        # testing FMA2112
        fma2112 = FMA(fma2111.lo,
                      fma2111.hi,
                      fma2111,
                      precision=double_format,
                      tag="fma2112")
        # testing FMA2212
        fma2212 = FMA(fma2112,
                      fma2112.hi,
                      fma2112,
                      precision=double_format,
                      tag="fma2212")
        # testing FMA2122
        fma2122 = FMA(fma2212.lo,
                      fma2212,
                      fma2212,
                      precision=double_format,
                      tag="fma2122")
        # testing FMA22222
        fma2222 = FMA(fma2122,
                      fma2212,
                      fma2111,
                      precision=double_format,
                      tag="fma2222")
        # testing Add122
        add122 = Addition(fma2222,
                          fma2222,
                          precision=self.precision,
                          tag="add122")
        # testing Add112
        add112 = Addition(add122,
                          fma2222,
                          precision=self.precision,
                          tag="add112")
        # testing Add121
        add121 = Addition(fma2222,
                          add112,
                          precision=self.precision,
                          tag="add121")
        # testing subnormalization
        multi_subnormalize = SpecificOperation(
            Addition(add121, add112, precision=double_format),
            Constant(3, precision=self.precision.get_integer_format()),
            specifier=SpecificOperation.Subnormalize,
            precision=double_format,
            tag="multi_subnormalize")
        result = Conversion(multi_subnormalize, precision=self.precision)

        scheme = Statement(Return(result))

        return scheme
示例#27
0
    def generate_cos_scheme(self, computation_precision, tabulated_cos,
                            tabulated_sin, sin_C2, cos_C2, red_vx_lo):
        cos_C2 = Multiplication(tabulated_cos,
                                cos_C2,
                                precision=ML_Custom_FixedPoint_Format(
                                    -1, 32, signed=True),
                                tag="cos_C2")
        u2 = Multiplication(
            red_vx_lo,
            red_vx_lo,
            precision=
            computation_precision,  # ML_Custom_FixedPoint_Format(5, 26, signed = True)
            tag="u2")
        sin_u = Multiplication(
            tabulated_sin,
            red_vx_lo,
            precision=
            computation_precision,  # ML_Custom_FixedPoint_Format(1, 30, signed = True)
            tag="sin_u")

        cos_C2_u2 = Multiplication(
            cos_C2,
            u2,
            precision=
            computation_precision,  # ML_Custom_FixedPoint_Format(1, 30,signed = True)
            tag="cos_C2_u2")

        S2_u2 = Multiplication(sin_C2,
                               u2,
                               precision=ML_Custom_FixedPoint_Format(
                                   -1, 32, signed=True),
                               tag="S2_u2")

        S2_u3_sin = Multiplication(
            S2_u2,
            sin_u,
            precision=
            computation_precision,  # ML_Custom_FixedPoint_Format(5,26, signed = True)
            tag="S2_u3_sin")

        cos_C2_u2_P_cos = Addition(
            tabulated_cos,
            cos_C2_u2,
            precision=
            computation_precision,  # ML_Custom_FixedPoint_Format(5, 26, signed = True)
            tag="cos_C2_u2_P_cos")

        cos_C2_u2_P_cos_M_sin_u = Subtraction(
            cos_C2_u2_P_cos,
            sin_u,
            precision=
            computation_precision  # ML_Custom_FixedPoint_Format(5, 26, signed = True)
        )

        scheme = Subtraction(
            cos_C2_u2_P_cos_M_sin_u,
            S2_u3_sin,
            precision=
            computation_precision  # ML_Custom_FixedPoint_Format(5, 26, signed = True)
        )

        return scheme
示例#28
0
    def generate_scheme(self):
        # declaring target and instantiating optimization engine
        vx = self.implementation.add_input_variable("x", self.precision)

        Log.set_dump_stdout(True)

        Log.report(Log.Info,
                   "\033[33;1m generating implementation scheme \033[0m")
        if self.debug_flag:
            Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            if self.libm_compliant:
                return RaiseReturn(*args, precision=self.precision, **kwords)
            else:
                return Return(kwords["return_value"], precision=self.precision)

        test_nan_or_inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               debug=debug_multi,
                               tag="nan_or_inf")
        test_nan = Test(vx,
                        specifier=Test.IsNaN,
                        debug=debug_multi,
                        tag="is_nan_test")
        test_positive = Comparison(vx,
                                   0,
                                   specifier=Comparison.GreaterOrEqual,
                                   debug=debug_multi,
                                   tag="inf_sign")

        test_signaling_nan = Test(vx,
                                  specifier=Test.IsSignalingNaN,
                                  debug=debug_multi,
                                  tag="is_signaling_nan")
        return_snan = Statement(
            ExpRaiseReturn(ML_FPE_Invalid,
                           return_value=FP_QNaN(self.precision)))

        # return in case of infinity input
        infty_return = Statement(
            ConditionBlock(
                test_positive,
                Return(FP_PlusInfty(self.precision), precision=self.precision),
                Return(FP_PlusZero(self.precision), precision=self.precision)))
        # return in case of specific value input (NaN or inf)
        specific_return = ConditionBlock(
            test_nan,
            ConditionBlock(
                test_signaling_nan, return_snan,
                Return(FP_QNaN(self.precision), precision=self.precision)),
            infty_return)
        # return in case of standard (non-special) input

        # exclusion of early overflow and underflow cases
        precision_emax = self.precision.get_emax()
        precision_max_value = S2 * S2**precision_emax
        exp_overflow_bound = sollya.ceil(log(precision_max_value))
        early_overflow_test = Comparison(vx,
                                         exp_overflow_bound,
                                         likely=False,
                                         specifier=Comparison.Greater)
        early_overflow_return = Statement(
            ClearException() if self.libm_compliant else Statement(),
            ExpRaiseReturn(ML_FPE_Inexact,
                           ML_FPE_Overflow,
                           return_value=FP_PlusInfty(self.precision)))

        precision_emin = self.precision.get_emin_subnormal()
        precision_min_value = S2**precision_emin
        exp_underflow_bound = floor(log(precision_min_value))

        early_underflow_test = Comparison(vx,
                                          exp_underflow_bound,
                                          likely=False,
                                          specifier=Comparison.Less)
        early_underflow_return = Statement(
            ClearException() if self.libm_compliant else Statement(),
            ExpRaiseReturn(ML_FPE_Inexact,
                           ML_FPE_Underflow,
                           return_value=FP_PlusZero(self.precision)))

        # constant computation
        invlog2 = self.precision.round_sollya_object(1 / log(2), sollya.RN)

        interval_vx = Interval(exp_underflow_bound, exp_overflow_bound)
        interval_fk = interval_vx * invlog2
        interval_k = Interval(floor(inf(interval_fk)),
                              sollya.ceil(sup(interval_fk)))

        log2_hi_precision = self.precision.get_field_size() - (
            sollya.ceil(log2(sup(abs(interval_k)))) + 2)
        Log.report(Log.Info, "log2_hi_precision: %d" % log2_hi_precision)
        invlog2_cst = Constant(invlog2, precision=self.precision)
        log2_hi = round(log(2), log2_hi_precision, sollya.RN)
        log2_lo = self.precision.round_sollya_object(
            log(2) - log2_hi, sollya.RN)

        # argument reduction
        unround_k = vx * invlog2
        unround_k.set_attributes(tag="unround_k", debug=debug_multi)
        k = NearestInteger(unround_k,
                           precision=self.precision,
                           debug=debug_multi)
        ik = NearestInteger(unround_k,
                            precision=self.precision.get_integer_format(),
                            debug=debug_multi,
                            tag="ik")
        ik.set_tag("ik")
        k.set_tag("k")
        exact_pre_mul = (k * log2_hi)
        exact_pre_mul.set_attributes(exact=True)
        exact_hi_part = vx - exact_pre_mul
        exact_hi_part.set_attributes(exact=True,
                                     tag="exact_hi",
                                     debug=debug_multi,
                                     prevent_optimization=True)
        exact_lo_part = -k * log2_lo
        exact_lo_part.set_attributes(tag="exact_lo",
                                     debug=debug_multi,
                                     prevent_optimization=True)
        r = exact_hi_part + exact_lo_part
        r.set_tag("r")
        r.set_attributes(debug=debug_multi)

        approx_interval = Interval(-log(2) / 2, log(2) / 2)

        approx_interval_half = approx_interval / 2
        approx_interval_split = [
            Interval(-log(2) / 2, inf(approx_interval_half)),
            approx_interval_half,
            Interval(sup(approx_interval_half),
                     log(2) / 2)
        ]

        # TODO: should be computed automatically
        exact_hi_interval = approx_interval
        exact_lo_interval = -interval_k * log2_lo

        opt_r = self.optimise_scheme(r, copy={})

        tag_map = {}
        self.opt_engine.register_nodes_by_tag(opt_r, tag_map)

        cg_eval_error_copy_map = {
            vx:
            Variable("x", precision=self.precision, interval=interval_vx),
            tag_map["k"]:
            Variable("k", interval=interval_k, precision=self.precision)
        }

        #try:
        if is_gappa_installed():
            eval_error = self.gappa_engine.get_eval_error_v2(
                self.opt_engine,
                opt_r,
                cg_eval_error_copy_map,
                gappa_filename="red_arg.g")
        else:
            eval_error = 0.0
            Log.report(Log.Warning,
                       "gappa is not installed in this environnement")
        Log.report(Log.Info, "eval error: %s" % eval_error)

        local_ulp = sup(ulp(sollya.exp(approx_interval), self.precision))
        # FIXME refactor error_goal from accuracy
        Log.report(Log.Info, "accuracy: %s" % self.accuracy)
        if isinstance(self.accuracy, ML_Faithful):
            error_goal = local_ulp
        elif isinstance(self.accuracy, ML_CorrectlyRounded):
            error_goal = S2**-1 * local_ulp
        elif isinstance(self.accuracy, ML_DegradedAccuracyAbsolute):
            error_goal = self.accuracy.goal
        elif isinstance(self.accuracy, ML_DegradedAccuracyRelative):
            error_goal = self.accuracy.goal
        else:
            Log.report(Log.Error, "unknown accuracy: %s" % self.accuracy)

        # error_goal = local_ulp #S2**-(self.precision.get_field_size()+1)
        error_goal_approx = S2**-1 * error_goal

        Log.report(Log.Info,
                   "\033[33;1m building mathematical polynomial \033[0m\n")
        poly_degree = max(
            sup(
                guessdegree(
                    expm1(sollya.x) / sollya.x, approx_interval,
                    error_goal_approx)) - 1, 2)
        init_poly_degree = poly_degree

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_estrin_scheme
        #polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme

        while 1:
            Log.report(Log.Info, "attempting poly degree: %d" % poly_degree)
            precision_list = [1] + [self.precision] * (poly_degree)
            poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(
                expm1(sollya.x),
                poly_degree,
                precision_list,
                approx_interval,
                sollya.absolute,
                error_function=error_function)
            Log.report(Log.Info, "polynomial: %s " % poly_object)
            sub_poly = poly_object.sub_poly(start_index=2)
            Log.report(Log.Info, "polynomial: %s " % sub_poly)

            Log.report(Log.Info, "poly approx error: %s" % poly_approx_error)

            Log.report(
                Log.Info,
                "\033[33;1m generating polynomial evaluation scheme \033[0m")
            pre_poly = polynomial_scheme_builder(
                poly_object, r, unified_precision=self.precision)
            pre_poly.set_attributes(tag="pre_poly", debug=debug_multi)

            pre_sub_poly = polynomial_scheme_builder(
                sub_poly, r, unified_precision=self.precision)
            pre_sub_poly.set_attributes(tag="pre_sub_poly", debug=debug_multi)

            poly = 1 + (exact_hi_part + (exact_lo_part + pre_sub_poly))
            poly.set_tag("poly")

            # optimizing poly before evaluation error computation
            #opt_poly = self.opt_engine.optimization_process(poly, self.precision, fuse_fma = fuse_fma)
            #opt_sub_poly = self.opt_engine.optimization_process(pre_sub_poly, self.precision, fuse_fma = fuse_fma)
            opt_poly = self.optimise_scheme(poly)
            opt_sub_poly = self.optimise_scheme(pre_sub_poly)

            # evaluating error of the polynomial approximation
            r_gappa_var = Variable("r",
                                   precision=self.precision,
                                   interval=approx_interval)
            exact_hi_gappa_var = Variable("exact_hi",
                                          precision=self.precision,
                                          interval=exact_hi_interval)
            exact_lo_gappa_var = Variable("exact_lo",
                                          precision=self.precision,
                                          interval=exact_lo_interval)
            vx_gappa_var = Variable("x",
                                    precision=self.precision,
                                    interval=interval_vx)
            k_gappa_var = Variable("k",
                                   interval=interval_k,
                                   precision=self.precision)

            #print "exact_hi interval: ", exact_hi_interval

            sub_poly_error_copy_map = {
                #r.get_handle().get_node(): r_gappa_var,
                #vx.get_handle().get_node():  vx_gappa_var,
                exact_hi_part.get_handle().get_node():
                exact_hi_gappa_var,
                exact_lo_part.get_handle().get_node():
                exact_lo_gappa_var,
                #k.get_handle().get_node(): k_gappa_var,
            }

            poly_error_copy_map = {
                exact_hi_part.get_handle().get_node(): exact_hi_gappa_var,
                exact_lo_part.get_handle().get_node(): exact_lo_gappa_var,
            }

            if is_gappa_installed():
                sub_poly_eval_error = -1.0
                sub_poly_eval_error = self.gappa_engine.get_eval_error_v2(
                    self.opt_engine,
                    opt_sub_poly,
                    sub_poly_error_copy_map,
                    gappa_filename="%s_gappa_sub_poly.g" % self.function_name)

                dichotomy_map = [
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[0],
                    },
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[1],
                    },
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[2],
                    },
                ]
                poly_eval_error_dico = self.gappa_engine.get_eval_error_v3(
                    self.opt_engine,
                    opt_poly,
                    poly_error_copy_map,
                    gappa_filename="gappa_poly.g",
                    dichotomy=dichotomy_map)

                poly_eval_error = max(
                    [sup(abs(err)) for err in poly_eval_error_dico])
            else:
                poly_eval_error = 0.0
                sub_poly_eval_error = 0.0
                Log.report(Log.Warning,
                           "gappa is not installed in this environnement")
                Log.report(Log.Info, "stopping autonomous degree research")
                # incrementing polynomial degree to counteract initial decrementation effect
                poly_degree += 1
                break
            Log.report(Log.Info, "poly evaluation error: %s" % poly_eval_error)
            Log.report(Log.Info,
                       "sub poly evaluation error: %s" % sub_poly_eval_error)

            global_poly_error = None
            global_rel_poly_error = None

            for case_index in range(3):
                poly_error = poly_approx_error + poly_eval_error_dico[
                    case_index]
                rel_poly_error = sup(
                    abs(poly_error /
                        sollya.exp(approx_interval_split[case_index])))
                if global_rel_poly_error == None or rel_poly_error > global_rel_poly_error:
                    global_rel_poly_error = rel_poly_error
                    global_poly_error = poly_error
            flag = error_goal > global_rel_poly_error

            if flag:
                break
            else:
                poly_degree += 1

        late_overflow_test = Comparison(ik,
                                        self.precision.get_emax(),
                                        specifier=Comparison.Greater,
                                        likely=False,
                                        debug=debug_multi,
                                        tag="late_overflow_test")
        overflow_exp_offset = (self.precision.get_emax() -
                               self.precision.get_field_size() / 2)
        diff_k = Subtraction(
            ik,
            Constant(overflow_exp_offset,
                     precision=self.precision.get_integer_format()),
            precision=self.precision.get_integer_format(),
            debug=debug_multi,
            tag="diff_k",
        )
        late_overflow_result = (ExponentInsertion(
            diff_k, precision=self.precision) * poly) * ExponentInsertion(
                overflow_exp_offset, precision=self.precision)
        late_overflow_result.set_attributes(silent=False,
                                            tag="late_overflow_result",
                                            debug=debug_multi,
                                            precision=self.precision)
        late_overflow_return = ConditionBlock(
            Test(late_overflow_result, specifier=Test.IsInfty, likely=False),
            ExpRaiseReturn(ML_FPE_Overflow,
                           return_value=FP_PlusInfty(self.precision)),
            Return(late_overflow_result, precision=self.precision))

        late_underflow_test = Comparison(k,
                                         self.precision.get_emin_normal(),
                                         specifier=Comparison.LessOrEqual,
                                         likely=False)
        underflow_exp_offset = 2 * self.precision.get_field_size()
        corrected_exp = Addition(
            ik,
            Constant(underflow_exp_offset,
                     precision=self.precision.get_integer_format()),
            precision=self.precision.get_integer_format(),
            tag="corrected_exp")
        late_underflow_result = (
            ExponentInsertion(corrected_exp, precision=self.precision) *
            poly) * ExponentInsertion(-underflow_exp_offset,
                                      precision=self.precision)
        late_underflow_result.set_attributes(debug=debug_multi,
                                             tag="late_underflow_result",
                                             silent=False)
        test_subnormal = Test(late_underflow_result,
                              specifier=Test.IsSubnormal)
        late_underflow_return = Statement(
            ConditionBlock(
                test_subnormal,
                ExpRaiseReturn(ML_FPE_Underflow,
                               return_value=late_underflow_result)),
            Return(late_underflow_result, precision=self.precision))

        twok = ExponentInsertion(ik,
                                 tag="exp_ik",
                                 debug=debug_multi,
                                 precision=self.precision)
        #std_result = twok * ((1 + exact_hi_part * pre_poly) + exact_lo_part * pre_poly)
        std_result = twok * poly
        std_result.set_attributes(tag="std_result", debug=debug_multi)
        result_scheme = ConditionBlock(
            late_overflow_test, late_overflow_return,
            ConditionBlock(late_underflow_test, late_underflow_return,
                           Return(std_result, precision=self.precision)))
        std_return = ConditionBlock(
            early_overflow_test, early_overflow_return,
            ConditionBlock(early_underflow_test, early_underflow_return,
                           result_scheme))

        # main scheme
        Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m")
        scheme = ConditionBlock(
            test_nan_or_inf,
            Statement(ClearException() if self.libm_compliant else Statement(),
                      specific_return), std_return)

        return scheme
示例#29
0
 def pointer_add(table_addr, offset):
     pointer_format = table_addr.get_precision_as_pointer_format()
     return Addition(table_addr, offset, precision=pointer_format)