Exemplo n.º 1
def classification_delta_kernel(ctx, outputs, targets, deltas):
    kernel_cache, thread = ctx.kernel_cache, ctx.thread

    assert outputs.shape[0] == targets.shape[0] == deltas.shape[0]
    assert len(targets.shape) == 1
    assert targets.dtype == numpy.int32
    assert outputs.shape[1] == deltas.shape[1]

    key = (classification_delta_kernel, outputs.shape)
    if not key in kernel_cache.keys():
        log.info("compiling " + str(key))
        kernel = PureParallel(
                Parameter('outputs', Annotation(outputs, 'i')),
                Parameter('targets', Annotation(targets, 'i')),
                Parameter('deltas', Annotation(deltas, 'o'))
        ${outputs.ctype} out = ${outputs.load_same};
        SIZE_T t = ${targets.load_idx}(${idxs[0]});
        SIZE_T idx = ${idxs[1]};
        ${deltas.ctype} d;
        if (t == idx) {
            d = 1.0f - out;
        } else {
            d = -out;
        """, guiding_array='deltas')

        kernel_cache[key] = kernel.compile(thread)

    # Run kernel
    kernel_cache[key](outputs, targets, deltas)
Exemplo n.º 2
def classification_delta_kernel(ctx, outputs, targets, deltas):
    kernel_cache, thread = ctx.kernel_cache, ctx.thread

    assert outputs.shape[0] == targets.shape[0] == deltas.shape[0]
    assert len(targets.shape) == 1
    assert targets.dtype == numpy.int32
    assert outputs.shape[1] == deltas.shape[1]

    key = (classification_delta_kernel, outputs.shape)
    if not key in kernel_cache.keys():
        log.info("compiling " + str(key))
        kernel = PureParallel([
            Parameter('outputs', Annotation(outputs, 'i')),
            Parameter('targets', Annotation(targets, 'i')),
            Parameter('deltas', Annotation(deltas, 'o'))
        ${outputs.ctype} out = ${outputs.load_same};
        SIZE_T t = ${targets.load_idx}(${idxs[0]});
        SIZE_T idx = ${idxs[1]};
        ${deltas.ctype} d;
        if (t == idx) {
            d = 1.0f - out;
        } else {
            d = -out;

        kernel_cache[key] = kernel.compile(thread)

    # Run kernel
    kernel_cache[key](outputs, targets, deltas)
Exemplo n.º 3
def logistic_derivative(context, activations, delta, dest=None):
    kernel_cache, thread = context.kernel_cache, context.thread

    if dest is None:
        dest = delta

    key = (logistic_derivative, activations.shape, thread)
    if not key in kernel_cache.keys():
        log.info("compiling " + str(key))
        kernel = PureParallel(
                Parameter('activations', Annotation(activations, 'i')),
                Parameter('delta', Annotation(activations, 'i')),
                Parameter('dest', Annotation(dest, 'o')),
        ${activations.ctype} a = ${activations.load_same};
        ${delta.ctype} d = ${delta.load_same};

        d = d*a*(1.0f - a);

        """, guiding_array='activations')

        kernel_cache[key] = kernel.compile(thread, fast_math=True)

    # Run kernel
    kernel_cache[key](activations, delta, dest)
Exemplo n.º 4
def logistic(context, activations, bias, dest=None):
    kernel_cache, thread = context.kernel_cache, context.thread

    if dest is None:
        dest = activations

    key = (logistic, activations.shape, thread)
    if not key in kernel_cache.keys():
        log.info("compiling " + str(key))
        assert activations.shape[1] == bias.shape[0]

        kernel = PureParallel(
                Parameter('activations', Annotation(activations, 'i')),
                Parameter('bias', Annotation(bias, 'i')),
                Parameter('dest', Annotation(dest, 'o')),
        ${activations.ctype} a = ${activations.load_same};
        ${bias.ctype} b = ${bias.load_idx}(${idxs[1]});

        a += b;
        a = min(max(-45.0f, a), 45.0f);
        a = 1.0f / (1.0f + exp(-a));

        """, guiding_array='activations')

        kernel_cache[key] = kernel.compile(thread, fast_math=True)

    # Run kernel
    kernel_cache[key](activations, bias, dest)

    return dest
Exemplo n.º 5
def logistic(context, activations, bias, dest=None):
    kernel_cache, thread = context.kernel_cache, context.thread

    if dest is None:
        dest = activations

    key = (logistic, activations.shape, thread)
    if not key in kernel_cache.keys():
        log.info("compiling " + str(key))
        assert activations.shape[1] == bias.shape[0]

        kernel = PureParallel([
            Parameter('activations', Annotation(activations, 'i')),
            Parameter('bias', Annotation(bias, 'i')),
            Parameter('dest', Annotation(dest, 'o')),
        ${activations.ctype} a = ${activations.load_same};
        ${bias.ctype} b = ${bias.load_idx}(${idxs[1]});

        a += b;
        a = min(max(-45.0f, a), 45.0f);
        a = 1.0f / (1.0f + exp(-a));


        kernel_cache[key] = kernel.compile(thread, fast_math=True)

    # Run kernel
    kernel_cache[key](activations, bias, dest)

    return dest
Exemplo n.º 6
def test_guiding_output(thr):

    N = 1000
    dtype = numpy.float32

    p = PureParallel(
            Parameter('output', Annotation(Type(dtype, shape=N), 'o')),
            Parameter('input', Annotation(Type(dtype, shape=(2, N)), 'i'))],
        float t1 = ${input.load_idx}(0, ${idxs[0]});
        float t2 = ${input.load_idx}(1, ${idxs[0]});
        ${output.store_idx}(${idxs[0]}, t1 + t2);

    a = get_test_array_like(p.parameter.input)
    a_dev = thr.to_device(a)
    res_dev = thr.empty_like(p.parameter.output)

    pc = p.compile(thr)
    pc(res_dev, a_dev)

    res_ref = a[0] + a[1]

    assert diff_is_negligible(res_dev.get(), res_ref)
Exemplo n.º 7
def logistic_derivative(context, activations, delta, dest=None):
    kernel_cache, thread = context.kernel_cache, context.thread

    if dest is None:
        dest = delta

    key = (logistic_derivative, activations.shape, thread)
    if not key in kernel_cache.keys():
        log.info("compiling " + str(key))
        kernel = PureParallel([
            Parameter('activations', Annotation(activations, 'i')),
            Parameter('delta', Annotation(activations, 'i')),
            Parameter('dest', Annotation(dest, 'o')),
        ${activations.ctype} a = ${activations.load_same};
        ${delta.ctype} d = ${delta.load_same};

        d = d*a*(1.0f - a);


        kernel_cache[key] = kernel.compile(thread, fast_math=True)

    # Run kernel
    kernel_cache[key](activations, delta, dest)
Exemplo n.º 8
def test_guiding_output(thr):

    N = 1000
    dtype = numpy.float32

    p = PureParallel([
        Parameter('output', Annotation(Type(dtype, shape=N), 'o')),
        Parameter('input', Annotation(Type(dtype, shape=(2, N)), 'i'))
        float t1 = ${input.load_idx}(0, ${idxs[0]});
        float t2 = ${input.load_idx}(1, ${idxs[0]});
        ${output.store_idx}(${idxs[0]}, t1 + t2);

    a = get_test_array_like(p.parameter.input)
    a_dev = thr.to_device(a)
    res_dev = thr.empty_like(p.parameter.output)

    pc = p.compile(thr)
    pc(res_dev, a_dev)

    res_ref = a[0] + a[1]

    assert diff_is_negligible(res_dev.get(), res_ref)
Exemplo n.º 9
def class_errors(ctx, expected, actual, errors):
    """ expected int32, actual float, errors int32 """
    kernel_cache, thread = ctx.kernel_cache, ctx.thread

    key = (class_errors, expected.shape)

    if key not in kernel_cache.keys():
        # target should be an integer
        logging.info("compiling " + str(key))
        assert expected.shape == errors.shape  # one neuron per class
        assert expected.shape == (actual.shape[0], )  # index of the class
        assert actual.dtype == numpy.float32
        assert expected.dtype == numpy.int32
        assert errors.dtype == numpy.int32
        kernel = PureParallel(
                Parameter('expected', Annotation(expected, 'i')),
                Parameter('actual', Annotation(actual, 'i')),
                Parameter('errors', Annotation(errors, 'o'))
            SIZE_T expected = ${expected.load_idx}(${idxs[0]});;
            float maximum=0.0f;
            float value;
            SIZE_T maxindex = 0;

            SIZE_T tl = ${target_length};

            // calculate argmax
            for(SIZE_T j=0; j < tl; j++) {
                value = ${actual.load_idx}(${idxs[0]}, j);

                if (value > maximum) {
                    maximum = value;
                    maxindex = j;

            // If the confidence is too low, return an error
            if (maximum < (1.0f / ${target_length}.0f + 0.001f)) {

            // compare argmax
            if (maxindex != expected) {
            } else {

            render_kwds={'target_length': numpy.int32(actual.shape[1])})

        kernel_cache[key] = kernel.compile(thread)

    kernel_cache[key](expected, actual, errors)
Exemplo n.º 10
def convolve2d_propagation(ctx, array, weights, dest):
    """ The output is the valid discrete linear convolution of the inputs. """
    kernel_cache, thread = ctx.kernel_cache, ctx.thread

    key = (convolve2d_propagation, weights.shape, array.shape, thread)
    if not key in kernel_cache.keys():
        logging.info("compiling" + str(key))

        channels, filters, owidth, oheight = weights.shape[0], weights.shape[
            1], dest.shape[1], dest.shape[2]

        render_kwds = {
            'w0': weights.shape[2],
            'w1': weights.shape[3],
            'a0': array.shape[2],
            'a1': array.shape[3],
            'off0': int(weights.shape[2] - 1),
            'off1': int(weights.shape[3] - 1)

        kernel_conv = PureParallel([
            Parameter('array', Annotation(array, 'i')),
            Parameter('weights', Annotation(weights, 'i')),
            Parameter('dest', Annotation(dest, 'o'))
        // Array dimensions:
        // array : (channels, width, height)
        // weights: (channels, filters, fwidth, fheight)
        // dest (channels, filters, owidth, oheight)

        float a = 0.0f;
        SIZE_T x, y, i, j;
        const SIZE_T number = ${idxs[0]};
        const SIZE_T channel = ${idxs[1]};
        const SIZE_T filter = ${idxs[2]};
        const SIZE_T xout = ${idxs[3]};
        const SIZE_T yout = ${idxs[4]};
        for (i=0; i < ${w0}; i++){
            for (j=0; j < ${w1}; j++){
                x = xout - i  + ${off0};
                y = yout - j  + ${off1};
                a += ${array.load_idx}(number, channel, x, y)
                   * ${weights.load_idx}(channel, filter, i, j); // channel, filter, i, j


        kernel_cache[key] = kernel_conv.compile(thread, fast_math=True)

    # run convolution
    kernel_cache[key](array, weights, dest)

    return dest
Exemplo n.º 11
def convolve2d_propagation(ctx, array, weights, dest):
    """ The output is the valid discrete linear convolution of the inputs. """
    kernel_cache, thread = ctx.kernel_cache, ctx.thread

    key = (convolve2d_propagation, weights.shape, array.shape, thread)
    if not key in kernel_cache.keys():
        logging.info("compiling" + str(key))

        channels, filters, owidth, oheight = weights.shape[0], weights.shape[1], dest.shape[1], dest.shape[2]

        render_kwds = {
            'w0': weights.shape[2],
            'w1': weights.shape[3],
            'a0': array.shape[2],
            'a1': array.shape[3],
            'off0': int(weights.shape[2] - 1),
            'off1': int(weights.shape[3] - 1)

        kernel_conv = PureParallel(
                Parameter('array', Annotation(array, 'i')),
                Parameter('weights', Annotation(weights, 'i')),
                Parameter('dest', Annotation(dest, 'o'))
        // Array dimensions:
        // array : (channels, width, height)
        // weights: (channels, filters, fwidth, fheight)
        // dest (channels, filters, owidth, oheight)

        float a = 0.0f;
        SIZE_T x, y, i, j;
        const SIZE_T number = ${idxs[0]};
        const SIZE_T channel = ${idxs[1]};
        const SIZE_T filter = ${idxs[2]};
        const SIZE_T xout = ${idxs[3]};
        const SIZE_T yout = ${idxs[4]};
        for (i=0; i < ${w0}; i++){
            for (j=0; j < ${w1}; j++){
                x = xout - i  + ${off0};
                y = yout - j  + ${off1};
                a += ${array.load_idx}(number, channel, x, y)
                   * ${weights.load_idx}(channel, filter, i, j); // channel, filter, i, j


        """, guiding_array='dest', render_kwds=render_kwds)
        kernel_cache[key] = kernel_conv.compile(
            thread, fast_math=True)

    # run convolution
    kernel_cache[key](array, weights, dest)

    return dest
Exemplo n.º 12
def class_errors(ctx, expected, actual, errors):
    """ expected int32, actual float, errors int32 """
    kernel_cache, thread = ctx.kernel_cache, ctx.thread

    key = (class_errors, expected.shape)

    if key not in kernel_cache.keys():
        # target should be an integer
        logging.info("compiling " + str(key))
        assert expected.shape == errors.shape # one neuron per class
        assert expected.shape == (actual.shape[0],) # index of the class
        assert actual.dtype == numpy.float32
        assert expected.dtype == numpy.int32
        assert errors.dtype == numpy.int32
        kernel = PureParallel(
                Parameter('expected', Annotation(expected, 'i')),
                Parameter('actual', Annotation(actual, 'i')),
                Parameter('errors', Annotation(errors, 'o'))
            SIZE_T expected = ${expected.load_idx}(${idxs[0]});;
            float maximum=0.0f;
            float value;
            SIZE_T maxindex = 0;

            SIZE_T tl = ${target_length};

            // calculate argmax
            for(SIZE_T j=0; j < tl; j++) {
                value = ${actual.load_idx}(${idxs[0]}, j);

                if (value > maximum) {
                    maximum = value;
                    maxindex = j;

            // If the confidence is too low, return an error
            if (maximum < (1.0f / ${target_length}.0f + 0.001f)) {

            // compare argmax
            if (maxindex != expected) {
            } else {

        """, guiding_array='expected', render_kwds={'target_length' : numpy.int32(actual.shape[1])})

        kernel_cache[key] = kernel.compile(thread)

    kernel_cache[key](expected, actual, errors)
Exemplo n.º 13
def softmax(ctx, activations, bias, dest=None):
    """ Softmax Activation Function """
    kernel_cache, thread = ctx.kernel_cache, ctx.thread

    if dest is None:
        dest = activations

    key = (softmax, activations.shape)
    if key not in kernel_cache.keys():
        logging.info("compiling " + str(key))
        # Regression hidden layer
        kernel_softmax = PureParallel(
                Parameter('activations', Annotation(activations, 'i')),
                Parameter('bias', Annotation(bias, 'i')),
                Parameter('dest', Annotation(dest, 'o')),
            float x;
            float b;
            float s = 0.0f;
            SIZE_T tl = ${target_length};
            for(SIZE_T j=0; j < tl; j++) {
                x = ${activations.load_idx}(${idxs[0]}, j);
                b = ${bias.load_idx}(j);
                x += b;
                x = exp(min(max(x, -45.0f), 45.0f));
                ${dest.store_idx}(${idxs[0]}, j, x);

                s += x;

            // divide by sum
            for(SIZE_T j=0; j < tl; j++) {
                x = ${dest.load_idx}(${idxs[0]}, j);
                x /= s;
                ${dest.store_idx}(${idxs[0]}, j, x);
            guiding_array=(activations.shape[0], ),
            render_kwds={'target_length': numpy.int32(activations.shape[1])})

        kernel_cache[key] = kernel_softmax.compile(thread)

    kernel_cache[key](activations, bias, dest)
Exemplo n.º 14
def softmax(ctx, activations, bias, dest=None):
    """ Softmax Activation Function """
    kernel_cache, thread = ctx.kernel_cache, ctx.thread

    if dest is None:
        dest = activations

    key = (softmax, activations.shape)
    if key not in kernel_cache.keys():
        logging.info("compiling " + str(key))
        # Regression hidden layer
        kernel_softmax = PureParallel(
                Parameter('activations', Annotation(activations, 'i')),
                Parameter('bias', Annotation(bias, 'i')),
                Parameter('dest', Annotation(dest, 'o')),
            float x;
            float b;
            float s = 0.0f;
            SIZE_T tl = ${target_length};
            for(SIZE_T j=0; j < tl; j++) {
                x = ${activations.load_idx}(${idxs[0]}, j);
                b = ${bias.load_idx}(j);
                x += b;
                x = exp(min(max(x, -45.0f), 45.0f));
                ${dest.store_idx}(${idxs[0]}, j, x);

                s += x;

            // divide by sum
            for(SIZE_T j=0; j < tl; j++) {
                x = ${dest.load_idx}(${idxs[0]}, j);
                x /= s;
                ${dest.store_idx}(${idxs[0]}, j, x);
        """, guiding_array=(activations.shape[0],), render_kwds={'target_length' : numpy.int32(activations.shape[1])})

        kernel_cache[key] = kernel_softmax.compile(thread)

    kernel_cache[key](activations, bias, dest)
Exemplo n.º 15
def test_zero_length_shape(thr):

    dtype = numpy.float32

    p = PureParallel(
            Parameter('output', Annotation(Type(dtype, shape=tuple()), 'o')),
            Parameter('input', Annotation(Type(dtype, shape=tuple()), 'i'))],
        float t = ${input.load_idx}();
        ${output.store_idx}(t * 2);

    a = get_test_array_like(p.parameter.input)
    a_dev = thr.to_device(a)
    res_dev = thr.empty_like(p.parameter.output)

    pc = p.compile(thr)
    pc(res_dev, a_dev)

    res_ref = (a * 2).astype(dtype)

    assert diff_is_negligible(res_dev.get(), res_ref)
Exemplo n.º 16
def test_zero_length_shape(thr):

    dtype = numpy.float32

    p = PureParallel([
        Parameter('output', Annotation(Type(dtype, shape=tuple()), 'o')),
        Parameter('input', Annotation(Type(dtype, shape=tuple()), 'i'))
        float t = ${input.load_idx}();
        ${output.store_idx}(t * 2);

    a = get_test_array_like(p.parameter.input)
    a_dev = thr.to_device(a)
    res_dev = thr.empty_like(p.parameter.output)

    pc = p.compile(thr)
    pc(res_dev, a_dev)

    res_ref = (a * 2).astype(dtype)

    assert diff_is_negligible(res_dev.get(), res_ref)
Exemplo n.º 17
def get_procs(thr, N):
    fft = FFTFactory.create(thr, (N,), compile_=False)
    unimod_trans = Transformation(
        [Parameter('output', Annotation(Type(np.complex128, N), 'o')),
        Parameter('input', Annotation(Type(np.complex128, N), 'i'))],
VSIZE_T idx = ${idxs[0]};
${input.ctype} val = ${input.load_same};
if (idx>${N}/2){
    val.x = 0.0;
    val.y = 0.0;
    ${output.store_same}(${polar_unit}(atan2(val.y, val.x)));
        render_kwds=dict(polar_unit=functions.polar_unit(dtype=np.float64), N=N)
    fft.parameter.output.connect(unimod_trans, unimod_trans.input, uni=unimod_trans.output)
    fft_unimod = fft.compile(thr)
    mag_square = PureParallel(
        [Parameter('output', Annotation(Type(np.complex128, N), 'o')),
         Parameter('input', Annotation(Type(np.complex128, N), 'i'))],
VSIZE_T idx = ${idxs[0]};
${input.ctype} val = ${input.load_idx}(idx);  
val.x = val.x*val.x + val.y*val.y;
val.y = 0;
${output.store_idx}(idx, val);
    mag_square = mag_square.compile(thr)
    apply_mask = PureParallel(
        [Parameter('output', Annotation(Type(np.complex128, N), 'o')),
         Parameter('origin', Annotation(Type(np.complex128, N), 'i')),
         Parameter('mask', Annotation(Type(np.double, N), 'i'))],
VSIZE_T idx = ${idxs[0]};
${output.store_idx}(idx, ${mul}(${origin.load_idx}(idx), ${mask.load_idx}(idx)));        
        render_kwds=dict(mul=functions.mul(np.complex128, np.double))
    apply_mask = apply_mask.compile(thr)
    combine_mag_phi = PureParallel(
        [Parameter('output', Annotation(Type(np.complex128, N), 'o')),
         Parameter('mag_square', Annotation(Type(np.complex128, N), 'i')),
         Parameter('phase', Annotation(Type(np.complex128, N), 'i'))],
VSIZE_T idx = ${idxs[0]};
double r = ${mag_square.load_idx}(idx).x;  
r = r<0.0 ? 0.0 : ${pow}(r, 0.5);
double2 v = ${phase.load_idx}(idx);
double angle = atan2(v.y, v.x);
${output.store_idx}(idx, ${polar}(r, angle));
        render_kwds=dict(pow=functions.pow(np.double), polar=functions.polar(np.double))
    combine_mag_phi = combine_mag_phi.compile(thr)
    return fft_unimod, mag_square, apply_mask, combine_mag_phi
Exemplo n.º 18
def convolve2d_gradient(ctx, prev_deltas, deltas, gradient_intermediate):
    """ The output is the full discrete linear convolution of the inputs. """
    kernel_cache, thread = ctx.kernel_cache, ctx.thread

    key = (convolve2d_gradient, prev_deltas.shape, deltas.shape, thread)
    if not key in kernel_cache.keys():
        logging.info("compiling " + str(key))

        # Extract shapes from the arrays
        n, channels, p_width, p_height = prev_deltas.shape
        n_1, filters, d_width, d_height = deltas.shape
        n, d_width_1, d_height_1, channels_1, filters_1, f_width, f_height = gradient_intermediate.shape

        # Some assertions to be sure everything is correct
        assert n_1 == n
        assert filters_1 == filters
        assert channels_1 == channels
        expected_shape = get_output_shape(prev_deltas, deltas, 'gradient')
        assert expected_shape == gradient_intermediate.shape
        assert d_width_1 == d_width
        assert d_height_1 == d_height

        # Render keywords
        render_kwds = {
            'n': n,
            'filters': filters,
            'channels': channels,
            'f_width': f_width,
            'f_height': f_height,
            'd_width': d_width,
            'd_height': d_height,
            'p_width': p_width,
            'p_height': p_height,

        # The kernel
        kernel = PureParallel([
            Parameter('prev_deltas', Annotation(prev_deltas, 'i')),
            Parameter('deltas', Annotation(deltas, 'i')),
                      Annotation(gradient_intermediate, 'o'))

        const SIZE_T number = ${idxs[0]};
        const SIZE_T dx = ${idxs[1]};
        const SIZE_T dy = ${idxs[2]};
        const SIZE_T channel = ${idxs[3]};
        const SIZE_T filter = ${idxs[4]};
        const SIZE_T fx = ${idxs[5]};
        const SIZE_T fy = ${idxs[6]};

        // weight gradient at the weight position fx, fy is defined by the sum
        //       (deltas * prev_deltas[fx:d_width+fx, fy:fy+d_height]).sum()
        // alternatively we can store all delta positions and sum in a separate kernel - this is what we do now.

        float g = ${deltas.load_idx}(number, filter, dx, dy) * ${prev_deltas.load_idx}(number, channel, dx+fx, dy+fy);



        kernel_cache[key] = kernel.compile(thread, fast_math=True)

    # run convolution -> intermediate
    kernel_cache[key](prev_deltas, deltas, gradient_intermediate)

    return gradient_intermediate
Exemplo n.º 19
def convolve2d_backprop(ctx, deltas, weights, deltas_intermediate):
    """ The output is the full discrete linear convolution of the inputs. """
    kernel_cache, thread = ctx.kernel_cache, ctx.thread

    key = (convolve2d_backprop, deltas.shape, weights.shape, thread)
    if not key in kernel_cache.keys():
        logging.info("compiling " + str(key))

        # Extract shapes from the arrays
        channels, filters, f_width, f_height = weights.shape
        n_1, filters_1, d_width, d_height = deltas.shape
        n, channels_1, filters_2, p_width, p_height = deltas_intermediate.shape

        # Some assertions to be sure everything is correct
        assert n_1 == n
        assert filters_2 == filters_1 == filters
        assert channels_1 == channels
        expected_shape = get_output_shape(deltas, weights, 'backprop')
        assert expected_shape == deltas_intermediate.shape

        # Render keywords
        render_kwds = {
            'n': n,
            'filters': filters,
            'channels': channels,
            'f_width': f_width,
            'f_height': f_height,
            'd_width': d_width,
            'd_height': d_height,
            'p_width': p_width,
            'p_height': p_height,

        # The kernel
        kernel = PureParallel([
            Parameter('deltas', Annotation(deltas, 'i')),
            Parameter('weights', Annotation(weights, 'i')),
                      Annotation(deltas_intermediate, 'o'))
        float d = 0.0f;
        SIZE_T x, y, i, j, fi, fj;
        const SIZE_T number = ${idxs[0]};
        const SIZE_T channel = ${idxs[1]};
        const SIZE_T filter = ${idxs[2]};
        const SIZE_T xout = ${idxs[3]};
        const SIZE_T yout = ${idxs[4]};
        for (i=0; i < ${f_width}; i++){
            for (j=0; j < ${f_height}; j++){
                x = xout - i;
                if (x < 0) continue;
                if (x >= ${d_width}) continue;
                y = yout - j;
                if (y < 0) continue;
                if (y >= ${d_height}) continue;
                // acces weights in flipped order!
                fi = ${f_width} - i - 1;
                fj = ${f_height} - j - 1;
                d += ${deltas.load_idx}(number, channel, x, y)
                   * ${weights.load_idx}(channel, filter, fi, fj);



        kernel_cache[key] = kernel.compile(thread, fast_math=True)

    # run convolution -> intermediate
    kernel_cache[key](deltas, weights, deltas_intermediate)

    return deltas_intermediate
Exemplo n.º 20
def get_procs(thr, N):
    fft = FFTFactory.create(thr, (N, ), compile_=False)
    unimod_trans = Transformation(
            Parameter('output', Annotation(Type(np.complex128, N), 'o')),
            Parameter('input', Annotation(Type(np.complex128, N), 'i'))
VSIZE_T idx = ${idxs[0]};
${input.ctype} val = ${input.load_same};
if (idx>${N}/2){
    val.x = 0.0;
    val.y = 0.0;
    ${output.store_same}(${polar_unit}(atan2(val.y, val.x)));
    fft_unimod = fft.compile(thr)

    mag_square = PureParallel([
        Parameter('output', Annotation(Type(np.complex128, N), 'o')),
        Parameter('input', Annotation(Type(np.complex128, N), 'i'))
    ], '''
VSIZE_T idx = ${idxs[0]};
${input.ctype} val = ${input.load_idx}(idx);  
val.x = val.x*val.x + val.y*val.y;
val.y = 0;
${output.store_idx}(idx, val);
    mag_square = mag_square.compile(thr)

    apply_mask = PureParallel(
            Parameter('output', Annotation(Type(np.complex128, N), 'o')),
            Parameter('origin', Annotation(Type(np.complex128, N), 'i')),
            Parameter('mask', Annotation(Type(np.double, N), 'i'))
VSIZE_T idx = ${idxs[0]};
${output.store_idx}(idx, ${mul}(${origin.load_idx}(idx), ${mask.load_idx}(idx)));        
        render_kwds=dict(mul=functions.mul(np.complex128, np.double)))
    apply_mask = apply_mask.compile(thr)

    combine_mag_phi = PureParallel([
        Parameter('output', Annotation(Type(np.complex128, N), 'o')),
        Parameter('mag_square', Annotation(Type(np.complex128, N), 'i')),
        Parameter('phase', Annotation(Type(np.complex128, N), 'i'))
VSIZE_T idx = ${idxs[0]};
double r = ${mag_square.load_idx}(idx).x;  
r = r<0.0 ? 0.0 : ${pow}(r, 0.5);
double2 v = ${phase.load_idx}(idx);
double angle = atan2(v.y, v.x);
${output.store_idx}(idx, ${polar}(r, angle));
    combine_mag_phi = combine_mag_phi.compile(thr)

    return fft_unimod, mag_square, apply_mask, combine_mag_phi
Exemplo n.º 21
def convolve2d_backprop(ctx, deltas, weights, deltas_intermediate):
    """ The output is the full discrete linear convolution of the inputs. """
    kernel_cache, thread = ctx.kernel_cache, ctx.thread

    key = (convolve2d_backprop, deltas.shape, weights.shape, thread)
    if not key in kernel_cache.keys():
        logging.info("compiling " + str(key))

        # Extract shapes from the arrays
        channels, filters, f_width, f_height = weights.shape
        n_1, filters_1, d_width, d_height = deltas.shape
        n, channels_1, filters_2, p_width, p_height = deltas_intermediate.shape

        # Some assertions to be sure everything is correct
        assert n_1 == n
        assert filters_2 == filters_1 == filters
        assert channels_1 == channels
        expected_shape = get_output_shape(deltas, weights, 'backprop')
        assert expected_shape == deltas_intermediate.shape

        # Render keywords
        render_kwds = {
            'channels': channels,
            'f_width': f_width,
            'f_height': f_height,
            'd_width': d_width,
            'd_height': d_height,
            'p_width': p_width,
            'p_height': p_height,

        # The kernel
        kernel = PureParallel(
                Parameter('deltas', Annotation(deltas, 'i')),
                Parameter('weights', Annotation(weights, 'i')),
                Parameter('deltas_intermediate', Annotation(deltas_intermediate, 'o'))
        float d = 0.0f;
        SIZE_T x, y, i, j, fi, fj;
        const SIZE_T number = ${idxs[0]};
        const SIZE_T channel = ${idxs[1]};
        const SIZE_T filter = ${idxs[2]};
        const SIZE_T xout = ${idxs[3]};
        const SIZE_T yout = ${idxs[4]};
        for (i=0; i < ${f_width}; i++){
            for (j=0; j < ${f_height}; j++){
                x = xout - i;
                if (x < 0) continue;
                if (x >= ${d_width}) continue;
                y = yout - j;
                if (y < 0) continue;
                if (y >= ${d_height}) continue;
                // acces weights in flipped order!
                fi = ${f_width} - i - 1;
                fj = ${f_height} - j - 1;
                d += ${deltas.load_idx}(number, channel, x, y)
                   * ${weights.load_idx}(channel, filter, fi, fj);


        """, guiding_array='deltas_intermediate', render_kwds=render_kwds)

        kernel_cache[key] = kernel.compile(
            thread, fast_math=True)

    # run convolution -> intermediate
    kernel_cache[key](deltas, weights, deltas_intermediate)

    return deltas_intermediate
Exemplo n.º 22
def convolve2d_gradient(ctx, prev_deltas, deltas, gradient_intermediate):
    """ The output is the full discrete linear convolution of the inputs. """
    kernel_cache, thread = ctx.kernel_cache, ctx.thread

    key = (convolve2d_gradient, prev_deltas.shape, deltas.shape, thread)
    if not key in kernel_cache.keys():
        logging.info("compiling " + str(key))

        # Extract shapes from the arrays
        n, channels, p_width, p_height = prev_deltas.shape
        n_1, filters, d_width, d_height = deltas.shape
        n, d_width_1, d_height_1, channels_1, filters_1, f_width, f_height = gradient_intermediate.shape

        # Some assertions to be sure everything is correct
        assert n_1 == n
        assert filters_1 == filters
        assert channels_1 == channels
        expected_shape = get_output_shape(prev_deltas, deltas, 'gradient')
        assert expected_shape == gradient_intermediate.shape
        assert d_width_1 == d_width
        assert d_height_1 == d_height

        # Render keywords
        render_kwds = {
            'channels': channels,
            'f_width': f_width,
            'f_height': f_height,
            'd_width': d_width,
            'd_height': d_height,
            'p_width': p_width,
            'p_height': p_height,

        # The kernel
        kernel = PureParallel(
                Parameter('prev_deltas', Annotation(prev_deltas, 'i')),
                Parameter('deltas', Annotation(deltas, 'i')),
                Parameter('gradient_intermediate', Annotation(gradient_intermediate, 'o'))

        const SIZE_T number = ${idxs[0]};
        const SIZE_T dx = ${idxs[1]};
        const SIZE_T dy = ${idxs[2]};
        const SIZE_T channel = ${idxs[3]};
        const SIZE_T filter = ${idxs[4]};
        const SIZE_T fx = ${idxs[5]};
        const SIZE_T fy = ${idxs[6]};

        // weight gradient at the weight position fx, fy is defined by the sum
        //       (deltas * prev_deltas[fx:d_width+fx, fy:fy+d_height]).sum()
        // alternatively we can store all delta positions and sum in a separate kernel - this is what we do now.

        float g = ${deltas.load_idx}(number, filter, dx, dy) * ${prev_deltas.load_idx}(number, channel, dx+fx, dy+fy);


        """, guiding_array='gradient_intermediate', render_kwds=render_kwds)

        kernel_cache[key] = kernel.compile(
            thread, fast_math=True)

    # run convolution -> intermediate
    kernel_cache[key](prev_deltas, deltas, gradient_intermediate)

    return gradient_intermediate