Пример #1
0
def logistic_derivative(context, activations, delta, dest=None):
    kernel_cache, thread = context.kernel_cache, context.thread

    if dest is None:
        dest = delta

    key = (logistic_derivative, activations.shape, thread)
    if not key in kernel_cache.keys():
        log.info("compiling " + str(key))
        kernel = PureParallel([
            Parameter('activations', Annotation(activations, 'i')),
            Parameter('delta', Annotation(activations, 'i')),
            Parameter('dest', Annotation(dest, 'o')),
        ],
                              """
        ${activations.ctype} a = ${activations.load_same};
        ${delta.ctype} d = ${delta.load_same};

        d = d*a*(1.0f - a);

        ${dest.store_same}(d);
        """,
                              guiding_array='activations')

        kernel_cache[key] = kernel.compile(thread, fast_math=True)

    # Run kernel
    kernel_cache[key](activations, delta, dest)
Пример #2
0
def renormalize_kernel(ctx, array, norm, constraint):
    kernel_cache, thread = ctx.kernel_cache, ctx.thread

    constraint = numpy.float32(constraint)

    key = (renormalize_kernel, array.shape, norm.shape, thread._context)
    if key not in kernel_cache.keys():
        comp = PureParallel([
            Parameter('array', Annotation(array, 'io')),
            Parameter('norm', Annotation(norm, 'i')),
            Parameter('constraint', Annotation(constraint))
        ],
                            """
        // Renormalize if necessary
        float n = ${norm.load_idx}(${idxs[1]});
        float c = ${constraint};
        if ( n > c ) {  
            float a = ${array.load_same};
            a = a * c / n;
            ${array.store_same}(a);
        }
            
        """,
                            guiding_array='array')

        kernel_cache[key] = comp.compile(thread)

    kernel_cache[key](array, norm, constraint)
Пример #3
0
def classification_delta_kernel(ctx, outputs, targets, deltas):
    kernel_cache, thread = ctx.kernel_cache, ctx.thread

    assert outputs.shape[0] == targets.shape[0] == deltas.shape[0]
    assert len(targets.shape) == 1
    assert targets.dtype == numpy.int32
    assert outputs.shape[1] == deltas.shape[1]

    key = (classification_delta_kernel, outputs.shape)
    if not key in kernel_cache.keys():
        log.info("compiling " + str(key))
        kernel = PureParallel([
            Parameter('outputs', Annotation(outputs, 'i')),
            Parameter('targets', Annotation(targets, 'i')),
            Parameter('deltas', Annotation(deltas, 'o'))
        ],
                              """
        ${outputs.ctype} out = ${outputs.load_same};
        SIZE_T t = ${targets.load_idx}(${idxs[0]});
        SIZE_T idx = ${idxs[1]};
        ${deltas.ctype} d;
        if (t == idx) {
            d = 1.0f - out;
        } else {
            d = -out;
        }
        ${deltas.store_same}(d);
        """,
                              guiding_array='deltas')

        kernel_cache[key] = kernel.compile(thread)

    # Run kernel
    kernel_cache[key](outputs, targets, deltas)
Пример #4
0
def linear(context, activations, bias, dest=None):
    kernel_cache, thread = context.kernel_cache, context.thread

    if dest is None:
        dest = activations

    key = (linear, activations.shape, thread)
    if not key in kernel_cache.keys():
        log.info("compiling " + str(key))
        assert activations.shape[1] == bias.shape[0]

        kernel = PureParallel([
            Parameter('activations', Annotation(activations, 'i')),
            Parameter('bias', Annotation(bias, 'i')),
            Parameter('dest', Annotation(dest, 'o')),
        ],
                              """
        ${activations.ctype} a = ${activations.load_same};
        ${bias.ctype} b = ${bias.load_idx}(${idxs[1]});

        a += b;

        ${dest.store_same}(a);
        """,
                              guiding_array='activations')

        kernel_cache[key] = kernel.compile(thread, fast_math=True)

    # Run kernel
    kernel_cache[key](activations, bias, dest)

    return dest
Пример #5
0
    def sub(self, mat1, mat2, dest):
        """
        Subtract mat2 from mat1.
        ATTENTION: if a value is nan, the result will be zero.
        """
        kernel_cache = self.kernel_cache
        thread = self.thread
        key = (self.sub, mat1.dtype, mat1.shape)

        if key not in kernel_cache.keys():
            log.info("compiling " + str(key))
            assert mat1.shape == mat2.shape == dest.shape
            kernel_delta_output = PureParallel([
                Parameter('mat1', Annotation(mat1, 'i')),
                Parameter('mat2', Annotation(mat2, 'i')),
                Parameter('dest', Annotation(dest, 'o'))
            ],
                                               """
            // Delta ( for the output layer )
            ${mat1.ctype} m1 = ${mat1.load_same};
            ${mat2.ctype} m2 = ${mat2.load_same};
            if (isnan(m1) || isnan(m2)) {
                ${dest.store_same}(0.0f);
            } else {                
                ${dest.ctype} d = m1 - m2;
                ${dest.store_same}(d);
            }
            """,
                                               guiding_array='dest')

            kernel_cache[key] = kernel_delta_output.compile(thread)

        kernel_cache[key](mat1, mat2, dest)
Пример #6
0
    def softplus(self, activations, bias, dest=None):
        kernel_cache, thread = self.kernel_cache, self.thread

        if dest is None:
            dest = activations

        key = (self.softplus, activations.shape, thread)
        if not key in kernel_cache.keys():
            log.info("compiling " + str(key))
            assert activations.shape[1] == bias.shape[0]

            kernel = PureParallel([
                Parameter('activations', Annotation(activations, 'i')),
                Parameter('bias', Annotation(bias, 'i')),
                Parameter('dest', Annotation(dest, 'o')),
            ],
                                  """
            ${activations.ctype} a = ${activations.load_same};
            ${bias.ctype} b = ${bias.load_idx}(${idxs[1]});
            
            a += b;   
            a = min(max(-45.0f, a), 45.0f);     
            a = log(1.0f + exp(a));
            
            ${dest.store_same}(a);
            """,
                                  guiding_array='activations')

            kernel_cache[key] = kernel.compile(thread, fast_math=True)

        # Run kernel
        kernel_cache[key](activations, bias, dest)

        return dest
Пример #7
0
    def softplus_derivative(self, activations, delta, dest=None):
        kernel_cache, thread = self.kernel_cache, self.thread

        if dest is None:
            dest = delta

        key = (self.softplus_derivative, activations.shape, thread)
        if not key in kernel_cache.keys():
            log.info("compiling " + str(key))
            kernel = PureParallel([
                Parameter('activations', Annotation(activations, 'i')),
                Parameter('delta', Annotation(activations, 'i')),
                Parameter('dest', Annotation(dest, 'o')),
            ],
                                  """
            ${activations.ctype} a = ${activations.load_same};
            ${delta.ctype} d = ${delta.load_same};
            
            // the softplus function already has been applied 
            // to the activations, so wee need to apply the
            // inverse of softplus chained with logistic
            // note: logistic is the derivative of softplus
            a = min(max(-45.0f, a), 45.0f);
            a = 1.0f / (1.0f / (exp(a) - 1.0f) + 1.0f);
            d = d*a;
            
            ${dest.store_same}(d);
            """,
                                  guiding_array='activations')

            kernel_cache[key] = kernel.compile(thread)

        # Run kernel
        kernel_cache[key](activations, delta, dest)
Пример #8
0
    def nan_to_zeros(self, array, dest=None):
        kernel_cache, thread = self.kernel_cache, self.thread

        if dest is None:
            dest = array

        key = (self.nan_to_zeros, array.shape, thread)
        if not key in kernel_cache.keys():
            log.info("compiling " + str(key))

            kernel = PureParallel([
                Parameter('array', Annotation(array, 'i')),
                Parameter('dest', Annotation(dest, 'o')),
            ],
                                  """
            ${array.ctype} a = ${array.load_same};
            if (isnan(a)) {
                ${dest.store_same}(0.0f);
            }        
            """,
                                  guiding_array='array')

            kernel_cache[key] = kernel.compile(thread, fast_math=True)

        # Run kernel
        kernel_cache[key](array, dest)

        return dest
Пример #9
0
    def add(self, mat1, mat2, dest):
        kernel_cache = self.kernel_cache
        thread = self.thread
        key = (self.add, mat1.dtype, mat1.shape)

        if key not in kernel_cache.keys():
            log.info("compiling " + str(key))
            assert mat1.shape == mat2.shape == dest.shape
            kernel_delta_output = PureParallel([
                Parameter('mat1', Annotation(mat1, 'i')),
                Parameter('mat2', Annotation(mat2, 'i')),
                Parameter('dest', Annotation(dest, 'o'))
            ],
                                               """
            // Delta ( for the output layer )
            ${mat1.ctype} m1 = ${mat1.load_same};
            ${mat2.ctype} m2 = ${mat2.load_same};
            ${dest.ctype} d = m1 + m2;
            ${dest.store_same}(d);
            """,
                                               guiding_array='dest')

            kernel_cache[key] = kernel_delta_output.compile(thread)

        kernel_cache[key](mat1, mat2, dest)
Пример #10
0
def class_errors(ctx, expected, actual, errors):
    """ expected int32, actual float, errors int32 """
    kernel_cache, thread = ctx.kernel_cache, ctx.thread

    key = (class_errors, expected.shape)

    if key not in kernel_cache.keys():
        # target should be an integer
        logging.info("compiling " + str(key))
        assert expected.shape == errors.shape  # one neuron per class
        assert expected.shape == (actual.shape[0], )  # index of the class
        assert actual.dtype == numpy.float32
        assert expected.dtype == numpy.int32
        assert errors.dtype == numpy.int32
        kernel = PureParallel(
            [
                Parameter('expected', Annotation(expected, 'i')),
                Parameter('actual', Annotation(actual, 'i')),
                Parameter('errors', Annotation(errors, 'o'))
            ],
            """
            SIZE_T expected = ${expected.load_idx}(${idxs[0]});;
            float maximum=0.0f;
            float value;
            SIZE_T maxindex = 0;

            SIZE_T tl = ${target_length};

            // calculate argmax
            for(SIZE_T j=0; j < tl; j++) {
                value = ${actual.load_idx}(${idxs[0]}, j);

                if (value > maximum) {
                    maximum = value;
                    maxindex = j;
                }
            }

            // If the confidence is too low, return an error
            if (maximum < (1.0f / ${target_length}.0f + 0.001f)) {
                ${errors.store_same}(1);
                return;
            };

            // compare argmax
            if (maxindex != expected) {
                ${errors.store_same}(1);
            } else {
                ${errors.store_same}(0);
            }

        """,
            guiding_array='expected',
            render_kwds={'target_length': numpy.int32(actual.shape[1])})

        kernel_cache[key] = kernel.compile(thread)

    kernel_cache[key](expected, actual, errors)
Пример #11
0
def convolve2d_propagation(ctx, array, weights, dest):
    """ The output is the valid discrete linear convolution of the inputs. """
    kernel_cache, thread = ctx.kernel_cache, ctx.thread

    key = (convolve2d_propagation, weights.shape, array.shape, thread)
    if not key in kernel_cache.keys():
        logging.info("compiling" + str(key))

        channels, filters, owidth, oheight = weights.shape[0], weights.shape[
            1], dest.shape[1], dest.shape[2]

        render_kwds = {
            'w0': weights.shape[2],
            'w1': weights.shape[3],
            'a0': array.shape[2],
            'a1': array.shape[3],
            'off0': int(weights.shape[2] - 1),
            'off1': int(weights.shape[3] - 1)
        }

        kernel_conv = PureParallel([
            Parameter('array', Annotation(array, 'i')),
            Parameter('weights', Annotation(weights, 'i')),
            Parameter('dest', Annotation(dest, 'o'))
        ],
                                   """
        // Array dimensions:
        // array : (channels, width, height)
        // weights: (channels, filters, fwidth, fheight)
        // dest (channels, filters, owidth, oheight)

        float a = 0.0f;
        SIZE_T x, y, i, j;
        const SIZE_T number = ${idxs[0]};
        const SIZE_T channel = ${idxs[1]};
        const SIZE_T filter = ${idxs[2]};
        const SIZE_T xout = ${idxs[3]};
        const SIZE_T yout = ${idxs[4]};
        for (i=0; i < ${w0}; i++){
            for (j=0; j < ${w1}; j++){
                x = xout - i  + ${off0};
                y = yout - j  + ${off1};
                a += ${array.load_idx}(number, channel, x, y)
                   * ${weights.load_idx}(channel, filter, i, j); // channel, filter, i, j
            }
        }

        ${dest.store_same}(a);

        """,
                                   guiding_array='dest',
                                   render_kwds=render_kwds)
        kernel_cache[key] = kernel_conv.compile(thread, fast_math=True)

    # run convolution
    kernel_cache[key](array, weights, dest)

    return dest
Пример #12
0
 def _scalar(self, val):
     """
     Adds a persistent scalar to the plan, and returns the corresponding
     :py:class:`KernelArgument`.
     """
     name = self._translator(self._persistent_value_idgen())
     ann = Annotation(val)
     self._internal_annotations[name] = ann
     self._persistent_values[name] = ann.type(val)
     return KernelArgument(name, ann.type)
Пример #13
0
 def _scalar(self, val):
     """
     Adds a persistent scalar to the plan, and returns the corresponding
     :py:class:`KernelArgument`.
     """
     name = self._translator(self._persistent_value_idgen())
     ann = Annotation(val)
     self._internal_annotations[name] = ann
     self._persistent_values[name] = ann.type(val)
     return KernelArgument(name, ann.type)
Пример #14
0
    def _process_kernel_arguments(self, args):
        """
        Scan through kernel arguments passed by the user, check types,
        and wrap ad hoc values if necessary.

        Does not change the plan state.
        """
        processed_args = []
        adhoc_idgen = IdGen('_adhoc')
        adhoc_values = {}

        for arg in args:
            if not isinstance(arg, KernelArgument):
                if hasattr(arg, 'shape') and hasattr(arg, 'dtype'):
                    if len(arg.shape) > 0:
                        raise ValueError(
                            "Arrays are not allowed as ad hoc arguments")

                    # Not creating a new persistent scalar with _scalar(),
                    # because the kernel compilation may fail,
                    # in which case we would have to roll back the plan state.
                    # These arguments are local to this kernel anyway,
                    # so there's no need in registering them in the plan.
                    name = self._translator(adhoc_idgen())
                    adhoc_values[name] = arg
                    annotation = Annotation(Type(arg.dtype))
                    arg = KernelArgument(name, annotation.type)
                else:
                    raise TypeError("Unknown argument type: " + str(type(arg)))
            else:
                annotation = self._get_annotation(arg.name)

            processed_args.append(Parameter(arg.name, annotation))

        return processed_args, adhoc_values
Пример #15
0
def softmax(ctx, activations, bias, dest=None):
    """ Softmax Activation Function """
    kernel_cache, thread = ctx.kernel_cache, ctx.thread

    if dest is None:
        dest = activations

    key = (softmax, activations.shape)
    if key not in kernel_cache.keys():
        logging.info("compiling " + str(key))
        # Regression hidden layer
        kernel_softmax = PureParallel(
            [
                Parameter('activations', Annotation(activations, 'i')),
                Parameter('bias', Annotation(bias, 'i')),
                Parameter('dest', Annotation(dest, 'o')),
            ],
            """
            float x;
            float b;
            float s = 0.0f;
            SIZE_T tl = ${target_length};
            for(SIZE_T j=0; j < tl; j++) {
                x = ${activations.load_idx}(${idxs[0]}, j);
                b = ${bias.load_idx}(j);
                x += b;
                x = exp(min(max(x, -45.0f), 45.0f));
                ${dest.store_idx}(${idxs[0]}, j, x);

                s += x;
            }

            // divide by sum
            for(SIZE_T j=0; j < tl; j++) {
                x = ${dest.load_idx}(${idxs[0]}, j);
                x /= s;
                ${dest.store_idx}(${idxs[0]}, j, x);
            }
        """,
            guiding_array=(activations.shape[0], ),
            render_kwds={'target_length': numpy.int32(activations.shape[1])})

        kernel_cache[key] = kernel_softmax.compile(thread)

    kernel_cache[key](activations, bias, dest)
Пример #16
0
 def persistent_array(self, arr):
     """
     Adds a persistent GPU array to the plan, and returns the corresponding
     :py:class:`KernelArgument`.
     """
     name = self._translator(self._persistent_value_idgen())
     ann = Annotation(arr, 'i')
     self._internal_annotations[name] = ann
     self._persistent_values[name] = self._thread.to_device(arr)
     return KernelArgument(name, ann.type)
Пример #17
0
    def _connect(self, ntr):

        # At this point we assume that ``ntr`` describes a valid connection.
        # All sanity checks are performed in ``connect()``.

        for tr_param in ntr.trf.signature.parameters.values():
            node_name = ntr.node_from_tr[tr_param.name]

            if node_name == ntr.connector_node_name:
                ann = self.leaf_parameters[node_name].annotation
                if ann.input and ann.output:
                    # splitting the 'io' leaf
                    updated_role = 'i' if ntr.output else 'o'

                    # Since it is an array parameter, we do not need to worry
                    # about preserving the default value (it can't have one).
                    self.leaf_parameters[node_name] = Parameter(
                        node_name, Annotation(ann.type, role=updated_role))
                else:
                    # 'i' or 'o' leaf is hidden by the transformation
                    del self.leaf_parameters[node_name]

            else:
                if (node_name in self.leaf_parameters
                        and self.leaf_parameters[node_name].annotation.array):
                    ann = self.leaf_parameters[node_name].annotation
                    if (ann.input and ntr.output) or (ann.output
                                                      and not ntr.output):
                        # Joining 'i' and 'o' paths into an 'io' leaf.
                        # Since it is an array parameter, we do not need to worry
                        # about preserving the default value (it can't have one).
                        self.leaf_parameters[node_name] = Parameter(
                            node_name, Annotation(ann.type, role='io'))
                else:
                    self.leaf_parameters[node_name] = tr_param.rename(
                        node_name)

            if node_name not in self.nodes:
                self.nodes[node_name] = Node()

        self.nodes[ntr.connector_node_name] = self.nodes[
            ntr.connector_node_name].connect(ntr)
Пример #18
0
 def temp_array(self, shape, dtype, strides=None):
     """
     Adds a temporary GPU array to the plan, and returns the corresponding
     :py:class:`KernelArgument`.
     Temporary arrays can share physical memory, but in such a way that
     their contents is guaranteed to persist between the first and the last use in a kernel
     during the execution of the plan.
     """
     name = self._translator(self._temp_array_idgen())
     ann = Annotation(Type(dtype, shape=shape, strides=strides), 'io')
     self._internal_annotations[name] = ann
     self._temp_arrays.add(name)
     return KernelArgument(name, ann.type)
Пример #19
0
def dropout(ctx, mat, rand, probability):
    kernel_cache = ctx.kernel_cache
    probability = numpy.float32(probability)
    thread = ctx.thread
    key = (dropout, mat.dtype, mat.shape)

    if key not in kernel_cache.keys():
        log.info("compiling " + str(key))
        kernel = PureParallel([
            Parameter('mat', Annotation(mat, 'o')),
            Parameter('rand', Annotation(mat, 'i')),
            Parameter('probability', Annotation(probability))
        ],
                              """
        ${rand.ctype} r = ${rand.load_same};
        if (r < ${probability}) {            
            ${mat.store_same}(0.0f);
        }
        """,
                              guiding_array='mat')

        kernel_cache[key] = kernel.compile(thread)

    kernel_cache[key](mat, rand, probability)
Пример #20
0
    def scale(self, mat, scalar):
        kernel_cache = self.kernel_cache
        scalar = numpy.float32(scalar)
        thread = self.thread
        key = (self.scale, mat.dtype, mat.shape)

        if key not in kernel_cache.keys():
            log.info("compiling " + str(key))
            kernel = PureParallel([
                Parameter('mat', Annotation(mat, 'io')),
                Parameter('scalar', Annotation(scalar))
            ],
                                  """
            // Delta ( for the output layer )
            ${mat.ctype} m = ${mat.load_same};
            ${mat.ctype} s = ${scalar};
            m *= s;
            ${mat.store_same}(m);
            """,
                                  guiding_array='mat')

            kernel_cache[key] = kernel.compile(thread)

        kernel_cache[key](mat, scalar)
Пример #21
0
    def copy_minibatch(self, array, indices, minibatch):
        kernel_cache, thread = self.kernel_cache, self.thread

        key = (self.copy_minibatch, minibatch.dtype, minibatch.shape,
               array.shape)

        if key not in kernel_cache.keys():
            log.info("compiling " + str(key))
            assert minibatch.shape[0] == indices.shape[0]
            assert indices.dtype == numpy.int32

            dimensions = numpy.int32(len(array.shape))
            assert minibatch.shape[0] == indices.shape[0]
            kernel = PureParallel([
                Parameter('array', Annotation(array, 'i')),
                Parameter('indices', Annotation(indices, 'i')),
                Parameter('minibatch', Annotation(minibatch, 'o'))
            ],
                                  """
            SIZE_T idx = ${indices.load_idx}(${idxs[0]});
            %if dimensions == 2:
            ${minibatch.store_same}(${array.load_idx}(idx, ${idxs[1]}));
            %elif dimensions == 3:
            ${minibatch.store_same}(${array.load_idx}(idx, ${idxs[1]}, ${idxs[2]}));
            %else:
            ${minibatch.store_same}(${array.load_idx}(idx));        
            %endif           
            """,
                                  guiding_array='minibatch',
                                  render_kwds=dict(dimensions=dimensions))
            log.info(array.shape)
            log.info(indices.shape)
            log.info(minibatch.shape)
            kernel_cache[key] = kernel.compile(thread)

        kernel_cache[key](array, indices, minibatch)
Пример #22
0
def lwta(ctx, mat, lwta_size):
    kernel_cache = ctx.kernel_cache
    lwta_size = numpy.float32(lwta_size)
    thread = ctx.thread
    key = (lwta, mat.dtype, mat.shape, lwta_size)

    if key not in kernel_cache.keys():
        num_units = mat.shape[1]
        log.info("compiling " + str(key))
        kernel = PureParallel([Parameter('mat', Annotation(mat, 'io'))],
                              """
        SIZE_T this_idx = ${idxs[1]};
        SIZE_T group_size = ${lwta_size};
        // only the first thread per group computes anything
        if (this_idx % group_size == 0) {
            SIZE_T argmax = ${idxs[1]};
            SIZE_T candidate_idx;
            ${mat.ctype} ma = ${mat.load_same};
            ${mat.ctype} candidate_value;
            // find the argmax in the group
            for (SIZE_T i=1; i < group_size; i++) {
                candidate_idx = this_idx + i;
                if (candidate_idx >= ${num_units}) break;
                candidate_value = ${mat.load_idx}(${idxs[0]}, candidate_idx);
                if ( candidate_value > ma) {
                    ma = candidate_value;
                    argmax = candidate_idx;
                }
            }
            // second pass: zero all except argmax
            for (SIZE_T i=0; i < group_size; i++) {
                candidate_idx = this_idx + i;
                if (candidate_idx >= ${num_units}) break;
                if ( candidate_idx != argmax ) {
                    ${mat.store_idx}(${idxs[0]}, candidate_idx, 0.0f);
                }
            }
        }
            
        """,
                              guiding_array='mat',
                              render_kwds=dict(lwta_size=lwta_size,
                                               num_units=num_units))

        kernel_cache[key] = kernel.compile(thread)

    kernel_cache[key](mat)
Пример #23
0
    def __init__(self, root_parameters):
        # Preserve order of initial root parameters.
        # These can repeat.
        self.root_names = []

        # Keeping whole parameters, because we want to preserve the default values (if any).
        self.root_parameters = {}

        self.nodes = {}  # all nodes of the tree
        self.leaf_parameters = {}  # nodes available for connection

        for param in root_parameters:
            self.root_names.append(param.name)
            if param.name in self.root_parameters and param != self.root_parameters[
                    param.name]:
                # Could be an 'io' parameter used for separate 'i' and 'o' parameters
                # in a nested computation.
                # Need to check types and merge.

                new_ann = param.annotation
                old_param = self.root_parameters[param.name]
                old_ann = old_param.annotation

                # FIXME: Not sure when these can be raised
                assert old_ann.type == new_ann.type
                assert old_param.default == param.default

                # Given the old_param != param, the only possible combinations of roles are
                # 'i' and 'o', 'i' and 'io', 'o' and 'io'.
                # In all cases the resulting role is 'io'.
                new_param = Parameter(param.name,
                                      Annotation(new_ann.type, 'io'),
                                      default=param.default)
                self.root_parameters[param.name] = new_param
                self.leaf_parameters[param.name] = new_param
            else:
                self.nodes[param.name] = Node()
                self.root_parameters[param.name] = param
                self.leaf_parameters[param.name] = param
Пример #24
0
def convolve2d_gradient(ctx, prev_deltas, deltas, gradient_intermediate):
    """ The output is the full discrete linear convolution of the inputs. """
    kernel_cache, thread = ctx.kernel_cache, ctx.thread

    key = (convolve2d_gradient, prev_deltas.shape, deltas.shape, thread)
    if not key in kernel_cache.keys():
        logging.info("compiling " + str(key))

        # Extract shapes from the arrays
        n, channels, p_width, p_height = prev_deltas.shape
        n_1, filters, d_width, d_height = deltas.shape
        n, d_width_1, d_height_1, channels_1, filters_1, f_width, f_height = gradient_intermediate.shape

        # Some assertions to be sure everything is correct
        assert n_1 == n
        assert filters_1 == filters
        assert channels_1 == channels
        expected_shape = get_output_shape(prev_deltas, deltas, 'gradient')
        assert expected_shape == gradient_intermediate.shape
        assert d_width_1 == d_width
        assert d_height_1 == d_height

        # Render keywords
        render_kwds = {
            'n': n,
            'filters': filters,
            'channels': channels,
            'f_width': f_width,
            'f_height': f_height,
            'd_width': d_width,
            'd_height': d_height,
            'p_width': p_width,
            'p_height': p_height,
        }

        # The kernel
        kernel = PureParallel([
            Parameter('prev_deltas', Annotation(prev_deltas, 'i')),
            Parameter('deltas', Annotation(deltas, 'i')),
            Parameter('gradient_intermediate',
                      Annotation(gradient_intermediate, 'o'))
        ],
                              """

        const SIZE_T number = ${idxs[0]};
        const SIZE_T dx = ${idxs[1]};
        const SIZE_T dy = ${idxs[2]};
        const SIZE_T channel = ${idxs[3]};
        const SIZE_T filter = ${idxs[4]};
        const SIZE_T fx = ${idxs[5]};
        const SIZE_T fy = ${idxs[6]};


        // weight gradient at the weight position fx, fy is defined by the sum
        //
        //       (deltas * prev_deltas[fx:d_width+fx, fy:fy+d_height]).sum()
        //
        // alternatively we can store all delta positions and sum in a separate kernel - this is what we do now.

        float g = ${deltas.load_idx}(number, filter, dx, dy) * ${prev_deltas.load_idx}(number, channel, dx+fx, dy+fy);

        ${gradient_intermediate.store_same}(g);

        """,
                              guiding_array='gradient_intermediate',
                              render_kwds=render_kwds)

        kernel_cache[key] = kernel.compile(thread, fast_math=True)

    # run convolution -> intermediate
    kernel_cache[key](prev_deltas, deltas, gradient_intermediate)

    return gradient_intermediate
Пример #25
0
def convolve2d_backprop(ctx, deltas, weights, deltas_intermediate):
    """ The output is the full discrete linear convolution of the inputs. """
    kernel_cache, thread = ctx.kernel_cache, ctx.thread

    key = (convolve2d_backprop, deltas.shape, weights.shape, thread)
    if not key in kernel_cache.keys():
        logging.info("compiling " + str(key))

        # Extract shapes from the arrays
        channels, filters, f_width, f_height = weights.shape
        n_1, filters_1, d_width, d_height = deltas.shape
        n, channels_1, filters_2, p_width, p_height = deltas_intermediate.shape

        # Some assertions to be sure everything is correct
        assert n_1 == n
        assert filters_2 == filters_1 == filters
        assert channels_1 == channels
        expected_shape = get_output_shape(deltas, weights, 'backprop')
        assert expected_shape == deltas_intermediate.shape

        # Render keywords
        render_kwds = {
            'n': n,
            'filters': filters,
            'channels': channels,
            'f_width': f_width,
            'f_height': f_height,
            'd_width': d_width,
            'd_height': d_height,
            'p_width': p_width,
            'p_height': p_height,
        }

        # The kernel
        kernel = PureParallel([
            Parameter('deltas', Annotation(deltas, 'i')),
            Parameter('weights', Annotation(weights, 'i')),
            Parameter('deltas_intermediate',
                      Annotation(deltas_intermediate, 'o'))
        ],
                              """
        float d = 0.0f;
        SIZE_T x, y, i, j, fi, fj;
        const SIZE_T number = ${idxs[0]};
        const SIZE_T channel = ${idxs[1]};
        const SIZE_T filter = ${idxs[2]};
        const SIZE_T xout = ${idxs[3]};
        const SIZE_T yout = ${idxs[4]};
        for (i=0; i < ${f_width}; i++){
            for (j=0; j < ${f_height}; j++){
                x = xout - i;
                if (x < 0) continue;
                if (x >= ${d_width}) continue;
                y = yout - j;
                if (y < 0) continue;
                if (y >= ${d_height}) continue;
                // acces weights in flipped order!
                fi = ${f_width} - i - 1;
                fj = ${f_height} - j - 1;
                d += ${deltas.load_idx}(number, channel, x, y)
                   * ${weights.load_idx}(channel, filter, fi, fj);
            }
        }

        ${deltas_intermediate.store_same}(d);

        """,
                              guiding_array='deltas_intermediate',
                              render_kwds=render_kwds)

        kernel_cache[key] = kernel.compile(thread, fast_math=True)

    # run convolution -> intermediate
    kernel_cache[key](deltas, weights, deltas_intermediate)

    return deltas_intermediate
Пример #26
0
def sarprop_kernel(ctx, weights, gradient, last_gradient, step_sizes, noise,
                   parameters):
    """ SARPROP update kernel """
    kernel_cache, thread = ctx.kernel_cache, ctx.thread

    assert weights.shape == gradient.shape == last_gradient.shape == step_sizes.shape

    key = (sarprop_kernel, weights.shape, thread._context) + tuple(
        parameters.values())
    if not key in kernel_cache.keys():
        logging.info("compiling " + str(key))
        kernel = PureParallel([
            Parameter('weights', Annotation(weights, 'io')),
            Parameter('gradient', Annotation(gradient, 'i')),
            Parameter('last_gradient', Annotation(last_gradient, 'io')),
            Parameter('step_sizes', Annotation(step_sizes, 'io')),
            Parameter('noise', Annotation(step_sizes, 'i'))
        ],
                              """
        ${weights.ctype} w = ${weights.load_same};
        ${gradient.ctype} g = ${gradient.load_same};
        ${last_gradient.ctype} lg = ${last_gradient.load_same};
        ${step_sizes.ctype} s = ${step_sizes.load_same};
        
        ${noise.ctype} n = ${noise.load_same};
        n = fabs(n);
    
        // Adapt step size
        if (g * lg > 0.0f) {            
            s = min(${reward_factor}f * s, ${max_step_size}f); 
            
            // Apply update
            if (g < 0.0f) {
                w = w - s*n;
            }             
            if (g > 0.0f) {
                w = w + s*n;
            } 
        } else {
            // punish step size
            s = max(${punish_factor}f * s, ${min_step_size}f);
        }

        // If l1 weight decay is greater zero, apply it
        % if l1_decay > 0.0:
        if (w > 0.0f) {
            w = max(0.0f, w - ${l1_decay}f);
        }
        if (w < 0.0f) {
            w = min(0.0f, w + ${l1_decay}f);
        }        
        % endif;
 
        // If l2 weight decay is greater zero, apply it
        % if l2_decay > 0.0:
        w *= ${1.0 - l2_decay}f;
        % endif;
               
        // Save last gradient
        lg = g;
            
        ${weights.store_same}(w);
        ${last_gradient.store_same}(lg);
        ${step_sizes.store_same}(s);
        """,
                              guiding_array='weights',
                              render_kwds=parameters)

        kernel_cache[key] = kernel.compile(thread)

    # Run kernel
    kernel_cache[key](weights, gradient, last_gradient, step_sizes, noise)