예제 #1
0
class ConvLayer(object):

    """
    Performs convolutions: spatial weight sharing
    http://deeplearning.net/tutorial/lenet.html
    """

    def __init__(self, opt={}):
        self.out_depth = opt['filters']
        self.sx = opt['sx'] # filter size: should be odd if possible
        self.in_depth = opt['in_depth']
        self.in_sx = opt['in_sx']
        self.in_sy = opt['in_sy']

        # optional
        self.sy = getopt(opt, 'sy', self.sx)
        self.stride = getopt(opt, 'stride', 1) # stride at which we apply filters to input volume
        self.pad = getopt(opt, 'pad', 0) # padding to borders of input volume
        self.l1_decay_mul = getopt(opt, 'l1_decay_mul', 0.0)
        self.l2_decay_mul = getopt(opt, 'l2_decay_mul', 1.0)

        """
        Note we are doing floor, so if the strided convolution of the filter doesnt fit into the input
        volume exactly, the output volume will be trimmed and not contain the (incomplete) computed
        final application.
        """
        self.out_sx = int(floor((self.in_sx - self.sx + 2 * self.pad) / self.stride + 1))
        self.out_sy = int(floor((self.in_sy - self.sy + 2 * self.pad) / self.stride + 1))
        self.layer_type = 'conv'

        bias = getopt(opt, 'bias_pref', 0.0)
        self.filters = [ Vol(self.sx, self.sy, self.in_depth) for i in xrange(self.out_depth) ]
        self.biases = Vol(1, 1, self.out_depth, bias)

    def forward(self, V, is_training):
        self.in_act = V
        A = Vol(self.out_sx, self.out_sy, self.out_depth, 0.0)

        v_sx = V.sx
        v_sy = V.sy
        xy_stride = self.stride

        for d in xrange(self.out_depth):
            f = self.filters[d]
            x = -self.pad
            y = -self.pad

            for ay in xrange(self.out_sy):
                x = -self.pad
                for ax in xrange(self.out_sx):
                    # convolve centered at this particular location
                    sum_a = 0.0
                    for fy in xrange(f.sy):
                        off_y = y + fy
                        for fx in xrange(f.sx):
                            # coordinates in the original input array coordinates
                            off_x = x + fx
                            if off_y >= 0 and off_y < V.sy and off_x >= 0 and off_x < V.sx:
                                for fd in xrange(f.depth):
                                    sum_a += f.w[((f.sx * fy) + fx) * f.depth + fd] \
                                    * V.w[((V.sx * off_y) + off_x) * V.depth + fd]

                    sum_a += self.biases.w[d]
                    A.set(ax, ay, d, sum_a)

                    x += xy_stride
                y += xy_stride

        self.out_act = A
        return self.out_act

    def backward(self):
        # compute gradient wrt weights, biases and input data
        V = self.in_act
        V.dw = zeros(len(V.w)) # zero out gradient

        V_sx = V.sx
        V_sy = V.sy
        xy_stride = self.stride
        
        for d in xrange(self.out_depth):
            f = self.filters[d]
            x = -self.pad
            y = -self.pad
            for ay in xrange(self.out_sy):
                x = -self.pad
                for ax in xrange(self.out_sx):
                    # convolve and add up the gradients
                    chain_grad = self.out_act.get_grad(ax, ay, d) # gradient from above, from chain rule
                    for fy in xrange(f.sy):
                        off_y = y + fy
                        for fx in xrange(f.sx):
                            off_x = x + fx
                            if off_y >= 0 and off_y < V_sy and off_x >= 0 and off_x < V_sx:
                                # forward prop calculated: a += f.get(fx, fy, fd) * V.get(ox, oy, fd)
                                #f.add_grad(fx, fy, fd, V.get(off_x, off_y, fd) * chain_grad)
                                #V.add_grad(off_x, off_y, fd, f.get(fx, fy, fd) * chain_grad)
                                for fd in xrange(f.depth):
                                    ix1 = ((V.sx * off_y) + off_x) * V.depth + fd
                                    ix2 = ((f.sx * fy) + fx) * f.depth + fd
                                    f.dw[ix2] += V.w[ix1] * chain_grad
                                    V.dw[ix1] += f.w[ix2] * chain_grad

                    self.biases.dw[d] += chain_grad
                    x += xy_stride
                y += xy_stride

    def getParamsAndGrads(self):
        response = []
        for d in xrange(self.out_depth):
            response.append({
                'params': self.filters[d].w,
                'grads': self.filters[d].dw,
                'l2_decay_mul': self.l2_decay_mul,
                'l1_decay_mul': self.l1_decay_mul
            })
        response.append({
            'params': self.biases.w,
            'grads': self.biases.dw,
            'l2_decay_mul': 0.0,
            'l1_decay_mul': 0.0
        })
        return response

    def toJSON(self):
        return {
            'sx'          : self.sx,
            'sy'          : self.sy,
            'stride'      : self.stride,
            'in_depth'    : self.in_depth,
            'out_depth'   : self.out_depth,
            'out_sx'      : self.out_sx,
            'out_sy'      : self.out_sy,
            'layer_type'  : self.layer_type,
            'l1_decay_mul': self.l1_decay_mul,
            'l2_decay_mul': self.l2_decay_mul,
            'pad'         : self.pad,
            'filters'     : [ f.toJSON() for f in self.filters ],
            'biases'      : self.biases.toJSON()
        }

    def fromJSON(self, json):
        self.sx           = json['sx']
        self.sy           = json['sy']
        self.stride       = json['stride']
        self.in_depth     = json['in_depth']
        self.out_depth    = json['out_depth']
        self.out_sx       = json['out_sx']
        self.out_sy       = json['out_sy']
        self.layer_type   = json['layer_type']
        self.l1_decay_mul = json['l1_decay_mul']
        self.l2_decay_mul = json['l2_decay_mul']
        self.pad          = json['pad']
        self.filters      = [ Vol(0, 0, 0, 0).fromJSON(f) for f in json['filters'] ]
        self.biases       = Vol(0, 0, 0, 0).fromJSON(json['biases'])
예제 #2
0
class FullyConnectedLayer(object):

    """
    Fully connected dot products, ie. multi-layer perceptron net
    Building block for most networks
    http://www.deeplearning.net/tutorial/mlp.html
    """

    def __init__(self, opt={}):
        self.out_depth = opt['num_neurons']
        self.l1_decay_mul = getopt(opt, 'l1_decay_mul', 0.0)
        self.l2_decay_mul = getopt(opt, 'l2_decay_mul', 1.0)

        self.num_inputs = opt['in_sx'] * opt['in_sy'] * opt['in_depth']
        self.out_sx = 1
        self.out_sy = 1
        self.layer_type = 'fc'

        bias = getopt(opt, 'bias_pref', 0.0)
        self.filters = [ Vol(1, 1, self.num_inputs) for i in xrange(self.out_depth) ]
        self.biases = Vol(1, 1, self.out_depth, bias)

    def forward(self, V, in_training):
        self.in_act = V
        A = Vol(1, 1, self.out_depth, 0.0)
        Vw = V.w

        # dot(W, x) + b
        for i in xrange(self.out_depth):
            sum_a = 0.0
            fiw = self.filters[i].w
            for d in xrange(self.num_inputs):
                sum_a += Vw[d] * fiw[d]
            sum_a += self.biases.w[i]
            A.w[i] = sum_a

        self.out_act = A
        return self.out_act

    def backward(self):
        V = self.in_act
        V.dw = zeros(len(V.w)) # zero out gradient

        # compute gradient wrt weights and data
        for i in xrange(self.out_depth):
            fi = self.filters[i]
            chain_grad = self.out_act.dw[i]

            for d in xrange(self.num_inputs):
                V.dw[d] += fi.w[d] * chain_grad #grad wrt input data
                fi.dw[d] += V.w[d] * chain_grad #grad wrt params

            self.biases.dw[i] += chain_grad

    def getParamsAndGrads(self):
        response = []
        for d in xrange(self.out_depth):
            response.append({
                'params': self.filters[d].w,
                'grads': self.filters[d].dw,
                'l2_decay_mul': self.l2_decay_mul,
                'l1_decay_mul': self.l1_decay_mul
            })
        response.append({
            'params': self.biases.w,
            'grads': self.biases.dw,
            'l2_decay_mul': 0.0,
            'l1_decay_mul': 0.0
        })
        return response

    def toJSON(self):
        return {
            'out_depth'   : self.out_depth,
            'out_sx'      : self.out_sx,
            'out_sy'      : self.out_sy,
            'layer_type'  : self.layer_type,
            'num_inputs'  : self.num_inputs,
            'l1_decay_mul': self.l1_decay_mul,
            'l2_decay_mul': self.l2_decay_mul,
            'filters'     : [ f.toJSON() for f in self.filters ],
            'biases'      : self.biases.toJSON()
        }

    def fromJSON(self, json):
        self.out_depth    = json['out_depth']
        self.out_sx       = json['out_sx']
        self.out_sy       = json['out_sy']
        self.layer_type   = json['layer_type']
        self.num_inputs   = json['num_inputs']
        self.l1_decay_mul = json['l1_decay_mul']
        self.l2_decay_mul = json['l2_decay_mul']
        self.filters      = [ Vol(0, 0, 0, 0).fromJSON(f) for f in json['filters'] ]
        self.biases       = Vol(0, 0, 0, 0).fromJSON(json['biases'])
예제 #3
0
class FullyConnectedLayer(object):
    """
    Fully connected dot products, ie. multi-layer perceptron net
    Building block for most networks
    http://www.deeplearning.net/tutorial/mlp.html
    """
    def __init__(self, opt={}):
        self.out_depth = opt['num_neurons']
        self.l1_decay_mul = getopt(opt, 'l1_decay_mul', 0.0)
        self.l2_decay_mul = getopt(opt, 'l2_decay_mul', 1.0)

        self.num_inputs = opt['in_sx'] * opt['in_sy'] * opt['in_depth']
        self.out_sx = 1
        self.out_sy = 1
        self.layer_type = 'fc'

        bias = getopt(opt, 'bias_pref', 0.0)
        self.filters = [
            Vol(1, 1, self.num_inputs) for i in xrange(self.out_depth)
        ]
        self.biases = Vol(1, 1, self.out_depth, bias)

    def forward(self, V, in_training):
        self.in_act = V
        A = Vol(1, 1, self.out_depth, 0.0)
        Vw = V.w

        # dot(W, x) + b
        for i in xrange(self.out_depth):
            sum_a = 0.0
            fiw = self.filters[i].w
            for d in xrange(self.num_inputs):
                sum_a += Vw[d] * fiw[d]
            sum_a += self.biases.w[i]
            A.w[i] = sum_a

        self.out_act = A
        return self.out_act

    def backward(self):
        V = self.in_act
        V.dw = zeros(len(V.w))  # zero out gradient

        # compute gradient wrt weights and data
        for i in xrange(self.out_depth):
            fi = self.filters[i]
            chain_grad = self.out_act.dw[i]

            for d in xrange(self.num_inputs):
                V.dw[d] += fi.w[d] * chain_grad  #grad wrt input data
                fi.dw[d] += V.w[d] * chain_grad  #grad wrt params

            self.biases.dw[i] += chain_grad

    def getParamsAndGrads(self):
        response = []
        for d in xrange(self.out_depth):
            response.append({
                'params': self.filters[d].w,
                'grads': self.filters[d].dw,
                'l2_decay_mul': self.l2_decay_mul,
                'l1_decay_mul': self.l1_decay_mul
            })
        response.append({
            'params': self.biases.w,
            'grads': self.biases.dw,
            'l2_decay_mul': 0.0,
            'l1_decay_mul': 0.0
        })
        return response

    def toJSON(self):
        return {
            'out_depth': self.out_depth,
            'out_sx': self.out_sx,
            'out_sy': self.out_sy,
            'layer_type': self.layer_type,
            'num_inputs': self.num_inputs,
            'l1_decay_mul': self.l1_decay_mul,
            'l2_decay_mul': self.l2_decay_mul,
            'filters': [f.toJSON() for f in self.filters],
            'biases': self.biases.toJSON()
        }

    def fromJSON(self, json):
        self.out_depth = json['out_depth']
        self.out_sx = json['out_sx']
        self.out_sy = json['out_sy']
        self.layer_type = json['layer_type']
        self.num_inputs = json['num_inputs']
        self.l1_decay_mul = json['l1_decay_mul']
        self.l2_decay_mul = json['l2_decay_mul']
        self.filters = [Vol(0, 0, 0, 0).fromJSON(f) for f in json['filters']]
        self.biases = Vol(0, 0, 0, 0).fromJSON(json['biases'])
예제 #4
0
class ConvLayer(object):
    """
    Performs convolutions: spatial weight sharing
    http://deeplearning.net/tutorial/lenet.html
    """
    def __init__(self, opt={}):
        self.out_depth = opt['filters']
        self.sx = opt['sx']  # filter size: should be odd if possible
        self.in_depth = opt['in_depth']
        self.in_sx = opt['in_sx']
        self.in_sy = opt['in_sy']

        # optional
        self.sy = getopt(opt, 'sy', self.sx)
        self.stride = getopt(
            opt, 'stride',
            1)  # stride at which we apply filters to input volume
        self.pad = getopt(opt, 'pad', 0)  # padding to borders of input volume
        self.l1_decay_mul = getopt(opt, 'l1_decay_mul', 0.0)
        self.l2_decay_mul = getopt(opt, 'l2_decay_mul', 1.0)
        """
        Note we are doing floor, so if the strided convolution of the filter doesnt fit into the input
        volume exactly, the output volume will be trimmed and not contain the (incomplete) computed
        final application.
        """
        self.out_sx = int(
            floor((self.in_sx - self.sx + 2 * self.pad) / self.stride + 1))
        self.out_sy = int(
            floor((self.in_sy - self.sy + 2 * self.pad) / self.stride + 1))
        self.layer_type = 'conv'

        bias = getopt(opt, 'bias_pref', 0.0)
        self.filters = [
            Vol(self.sx, self.sx, self.in_depth)
            for i in xrange(self.out_depth)
        ]
        self.biases = Vol(1, 1, self.out_depth, bias)

    def forward(self, V, is_training):
        self.in_act = V
        A = Vol(self.out_sx, self.out_sy, self.out_depth, 0.0)

        v_sx = V.sx
        v_sy = V.sy
        xy_stride = self.stride

        for d in xrange(self.out_depth):
            f = self.filters[d]
            x = -self.pad
            y = -self.pad

            for ay in xrange(self.out_sy):
                x = -self.pad
                for ax in xrange(self.out_sx):
                    # convolve centered at this particular location
                    sum_a = 0.0
                    for fy in xrange(f.sy):
                        off_y = y + fy
                        for fx in xrange(f.sx):
                            # coordinates in the original input array coordinates
                            off_x = x + fx
                            if off_y >= 0 and off_y < V.sy and off_x >= 0 and off_x < V.sx:
                                for fd in xrange(f.depth):
                                    sum_a += f.w[((f.sx * fy) + fx) * f.depth + fd] \
                                    * V.w[((V.sx * off_y) + off_x) * V.depth + fd]

                    sum_a += self.biases.w[d]
                    A.set(ax, ay, d, sum_a)

                    x += xy_stride
                y += xy_stride

        self.out_act = A
        return self.out_act

    def backward(self):
        # compute gradient wrt weights, biases and input data
        V = self.in_act
        V.dw = zeros(len(V.w))  # zero out gradient

        V_sx = V.sx
        V_sy = V.sy
        xy_stride = self.stride

        for d in xrange(self.out_depth):
            f = self.filters[d]
            x = -self.pad
            y = -self.pad
            for ay in xrange(self.out_sy):
                x = -self.pad
                for ax in xrange(self.out_sx):
                    # convolve and add up the gradients
                    chain_grad = self.out_act.get_grad(
                        ax, ay, d)  # gradient from above, from chain rule
                    for fy in xrange(f.sy):
                        off_y = y + fy
                        for fx in xrange(f.sx):
                            off_x = x + fx
                            if off_y >= 0 and off_y < V_sy and off_x >= 0 and off_x < V_sx:
                                # forward prop calculated: a += f.get(fx, fy, fd) * V.get(ox, oy, fd)
                                #f.add_grad(fx, fy, fd, V.get(off_x, off_y, fd) * chain_grad)
                                #V.add_grad(off_x, off_y, fd, f.get(fx, fy, fd) * chain_grad)
                                for fd in xrange(f.depth):
                                    ix1 = (
                                        (V.sx * off_y) + off_x) * V.depth + fd
                                    ix2 = ((f.sx * fy) + fx) * f.depth + fd
                                    f.dw[ix2] += V.w[ix1] * chain_grad
                                    V.dw[ix1] += f.w[ix2] * chain_grad

                    self.biases.dw[d] += chain_grad
                    x += xy_stride
                y += xy_stride

    def getParamsAndGrads(self):
        response = []
        for d in xrange(self.out_depth):
            response.append({
                'params': self.filters[d].w,
                'grads': self.filters[d].dw,
                'l2_decay_mul': self.l2_decay_mul,
                'l1_decay_mul': self.l1_decay_mul
            })
        response.append({
            'params': self.biases.w,
            'grads': self.biases.dw,
            'l2_decay_mul': 0.0,
            'l1_decay_mul': 0.0
        })
        return response

    def toJSON(self):
        return {
            'sx': self.sx,
            'sy': self.sy,
            'stride': self.stride,
            'in_depth': self.in_depth,
            'out_depth': self.out_depth,
            'out_sx': self.out_sx,
            'out_sy': self.out_sy,
            'layer_type': self.layer_type,
            'l1_decay_mul': self.l1_decay_mul,
            'l2_decay_mul': self.l2_decay_mul,
            'pad': self.pad,
            'filters': [f.toJSON() for f in self.filters],
            'biases': self.biases.toJSON()
        }

    def fromJSON(self, json):
        self.sx = json['sx']
        self.sy = json['sy']
        self.stride = json['stride']
        self.in_depth = json['in_depth']
        self.out_depth = json['out_depth']
        self.out_sx = json['out_sx']
        self.out_sy = json['out_sy']
        self.layer_type = json['layer_type']
        self.l1_decay_mul = json['l1_decay_mul']
        self.l2_decay_mul = json['l2_decay_mul']
        self.pad = json['pad']
        self.filters = [Vol(0, 0, 0, 0).fromJSON(f) for f in json['filters']]
        self.biases = Vol(0, 0, 0, 0).fromJSON(json['biases'])
예제 #5
0
class SimilarityLayer(object):

    """
    Computes similarity measures, generalization of dot products 
    http://arxiv.org/pdf/1410.0781.pdf
    """

    def __init__(self, opt={}):
        self.out_depth = opt['num_neurons']
        self.l1_decay_mul = getopt(opt, 'l1_decay_mul', 0.0)
        self.l2_decay_mul = getopt(opt, 'l2_decay_mul', 1.0)

        self.num_inputs = opt['in_sx'] * opt['in_sy'] * opt['in_depth']
        self.out_sx = 1
        self.out_sy = 1
        self.layer_type = 'sim'

        bias = getopt(opt, 'bias_pref', 0.0)
        self.filters = [ Vol(1, 1, self.num_inputs) for i in xrange(self.out_depth) ]
        self.biases = Vol(1, 1, self.out_depth, bias)

    def forward(self, V, in_training):
        self.in_act = V
        A = Vol(1, 1, self.out_depth, 0.0)
        Vw = V.w
        
        def norm(vec):
            return sqrt(sum(c * c for c in vec))
        
        normv = norm(Vw)

        # compute cos sim between V and filters
        for i in xrange(self.out_depth):
            sum_a = 0.0
            fiw = self.filters[i].w
            for d in xrange(self.num_inputs):
                sum_a += Vw[d] * fiw[d]
            sum_a += self.biases.w[i] # dot(W, v) + b
            
            normf = norm(fiw)
            try:
                A.w[i] = sum_a / (normv * normf)
            except:
                A.w[i] = 0

        self.out_act = A
        return self.out_act

    def backward(self):
        V = self.in_act
        V.dw = zeros(len(V.w)) # zero out gradient

        # compute gradient wrt weights and data
        for i in xrange(self.out_depth):
            fi = self.filters[i]
            chain_grad = self.out_act.dw[i]

            for d in xrange(self.num_inputs):
                V.dw[d] += fi.w[d] * chain_grad #grad wrt input data
                fi.dw[d] += V.w[d] * chain_grad #grad wrt params

            self.biases.dw[i] += chain_grad

    def getParamsAndGrads(self):
        response = []
        for d in xrange(self.out_depth):
            response.append({
                'params': self.filters[d].w,
                'grads': self.filters[d].dw,
                'l2_decay_mul': self.l2_decay_mul,
                'l1_decay_mul': self.l1_decay_mul
            })
        response.append({
            'params': self.biases.w,
            'grads': self.biases.dw,
            'l2_decay_mul': 0.0,
            'l1_decay_mul': 0.0
        })
        return response

    def toJSON(self):
        return {
            'out_depth'   : self.out_depth,
            'out_sx'      : self.out_sx,
            'out_sy'      : self.out_sy,
            'layer_type'  : self.layer_type,
            'num_inputs'  : self.num_inputs,
            'l1_decay_mul': self.l1_decay_mul,
            'l2_decay_mul': self.l2_decay_mul,
            'filters'     : [ f.toJSON() for f in self.filters ],
            'biases'      : self.biases.toJSON()
        }

    def fromJSON(self, json):
        self.out_depth    = json['out_depth']
        self.out_sx       = json['out_sx']
        self.out_sy       = json['out_sy']
        self.layer_type   = json['layer_type']
        self.num_inputs   = json['num_inputs']
        self.l1_decay_mul = json['l1_decay_mul']
        self.l2_decay_mul = json['l2_decay_mul']
        self.filters      = [ Vol(0, 0, 0, 0).fromJSON(f) for f in json['filters'] ]
        self.biases       = Vol(0, 0, 0, 0).fromJSON(json['biases'])