def __init__(self, dim_list, eta = 0.1):
        """
        Constructor for network.
        Params:
        dim_list: a list of the number of dimension for each layer.
        eta: learning rate for each gradient descent step
        """
        depth = len(dim_list)
        self.depth = depth
        self.dim_list = dim_list
        self.eta = eta

        # 1. Initiate each layer: output, partial_output and weight,
        #    although partial_output is useless for the input layer, similarly
        #    weight and bias are useless for the output layer.
        #
        # 2. Partial_weight is an internal variable and will not be stored in
        #    a layer.
        #
        self.layers = [ {'output':Vector.fromIterable(0 for i in xrange(dim_list[l])),
            'partial_output':Vector.fromIterable(0 for i in xrange(dim_list[l])),
            'weight':Matrix.fromRandom(dim_list[l + 1], dim_list[l]),
            'bias':Vector.fromRandom(dim_list[l + 1])}
            for l in xrange(depth - 1) ]
        
        # output layer
        self.layers.append({'output':Vector.fromList([0] * dim_list[depth - 1]),
            'partial_output':Vector.fromList([0] * dim_list[depth - 1]),
            'weight': None, 'bias': None})
Пример #2
0
def main(mnist_path):
    puttime('start loading')

    network = feedforward_network.FeedForwardNetwork(
            dim_list = [2, 2, 2],
            eta = 0.5
            )

    network.layers[0]['weight'] = Matrix(2, 2, [.15, .20, .25, .30])
    network.layers[0]['bias'] = Vector.fromList([.35, .35])
    network.layers[1]['weight'] = Matrix(2, 2, [.40, .45, .50, .55])
    network.layers[1]['bias'] = Vector.fromList([.60, .60])

    x = Vector([.05, .10])
    y = Vector([.01, .99])

    def generator(x, y):
        yield (x, y)

    # start training
    puttime('start training')
    network.train(generator(x, y), puttime, limit = 1)

    # testing
    out = network.inference(x)
    print x, '->', out

    # start training
    puttime('start training')
    network.train(generator(x, y), puttime, limit = 1)

    # testing
    out = network.inference(x)
    print x, '->', out
 def inference(self, vector):
     self._forward(vector)
     output = self.layers[self.depth - 1]['output']
     maxpos, maxval = 0, 0
     for i in xrange(len(output)):
         if maxval < output[i]:
             maxpos, maxval = i, output[i]
     rtn = Vector.fromList([0] * len(output))
     rtn[maxpos] = 1
     return rtn
    def _backward(self, x, y):
        # output layer
        layer_id = self.depth - 1
        output = self.layers[layer_id]['output']
        self.layers[layer_id]['partial_output'].assign(Vector.fromIterable(
            output[i] - y[i] for i in xrange(self.dim_list[layer_id])
            ))

        loss = sum((output[i] - y[i]) ** 2 for i in xrange(self.dim_list[layer_id]))

        # hidden layer and input layer
        for layer_id in xrange(self.depth - 2, -1, -1):
            weight = self.layers[layer_id]['weight']
            bias = self.layers[layer_id]['bias']
            partial_output = self.layers[layer_id]['partial_output']
            output = self.layers[layer_id]['output']
            last_output = self.layers[layer_id + 1]['output']
            last_partial = self.layers[layer_id + 1]['partial_output']

            """
            Partial output for every layer except the output one is:
            \frac {\partial E} {\partial O_k^{(l)}} =
                \sum_i (\frac {\partial E} {\partial O_i^{ (l+1) }}
                    * O_i^{ (l+1) } * (1 - O_i^{ (l+1) }) * w_{ik}^{ (l) } )
            
            But the partial output of the first layer is unnecessary,
            thus we don't compute it.
            """
            if layer_id > 0:
                self.layers[layer_id]['partial_output'].assign(Vector.fromIterable(
                    sum(last_partial[i] * last_output[i] * (1 - last_output[i])
                        * weight.item(i, k)
                        for i in xrange(self.dim_list[layer_id + 1]))
                    / (self.dim_list[layer_id] + 1.0)
                    for k in xrange(self.dim_list[layer_id] )))

            """
            Partial weight for every layer except the output one:
            \frac {\partial E} {\partial w_{ji}^{(l)}} = 
                \frac {\partial E} {\partial O_j^{(l + 1)}}
                    * O_j^{(l + 1)} * (1 - O_j^{(l+1)}) * O_i^{(l)}
            """
            partial_weight = Matrix.fromIterable(weight.row_num, weight.col_num, (
                    last_partial[row_id] 
                    * last_output[row_id] * (1 - last_output[row_id])
                    * output[col_id]
                    / (self.dim_list[layer_id] + 1.0)
                    for row_id in xrange(self.dim_list[layer_id + 1])
                    for col_id in xrange(self.dim_list[layer_id])
                    ))

            self.layers[layer_id]['weight'] -= self.eta * partial_weight

            """
            Partial bias is almost exact as the partial weight,
            but for every item in the bias vector the last item is 1
            \frac {\partial E}{\partial b_j^{(l)}} =
                \frac {\partial E}{\partial O_j^{(l + 1)}}
                    O_j^{(l + 1)} (1 - O_j^{(l+1)}) * 1
            """
            partial_bias = Vector.fromIterable(
                    last_partial[row_id]
                    * last_output[row_id] * (1 - last_output[row_id]) * 1
                    / (self.dim_list[layer_id] + 1.0)
                    for row_id in xrange(self.dim_list[layer_id + 1])
                    )
            self.layers[layer_id]['bias'] -= self.eta * partial_bias

        return loss
def sample_wrapper(data):
    for (img, label) in izip(data[0], data[1]):
        x = Vector(img)
        y = Vector.fromIterable(1 if pos == label else 0 for pos in xrange(10))
        yield (x, y)
def vsigmoid(v):
    return Vector.fromIterable(sigmoid(v[i]) for i in xrange(len(v)))