示例#1
0
 def compute_grad_by_params(w, b):
     layer = Dense(32, 64)
     layer.weights = np.array(w)
     layer.biases = np.array(b)
     x = np.linspace(-1, 1, 10 * 32).reshape([10, 32])
     layer.backward(x, np.ones([10, 64]), optim='gd', lr=1)
     return w - layer.weights, b - layer.biases
示例#2
0
class MnistNetMiniBatch:
    def __init__(self):
        self.d1_layer = Dense(784, 100)
        self.a1_layer = ReLu()
        self.drop1_layer = Dropout(0.5)

        self.d2_layer = Dense(100, 50)
        self.a2_layer = ReLu()
        self.drop2_layer = Dropout(0.25)

        self.d3_layer = Dense(50, 10)
        self.a3_layer = Softmax()

    def forward(self, x, train=True):
        net = self.d1_layer.forward(x)
        net = self.a1_layer.forward(net)
        net = self.drop1_layer.forward(net, train)

        net = self.d2_layer.forward(net)
        net = self.a2_layer.forward(net)
        net = self.drop2_layer.forward(net, train)

        net = self.d3_layer.forward(net)
        net = self.a3_layer.forward(net)

        return (net)

    def backward(self,
                 dz,
                 learning_rate=0.01,
                 mini_batch=True,
                 update=False,
                 len_mini_batch=None):

        dz = self.a3_layer.backward(dz)
        dz = self.d3_layer.backward(dz,
                                    learning_rate=learning_rate,
                                    mini_batch=mini_batch,
                                    update=update,
                                    len_mini_batch=len_mini_batch)

        dz = self.drop2_layer.backward(dz)
        dz = self.a2_layer.backward(dz)
        dz = self.d2_layer.backward(dz,
                                    learning_rate=learning_rate,
                                    mini_batch=mini_batch,
                                    update=update,
                                    len_mini_batch=len_mini_batch)

        dz = self.drop1_layer.backward(dz)
        dz = self.a1_layer.backward(dz)
        dz = self.d1_layer.backward(dz,
                                    learning_rate=learning_rate,
                                    mini_batch=mini_batch,
                                    update=update,
                                    len_mini_batch=len_mini_batch)

        return dz
示例#3
0
class Sampler():
    def __init__(self, inputDim=1, outputDim=1, optimizer=Adam()):
        self.inputDim = inputDim
        self.outputDim = outputDim
        self.mean = Dense(self.inputDim,
                          self.outputDim,
                          activation=Identity(),
                          optimizer=copy.copy(optimizer))
        self.logVar = Dense(self.inputDim,
                            self.outputDim,
                            activation=Identity(),
                            optimizer=copy.copy(optimizer))

    def feedforward(self, input):
        self.latentMean = self.mean.feedforward(input)
        self.latentLogVar = self.logVar.feedforward(input)

        self.epsilon = np.random.standard_normal(size=(self.outputDim,
                                                       input.shape[1]))
        self.sample = self.latentMean + np.exp(
            self.latentLogVar / 2.) * self.epsilon

        return self.sample

    def backpropagate(self, lastGradient):
        gradLogVar = {}
        gradMean = {}
        tmp = self.outputDim * lastGradient.shape[1]

        # KL divergence gradients
        gradLogVar["KL"] = (np.exp(self.latentLogVar) - 1) / (2 * tmp)
        gradMean["KL"] = self.latentMean / tmp

        # MSE gradients
        gradLogVar["MSE"] = 0.5 * lastGradient * self.epsilon * np.exp(
            self.latentLogVar / 2.)
        gradMean["MSE"] = lastGradient

        # backpropagate gradients thorugh self.mean and self.logVar
        return self.mean.backward(gradMean["KL"] +
                                  gradMean["MSE"]) + self.logVar.backward(
                                      gradLogVar["KL"] + gradLogVar["MSE"])

    def getKLDivergence(self, output):
        # output.shape[1] == batchSize
        return -np.sum(1 + self.latentLogVar - np.square(self.latentMean) -
                       np.exp(self.latentLogVar)) / (2 * self.outputDim *
                                                     output.shape[1])
示例#4
0
 def test_dense_layer_NUMERICAL_GRADIENT_CHECK(self):
     x = np.linspace(-1, 1, 10 * 32).reshape([10, 32])
     l = Dense(32, 64)
     numeric_grads = eval_numerical_gradient(lambda x: l.forward(x).sum(),
                                             x)
     grads = l.backward(x, np.ones([10, 64]), optim='gd', lr=0)
     self.assertTrue(np.allclose(grads, numeric_grads, rtol=1e-5, atol=0),
                     msg="input gradient does not match numeric grad")
示例#5
0
        layer1.forward(trainingData[batch])
        activation1.forward(layer1.outputs)
        layer2.forward(activation1.outputs)
        activation2.forward(layer2.outputs)
        cost.forward(activation2.outputs, labels[batch], 10)

        for sample in range(activation2.outputs.shape[1]):
            if np.argmax(activation2.outputs[:, sample]) == np.argmax(
                    labels[batch, sample]):
                correct += 1

        cost.backward(activation2.outputs, labels[batch], 10)
        activation2.backward(layer2.outputs, layer2.weights.shape[0],
                             BATCH_SIZE)
        layer2.backward(activation1.outputs)
        activation1.backward(layer1.outputs)
        layer1.backward(trainingData[batch])

        delta1 = np.zeros((cost.prime.shape[0], cost.prime.shape[1]))
        for i in range(cost.prime.shape[0]):
            delta1[i] = np.matmul(cost.prime[i], activation2.prime[i])

        delta1_wrt_L2 = np.matmul(delta1, layer2.input_prime)
        delta2 = np.zeros(
            (activation1.prime.shape[0], activation1.prime.shape[2]))
        for i in range(activation1.prime.shape[2]):
            delta2[:, i] = np.matmul(delta1_wrt_L2[i], activation1.prime[:, :,
                                                                         i])

        C_wrt_W2 = np.zeros(
示例#6
0
    sw1 = swish1.forward(z1)
    sw2 = dense2.forward(sw1)
    y_pre = swish2.forward(sw2)

    # loss = loss_fn.loss(y_true, y_pred)
    # print("loss: ", loss)
    # print("loss's mean: ",np.mean(loss))

    # sigloss = loss_fn.loss(y_true, y_pre)
    # print("SIG loss: ", sigloss)
    # print("sig loss's mean: ",np.mean(sigloss))

    # dldy_pred = loss_fn.gradient(y_true , y_pred)
    # print("lldy: ",dldy_pred)
    # dldz2 = activation2.backward(dldy_pred)
    # print("dldz2: ",dldz2)
    # dLda1 = dense2.backward(dldz2)
    # print("dLda1: ",dLda1)
    # dLz1 = sigmoid.backward(dLda1)
    # dLdw = dense.backward(dLz1)

    d1 = loss_fn.gradient(y_true, y_pre)
    # a = swish2.gradient(d1)
    # print(d1)
    d2 = swish2.backward(d1)
    d3 = dense2.backward(d2)
    d4 = swish1.backward(d3)
    d5 = dense.backward(d4)
    print(d2)
    # Dense -> Activation -> Dense -> Activation -> y_pred

    z1 = dense.forward(x)
    a1 = activation1.forward(z1)
    print("Activation Value:", a1)

    z2 = dense2.forward(a1)
    a2 = activation2.forward(z2)
    y_pred = a2

    loss = loss_func.loss(y_true, y_pred)

    print("Individual Loss:", loss)
    total_loss = np.mean(loss)
    print("Total Loss:", total_loss)

    #Backward Propagation
    dLdy_pred = loss_func.gradient(y_true, y_pred)
    print("dLdy:", dLdy_pred)
    '''

	dydz=activation2.gradient(z2)
	dLdz2-dLdy_pred*dydz
	
	'''

    dLdz2 = activation2.backward(dLdy_pred)
    dLda1 = dense2.backward(dLdz2)
    dLdz1 = sigmoid.backward(dLda1)
    dLdw = dense.backward(dLdz1)