Exemplo n.º 1
0
    def test_update(self):
        for vector_size in range(1, 10):
            adam = Adam()
            gnn = GraphNeuralNetwork(vector_size)
            expected = gnn.params
            adam.update(gnn)
            actual = gnn.params
            self.assertEqual(expected, actual)

            gnn.grads["W"] = np.random.rand(vector_size, vector_size)
            gnn.grads["A"] = np.random.rand(vector_size)
            gnn.grads["b"] = np.random.rand(1)

            v = {}
            m = {}
            for key, grad in gnn.grads.items():
                v[key] = np.zeros_like(grad)
                m[key] = np.zeros_like(grad)
            params = copy.deepcopy(gnn.params)

            for i in range(1, 100):
                gnn.grads["W"] = np.random.rand(vector_size, vector_size)
                gnn.grads["A"] = np.random.rand(vector_size)
                gnn.grads["b"] = np.random.rand(1)
                adam.update(gnn)
                for key, param in params.items():
                    m[key] = adam.beta1 * m[key] + (
                        (1 - adam.beta1) * gnn.grads[key])
                    v[key] = adam.beta2 * v[key] + (
                        (1 - adam.beta2) * gnn.grads[key]**2)
                    m_hat = m[key] / (1 - adam.beta1**i)
                    v_hat = v[key] / (1 - adam.beta2**i)
                    params[key] = param - adam.lr * m_hat / (np.sqrt(v_hat) +
                                                             1.0e-8)
                    expected1 = repr(
                        np.round(np.abs(params[key] - gnn.params[key]), 6))
                    actual1 = repr(np.zeros_like(params[key]))
                    self.assertEqual(expected1, actual1)
Exemplo n.º 2
0
Arquivo: net.py Projeto: hunnxx/DNN
class net:
    def __init__(self):
        self.params = dict()
        self.grads = dict()

        self.params['W1'] = args.weight_init_std * np.random.randn(784, 256)
        self.params['B1'] = np.zeros(256)
        self.params['W2'] = args.weight_init_std * np.random.randn(256, 256)
        self.params['B2'] = np.zeros(256)
        self.params['W3'] = args.weight_init_std * np.random.randn(256, 10)
        self.params['B3'] = np.zeros(10)

        self.optimizer = Adam()

    def forward(self, input, target):
        z1 = np.dot(input, self.params['W1']) + self.params['B1']
        x2 = self.relu(z1)
        z2 = np.dot(x2, self.params['W2']) + self.params['B2']
        x3 = self.relu(z2)
        z3 = np.dot(x3, self.params['W3']) + self.params['B3']
        output = self.softmax(z3)

        self.params['X1'] = input
        self.params['Z1'] = z1
        self.params['X2'] = x2
        self.params['Z2'] = z2
        self.params['X3'] = x3
        self.params['Z3'] = z3
        self.params['Y'] = output
        self.params['T'] = target

        return output, self.loss(output, target)

    def backward(self):
        # Grad
        # https://stats.stackexchange.com/questions/235528/backpropagation-with-softmax-cross-entropy

        W3 = (self.params['Y'] - self.params['T']) / args.batch_size
        self.grads['W3'] = np.dot(self.params['X3'].T, W3)
        self.grads['B3'] = np.sum(W3, axis=0)

        W2 = np.dot(W3, self.params['W3'].T)
        W2 *= np.where(self.params['Z2'] > 0, 1, 0)
        self.grads['W2'] = np.dot(self.params['X2'].T, W2)
        self.grads['B2'] = np.sum(W2, axis=0)

        W1 = np.dot(W2, self.params['W2'].T)
        W1 *= np.where(self.params['Z1'] > 0, 1, 0)
        self.grads['W1'] = np.dot(self.params['X1'].T, W1)
        self.grads['B1'] = np.sum(W1, axis=0)

        self.optimizer.update(self.params, self.grads)

    def loss(self, output, target):
        # Cross Entropy
        if output.ndim == 1:
            target = t.reshape(1, target.size)
            output = output.reshape(1, output.size)

        # 훈련 데이터가 원-핫 벡터라면 정답 레이블의 인덱스로 반환
        if target.size == output.size:
            target = target.argmax(axis=1)

        batch_size = output.shape[0]
        return -np.sum(
            np.log(output[np.arange(batch_size), target] + 1e-7)) / batch_size

    def accuracy(self, output, target):
        return np.sum(np.argmax(output, axis=1) == np.argmax(
            target, axis=1)) / output.shape[0]

    def softmax(self, x):
        if x.ndim == 2:
            x = x.T
            x = x - np.max(x, axis=0)
            y = np.exp(x) / np.sum(np.exp(x), axis=0)
            return y.T

        x = x - np.max(x)  # 오버플로 대책
        return np.exp(x) / np.sum(np.exp(x))

    def relu(self, x):
        return np.maximum(0, x)

    def relu6(self, x):
        return np.minimum(6, np.maximum(0, x))
Exemplo n.º 3
0
train_acc_list = []
test_acc_list = []

iters_num = 10000
batch_size = 100
train_size = x_train.shape[0]
iter_per_epoch = max(train_size / batch_size, 1)

net = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)
# optim = SGD(net.params, lr=0.1, momentum=0.9)
# optim = AdaGrad(net.params)
optim = Adam(net.params)

for i in range(iters_num):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    grad = net.gradient(x_batch, t_batch)
    net.params = optim.update(net.params, grad)

    loss = net.loss(x_batch, t_batch)
    train_loss_list.append(loss)

    if i % iter_per_epoch == 0:
        train_acc = net.accuracy(x_train, t_train)
        test_acc = net.accuracy(x_test, t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        print("train acc, test acc | " + str(train_acc) + ", " + str(test_acc))