def test_cross_entropy_loss(): print("gradient check: Cross Entropy") x = np.random.rand(5*8).reshape((5, 8)).astype('float32') softmax = activation.Softmax() x_soft = softmax(x) y = np.array([1, 4, 6, 3, 2], dtype='int32') y_onehot = np.zeros((5, 8)).astype('float32') y_onehot[range(0, 5), y] = 1. print(x) print('log loss: ', log_loss(y, x_soft, labels=[0, 1, 2, 3, 4, 5, 6, 7])) cross_entropy_f = loss.CrossEntropyLoss() cross_entropy_torch = nn.CrossEntropyLoss() torch_x = torch.Tensor(x) torch_x.requires_grad = True ce_loss_torch = cross_entropy_torch(torch_x, torch.LongTensor(y)) ce_loss = cross_entropy_f(x_soft, y_onehot) print("Value:\ntorch:{},mine:{}, delta:{}" .format(ce_loss_torch.item(), ce_loss, (ce_loss-ce_loss_torch.item()))) ce_loss_torch.backward() torch_x_grad = torch_x.grad.data.numpy() x_grad = softmax.backward(cross_entropy_f.backward()) # print(np.sum(x_grad - torch_x_grad, 0)) print(x_grad - torch_x_grad)
def __init__(self, X_train, Y_train, Net='LeNet5', opti='SGDMomentum'): # Prepare Data: Load, Shuffle, Normalization, Batching, Preprocessing self.X_train = X_train self.Y_train = Y_train self.batch_size = 64 # D_in: input depth of network, 784, 28*28 input grayscale image self.D_in = 784 # D_out: output depth of network = 10, the 10 digits self.D_out = 10 print(' Net: ' + str(Net)) print(' batch_size: ' + str(self.batch_size)) print(' D_in: ' + str(self.D_in)) print(' D_out: ' + str(self.D_out)) print(' Optimizer: ' + opti) # ======================= if Net == 'TwoLayerNet': # H is the size of the one hidden layer. H = 400 self.model = ANN.TwoLayerNet(self.D_in, H, self.D_out) elif Net == 'ThreeLayerNet': ####################################### ############ TODO ################## ####################################### # H1, H2 are the size of the two hidden layers. #self.model = ANN.ThreeLayerNet (self.D_in, H1, H2, self.D_out) print('Not Implemented.') exit(0) elif Net == 'LeNet5': self.model = CNN.LeNet5() # store training loss over iterations, for later visualization self.losses = [] if opti == 'SGD': self.opti = optimizer.SGD(self.model.get_params(), lr=0.0001, reg=0) else: self.opti = optimizer.SGDMomentum(self.model.get_params(), lr=0.0001, momentum=0.80, reg=0.00003) self.criterion = loss.CrossEntropyLoss()
def test_fully_connected(): print("gradient check: FullyConnected") x = np.random.rand(5 * 8).reshape((5, 8)).astype('float32') y = np.array([1, 4, 6, 3, 2], dtype='int32') y_onehot = np.zeros((5, 12)).astype('float32') y_onehot[range(0, 5), y] = 1. # --- mine -- fc1 = layer.FullyConnected(8, 10) fc2 = layer.FullyConnected(10, 12) relu1 = activation.ReLU() softmax = activation.Softmax() ce_func = loss.CrossEntropyLoss() fc_out1 = fc1(x) fc_out1 = relu1(fc_out1) fc_out2 = fc2(fc_out1) fc_out2 = softmax(fc_out2) sqaure_loss = ce_func(fc_out2, y_onehot) # --- torch --- weights1 = fc1.weights.get_data() bias1 = fc1.bias.get_data() weights2 = fc2.weights.get_data() bias2 = fc2.bias.get_data() torch_fc = nn.Linear(8, 10) torch_fc2 = nn.Linear(10, 12) torch_fc.weight.data.copy_(torch.Tensor(weights1.T)) torch_fc.bias.data.copy_(torch.Tensor(bias1)) torch_fc2.weight.data.copy_(torch.Tensor(weights2.T)) torch_fc2.bias.data.copy_(torch.Tensor(bias2)) torch_relu = nn.ReLU() torch_square_func = nn.CrossEntropyLoss() torch_x = torch.Tensor(x) torch_x.requires_grad = True torch_fc_out = torch_fc(torch_x) torch_fc_out1 = torch_relu(torch_fc_out) torch_fc_out2 = torch_fc2(torch_fc_out1) torch_sqaure_loss = torch_square_func(torch_fc_out2, torch.LongTensor(y)) print("Value:\ntorch:{}, mini:{}, delta:{}".format( torch_sqaure_loss.item(), sqaure_loss, (torch_sqaure_loss.item() - sqaure_loss))) # --- my grad --- grad_x = ce_func.backward() grad_x = softmax.backward(grad_x) grad_fc2 = fc2.backward(grad_x) grad_w2 = fc2.weights.get_grad() grad_b2 = fc2.bias.get_grad() grad_x = relu1.backward(grad_fc2) grad_x = fc1.backward(grad_x) grad_w1 = fc1.weights.get_grad() grad_b1 = fc1.bias.get_grad() # --- torch grad --- torch_sqaure_loss.backward() torch_grad_x = torch_x.grad.data.numpy() torch_grad_w1 = torch_fc.weight.grad.data.numpy() torch_grad_b1 = torch_fc.bias.grad.data.numpy() torch_grad_w2 = torch_fc2.weight.grad.data.numpy() torch_grad_b2 = torch_fc2.bias.grad.data.numpy() print("--grad x ---") print(grad_x - torch_grad_x) print("--grad w1 ---") print(grad_w1 - torch_grad_w1.T) print("--grad b1 ---") print(grad_b1 - torch_grad_b1) print("--grad w2 ---") print(grad_w2 - torch_grad_w2.T) print("--grad b2 ---") print(grad_b2 - torch_grad_b2)
str(D_out)) ### TWO LAYER NET FORWARD TEST ### #H=400 #model = nn.TwoLayerNet(batch_size, D_in, H, D_out) H1 = 300 H2 = 100 model = nn.ThreeLayerNet(batch_size, D_in, H1, H2, D_out) losses = [] #optim = optimizer.SGD(model.get_params(), lr=0.0001, reg=0) optim = optimizer.SGDMomentum(model.get_params(), lr=0.0001, momentum=0.80, reg=0.00003) criterion = loss.CrossEntropyLoss() # TRAIN ITER = 25000 for i in range(ITER): # get batch, make onehot X_batch, Y_batch = util.get_batch(X_train, Y_train, batch_size) Y_batch = util.MakeOneHot(Y_batch, D_out) # forward, loss, backward, step Y_pred = model.forward(X_batch) loss, dout = criterion.get(Y_pred, Y_batch) model.backward(dout) optim.step() if i % 100 == 0: