def test_softmax(): print("gradient check: Softmax") x = np.random.rand(5 * 8).reshape((5, 8)).astype('float32') y = np.random.rand(5 * 8).reshape((5, 8)).astype('float32') softmax = activation.Softmax() sqaure_loss_func = loss.SquareLoss() softmax_x = softmax(x) square_loss = sqaure_loss_func(softmax_x, y) torch_x = torch.Tensor(x) torch_x.requires_grad = True softmax_torch = nn.Softmax() square_loss_func_torch = nn.MSELoss() softmax_x_torch = softmax_torch(torch_x) sqaure_loss_torch = square_loss_func_torch(softmax_x_torch, torch.Tensor(y)) print("Value:\ntorch:{},mine:{}, delta:{}".format( sqaure_loss_torch.item(), square_loss, (sqaure_loss_torch.item() - square_loss))) # --- my grad --- grad_softmax = sqaure_loss_func.backward() grad_x = softmax.backward(grad_softmax) # --- torch grad --- sqaure_loss_torch.backward() grad_x_torch = torch_x.grad.data.numpy() print(grad_x_torch - grad_x)
def test_cross_entropy_loss(): print("gradient check: Cross Entropy") x = np.random.rand(5*8).reshape((5, 8)).astype('float32') softmax = activation.Softmax() x_soft = softmax(x) y = np.array([1, 4, 6, 3, 2], dtype='int32') y_onehot = np.zeros((5, 8)).astype('float32') y_onehot[range(0, 5), y] = 1. print(x) print('log loss: ', log_loss(y, x_soft, labels=[0, 1, 2, 3, 4, 5, 6, 7])) cross_entropy_f = loss.CrossEntropyLoss() cross_entropy_torch = nn.CrossEntropyLoss() torch_x = torch.Tensor(x) torch_x.requires_grad = True ce_loss_torch = cross_entropy_torch(torch_x, torch.LongTensor(y)) ce_loss = cross_entropy_f(x_soft, y_onehot) print("Value:\ntorch:{},mine:{}, delta:{}" .format(ce_loss_torch.item(), ce_loss, (ce_loss-ce_loss_torch.item()))) ce_loss_torch.backward() torch_x_grad = torch_x.grad.data.numpy() x_grad = softmax.backward(cross_entropy_f.backward()) # print(np.sum(x_grad - torch_x_grad, 0)) print(x_grad - torch_x_grad)
def get (self, Y_pred, Y_true): N = Y_pred.shape[0] softmax = activation.Softmax() prob = softmax._forward(Y_pred) loss = NLLLoss (prob, Y_true) Y_serial = np.argmax(Y_true, axis=1) dout = prob.copy() dout[np.arange(N), Y_serial] -= 1 return loss, dout
def __init__(self): self.Cin = 1 self.D_out = 10 # Cin: input channel # Cout: output channel # F: kernel size 3x3 # Conv1: Cin=1, Cout=6, F=3 self.conv1 = conv_layer.Conv (self.Cin, 6, 3) self.ReLU1 = activation.ReLU () self.pool1 = pooling.MaxPool (2,2) # Conv2: Cin=6, Cout=16, F=3 self.conv2 = conv_layer.Conv (6, 16, 3) self.ReLU2 = activation.ReLU () self.pool2 = pooling.MaxPool (2,2) # FC1 flatten to be 64*784 self.FC1 = nn_layer.FC(784, 120) self.ReLU3 = activation.ReLU () self.FC2 = nn_layer.FC (120, 84) self.ReLU4 = activation.ReLU () self.FC3 = nn_layer.FC (84, self.D_out) self.Softmax = activation.Softmax () self.p2_shape = None
sys.path.append('../source') import functions as f import activation as act import cost_functions as cost from data_prep import DataPrep from NeuralNetwork import NeuralNetwork # set the parameters n = 100 n_epochs = 300 n_batches = 100 neurons = [50, 50] n_outputs = 10 hidden_act = act.Sigmoid() output_act = act.Softmax() cost_func = cost.CrossEntropy() no_hidden = False seed = 2034 # download MNIST dataset digits = datasets.load_digits() # define input data and labels dataset = digits.images labels = digits.target.reshape(-1, 1) # flatten the image N = len(dataset) dataset = dataset.reshape(N, -1)
def test_fully_connected(): print("gradient check: FullyConnected") x = np.random.rand(5 * 8).reshape((5, 8)).astype('float32') y = np.array([1, 4, 6, 3, 2], dtype='int32') y_onehot = np.zeros((5, 12)).astype('float32') y_onehot[range(0, 5), y] = 1. # --- mine -- fc1 = layer.FullyConnected(8, 10) fc2 = layer.FullyConnected(10, 12) relu1 = activation.ReLU() softmax = activation.Softmax() ce_func = loss.CrossEntropyLoss() fc_out1 = fc1(x) fc_out1 = relu1(fc_out1) fc_out2 = fc2(fc_out1) fc_out2 = softmax(fc_out2) sqaure_loss = ce_func(fc_out2, y_onehot) # --- torch --- weights1 = fc1.weights.get_data() bias1 = fc1.bias.get_data() weights2 = fc2.weights.get_data() bias2 = fc2.bias.get_data() torch_fc = nn.Linear(8, 10) torch_fc2 = nn.Linear(10, 12) torch_fc.weight.data.copy_(torch.Tensor(weights1.T)) torch_fc.bias.data.copy_(torch.Tensor(bias1)) torch_fc2.weight.data.copy_(torch.Tensor(weights2.T)) torch_fc2.bias.data.copy_(torch.Tensor(bias2)) torch_relu = nn.ReLU() torch_square_func = nn.CrossEntropyLoss() torch_x = torch.Tensor(x) torch_x.requires_grad = True torch_fc_out = torch_fc(torch_x) torch_fc_out1 = torch_relu(torch_fc_out) torch_fc_out2 = torch_fc2(torch_fc_out1) torch_sqaure_loss = torch_square_func(torch_fc_out2, torch.LongTensor(y)) print("Value:\ntorch:{}, mini:{}, delta:{}".format( torch_sqaure_loss.item(), sqaure_loss, (torch_sqaure_loss.item() - sqaure_loss))) # --- my grad --- grad_x = ce_func.backward() grad_x = softmax.backward(grad_x) grad_fc2 = fc2.backward(grad_x) grad_w2 = fc2.weights.get_grad() grad_b2 = fc2.bias.get_grad() grad_x = relu1.backward(grad_fc2) grad_x = fc1.backward(grad_x) grad_w1 = fc1.weights.get_grad() grad_b1 = fc1.bias.get_grad() # --- torch grad --- torch_sqaure_loss.backward() torch_grad_x = torch_x.grad.data.numpy() torch_grad_w1 = torch_fc.weight.grad.data.numpy() torch_grad_b1 = torch_fc.bias.grad.data.numpy() torch_grad_w2 = torch_fc2.weight.grad.data.numpy() torch_grad_b2 = torch_fc2.bias.grad.data.numpy() print("--grad x ---") print(grad_x - torch_grad_x) print("--grad w1 ---") print(grad_w1 - torch_grad_w1.T) print("--grad b1 ---") print(grad_b1 - torch_grad_b1) print("--grad w2 ---") print(grad_w2 - torch_grad_w2.T) print("--grad b2 ---") print(grad_b2 - torch_grad_b2)
op = wrap_name_default(op_name)(op) op.__doc__ = type(act).__doc__ globals()[op_name] = op __all__.append(op_name) __register_unary_math_op__('exp', act.Exp()) __register_unary_math_op__('log', act.Log()) __register_unary_math_op__('abs', act.Abs()) __register_unary_math_op__('sigmoid', act.Sigmoid()) __register_unary_math_op__('tanh', act.Tanh()) __register_unary_math_op__('square', act.Square()) __register_unary_math_op__('relu', act.Relu()) __register_unary_math_op__('sqrt', act.Sqrt()) __register_unary_math_op__('reciprocal', act.Reciprocal()) __register_unary_math_op__('softmax', act.Softmax()) def __add__(layeroutput, other): if is_compatible_with(other, float): return layer.slope_intercept(input=layeroutput, intercept=other) if not isinstance(other, Layer): raise TypeError("Layer can only be added with" " another Layer or a number") if layeroutput.size == other.size: return layer.mixed(input=[ layer.identity_projection(input=layeroutput), layer.identity_projection(input=other) ]) if other.size != 1 and layeroutput.size != 1: raise TypeError("Two Layer can be added only if they have equal size"