def sanity_layers(data): """ Check implementation of the forward and backward pass for all activations. """ # Set the seed to reproduce results. np.random.seed(42) # Pseudo-input. random_input = np.random.randn(1, 50) # Get the activations. act_sigmoid = neuralnet.Activation('sigmoid') act_tanh = neuralnet.Activation('tanh') act_ReLU = neuralnet.Activation('ReLU') act_leakyReLU = neuralnet.Activation('leakyReLU') # Get the outputs for forward-pass. out_sigmoid = act_sigmoid(random_input) out_tanh = act_tanh(random_input) out_ReLU = act_ReLU(random_input) out_leakyReLU = act_leakyReLU(random_input) # Compute the errors. err_sigmoid = np.sum(np.abs(data['out_sigmoid'] - out_sigmoid)) err_tanh = np.sum(np.abs(data['out_tanh'] - out_tanh)) err_ReLU = np.sum(np.abs(data['out_ReLU'] - out_ReLU)) err_leakyReLU = np.sum(np.abs(data['out_leakyReLU'] - out_leakyReLU)) # Check the errors. check_error(err_sigmoid, "Sigmoid Forward Pass") check_error(err_tanh, "Tanh Forward Pass") check_error(err_ReLU, "ReLU Forward Pass") check_error(err_leakyReLU, "leakyReLU Forward Pass") print(20 * "-", "\n") # Compute the gradients. grad_sigmoid = act_sigmoid.backward(1.0, 1) grad_tanh = act_tanh.backward(1.0, 1) grad_ReLU = act_ReLU.backward(1.0, 1) grad_leakyReLU = act_leakyReLU.backward(1.0, 1) # Compute the errors. err_sigmoid_grad = np.sum(np.abs(data['grad_sigmoid'] - grad_sigmoid)) err_tanh_grad = np.sum(np.abs(data['grad_tanh'] - grad_tanh)) err_ReLU_grad = np.sum(np.abs(data['grad_ReLU'] - grad_ReLU)) err_leakyReLU_grad = np.sum(np.abs(data['grad_leakyReLU'] - grad_leakyReLU)) # Check the errors. check_error(err_sigmoid_grad, "Sigmoid Gradient") check_error(err_tanh_grad, "Tanh Gradient") check_error(err_ReLU_grad, "ReLU Gradient") check_error(err_leakyReLU_grad, "leakyReLU Gradient") print(20 * "-", "\n")
def main(): # make_pickle() benchmark_data = pickle.load(open('validate_data.pkl', 'rb'), encoding='latin1') config = {} config['layer_specs'] = [ 784, 100, 100, 10 ] # The length of list denotes number of hidden layers; each element denotes number of neurons in that layer; first element is the size of input layer, last element is the size of output layer. config[ 'activation'] = 'sigmoid' # Takes values 'sigmoid', 'tanh' or 'ReLU'; denotes activation function for hidden layers config[ 'batch_size'] = 1000 # Number of training samples per batch to be passed to network config['epochs'] = 50 # Number of epochs to train the model config['early_stop'] = True # Implement early stopping or not config[ 'early_stop_epoch'] = 5 # Number of epochs for which validation loss increases to be counted as overfitting config['L2_penalty'] = 0 # Regularization constant config['momentum'] = False # Denotes if momentum is to be applied or not config[ 'momentum_gamma'] = 0.9 # Denotes the constant 'gamma' in momentum expression np.random.seed(42) x = np.random.randn(1, 100) act_sigmoid = neuralnet.Activation('sigmoid') act_tanh = neuralnet.Activation('tanh') act_ReLU = neuralnet.Activation('ReLU') out_sigmoid = act_sigmoid.forward_pass(x) err_sigmoid = np.sum(np.abs(benchmark_data['out_sigmoid'] - out_sigmoid)) check_error(err_sigmoid, "Sigmoid Forward Pass") out_tanh = act_tanh.forward_pass(x) err_tanh = np.sum(np.abs(benchmark_data['out_tanh'] - out_tanh)) check_error(err_tanh, "Tanh Forward Pass") out_ReLU = act_ReLU.forward_pass(x) err_ReLU = np.sum(np.abs(benchmark_data['out_ReLU'] - out_ReLU)) check_error(err_ReLU, "ReLU Forward Pass") print("**************") grad_sigmoid = act_sigmoid.backward_pass(1.0) err_sigmoid_grad = np.sum( np.abs(benchmark_data['grad_sigmoid'] - grad_sigmoid)) check_error(err_sigmoid_grad, "Sigmoid Gradient") grad_tanh = act_tanh.backward_pass(1.0) err_tanh_grad = np.sum(np.abs(benchmark_data['grad_tanh'] - grad_tanh)) check_error(err_tanh_grad, "Tanh Gradient") grad_ReLU = act_ReLU.backward_pass(1.0) err_ReLU_grad = np.sum(np.abs(benchmark_data['grad_ReLU'] - grad_ReLU)) check_error(err_ReLU_grad, "ReLU Gradient") np.random.seed(42) x_image = np.random.randn(1, 784) nnet = neuralnet.Neuralnetwork(config) nnet.forward_pass(x_image, targets=np.array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0])) nnet.backward_pass() layer_no = 0 for layer_idx, layer in enumerate(nnet.layers): if isinstance(layer, neuralnet.Layer): layer_no += 1 error_x = np.sum( np.abs(benchmark_data['nnet'].layers[layer_idx].x - layer.x)) error_w = np.sum( np.abs(benchmark_data['nnet'].layers[layer_idx].w - layer.w)) error_b = np.sum( np.abs(benchmark_data['nnet'].layers[layer_idx].b - layer.b)) error_d_w = np.sum( np.abs(benchmark_data['nnet'].layers[layer_idx].d_w - layer.d_w)) error_d_b = np.sum( np.abs(benchmark_data['nnet'].layers[layer_idx].d_b - layer.d_b)) check_error(error_x, "Layer{} Input".format(layer_no)) check_error(error_w, "Layer{} Weights".format(layer_no)) check_error(error_b, "Layer{} Biases".format(layer_no)) check_error(error_d_w, "Layer{} Weight Gradient".format(layer_no)) check_error(error_d_b, "Layer{} Bias Gradient".format(layer_no))
def main(): # make_pickle() benchmark_data = pickle.load(open('validate_data.pkl', 'rb'), encoding='latin1') config = {} config['layer_specs'] = [ 784, 50, 10 ] # The length of list denotes number of hidden layers; each element denotes number of neurons in that layer; first element is the size of input layer, last element is the size of output layer. config[ 'activation'] = 'tanh' # Takes values 'sigmoid', 'tanh' or 'ReLU'; denotes activation function for hidden layers input_size = config['layer_specs'][0] hidden_size = config['layer_specs'][1] output_size = config['layer_specs'][2] np.random.seed(42) w1 = np.random.randn(input_size, hidden_size) # Weight matrix 1 b1 = np.random.randn(1, 1).dot( np.ones((1, hidden_size)).astype(np.float32)) # Bias vector 1 w2 = np.random.randn(hidden_size, output_size) # Weight matrix 2 b2 = np.random.randn(1, 1).dot( np.ones((1, output_size)).astype(np.float32)) # Bias vector 2 X_test, y_test = neuralnet.load_data('MNIST_test.pkl') # test data y_test_onehot = [[1 if i == y else 0 for i in range(10)] for y in y_test] act = neuralnet.Activation('tanh') epsilon = 0.01 # epsilon value x_index, y_index = 0, 3 op = 0 # 0:input to hidden weight; 1:hidden to output weight; 2:hidden bias weight; 3:hidden bias weight ops = [[epsilon, 0, 0, 0], [0, epsilon, 0, 0], [0, 0, epsilon, 0], [0, 0, 0, epsilon]] w1[x_index][y_index] += ops[op][0] # increase the weight by epsilon w2[x_index][y_index] += ops[op][1] for b in b1: b += ops[op][2] for b in b2: b += ops[op][3] input_h = np.dot(X_test[0], w1) + b1 output_h = act.sigmoid(input_h) input_o = np.dot(output_h, w2) + b2 output_o = act.sigmoid(input_o) loss1 = loss_func(output_o, y_test_onehot[0]) w1[x_index][y_index] -= 2 * ops[op][0] # decrease the weight by 2 epsilon w2[x_index][y_index] -= 2 * ops[op][1] for b in b1: b -= 2 * ops[op][2] for b in b2: b -= 2 * ops[op][3] input_h = np.dot(X_test[0], w1) + b1 output_h = act.sigmoid(input_h) input_o = np.dot(output_h, w2) + b2 output_o = act.sigmoid(input_o) loss2 = loss_func(output_o, y_test_onehot[0]) diff1 = (loss1 - loss2) / (2 * epsilon) w1[x_index][y_index] += ops[op][0] # recover the weight w2[x_index][y_index] += ops[op][1] for b in b1: b += ops[op][2] for b in b2: b += ops[op][3] input_h = np.dot(X_test[0], w1) + b1 output_h = act.sigmoid(input_h) input_o = np.dot(output_h, w2) + b2 output_o = act.sigmoid(input_o) # choose hidden weight if op == 0: ans = 0 for l in range(10): ans += (w2[y_index][l] * (output_o[0][l] - y_test_onehot[0][l]) * output_o[0][l] * (1 - output_o[0][l])) ans *= (output_h[0][y_index] * (1 - output_h[0][y_index]) * X_test[0][x_index]) diff2 = ans # choose output weight elif op == 1: diff2 = (output_o[0][y_index] - y_test_onehot[0][y_index]) * output_o[0][y_index] * ( 1 - output_o[0][y_index]) * output_h[0][x_index] # choose input bias weight elif op == 2: diff2 = 0 for i in range(50): ans = 0 for l in range(10): ans += (w2[i][l] * (output_o[0][l] - y_test_onehot[0][l]) * output_o[0][l] * (1 - output_o[0][l])) ans *= output_h[0][i] * (1 - output_h[0][i]) diff2 += ans # choose hidden bias weight else: diff2 = 0 for i in range(10): diff2 += (output_o[0][i] - y_test_onehot[0][i] ) * output_o[0][i] * (1 - output_o[0][i]) print(abs(diff1), abs(diff2)) print("difference :", abs(diff1 - diff2))