def forward_propagation(X, parameters): ''' ''' # 初始化参数的值 W1 = parameters['W1'] b1 = parameters['b1'] W2 = parameters['W2'] b2 = parameters['b2'] W3 = parameters['W3'] b3 = parameters['b3'] # 正向传播 Z1 = np.dot(W1, X) + b1 A1 = AF.relu(Z1) Z2 = np.dot(W2, A1) + b2 A2 = AF.relu(Z2) Z3 = np.dot(W3, A2) + b3 A3 = AF.sigimoid(Z3) cache = (A1, Z1, W1, b1, A2, Z2, W2, b2, A3, Z3, W3, b3) return A3, cache
def linear_activation_forward(A_prev, W, b, activation): """ Implement the forward propagation for the LINEAR->ACTIVATION layer Arguments: A_prev -- activations from previous layer (or input data): (size of previous layer, number of examples) W -- weights matrix: numpy array of shape (size of current layer, size of previous layer) b -- bias vector, numpy array of shape (size of the current layer, 1) activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu" Returns: A -- the output of the activation function, also called the post-activation value cache -- a python tuple containing "linear_cache" and "activation_cache"; stored for computing the backward pass efficiently """ if (activation == 'relu'): Z, linear_cache = linear_forward(A_prev, W, b) A, activation_cache = relu(Z) elif (activation == 'sigmoid'): Z, linear_cache = linear_forward(A_prev, W, b) A, activation_cache = sigmoid(Z) cache = (linear_cache, activation_cache) return A, cache
def convolution_layer0(self): for i in range(len(self.conv0_filter)): feature_map = np.zeros([self.input.shape[0] - 2,self.input.shape[1] - 2]) for y in range(feature_map.shape[0]): for x in range(feature_map.shape[1]): sum = np.sum(self.input[y:y + 3,x:x + 3] * self.conv0_filter[i]) feature_map[y][x] = af.relu(sum + self.conv0_bweight[i]) self.conv0_feature_map.append(feature_map)
def predict(self, X): lin = linear() act = relu() classifier = softmax() A = X for l in range(len(self.layer_dims) - 1): Z = lin.forward(A, self.W[l], self.b[l]) A = act.forward(Z) Y_het = classifier.forward(A) return Y_het
def convolution_layer1(self): for i in range(len(self.conv1_filter)): v = [] for j in range(len(self.conv1_filter[i])): v.append(np.zeros([self.pool0_out[j].shape[0] - 2,self.pool0_out[j].shape[1] - 2])) for y in range(v[j].shape[0]): for x in range(v[j].shape[1]): v[j][y][x] = np.sum(self.pool0_out[j][y:y+3,x:x+3] * self.conv1_filter[i][j]) feature_map = af.relu(sum(v) + self.conv1_bweight[i]) self.conv1_feature_map.append(feature_map)
def forward_propagation_dropout(X, parameters, keep_prob=0.5): ''' 加入了dropout的向前传播,输出层不加dropout ''' # 初始化参数的值 np.random.seed(1) W1 = parameters['W1'] b1 = parameters['b1'] W2 = parameters['W2'] b2 = parameters['b2'] W3 = parameters['W3'] b3 = parameters['b3'] # 正向传播 Z1 = np.dot(W1, X) + b1 A1 = AF.relu(Z1) # 加入dropout D1 = np.random.rand(A1.shape[0], A1.shape[1]) #step1 初始换一个和A1相同维数的矩阵 D1 = D1 < keep_prob #step2 将D1的值转换为0或1 A1 = A1 * D1 #step3 舍弃A1中的一些节点, 将其激活值变为0 A1 = A1 / keep_prob #step4 缩放未舍弃的节点的值 Z2 = np.dot(W2, A1) + b2 A2 = AF.relu(Z2) # 加入dropout D2 = np.random.rand(A2.shape[0], A2.shape[1]) #step1 初始换一个和A2相同维数的矩阵 D2 = D2 < keep_prob #step2 将D2的值转换为0或1 A2 = A2 * D2 #step3 舍弃A2中的一些节点, 将其激活值变为0 A2 = A2 / keep_prob #step4 缩放未舍弃的节点的值 Z3 = np.dot(W3, A2) + b3 A3 = AF.sigimoid(Z3) cache = (A1, D1, Z1, W1, b1, A2, D2, Z2, W2, b2, A3, Z3, W3, b3) return A3, cache
print("---------------------------------------------") print("Unique fruits: ", len(paths)) print("Total fruits: ", tot_data) print("Image resolution: %ix%i" % (im_shape, im_shape)) print("Max data set to: ", max_data) print("Total runs: ", 20) print("Total fruit per run: ", lim_data) print("---------------------------------------------") ce = CE() acc_score = accuracy() scaler = StandardScaler() if color_scale: layer1 = DenseLayer(im_shape * im_shape, 3000, relu(), Glorot=True) else: layer1 = DenseLayer(im_shape * im_shape * 3, 3000, relu(), Glorot=True) layer2 = DenseLayer(3000, 1000, relu(), Glorot=True) layer3 = DenseLayer(1000, 200, relu(), Glorot=True) layer4 = DenseLayer(200, 10, relu(), Glorot=True) layer5 = DenseLayer(10, num_fruits, softmax()) layers = [layer1, layer2, layer3, layer4, layer5] network = NN(layers, ce) for i in trange(20): print("Run: ", i + 1) data = extract_data(paths, true_labels, lim_data=lim_data,
def train(self, X, Y, iteration, learning_rate, lambd=0, keep_prob=1, interrupt_threshold=0.1, print_loss=True): # import function init = initialization(self.layer_dims) lin = linear() act = relu() drop = dropout(keep_prob) classifier = softmax() regulator = l2(lambd) optimizer = adam(learning_rate) # initialization counter = 0 train_X, validation_X, train_Y, validation_Y = train_test_split( X, Y, test_size=0.2, shuffle=True) self.W, self.b = init.he() # iteration for i in range(iteration): m = Y.shape[0] # forward A = train_X cache = [[None, A, None]] for l in range(len(self.layer_dims) - 1): Z = lin.forward(A, self.W[l], self.b[l]) A = act.forward(Z) A, D = drop.forward(A) cache.append([Z, A, D]) # loss prob = classifier.forward(A) loss_tmp1 = classifier.loss(train_Y, prob) loss_tmp2 = regulator.loss(self.W, m) loss_tmp = loss_tmp1 + loss_tmp2 self.loss.append(loss_tmp) # validation accuracy pred = self.predict(validation_X) pred = one_hot_vector.decoder(pred) acc_tmp = np.mean(validation_Y == pred) self.accuracy.append(acc_tmp) # print if print_loss and i % 1000 == 0: print("Iteration %i, Loss: %f, Accuracy: %.f%%" % (i, loss_tmp, acc_tmp * 100)) if loss_tmp <= interrupt_threshold: print("Iteration %i, Loss: %f <= Threshold: %f" % (i, loss_tmp, interrupt_threshold)) return # backward dA = classifier.backward(train_Y, prob) for l in range(len(self.layer_dims) - 1, 1, -1): dA = drop.backward(dA, cache[l][2]) dZ = act.backward(dA, cache[l][0]) dA, dW, db = lin.backward(dZ, cache[l - 1][1], self.W[l - 1], self.b[l - 1], m) dW += regulator.backward(self.W[l - 1], m) # update counter += 1 self.W[l - 1], self.b[l - 1] = optimizer.update( [self.W[l - 1], self.b[l - 1]], [dW, db], counter)
def test_relu(self): self.assertEqual(af.relu(-1), 0) self.assertEqual(af.relu(0), 0) self.assertEqual(af.relu(1), 1) self.assertEqual(af.relu(5), 5)
heatmap.set_xlabel(r"$\eta$") heatmap.set_ylabel(r"$\lambda$") heatmap.invert_yaxis() heatmap.set_title("MSE on Franke dataset with Keras") fig = heatmap.get_figure() plt.show() fig.savefig("../figures/Franke_Keras.pdf", bbox_inches='tight', pad_inches=0.1) if franke_relu == True: MSE_relu = np.zeros( (len(etas), len(epochs))) # 2 so we can use plot_acc.py for i, eta in enumerate(etas): #Setting up network layer1 = DenseLayer(2, n_neurons_layer1, relu()) layer2 = DenseLayer(n_neurons_layer1, n_neurons_layer2, relu()) layer3 = DenseLayer(n_neurons_layer2, 1, identity()) layers = [layer1, layer2, layer3] network = NN(layers) k = 0 for j in range(epochs[-1] + 1): random.shuffle(ind) x_train_shuffle = x_train[ind] y_train_shuffle = y_train[ind] network.SGD(mse, 100, x_train_shuffle, y_train_shuffle, eta, penalty=0)