def predict(network, x): W1, W2, W3 = network['W1'], network['W2'], network['W3'] b1, b2, b3 = network['b1'], network['b2'], network['b3'] a1 = np.dot(x, W1) + b1 z1 = activation_function.sigmoid(a1) a2 = np.dot(z1, W2) + b2 z2 = activation_function.sigmoid(a2) a3 = np.dot(z2, W3) + b3 return activation_function.softmax(a3)
def epoch(eta=0.04, penalty=0.4, epochs=200, mini_batch_size=100, t0=5, t1=50, create_conf=False): layer1 = DenseLayer(features, 100, sigmoid()) #layer2 = DenseLayer(100, 50, sigmoid()) #layer3 = DenseLayer(100, 50, sigmoid()) layer4 = DenseLayer(100, 10, softmax()) layers = [layer1, layer4] network = NN(layers) cost_array = np.zeros((epochs, 2)) def learning_schedule(t): return 0.04 #t0/(t+t1) for i in range(epochs): random.shuffle(batch) X_train_shuffle = X_train[batch] one_hot_shuffle = one_hot[batch] Y_train_shuffle = Y_train[batch] #eta = learning_schedule(i) network.SGD(ce, 100, X_train_shuffle, one_hot_shuffle, eta, penalty) Y_pred = np.argmax(network.feedforward(X_test), axis=1) Y_pred_train = np.argmax(network.feedforward(X_train_shuffle), axis=1) cost_array[i, 0] = accuracy()(Y_test.ravel(), Y_pred) cost_array[i, 1] = accuracy()(Y_train_shuffle.ravel(), Y_pred_train) print("accuracy on train data = %.3f" % cost_array[-1, 1]) print("accuracy on test data = %.3f" % cost_array[-1, 0]) if create_conf == True: #creating confusion matrix numbers = np.arange(0, 10) conf_matrix = confusion_matrix(Y_pred, Y_test, normalize="true") heatmap = sb.heatmap(conf_matrix, cmap="viridis", xticklabels=["%d" % i for i in numbers], yticklabels=["%d" % i for i in numbers], cbar_kws={'label': 'Accuracy'}, fmt=".2", edgecolor="none", annot=True) heatmap.set_xlabel("pred") heatmap.set_ylabel("true") heatmap.set_title(r"FFNN prediction accuracy with $\lambda$ = {:.1e} $\eta$ = {:.1e}"\ .format(penalty, eta)) fig = heatmap.get_figure() fig.savefig("../figures/MNIST_confusion_net.pdf", bbox_inches='tight', pad_inches=0.1, dpi=1200) plt.show() return cost_array[-1]
def predict(network, x): W1, W2, W3 = network['W1'], network['W2'], network['W3'] b1, b2, b3 = network['b1'], network['b2'], network['b3'] a1 = np.dot(x, W1) + b1 z1 = sigmoid(a1) a2 = np.dot(z1, W2) + b2 z2 = sigmoid(a2) a3 = np.dot(z2, W3) + b3 y = softmax(a3) return y
def predict(network, x): W1, W2, W3 = network["W1"], network["W2"], network["W3"] b1, b2, b3 = network["b1"], network["b2"], network["b3"] a1 = np.dot(x, W1) + b1 z1 = af.sigmoid(a1) a2 = np.dot(z1, W2) + b2 z2 = af.sigmoid(a2) a3 = np.dot(z2, W3) + b3 y = af.softmax(a3) return y
def predict(network, x): W1, W2, W3 = network['W1'], network['W2'], network['W3'] b1, b2, b3 = network['b1'], network['b2'], network['b3'] # 2 층 레이어 구성 # 각 레이어 에서는 행렬(데이터를 2차원 배열로 취급)의 내적곱 계산 a1 = np.dot(x, W1) + b1 # sigmoid 활성화 함수로 활성화 여부를 결정한다. z1 = sigmoid(a1) # 위 과정을 다음 레이어에서 반복 a2 = np.dot(z1, W2) + b2 z2 = sigmoid(a2) a3 = np.dot(z2, W3) + b3 # 최종 결과시에는 softmax 로 각 노드(숫자0~9)를 1.0 기준 확률로 계산한다. # 실제 노트 값이 가장 큰값이 가장 높은 확률로 계산되고 계산 비용이 비싸서 # softmax 는 실제 사용시에는 사용하지 않고, 훈련할때만 사용한다. y = softmax(a3) return y
print("Total runs: ", 20) print("Total fruit per run: ", lim_data) print("---------------------------------------------") ce = CE() acc_score = accuracy() scaler = StandardScaler() if color_scale: layer1 = DenseLayer(im_shape * im_shape, 3000, relu(), Glorot=True) else: layer1 = DenseLayer(im_shape * im_shape * 3, 3000, relu(), Glorot=True) layer2 = DenseLayer(3000, 1000, relu(), Glorot=True) layer3 = DenseLayer(1000, 200, relu(), Glorot=True) layer4 = DenseLayer(200, 10, relu(), Glorot=True) layer5 = DenseLayer(10, num_fruits, softmax()) layers = [layer1, layer2, layer3, layer4, layer5] network = NN(layers, ce) for i in trange(20): print("Run: ", i + 1) data = extract_data(paths, true_labels, lim_data=lim_data, from_data=i * lim_data) data.reshape(im_shape) # making all data the same shape if color_scale: data.gray() data.flatten()
def forward(self, x, t): self.t = t self.y = softmax(x) self.loss = cross_entropy_error(self.y, self.t) return self.loss
import numpy as np import activation_function def layer(X, W1, B1): A1 = np.dot(X, W1) + B1 return activation_function.sigmoid(A1) def matrix_test(): X = np.array([1.0, 0.5]) W1 = np.array([[0.1, 0.3, 0.5], [0.2, 0.4, 0.6]]) B1 = np.array([0.1, 0.2, 0.3]) W2 = np.array([[0.1, 0.4], [0.2, 0.5], [0.3, 0.6]]) B2 = np.array([0.1, 0.2]) Z1 = layer(X, W1, B1) Z2 = layer(Z1, W2, B2) print(Z2) if __name__ == "__main__": a = activation_function.softmax(np.array([1010, 1000, 990])) b = activation_function.softmax(np.array([0.3, 2.9, 7.0])) print(a) print(b) print(np.sum(b)) print(np.sum(a))
def test_softmax(self): a = np.array([0.3, 2.9, 4.0]) np.testing.assert_allclose(af.softmax(a), [0.01821127, 0.24519181, 0.73659691], rtol=1e-6, atol=0)
def loss(self, x, t): z = self.predict(x) y = softmax(z) loss = cross_entropy_error(y, t) return loss
import numpy as np from activation_function import softmax a = np.array([0.3, 2.9, 4.0]) y = softmax(a) print(y) print(np.sum(y))
#Make neural network with zero hidden layer cost_func = CE() #using cross-entropy as cost function epochs = 200 mini_batch_size = 100 m = X_train.shape[0] ind = np.arange(0, X_train.shape[0]) cost_array = np.zeros((len(eta), len(penalty))) cost_best = 0.5 start = time.time() for i in range(len(eta)): for j in range(len(penalty)): output_layer = DenseLayer(features, 10, softmax()) layers = [output_layer] log_net = NN(layers) for k in range(epochs): #looping epochs random.shuffle(ind) X_train = X_train[ind] one_hot = one_hot[ind] for l in range(0, m, mini_batch_size): log_net.backprop2layer(cost_func, X_train[l:l + mini_batch_size], one_hot[l:l + mini_batch_size], eta[i], penalty[j]) Y_pred = np.argmax(log_net.feedforward(X_test), axis=1) cost_array[i][j] = accuracy()(Y_test, Y_pred) if cost_array[i][j] > cost_best: