}) print( "#{} Trn acc={} , Trn loss={} Tst acc={} , Tst loss={}".format( i, acc_trn, loss_trn, acc_tst, loss_tst)) train_losses.append(loss_trn) train_acc.append(acc_trn) test_losses.append(loss_tst) test_acc.append(acc_tst) # the backpropagationn training step sess.run(train_step, feed_dict={X: batch_X, Y_: batch_Y}) title = "MNIST 2.1 5 layers relu adam" vis.losses_accuracies_plots(train_losses, train_acc, test_losses, test_acc, title, DISPLAY_STEP) # Restults # mnist_single_layer_nn.py acc= 0.9237 # mnist__layer_nn.py TST acc = 0.9534 # mnist__layer_nn_relu_adam.py TST acc = 0.9771 # sample output for 5k iterations #0 Trn acc=0.10000000149011612 , Trn loss=229.3443603515625 Tst acc=0.11999999731779099 , Tst loss=230.12518310546875 #100 Trn acc=0.9300000071525574 , Trn loss=30.25579071044922 Tst acc=0.8877000212669373 , Tst loss=35.22196578979492 #200 Trn acc=0.8799999952316284 , Trn loss=33.183040618896484 Tst acc=0.9417999982833862 , Tst loss=19.18865966796875 #300 Trn acc=0.9399999976158142 , Trn loss=21.5306396484375 Tst acc=0.9406999945640564 , Tst loss=19.576183319091797 # ... #4800 Trn acc=0.949999988079071 , Trn loss=16.546607971191406 Tst acc=0.9739999771118164 , Tst loss=10.48233699798584 #4900 Trn acc=1.0 , Trn loss=0.8173556327819824 Tst acc=0.9768000245094299 , Tst loss=11.440749168395996
acc_tst, loss_tst = sess.run([accuracy, cross_entropy], feed_dict={X: mnist.test.images, Y_: mnist.test.labels}) print("#{} Trn acc={} , Trn loss={} Tst acc={} , Tst loss={}".format(i,acc_trn,loss_trn,acc_tst,loss_tst)) train_losses.append(loss_trn) train_acc.append(acc_trn) test_losses.append(loss_tst) test_acc.append(acc_tst) # the backpropagationn training step sess.run(train_step, feed_dict={X: batch_X, Y_: batch_Y}) title = "MNIST 2.1 5 layers relu adam" vis.losses_accuracies_plots(train_losses,train_acc,test_losses, test_acc,title,DISPLAY_STEP) # Restults # mnist_single_layer_nn.py acc= 0.9237 # mnist__layer_nn.py TST acc = 0.9534 # mnist__layer_nn_relu_adam.py TST acc = 0.9771 # sample output for 5k iterations #0 Trn acc=0.10000000149011612 , Trn loss=229.3443603515625 Tst acc=0.11999999731779099 , Tst loss=230.12518310546875 #100 Trn acc=0.9300000071525574 , Trn loss=30.25579071044922 Tst acc=0.8877000212669373 , Tst loss=35.22196578979492 #200 Trn acc=0.8799999952316284 , Trn loss=33.183040618896484 Tst acc=0.9417999982833862 , Tst loss=19.18865966796875 #300 Trn acc=0.9399999976158142 , Trn loss=21.5306396484375 Tst acc=0.9406999945640564 , Tst loss=19.576183319091797 # ...
def cnn_model(learning_rate=0.001, n_epochs=50, batch_size=100, drop_out=0.75): # what args do we need ? - -| #NUM_ITERS=5000 #DISPLAY_STEP=100 #BATCH=100 # # input layer - X[batch, 28, 28] # 1 conv. layer - W1[5, 5, 1, C1] + b1[C1] pad = 2? # Y1[batch, 28, 28, C1] # 2 conv. layer - W2[3, 3, C1, C2] + b2[C2] # 2.1 max pooling filter 2x2, stride 2 - down sample the input (rescale input by 2) 28x28-> 14x14 # Y2[batch, 14,14,C2] # 3 conv. layer - W3[3, 3, C2, C3] + b3[C3] # 3.1 max pooling filter 2x2, stride 2 - down sample the input (rescale input by 2) 14x14-> 7x7 # Y3[batch, 7, 7, C3] # 4 fully connecteed layer - W4[7*7*C3, FC4] + b4[FC4] # Y4[batch, FC4] # 5 output layer - W5[FC4, 10] + b5[10] # One-hot encoded labels Y5[batch, 10] # input X = tf.placeholder(tf.float32, shape=(None, 28, 28, 1), name='X') y = tf.placeholder(tf.int64, shape=(None), name='y') # Probability of keeping a node during dropout = 1.0 at test time (no dropout) and 0.75 at training time pkeep = tf.placeholder(tf.float32) # layer size, for cnn is conv depth (the number of detector) C1 = 4 C2 = 8 C3 = 16 FC4 = 256 # fully connected layer # stride: 步幅, padding: 填充, "SAME"将detector结果填充为原向量维度 stride = 1 k = 2 # conv 1 W1 = tf.Variable(tf.truncated_normal((5, 5, 1, C1), stddev=0.01), name='conv_1') b1 = tf.Variable(tf.truncated_normal([C1], stddev=0.01)) Y1 = tf.nn.relu( tf.nn.conv2d(X, W1, strides=[1, stride, stride, 1], padding="SAME") + b1) # conv 2 + maxpooling W2 = tf.Variable(tf.truncated_normal((3, 3, C1, C2), stddev=0.01), name='conv_2') b2 = tf.Variable(tf.truncated_normal([C2], stddev=0.01)) Y2 = tf.nn.relu( tf.nn.conv2d(Y1, W2, strides=[1, stride, stride, 1], padding="SAME") + b2) Y2 = tf.nn.max_pool(Y2, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding="SAME") # conv3 + maxpooling W3 = tf.Variable(tf.truncated_normal((3, 3, C2, C3), stddev=0.01), name='conv_3') b3 = tf.Variable(tf.truncated_normal([C3], stddev=0.01)) Y3 = tf.nn.relu( tf.nn.conv2d(Y2, W3, strides=[1, stride, stride, 1], padding="SAME") + b3) Y3 = tf.nn.max_pool(Y3, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding="SAME") # full connected YY = tf.reshape(Y3, shape=[-1, 7 * 7 * C3]) W4 = tf.Variable( tf.truncated_normal([7 * 7 * C3, FC4], stddev=0.01, name="full_connected")) b4 = tf.Variable(tf.truncated_normal([FC4], stddev=0.01)) Y4 = tf.nn.relu(tf.matmul(YY, W4) + b4) # calculate softmax mapping to 10 classification W5 = tf.Variable(tf.truncated_normal([FC4, 10], stddev=0.01)) b5 = tf.Variable(tf.truncated_normal([10], stddev=0.01)) Y5 = tf.nn.relu(tf.matmul(Y4, W5) + b5) Y = tf.nn.softmax(Y5) # loss function xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=Y, labels=y) loss = tf.reduce_mean(xentropy) * 100 # optimizer optimizer = tf.train.GradientDescentOptimizer(learning_rate) #training_op = optimizer.minimize(loss) training_op = tf.train.AdamOptimizer(learning_rate).minimize(loss) # accuracy correct = tf.nn.in_top_k(Y, y, 1) accuracy = tf.reduce_mean(tf.cast(correct, tf.float32)) #correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(y, 1)) #accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # matplotlib visualization allweights = tf.concat([ tf.reshape(W1, [-1]), tf.reshape(W2, [-1]), tf.reshape(W3, [-1]), tf.reshape(W4, [-1]), tf.reshape(W5, [-1]) ], 0) allbiases = tf.concat([ tf.reshape(b1, [-1]), tf.reshape(b2, [-1]), tf.reshape(b3, [-1]), tf.reshape(b4, [-1]), tf.reshape(b5, [-1]) ], 0) # init init = tf.global_variables_initializer() train_losses = list() train_acc = list() test_losses = list() test_acc = list() saver = tf.train.Saver() # run session with tf.Session() as sess: sess.run(init) for epoch in range(n_epochs): for iteration in range(len(train_data) // batch_size): X_batch = np.array( all_train[iteration * batch_size:min((iteration + 1) * batch_size, len(train_data))]) y_batch = np.array( all_label[iteration * batch_size:min((iteration + 1) * batch_size, len(train_label))]) sess.run(training_op, feed_dict={ X: np.reshape(X_batch, (len(X_batch), 28, 28, 1)), y: y_batch, pkeep: drop_out }) acc_trn, loss_trn, w, b = sess.run( [accuracy, loss, allweights, allbiases], feed_dict={ X: np.reshape(X_batch, (len(X_batch), 28, 28, 1)), y: y_batch, pkeep: 1.0 }) acc_tst, loss_tst = sess.run( [accuracy, loss], feed_dict={ X: np.reshape(np.array(validate_data), (len(validate_data), 28, 28, 1)), y: np.array(validate_label), pkeep: 1.0 }) print( "#{} Trn acc={} , Trn loss={} Tst acc={} , Tst loss={}".format( epoch, acc_trn, loss_trn, acc_tst, loss_tst)) train_losses.append(loss_trn) train_acc.append(acc_trn) test_losses.append(loss_tst) test_acc.append(acc_tst) #acc_train = accuracy.eval(feed_dict={X:np.reshape(X_batch, (len(X_batch), 28, 28, 1)), y:y_batch, pkeep: 1.0}) # test error #acc_test = accuracy.eval(feed_dict={X:np.reshape(np.array(validate_data), (len(validate_data), 28, 28, 1)), # y:np.array(validate_label), pkeep: 1.0}) #print(epoch, 'Train accuracy:', acc_train, 'Test accuracy:', acc_test) title = "MNIST_3.0 5 layers 3 conv. epoch={},batch_size={},learning_rate={},drop_out={}".format( n_epochs, batch_size, learning_rate, drop_out) vis.losses_accuracies_plots(train_losses, train_acc, test_losses, test_acc, title, n_epochs) predict_output = sess.run(Y, feed_dict={ X: np.reshape(np.array(mnist_test), (len(mnist_test), 28, 28, 1)) }) return np.argmax(predict_output, axis=1)
Y_: batch_Y }) # compute testing values for visualization acc_tst, loss_tst = sess.run([accuracy, cross_entropy], feed_dict={ X: mnist.test.images, Y_: mnist.test.labels }) print( "#{} Trn acc={} , Trn loss={} Tst acc={} , Tst loss={}".format( i, acc_trn, loss_trn, acc_tst, loss_tst)) train_losses.append(loss_trn) train_acc.append(acc_trn) test_losses.append(loss_tst) test_acc.append(acc_tst) # the back-propagation training step sess.run(train_step, feed_dict={X: batch_X, Y_: batch_Y}) # calculating the confusion matrix conf_mat_heatmap = sess.run( tf.confusion_matrix( labels=labels, predictions=prediction.eval(feed_dict={X: mnist.test.images}))) print(conf_mat_heatmap) title = "MNIST_Digit recognition in CNN" vis.losses_accuracies_plots(conf_mat_heatmap, train_losses, train_acc, test_losses, test_acc, title, DISPLAY_STEP)