def main_mlp(): mnist_train = sio.loadmat('./ReducedMNIST/mnist_train.mat') mnist_test = sio.loadmat('./ReducedMNIST/mnist_test.mat') im_train, label_train = mnist_train['im_train'], mnist_train['label_train'] im_test, label_test = mnist_test['im_test'], mnist_test['label_test'] batch_size = 32 im_train, im_test = im_train / 255.0, im_test / 255.0 mini_batch_x, mini_batch_y = get_mini_batch(im_train, label_train, batch_size) w1, b1, w2, b2 = train_mlp(mini_batch_x, mini_batch_y) sio.savemat('mlp.mat', mdict={'w1': w1, 'b1': b1, 'w2': w2, 'b2': b2}) acc = 0 confusion = np.zeros((10, 10)) num_test = im_test.shape[1] for i in range(num_test): x = im_test[:, [i]] pred1 = fc(x, w1, b1) pred2 = relu(pred1) y = fc(pred2, w2, b2) l_pred = np.argmax(y) confusion[l_pred, label_test[0, i]] = confusion[l_pred, label_test[0, i]] + 1 if l_pred == label_test[0, i]: acc = acc + 1 accuracy = acc / num_test for i in range(10): confusion[:, i] = confusion[:, i] / np.sum(confusion[:, i]) return confusion, accuracy
def main_mlp(): mnist_train = sio.loadmat('./mnist_train.mat') mnist_test = sio.loadmat('./mnist_test.mat') im_train, label_train = mnist_train['im_train'], mnist_train['label_train'] im_test, label_test = mnist_test['im_test'], mnist_test['label_test'] batch_size = 32 im_train, im_test = im_train / 255.0, im_test / 255.0 mini_batch_x, mini_batch_y = get_mini_batch(im_train, label_train, batch_size) w1, b1, w2, b2 = train_mlp(mini_batch_x, mini_batch_y) sio.savemat('mlp.mat', mdict={'w1': w1, 'b1': b1, 'w2': w2, 'b2': b2}) acc = 0 confusion = np.zeros((10, 10)) num_test = im_test.shape[1] for i in range(num_test): x = im_test[:, [i]] pred1 = fc(x, w1, b1) pred2 = relu(pred1) y = fc(pred2, w2, b2) l_pred = np.argmax(y) confusion[l_pred, label_test[0, i]] = confusion[l_pred, label_test[0, i]] + 1 if l_pred == label_test[0, i]: acc = acc + 1 accuracy = acc / num_test for i in range(10): confusion[:, i] = confusion[:, i] / np.sum(confusion[:, i]) label_classes = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] visualize_confusion_matrix(confusion, accuracy, label_classes, 'Multi-layer Perceptron Confusion Matrix')
def main_slp(retrain_tag): mnist_train = sio.loadmat('./mnist_train.mat') mnist_test = sio.loadmat('./mnist_test.mat') im_train, label_train = mnist_train['im_train'], mnist_train['label_train'] im_test, label_test = mnist_test['im_test'], mnist_test['label_test'] batch_size = 32 im_train, im_test = im_train / 255.0, im_test / 255.0 mini_batch_x, mini_batch_y = get_mini_batch(im_train, label_train, batch_size) if retrain_tag: w, b = train_slp(mini_batch_x, mini_batch_y) sio.savemat('slp.mat', mdict={'w': w, 'b': b}) else: data = sio.loadmat('slp.mat') w, b = data['w'], data['b'] acc = 0 confusion = np.zeros((10, 10)) num_test = im_test.shape[1] for i in range(num_test): x = im_test[:, [i]] y = fc(x, w, b) l_pred = np.argmax(y) confusion[l_pred, label_test[0, i]] = confusion[l_pred, label_test[0, i]] + 1 if l_pred == label_test[0, i]: acc = acc + 1 accuracy = acc / num_test for i in range(10): confusion[:, i] = confusion[:, i] / np.sum(confusion[:, i]) label_classes = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] visualize_confusion_matrix(confusion, accuracy, label_classes, 'Single-layer Perceptron Confusion Matrix')
def main_slp_linear(): mnist_train = sio.loadmat('./ReducedMNIST/mnist_train.mat') mnist_test = sio.loadmat('./ReducedMNIST/mnist_test.mat') im_train, label_train = mnist_train['im_train'], mnist_train['label_train'] im_test, label_test = mnist_test['im_test'], mnist_test['label_test'] batch_size = 32 im_train, im_test = im_train / 255.0, im_test / 255.0 mini_batch_x, mini_batch_y = get_mini_batch(im_train, label_train, batch_size) w, b = train_slp_linear(mini_batch_x, mini_batch_y) sio.savemat('slp_linear.mat', mdict={'w': w, 'b': b}) acc = 0 confusion = np.zeros((10, 10)) num_test = im_test.shape[1] for i in range(num_test): x = im_test[:, [i]] y = fc(x, w, b) l_pred = np.argmax(y) confusion[l_pred, label_test[0, i]] = confusion[l_pred, label_test[0, i]] + 1 if l_pred == label_test[0, i]: acc = acc + 1 accuracy = acc / num_test for i in range(10): confusion[:, i] = confusion[:, i] / np.sum(confusion[:, i]) return confusion, accuracy
def main_cnn(): mnist_train = sio.loadmat('./mnist_train.mat') mnist_test = sio.loadmat('./mnist_test.mat') im_train, label_train = mnist_train['im_train'], mnist_train['label_train'] im_test, label_test = mnist_test['im_test'], mnist_test['label_test'] batch_size = 32 im_train, im_test = im_train / 255.0, im_test / 255.0 mini_batch_x, mini_batch_y = cnn.get_mini_batch(im_train, label_train, batch_size) # learning_rates = [.14, .16, .18] # decay_rates = [.85, .9, .95] # for l in learning_rates: # for d in decay_rates: w_conv, b_conv, w_fc, b_fc = cnn.train_cnn(mini_batch_x, mini_batch_y) sio.savemat('cnn.mat', mdict={ 'w_conv': w_conv, 'b_conv': b_conv, 'w_fc': w_fc, 'b_fc': b_fc }) # could use following two lines to replace above two lines if only want to check results # data = sio.loadmat('cnn.mat') # w_conv, b_conv, w_fc, b_fc = data['w_conv'], data['b_conv'], data['w_fc'], data['b_fc'] acc = 0 confusion = np.zeros((10, 10)) num_test = im_test.shape[1] for i in range(num_test): x = im_test[:, [i]].reshape((14, 14, 1), order='F') pred1 = cnn.conv(x, w_conv, b_conv) # (14, 14, 3) pred2 = cnn.relu(pred1) # (14, 14, 3) pred3, maxes = cnn.pool2x2(pred2) # (7, 7, 3) pred4 = cnn.flattening(pred3) # (147, 1) y = cnn.fc(pred4, w_fc, b_fc) # (10, 1) l_pred = np.argmax(y) confusion[l_pred, label_test[0, i]] = confusion[l_pred, label_test[0, i]] + 1 if l_pred == label_test[0, i]: acc = acc + 1 accuracy = acc / num_test # print("Learning rate:", l, "Decay rate:", d, "Accuracy:", accuracy) for i in range(10): confusion[:, i] = confusion[:, i] / np.sum(confusion[:, i]) label_classes = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] visualize_confusion_matrix(confusion, accuracy, label_classes, 'CNN Confusion Matrix')
def main_cnn(retrain_tag): mnist_train = sio.loadmat('./mnist_train.mat') mnist_test = sio.loadmat('./mnist_test.mat') im_train, label_train = mnist_train['im_train'], mnist_train['label_train'] im_test, label_test = mnist_test['im_test'], mnist_test['label_test'] batch_size = 32 im_train, im_test = im_train / 255.0, im_test / 255.0 mini_batch_x, mini_batch_y = get_mini_batch(im_train, label_train, batch_size) if retrain_tag: w_conv, b_conv, w_fc, b_fc = train_cnn(mini_batch_x, mini_batch_y) sio.savemat('cnn.mat', mdict={ 'w_conv': w_conv, 'b_conv': b_conv, 'w_fc': w_fc, 'b_fc': b_fc }) else: data = sio.loadmat('cnn.mat') w_conv, b_conv, w_fc, b_fc = data['w_conv'], data['b_conv'], data[ 'w_fc'], data['b_fc'] acc = 0 confusion = np.zeros((10, 10)) num_test = im_test.shape[1] for i in range(num_test): x = im_test[:, [i]].reshape((14, 14, 1), order='F') pred1 = conv(x, w_conv, b_conv) # (14, 14, 3) pred2 = relu(pred1) # (14, 14, 3) pred3 = pool2x2(pred2) # (7, 7, 3) pred4 = flattening(pred3) # (147, 1) y = fc(pred4, w_fc, b_fc) # (10, 1) l_pred = np.argmax(y) confusion[l_pred, label_test[0, i]] = confusion[l_pred, label_test[0, i]] + 1 if l_pred == label_test[0, i]: acc = acc + 1 accuracy = acc / num_test for i in range(10): confusion[:, i] = confusion[:, i] / np.sum(confusion[:, i]) label_classes = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] visualize_confusion_matrix(confusion, accuracy, label_classes, 'CNN Confusion Matrix')
def vs_multilayer(input_batch, name, middle_layer_dim=1000, reuse=False, test=False): with tf.variable_scope(name): if reuse == True: # print name+" reuse variables" tf.get_variable_scope().reuse_variables() else: pass # print name+" doesn't reuse variables" layer1 = fc_relu('layer1', input_batch, output_dim=middle_layer_dim) if test: layer1 = drop(layer1, 1) else: layer1 = drop(layer1, 0.5) outputs = fc('layer2', layer1, output_dim=4) return outputs
def main_cnn(): mnist_train = sio.loadmat('./ReducedMNIST/mnist_train.mat') mnist_test = sio.loadmat('./ReducedMNIST/mnist_test.mat') im_train, label_train = mnist_train['im_train'], mnist_train['label_train'] im_test, label_test = mnist_test['im_test'], mnist_test['label_test'] batch_size = 32 im_train, im_test = im_train / 255.0, im_test / 255.0 mini_batch_x, mini_batch_y = get_mini_batch(im_train, label_train, batch_size) w_conv, b_conv, w_fc, b_fc = train_cnn(mini_batch_x, mini_batch_y) sio.savemat('cnn.mat', mdict={ 'w_conv': w_conv, 'b_conv': b_conv, 'w_fc': w_fc, 'b_fc': b_fc }) # could use following two lines to replace above two lines if only want to check results # data = sio.loadmat('cnn.mat') # w_conv, b_conv, w_fc, b_fc = data['w_conv'], data['b_conv'], data['w_fc'], data['b_fc'] acc = 0 confusion = np.zeros((10, 10)) num_test = im_test.shape[1] for i in range(num_test): x = im_test[:, [i]].reshape((14, 14, 1), order='F') pred1 = conv(x, w_conv, b_conv) # (14, 14, 3) pred2 = relu(pred1) # (14, 14, 3) pred3 = pool2x2(pred2) # (7, 7, 3) pred4 = flattening(pred3) # (147, 1) y = fc(pred4, w_fc, b_fc) # (10, 1) l_pred = np.argmax(y) confusion[l_pred, label_test[0, i]] = confusion[l_pred, label_test[0, i]] + 1 if l_pred == label_test[0, i]: acc = acc + 1 accuracy = acc / num_test for i in range(10): confusion[:, i] = confusion[:, i] / np.sum(confusion[:, i]) return confusion, accuracy
def main_slp_linear(): mnist_train = sio.loadmat('./mnist_train.mat') mnist_test = sio.loadmat('./mnist_test.mat') im_train, label_train = mnist_train['im_train'], mnist_train['label_train'] im_test, label_test = mnist_test['im_test'], mnist_test['label_test'] batch_size = 32 im_train, im_test = im_train / 255.0, im_test / 255.0 mini_batches_x, mini_batches_y = get_mini_batch(im_train, label_train, batch_size) # for batch_x, batch_y in zip(mini_batches_x, mini_batches_y): # plt.imshow(batch_x[:, -1].reshape((14, 14), order='F'), cmap='gray') # plt.show() # print(batch_y[:, -1]) w, b = train_slp_linear(mini_batches_x, mini_batches_y) sio.savemat('slp_linear.mat', mdict={'w': w, 'b': b}) acc = 0 confusion = np.zeros((10, 10)) num_test = im_test.shape[1] for i in range(num_test): x = im_test[:, [i]] y = fc(x, w, b) l_pred = np.argmax(y) confusion[l_pred, label_test[0, i]] = confusion[l_pred, label_test[0, i]] + 1 if l_pred == label_test[0, i]: acc = acc + 1 accuracy = acc / num_test for i in range(10): confusion[:, i] = confusion[:, i] / np.sum(confusion[:, i]) label_classes = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] visualize_confusion_matrix( confusion, accuracy, label_classes, 'Single-layer Linear Perceptron Confusion Matrix')
def main_cnn(): mnist_train = sio.loadmat('./mnist_train.mat') mnist_test = sio.loadmat('./mnist_test.mat') im_train, label_train = mnist_train['im_train'], mnist_train['label_train'] im_test, label_test = mnist_test['im_test'], mnist_test['label_test'] batch_size = 32 im_train, im_test = im_train / 255.0, im_test / 255.0 # plt.imshow(mnist_train['im_train'][:, 0].reshape((14, 14), order='F'), cmap='gray') # plt.show() # x = im_train[:, 0].reshape((14, 14, 1), order='F') # y = pool2x2(x) # dl_dy = np.random.rand(7, 7, 1) # dl_dx = pool2x2_backward(dl_dy, x, y) # plt.imshow(x[:, :, 0], cmap='gray') # plt.show() # plt.imshow(y[:, :, 0], cmap='gray') # plt.show() # plt.imshow(dl_dy[:, :, 0], cmap='gray') # plt.show() # plt.imshow(dl_dx[:, :, 0], cmap='gray') # plt.show() # x = np.arange(25).reshape((5, 5, 1)) # w_conv = np.arange(27).reshape((3, 3, 1, 3)) # b_conv = np.arange(3).reshape((3, 1)) # y = conv(x, w_conv, b_conv) # dl_dy = np.random.random((5, 5, 3)) # dl_dw, dl_db = conv_backward(dl_dy, x, w_conv, b_conv, y) # print(x) # print(w_conv) # print(b_conv) # print(y) # print(dl_dw.shape) # print(dl_db) # exit(-1) mini_batches_x, mini_batches_y = get_mini_batch(im_train, label_train, batch_size) w_conv, b_conv, w_fc, b_fc = train_cnn(mini_batches_x, mini_batches_y # , im_test, label_test ) sio.savemat('cnn.mat', mdict={ 'w_conv': w_conv, 'b_conv': b_conv, 'w_fc': w_fc, 'b_fc': b_fc }) # could use following two lines to replace above two lines if only want to check results # data = sio.loadmat('cnn.mat') # w_conv, b_conv, w_fc, b_fc = data['w_conv'], data['b_conv'], data['w_fc'], data['b_fc'] acc = 0 confusion = np.zeros((10, 10)) num_test = im_test.shape[1] for i in range(num_test): print('Test # {}/{}: \r'.format(i + 1, num_test), end='') x = im_test[:, [i]].reshape((14, 14, 1), order='F') pred1 = conv(x, w_conv, b_conv) # (14, 14, 3) pred2 = relu(pred1) # (14, 14, 3) pred3 = pool2x2(pred2) # (7, 7, 3) pred4 = flattening(pred3) # (147, 1) y = fc(pred4, w_fc, b_fc) # (10, 1) l_pred = np.argmax(y) confusion[l_pred, label_test[0, i]] = confusion[l_pred, label_test[0, i]] + 1 if l_pred == label_test[0, i]: acc = acc + 1 accuracy = acc / num_test print(accuracy) for i in range(10): confusion[:, i] = confusion[:, i] / np.sum(confusion[:, i]) label_classes = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] visualize_confusion_matrix(confusion, accuracy, label_classes, 'CNN Confusion Matrix')
def visual_semantic_infer(self, visual_feature_train, sentence_embed_train, visual_feature_test, sentence_embed_test, softmax_train, softmax_test, VP_embed_train, VP_embed_test): name = "CTRL_Model" with tf.variable_scope(name): print "Building training network...............................\n" transformed_clip_train = fc('v2s_lt', visual_feature_train, output_dim=self.semantic_size) transformed_clip_train_norm = tf.nn.l2_normalize( transformed_clip_train, axis=1) transformed_sentence_train = fc('s2s_lt', sentence_embed_train, output_dim=self.semantic_size) transformed_sentence_train_norm = tf.nn.l2_normalize( transformed_sentence_train, axis=1) cross_modal_vis_sent_train = self.cross_modal_comb( transformed_clip_train_norm, transformed_sentence_train_norm, self.batch_size, self.semantic_size) # the action semantic transformed_softmax_train = fc( 'soft2s_lt', softmax_train, output_dim=self.action_semantic_size) transformed_softmax_train_norm = tf.nn.l2_normalize( transformed_softmax_train, axis=1) transformed_VP_train = fc('VP2s_lt', VP_embed_train, output_dim=self.action_semantic_size) transformed_VP_train_norm = tf.nn.l2_normalize( transformed_VP_train, axis=1) cross_modal_action_train = self.cross_modal_comb( transformed_softmax_train_norm, transformed_VP_train_norm, self.batch_size, self.action_semantic_size) # may not need normalization cross_modal_vis_sent_train = tf.nn.l2_normalize( cross_modal_vis_sent_train, axis=3) cross_modal_action_train = tf.nn.l2_normalize( cross_modal_action_train, axis=3) # concatenate two cross_modal_train = tf.concat( [cross_modal_vis_sent_train, cross_modal_action_train], axis=3) sim_score_mat_train = mpu.vs_multilayer(cross_modal_train, "vs_multilayer_lt", middle_layer_dim=1000) sim_score_mat_train = tf.reshape( sim_score_mat_train, [self.batch_size, self.batch_size, 3]) tf.get_variable_scope().reuse_variables() print "Building test network...............................\n" transformed_clip_test = fc('v2s_lt', visual_feature_test, output_dim=self.semantic_size) transformed_clip_test_norm = tf.nn.l2_normalize( transformed_clip_test, axis=1) transformed_sentence_test = fc('s2s_lt', sentence_embed_test, output_dim=self.semantic_size) transformed_sentence_test_norm = tf.nn.l2_normalize( transformed_sentence_test, axis=1) cross_modal_vis_sent_test = self.cross_modal_comb( transformed_clip_test_norm, transformed_sentence_test_norm, self.test_batch_size, self.semantic_size) # the action semantic transformed_softmax_test = fc('soft2s_lt', softmax_test, output_dim=self.action_semantic_size) transformed_softmax_test_norm = tf.nn.l2_normalize( transformed_softmax_test, axis=1) transformed_VP_test = fc('VP2s_lt', VP_embed_test, output_dim=self.action_semantic_size) transformed_VP_test_norm = tf.nn.l2_normalize(transformed_VP_test, axis=1) cross_modal_action_test = self.cross_modal_comb( transformed_softmax_test_norm, transformed_VP_test_norm, self.test_batch_size, self.action_semantic_size) # may not need normalization cross_modal_vis_sent_test = tf.nn.l2_normalize( cross_modal_vis_sent_test, axis=3) cross_modal_action_test = tf.nn.l2_normalize( cross_modal_action_test, axis=3) # concatenate two cross_modal_test = tf.concat( [cross_modal_vis_sent_test, cross_modal_action_test], axis=3) sim_score_mat_test = mpu.vs_multilayer(cross_modal_test, "vs_multilayer_lt", reuse=True, middle_layer_dim=1000) sim_score_mat_test = tf.reshape(sim_score_mat_test, [3]) return sim_score_mat_train, sim_score_mat_test
# First conv layer num_filters = 2 depth = img.shape[-1] stride = 2.0 l1_filter = numpy.random.rand(num_filters, 3, 3, img.shape[-1]) print("\n**Working with conv layer 1**") l1_feature_map = cnn.conv(img, l1_filter) print("\n**ReLU**") l1_feature_map_relu = cnn.relu(l1_feature_map) print("\n**Pooling**") l1_feature_map_relu_pool = cnn.pooling(l1_feature_map_relu, 2, 2) print("\n**Fully connected**") l1_fc1_weights = numpy.random.rand(10, numpy.prod(l1_feature_map.shape)) l1_feature_map_fc = cnn.fc(l1_feature_map, l1_fc1_weights) print("**End of conv layer 1**\n") # Graphing results fig0, ax0 = matplotlib.pyplot.subplots(nrows=1, ncols=1) ax0.imshow(img).set_cmap("gray") ax0.set_title("Input Image") ax0.get_xaxis().set_ticks([]) ax0.get_yaxis().set_ticks([]) matplotlib.pyplot.savefig("in_img.png", bbox_inches="tight") matplotlib.pyplot.show() matplotlib.pyplot.close(fig0) # Layer 1 fig1, ax1 = matplotlib.pyplot.subplots(nrows=3, ncols=2) ax1[0, 0].imshow(l1_feature_map[:, :, 0]).set_cmap("gray")