def softTargets(self, T, mnist): n_epochs = 50 batch_size = 50 n_batches = len(mnist.train.images) // batch_size with Model.Session() as sess: super().restore(sess) print("Accuracy on the test set") print( sess.run(self.accuracy, feed_dict={ self.x: mnist.test.images, self.y_: mnist.test.labels, self.keep_prob: 1.0 })) for t in T: print("Generating soft targets at T = %d" % t) _soft_targets = [] for i in range(n_batches): start = i * batch_size end = start + batch_size batch_x = mnist.train.images[start:end] soft_target = sess.run(self.y_soft_target, feed_dict={ self.x: batch_x, self.keep_prob: 1.0, self.temp: t }) _soft_targets.append(soft_target) soft_targets = np.c_[_soft_targets].reshape(55000, 10) np.save("soft-targets-%d.npy" % t, soft_targets)
def train(self, mnist): print("Student2::train") n_epochs = 50 batch_size = 50 n_batches = len(mnist.train.images) // batch_size losses = [] accs = [] test_accs = [] with Model.Session() as sess: sess.run(tf.global_variables_initializer()) for epoch in range(n_epochs): x_shuffle, y_shuffle \ = shuffle(mnist.train.images, mnist.train.labels) print("Starting training opoch %d" % epoch) for i in range(n_batches): start = i * batch_size end = start + batch_size batch_x, batch_y \ = x_shuffle[start:end], y_shuffle[start:end] sess.run(self.train_step, feed_dict={ self.x: batch_x, self.y_: batch_y }) x_shuffle, y_shuffle \ = shuffle(mnist.train.images, mnist.train.labels) batch_x, batch_y \ = x_shuffle[0:250], y_shuffle[0:250] train_loss = sess.run(self.cross_entropy, feed_dict={ self.x: batch_x, self.y_: batch_y }) train_accuracy = sess.run(self.accuracy, feed_dict={ self.x: batch_x, self.y_: batch_y }) test_accuracy = sess.run(self.accuracy, feed_dict={ self.x: mnist.test.images, self.y_: mnist.test.labels }) print( "Epoch : %i, Loss : %f, Accuracy: %f, Test accuracy: %f" % (epoch + 1, train_loss, train_accuracy, test_accuracy)) losses.append(train_loss) accs.append(train_accuracy) test_accs.append(test_accuracy) super().append_to_csv("train_loss", epoch, train_loss) super().append_to_csv("train_accuracy", epoch, train_accuracy) super().append_to_csv("test_accuracy", epoch, test_accuracy) return (losses, accs, test_accs)
def test(self, mnist): batch_size = 50 n_batches = len(mnist.test.images) // batch_size C = np.zeros([10, 10]) prediction = tf.argmax(self.y_conv, 1) correct_answer = tf.argmax(self.y_, 1) with Model.Session() as sess: super().restore(sess) print("Accuracy on the test set") print( sess.run(self.accuracy, feed_dict={ self.x: mnist.test.images, self.y_: mnist.test.labels, self.keep_prob: 1.0 })) print("Generating confusion matrix for %s" % self.name) for i in range(n_batches): start = i * batch_size end = start + batch_size batch_x = mnist.test.images[start:end] batch_y = mnist.test.labels[start:end] predict = sess.run(prediction, feed_dict={ self.x: batch_x, self.y_: batch_y, self.keep_prob: 1.0 }) answer = sess.run(correct_answer, feed_dict={ self.x: batch_x, self.y_: batch_y, self.keep_prob: 1.0 }) for (i, j) in zip(predict, answer): C[i][j] += 1 return C
def distillate(self, mnist, soft_targets, TEMP): n_epochs = 50 batch_size = 50 n_batches = len(mnist.train.images) // batch_size soft_target_ = tf.placeholder(tf.float32, shape=[None, 10]) T = tf.placeholder(tf.float32) # hard target y = tf.nn.softmax(self.y_conv) # soft target y_soft_target = Model.softmax_with_temperature(self.y_conv, temp=T) # loss for each of them loss_hard_target = tf.reduce_mean( -tf.reduce_sum(self.y_ * tf.log(y), reduction_indices=[1])) loss_soft_target = tf.reduce_mean(-tf.reduce_sum( soft_target_ * tf.log(y_soft_target), reduction_indices=[1])) # total loss loss = loss_soft_target # train step train_step = tf.train.AdamOptimizer(self.learning_rate).minimize(loss) losses = [] accs = [] test_accs = [] with Model.Session() as sess: sess.run(tf.global_variables_initializer()) for epoch in range(n_epochs): x_shuffle, y_shuffle, soft_targets_shuffle \ = shuffle(mnist.train.images, mnist.train.labels, soft_targets) for i in range(n_batches): start = i * batch_size end = start + batch_size batch_x, batch_y, batch_soft_targets \ = x_shuffle[start:end], y_shuffle[start:end], soft_targets_shuffle[start:end] sess.run(train_step, feed_dict={ self.x: batch_x, self.y_: batch_y, soft_target_: batch_soft_targets, T: TEMP }) x_shuffle, y_shuffle, soft_targets_shuffle \ = shuffle(mnist.train.images, mnist.train.labels, soft_targets) batch_x, batch_y, batch_soft_targets \ = x_shuffle[0:1000], y_shuffle[0:1000], soft_targets_shuffle[0:1000] train_loss = sess.run(loss, feed_dict={ self.x: batch_x, self.y_: batch_y, soft_target_: batch_soft_targets, T: TEMP }) train_accuracy = sess.run(self.accuracy, feed_dict={ self.x: batch_x, self.y_: batch_y }) test_accuracy = sess.run(self.accuracy, feed_dict={ self.x: mnist.test.images, self.y_: mnist.test.labels }) print( "Distillation: Epoch : %i, Loss : %f, Accuracy: %f, Test accuracy: %f" % (epoch + 1, train_loss, train_accuracy, test_accuracy)) losses.append(train_loss) accs.append(train_accuracy) test_accs.append(test_accuracy) super().append_to_csv("distillation_%d_train_loss" % TEMP, epoch, train_loss) super().append_to_csv("distillation_%d_train_accuracy" % TEMP, epoch, train_accuracy) super().append_to_csv("distillation_%d_test_accuracy" % TEMP, epoch, test_accuracy) super().save(sess) return [losses, accs, test_accs]