def validate(self, sess): if self._acc_ops is None: self._acc_ops = self._init_acc_ops() if self._acc_orient_ops is None: self._acc_orient_ops = self._init_acc_orient_ops() sess.run(self._acc_ops.reset) sess.run(self._acc_orient_ops.reset) num_batches_val = int(self.data.validation.num_examples/self.batch_size_val) if self.tf_record_prefix is not None: # tmp = sum(1 for _ in tf.python_io.tf_record_iterator(self.data.validation.path)) # assert(num_batches_val == tmp) img, label, label_orient = MNIST.read_and_decode_ops(\ self.data.validation.path, one_hot=self.data.validation.one_hot, num_orientations=len(self.data.validation.orientations)) batch_xs_op, batch_ys_op, batch_os_op = tf.train.batch([img, label, label_orient], batch_size=self.batch_size_val, capacity=2000, num_threads=8 ) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) for _ in xrange(num_batches_val): if self.tf_record_prefix is None: batch_xs, batch_ys = self.data.validation.next_batch(self.batch_size_val, shuffle=False) else: batch_xs, batch_ys, batch_os = sess.run([batch_xs_op, batch_ys_op, batch_os_op]) batch_xs_in = batch_xs if self.do_augment_rot: augment_op, batch_os2 = self.rotation_ops_multiset_val(3) #TODO remove hardcoded init rots = rotation_rad(-60,60,15) num_orients = len(rots) orients_dense = np.array([rots.index(o) for o in batch_os2]) batch_os_one_hot = dense_to_one_hot(orients_dense, num_orients) batch_xs_in = sess.run(augment_op, feed_dict={self.x : batch_xs}) else: #TODO remove hardcoded init rots = rotation_rad(-60, 60, 15) batch_os_one_hot = dense_to_one_hot(np.zeros((self.batch_size_val,), dtype=int)+(len(rots)/2), len(rots)) _, _, _, _ = sess.run(\ [self._acc_ops.metric, self._acc_ops.update, self._acc_orient_ops.metric, self._acc_orient_ops.update, # tf.argmax(self.model.p,1), tf.argmax(self.y_,1), ], feed_dict={self.x: batch_xs_in, self.y_: batch_ys, self.orient_ : batch_os_one_hot, } ) if self.tf_record_prefix is not None: coord.request_stop() coord.join(threads)
def read_data_sets(validation_size=5000, one_hot=True): cifar_filename = "datasets/" + "cifar-10-python.tar.gz" try: os.makedirs("datasets") except OSError: pass if not os.path.isfile(cifar_dir + batches[0]): # Download data print("Downloading ckplus dataset") urllib.urlretrieve( "http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz", cifar_filename) tar = tarfile.open(cifar_filename) tar.extractall(path="datasets") tar.close() os.remove(cifar_filename) # Process batches all_batch_images = [] all_batch_labels = [] for batch_name in batches: batch = np.load(cifar_dir + batch_name) batch_images = batch['data'] all_batch_images.append(batch_images) batch_labels = batch['labels'] all_batch_labels.extend(batch_labels) all_batch_images = np.vstack(all_batch_images).reshape(-1, 3, 32, 32) all_batch_images = all_batch_images.transpose([0, 2, 3, 1]) all_batch_labels = np.array(all_batch_labels) train_images, validation_images, train_labels, validation_labels = train_test_split( all_batch_images, all_batch_labels, test_size=validation_size, random_state=0) test_batch = np.load(cifar_dir + "test_batch") test_images = test_batch['data'].reshape(-1, 3, 32, 32) test_images = test_images.transpose([0, 2, 3, 1]) test_labels = np.array(test_batch['labels']) if one_hot: train_labels = dense_to_one_hot(train_labels, NUM_CLASSES) validation_labels = dense_to_one_hot(validation_labels, NUM_CLASSES) test_labels = dense_to_one_hot(test_labels, NUM_CLASSES) train = DataSet(train_images, train_labels, reshape=False) validation = DataSet(validation_images, validation_labels, reshape=False) test = DataSet(test_images, test_labels, reshape=False) return Datasets(train=train, validation=validation, test=test)
def __init__(self, dirname, one_hot=True): self._epochs_completed = 0 self._index_in_epoch = 0 self._one_hot = one_hot self._num_classes = 10 self.maybe_download_and_extract(dirname) dirname = os.path.join(dirname, 'cifar-10-batches-py/') images = [] labels = [] for i in range(1, 6): fpath = os.path.join(dirname, 'data_batch_' + str(i)) image, label = self.load_batch(fpath) if i == 1: images = np.array(image) labels = np.array(label) else: images = np.concatenate([images, image], axis=0) labels = np.concatenate([labels, label], axis=0) images = np.dstack( (images[:, :1024], images[:, 1024:2048], images[:, 2048:])) images = np.reshape(images, [-1, 32, 32, 3]) if self._one_hot: labels = dense_to_one_hot(labels, self._num_classes) print 'Cifar images size:', images.shape print 'Cifar labels size:', labels.shape self._images = images / 255.0 - 0.5 self._labels = labels self._num_examples = images.shape[0]
def validate(self, sess): if self._acc_ops is None: self._acc_ops = self._init_acc_ops() if self._acc_orient_ops is None: self._acc_orient_ops = self._init_acc_orient_ops() sess.run(self._acc_ops.reset) sess.run(self._acc_orient_ops.reset) num_batches_val = int(self.data.validation.num_examples / self.batch_size_val) for _ in xrange(num_batches_val): if self.tf_record_prefix is None: batch_xs, batch_ys = self.data.validation.next_batch( self.batch_size_val, shuffle=False) else: batch_xs, batch_ys, batch_os = sess.run( [batch_xs_op, batch_ys_op, batch_os_op]) batch_xs_in = batch_xs if self.do_augment_rot: augment_op, batch_os2 = self.rotation_ops_multiset_val(3) rots = rotation_rad(-60, 60, 15) num_orients = len(rots) orients_dense = np.array([rots.index(o) for o in batch_os2]) batch_os_one_hot = dense_to_one_hot(orients_dense, num_orients) batch_xs_in = sess.run(augment_op, feed_dict={self.x: batch_xs}) _, _, _, _ = sess.run(\ [self._acc_ops.metric, self._acc_ops.update, self._acc_orient_ops.metric, self._acc_orient_ops.update, # tf.argmax(self.model.p,1), tf.argmax(self.y_,1), ], feed_dict={self.x: batch_xs_in, self.y_: batch_ys, self.orient_ : batch_os_one_hot, } )
def cifar_datasets(dirname, one_hot=True, dtype=dtypes.float32, reshape=False, seed=None): maybe_download_and_extract(dirname) dirname = os.path.join(dirname, 'cifar-10-batches-py/') train_images = [] train_labels = [] for i in range(1, 6): fpath = os.path.join(dirname, 'data_batch_' + str(i)) image, label = load_batch(fpath) if i == 1: train_images = np.array(image) train_labels = np.array(label) else: train_images = np.concatenate([train_images, image], axis=0) train_labels = np.concatenate([train_labels, label], axis=0) train_images = np.dstack((train_images[:, :1024], train_images[:, 1024:2048], train_images[:, 2048:])) train_images = np.reshape(train_images, [-1, 32, 32, 3]) if one_hot: train_labels = dense_to_one_hot(train_labels, 10) print 'Cifar train_images size:', train_images.shape print 'Cifar train_labels size:', train_labels.shape train_images = train_images / 255.0 - 0.5 fpath = os.path.join(dirname, "test_batch") image, label = load_batch(fpath) test_images = np.array(image) test_labels = np.array(label) test_images = np.dstack((test_images[:, :1024], test_images[:, 1024:2048], test_images[:, 2048:])) test_images = np.reshape(test_images, [-1, 32, 32, 3]) if one_hot: test_labels = dense_to_one_hot(test_labels, 10) print "Cifar test_images size:", test_images.shape print "Cifar test_lables size:", test_labels.shape test_images = test_images / 255.0 - 0.5 options = dict(dtype=dtype, reshape=reshape, seed=seed) train = DataSet(train_images, train_labels, options) test = DataSet(test_images, test_labels, options) return Datasets(train=train, test=test)
def read_datafile(data_file, one_hot=False, num_classes=10): mat = np.loadtxt(data_file, np.float32) images = mat[:, :-1] labels = mat[:, -1] labels = labels.astype(np.uint8) num_images = images.shape[0] rows = 28 cols = 28 assert rows * cols == images.shape[1],\ "loaded images are not 28*28" images = images.reshape(num_images, rows, cols, 1) if one_hot: labels = mnist.dense_to_one_hot(labels, num_classes) return images, labels
def loadLabelSet(filename): print("load label set", filename) binfile = open(filename, 'rb') buffers = binfile.read() head = struct.unpack_from('>II', buffers, 0) print("head,", head) offset = struct.calcsize('>II') print("offset,", offset) labels = np.frombuffer(buffers, dtype=np.uint8, offset=offset) binfile.close() labels = dense_to_one_hot(labels, 10) print("load label finished") return labels
def read_user_data(path, ratio): """ :param path: 数据所在路径 :param ratio: 训练集和测试集比例 :return: """ images = [] labels = [] lablejson = {} dtype = dtypes.float32 reshape = True seed = None with open(os.path.join(path, 'tag.json')) as file: lablejson = json.load(file) path = 'data' for (root, dirs, files) in os.walk(path): for filename in files: if filename.endswith('.jpg'): name = os.path.join(root, filename) img = Image.open(name) img = numpy.array(img) img = img.reshape([img.shape[0], img.shape[1], 1]) labels.append(int(lablejson[filename])) images.append(img) images = numpy.array(images) labels = dense_to_one_hot(numpy.array(labels), 10) number = labels.shape[0] middle = int(number * ratio) train_image = images[:middle] train_label = labels[:middle] test_image = images[middle:] test_label = labels[middle:] options = dict(dtype=dtype, reshape=reshape, seed=seed) train = DataSet(train_image, train_label, **options) test = DataSet(test_image, test_label, **options) return base.Datasets(train=train, validation=None, test=test)
def initDataSetsClasses(): global dataSetTrain global dataSetTest print(FLAGS.train_classes, FLAGS.test_classes) # Variable to read out the labels & data of the DataSet Object. mnistData = read_data_sets('./', one_hot=True) # MNIST labels & data for training. mnistLabelsTrain = mnistData.train.labels mnistDataTrain = mnistData.train.images # MNIST labels & data for testing. mnistLabelsTest = mnistData.test.labels mnistDataTest = mnistData.test.images print("LABELS", mnistLabelsTest.shape, mnistLabelsTrain.shape) if FLAGS.permuteTrain != -1: # training dataset np.random.seed(FLAGS.permuteTrain) permTr = np.random.permutation(mnistDataTrain.shape[1]) mnistDataTrainPerm = mnistDataTrain[:, permTr] mnistDataTrain = mnistDataTrainPerm # dataSetTrain = DataSet(255. * dataSetTrainPerm, # mnistLabelsTrain, reshape=False) if FLAGS.permuteTest != -1: # testing dataset np.random.seed(FLAGS.permuteTest) permTs = np.random.permutation(mnistDataTest.shape[1]) mnistDataTestPerm = mnistDataTest[:, permTs] # dataSetTest = DataSet(255. * dataSetTestPerm, # mnistLabelsTest, reshape=False) mnistDataTest = mnistDataTestPerm if True: # args = parser.parse_args() print(FLAGS.train_classes, FLAGS.test_classes) if FLAGS.train_classes[0:]: labels_to_train = [int(i) for i in FLAGS.train_classes[0:]] if FLAGS.test_classes[0:]: labels_to_test = [int(i) for i in FLAGS.test_classes[0:]] # Filtered labels & data for training and testing. labels_train_classes = np.array([ mnistLabelsTrain[i].argmax() for i in xrange(0, mnistLabelsTrain.shape[0]) if mnistLabelsTrain[i].argmax() in labels_to_train ], dtype=np.uint8) data_train_classes = np.array([ mnistDataTrain[i, :] for i in xrange(0, mnistLabelsTrain.shape[0]) if mnistLabelsTrain[i].argmax() in labels_to_train ], dtype=np.float32) labels_test_classes = np.array([ mnistLabelsTest[i].argmax() for i in xrange(0, mnistLabelsTest.shape[0]) if mnistLabelsTest[i].argmax() in labels_to_test ], dtype=np.uint8) data_test_classes = np.array([ mnistDataTest[i, :] for i in xrange(0, mnistDataTest.shape[0]) if mnistLabelsTest[i].argmax() in labels_to_test ], dtype=np.float32) labelsTrainOnehot = dense_to_one_hot(labels_train_classes, 10) labelsTestOnehot = dense_to_one_hot(labels_test_classes, 10) dataSetTrain = DataSet(255. * data_train_classes, labelsTrainOnehot, reshape=False) dataSetTest = DataSet(255. * data_test_classes, labelsTestOnehot, reshape=False)
def learn(self, sess): if self.y_ is None: self.logger.info("Define placeholder for ground truth. Dims: %d" % self.model.n_nodes[-1]) self.y_ = tf.placeholder("float", [None, self.model.n_nodes[-1]]) suffix = '' if self.do_finetune: self.logger.info("Finetuning!") suffix += 'finetune' dir_train = self.dirpath('train', suffix=suffix) dir_val = self.dirpath('validation', suffix=suffix) summary_writer_train = tf.summary.FileWriter(dir_train, sess.graph) summary_writer_val = tf.summary.FileWriter(dir_val) cost, loss = self._cost_loss(self.dirname('train', suffix=suffix)) vars_new = None if not self.do_finetune: vars_new = self.model.vars_new() # limit optimizaer vars if not finetuning optimizer = setup_optimizer(cost, self.learning_rate, var_list=vars_new) if not self.do_finetune: vars_new = self.model.vars_new() self.init_vars(sess, vars_new) summaries_merged_train = self._merge_summaries_scalars([cost, loss]) if self._acc_ops is None: self._acc_ops = self._init_acc_ops() sess.run(self._acc_ops.reset) if self._acc_orient_ops is None: self._acc_orient_ops = self._init_acc_orient_ops() sess.run(self._acc_orient_ops.reset) summaries_merged_val = self._merge_summaries_scalars([self._acc_ops.metric, self._acc_orient_ops.metric]) # # in_ = self.model.x # xx = tf.placeholder("float", [None, 784]) # augment_op = augment_rotation(xx, # -90, 90, 15, # self.batch_size_train) # self.model.x = augment_op if self.tf_record_prefix is not None: img, label, label_orient = MNIST.read_and_decode_ops(\ self.data.train.path, one_hot=self.data.train.one_hot, num_orientations=len(self.data.train.orientations)) batch_xs_op, batch_ys_op, batch_os_op = tf.train.shuffle_batch([img, label, label_orient], batch_size=self.batch_size_train, capacity=2000, min_after_dequeue=1000, num_threads=8 ) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) if self.do_augment_rot: #TODO remove hardcoded init rots = rotation_rad(-60, 60, 15) self._init_saver() itr_exp = 0 result = collections.namedtuple('Result', ['max', 'last', 'name', 'history', 'epoch_last']) result_orient = collections.namedtuple('Result', ['max', 'last', 'name', 'history', 'epoch_last']) result.name = self._acc_ops.metric.name result.max = 0 result.history = collections.deque(maxlen=3) result_orient.name = self._acc_orient_ops.metric.name result_orient.max = 0 result_orient.history = collections.deque(maxlen=3) for epoch in xrange(self.training_epochs): self.logger.info("Start %s epoch %d, step %d" % (suffix, epoch, itr_exp)) # Loop over all batches for itr_epoch in xrange(self.num_batches_train): if self.tf_record_prefix is None: batch_xs, batch_ys = self.data.train.next_batch(self.batch_size_train) else: batch_xs, batch_ys, batch_os = sess.run([batch_xs_op, batch_ys_op, batch_os_op]) # f = sess.run([augment_op], feed_dict={xx:batch_xs}) batch_xs_in = batch_xs if self.do_augment_rot: augment_op, batch_os2 = self.rotation_ops_multiset_train(3) batch_xs_in = sess.run(augment_op, feed_dict={self.x : batch_xs}) orients_dense = np.array([rots.index(o) for o in batch_os2]) batch_os_one_hot = dense_to_one_hot(orients_dense, len(rots)) else: #TODO remove hardcoded init rots = rotation_rad(-60, 60, 15) batch_os_one_hot = dense_to_one_hot(np.zeros((self.batch_size_train,), dtype=int)+(len(rots)/2), len(rots)) _, _, sess_summary = sess.run([optimizer, cost, summaries_merged_train], feed_dict={self.x : batch_xs_in, self.y_: batch_ys, self.orient_ : batch_os_one_hot} ) if self.is_time_to_track_train(itr_exp): summary_writer_train.add_summary(sess_summary, itr_exp) # self.logger.debug("training batch loss after step %d: %f" % (itr_exp, loss_batch)) itr_exp += 1 self.validate(sess) # run metric op one more time, data in feed dict is dummy data, does not influence metric acc, acc_orient, sess_summary = sess.run([ self._acc_ops.metric, self._acc_orient_ops.metric, summaries_merged_val], feed_dict={self.x : batch_xs, self.y_ : batch_ys, self.orient_ : batch_os_one_hot} ) if self.is_time_to_track_val(itr_exp): summary_writer_val.add_summary(sess_summary, itr_exp) self.logger.debug("validation accuracy after %s step %d: %f" % (suffix, itr_exp, acc)) self.logger.debug("validation orientation accuracy after %s step %d: %f" % (suffix, itr_exp, acc_orient)) fpath_save = os.path.join(dir_train, self._get_save_name()) self.logger.debug("Save model at %s step %d to '%s'" % (suffix, itr_exp, fpath_save)) self.saver.save(sess, fpath_save, global_step=itr_exp) result.last = acc result.epoch_last = epoch result.max = max(result.max, result.last) result.history.append(result.last) result_orient.last = acc_orient result_orient.max = max(result_orient.max, result_orient.last) result_orient.history.append(result_orient.last) result_orient.epoch_last = epoch if self.do_task_recognition: if len(result.history) == result.history.maxlen and np.absolute(np.mean(result.history)-result.last) < 1e-5: self.logger.debug("Validation accuracy not changing anymore. Stop iterating.") break elif self.do_task_orientation: if len(result_orient.history) == result_orient.history.maxlen and np.absolute(np.mean(result_orient.history)-result_orient.last) < 1e-5: self.logger.debug("Validation orientation accuracy not changing anymore. Stop iterating.") break if self.tf_record_prefix is not None: coord.request_stop() coord.join(threads) self.logger.info("Classification %s Optimization Finished!" % suffix) return result, result_orient
def initDataSetsClasses(FLAGS): """ global dataSetTrain global dataSetTest global dataSetTest2 global dataSetTest3 """ print("FLAGS", FLAGS.train_classes, FLAGS.test_classes) # Variable to read out the labels & data of the DataSet Object. mnistData = read_data_sets('./', one_hot=True) # MNIST labels & data for training. mnistLabelsTrain = mnistData.train.labels mnistDataTrain = mnistData.train.images # MNIST labels & data for testing. mnistLabelsTest = mnistData.test.labels mnistDataTest = mnistData.test.images print("LABELS", mnistLabelsTest.shape, mnistLabelsTrain.shape) ## starting point: # TRAINSET: mnistDataTrain, mnistLabelsTrain # TESTSET: mnistDataTest, mnistLabelsTest # make a copy mnistDataTest2 = mnistDataTest + 0.0 mnistLabelsTest2 = mnistLabelsTest + 0.0 # make a copy mnistDataTest3 = mnistDataTest + 0.0 mnistLabelsTest3 = mnistLabelsTest + 0.0 if FLAGS.permuteTrain != -1: # training dataset np.random.seed(FLAGS.permuteTrain) permTr = np.random.permutation(mnistDataTrain.shape[1]) mnistDataTrainPerm = mnistDataTrain[:, permTr] if FLAGS.mergeTrainWithPermutation == True: mnistDataTrain = np.concatenate( (mnistDataTrain, mnistDataTrainPerm), axis=0) else: mnistDataTrain = mnistDataTrainPerm # dataSetTrain = DataSet(255. * dataSetTrainPerm, # mnistLabelsTrain, reshape=False) if FLAGS.permuteTest != -1: print("Permute") # testing dataset np.random.seed(FLAGS.permuteTest) permTs = np.random.permutation(mnistDataTest.shape[1]) mnistDataTestPerm = mnistDataTest[:, permTs] # dataSetTest = DataSet(255. * dataSetTestPerm, # mnistLabelsTest, reshape=False) mnistDataTest = mnistDataTestPerm if FLAGS.permuteTest2 != -1: # testing dataset print("Permute2") np.random.seed(FLAGS.permuteTest2) permTs = np.random.permutation(mnistDataTest.shape[1]) mnistDataTestPerm = mnistDataTest[:, permTs] mnistDataTest2 = mnistDataTestPerm if FLAGS.permuteTest3 != -1: print("Permute3") # testing dataset np.random.seed(FLAGS.permuteTest3) permTs = np.random.permutation(mnistDataTest.shape[1]) mnistDataTestPerm = mnistDataTest[:, permTs] mnistDataTest3 = mnistDataTestPerm print "SHAPE", mnistDataTrain.shape if True: # args = parser.parse_args() if FLAGS.train_classes[0:]: labels_to_train = [int(i) for i in FLAGS.train_classes[0:]] if FLAGS.test_classes[0:]: labels_to_test = [int(i) for i in FLAGS.test_classes[0:]] if FLAGS.test2_classes != None: labels_to_test2 = [int(i) for i in FLAGS.test2_classes[0:]] else: labels_to_test2 = [] if FLAGS.test3_classes != None: labels_to_test3 = [int(i) for i in FLAGS.test3_classes[0:]] else: labels_to_test3 = [] # Filtered labels & data for training and testing. labels_train_classes = np.array([ mnistLabelsTrain[i].argmax() for i in range(0, mnistLabelsTrain.shape[0]) if mnistLabelsTrain[i].argmax() in labels_to_train ], dtype=np.uint8) data_train_classes = np.array([ mnistDataTrain[i, :] for i in range(0, mnistLabelsTrain.shape[0]) if mnistLabelsTrain[i].argmax() in labels_to_train ], dtype=np.float32) labels_test_classes = np.array([ mnistLabelsTest[i].argmax() for i in range(0, mnistLabelsTest.shape[0]) if mnistLabelsTest[i].argmax() in labels_to_test ], dtype=np.uint8) labels_test2_classes = np.array([ mnistLabelsTest[i].argmax() for i in range(0, mnistLabelsTest.shape[0]) if mnistLabelsTest[i].argmax() in labels_to_test2 ], dtype=np.uint8) if FLAGS.mergeTest12 == False: labels_test3_classes = np.array([ mnistLabelsTest[i].argmax() for i in range(0, mnistLabelsTest.shape[0]) if mnistLabelsTest[i].argmax() in labels_to_test3 ], dtype=np.uint8) data_test_classes = np.array([ mnistDataTest[i, :] for i in range(0, mnistDataTest.shape[0]) if mnistLabelsTest[i].argmax() in labels_to_test ], dtype=np.float32) data_test2_classes = np.array([ mnistDataTest[i, :] for i in range(0, mnistDataTest.shape[0]) if mnistLabelsTest[i].argmax() in labels_to_test2 ], dtype=np.float32) if FLAGS.mergeTest12 == False: data_test3_classes = np.array([ mnistDataTest[i, :] for i in range(0, mnistDataTest.shape[0]) if mnistLabelsTest[i].argmax() in labels_to_test3 ], dtype=np.float32) if FLAGS.mergeTest12 == True: data_test3_classes = np.concatenate( (data_test_classes, data_test2_classes), axis=0) labels_test3_classes = np.concatenate( (labels_test_classes, labels_test2_classes), axis=0) print "CONCATMERGE", data_test_classes.shape, data_test2_classes.shape, data_test3_classes.shape labelsTrainOnehot = dense_to_one_hot(labels_train_classes, 10) labelsTestOnehot = dense_to_one_hot(labels_test_classes, 10) labelsTest2Onehot = dense_to_one_hot(labels_test2_classes, 10) labelsTest3Onehot = dense_to_one_hot(labels_test3_classes, 10) dataSetTrain = DataSet(255. * data_train_classes, labelsTrainOnehot, reshape=False) dataSetTest = DataSet(255. * data_test_classes, labelsTestOnehot, reshape=False) dataSetTest2 = DataSet(255. * data_test2_classes, labelsTest2Onehot, reshape=False) dataSetTest3 = DataSet(255. * data_test3_classes, labelsTest3Onehot, reshape=False) #print ("EQUAL?",np.mean((data_test3_classes==data_test_classes)).astype("float32")) ; print(data_test3_classes.shape, data_test2_classes.shape) print(FLAGS.test_classes, FLAGS.test2_classes, FLAGS.test3_classes) print(labels_to_test3, labels_to_test2) return dataSetTrain, dataSetTest, dataSetTest2, dataSetTest3
def read_data_sets(split=True, num_train_folders=90, num_test_folders=24, one_hot=True, frames=3): all_folders = os.listdir(emotions_path) random.Random(0).shuffle(all_folders) if split: train_folders = all_folders[:num_train_folders] test_folders = all_folders[num_train_folders:num_train_folders + num_test_folders] validation_folders = all_folders[num_train_folders + num_test_folders:] train_df = pd.DataFrame(read_from_folders(train_folders, frames)) validation_df = pd.DataFrame( read_from_folders(validation_folders, frames)) test_df = pd.DataFrame(read_from_folders(test_folders, frames)) print("{} CK+ TRAIN datapoints loaded".format(len(train_df))) print("{} CK+ VALIDATION datapoints loaded".format(len(validation_df))) print("{} CK+ TEST datapoints loaded".format(len(test_df))) else: train_df = pd.DataFrame(read_from_folders(all_folders, frames)) validation_df = train_df.copy() test_df = train_df.copy() print("{} CK+ TRAIN datapoints loaded".format(len(train_df))) if one_hot: train_labels = dense_to_one_hot(train_df['emotion'].values, NUM_CLASSES) validation_labels = dense_to_one_hot(validation_df['emotion'].values, NUM_CLASSES) test_labels = dense_to_one_hot(test_df['emotion'].values, NUM_CLASSES) else: train_labels = train_df['emotion'] validation_labels = validation_df['emotion'] test_labels = test_df['emotion'] del train_df['emotion'] del validation_df['emotion'] del test_df['emotion'] train_idx = np.arange(len(train_labels)) validation_idx = np.arange(len(validation_labels)) test_idx = np.arange(len(test_labels)) np.random.shuffle(train_idx) np.random.shuffle(validation_idx) np.random.shuffle(test_idx) train_images = train_df.as_matrix() validation_images = validation_df.as_matrix() test_images = test_df.as_matrix() train = DataSet(train_images[train_idx, :], train_labels[train_idx], reshape=False) validation = DataSet(validation_images[validation_idx, :], validation_labels[validation_idx], reshape=False) test = DataSet(test_images[test_idx, :], test_labels[test_idx], reshape=False) return Datasets(train=train, validation=validation, test=test)
with open(training_file, mode='rb') as f: train = pickle.load(f) with open(testing_file, mode='rb') as f: test = pickle.load(f) X_train, y_train = train['features'], train['labels'] X_test, y_test = test['features'], test['labels'] # number of training examples n_train = len(X_train) # number of testing examples n_test = len(X_test) y_train = mnist.dense_to_one_hot(y_train, nb_classes) y_test = mnist.dense_to_one_hot(y_test, nb_classes) print("done loading") # TODO: Split data into training and validation sets. # TODO: Define placeholders and resize operation. x = tf.placeholder(tf.float32, (None, 32, 32, 3)) # TODO: Resize the images so they can be fed into AlexNet. # HINT: Use `tf.image.resize_images` to resize the images resized = tf.image.resize_images(x, (227, 227)) # TODO: pass placeholder as first argument to `AlexNet`. fc7 = AlexNet(resized, feature_extract=True) # NOTE: `tf.stop_gradient` prevents the gradient from flowing backwards
def split_data_set(wanted_digits, fraction_train, fraction_test): """ Splits MNIST dataset into two parts: one containing only digits in wanted_digits and second containing all remaining digits. Labels in dataset with wanted_digits are mapped to range 0:num_labels, where num_labels - number of wanted digits. Each of two datasets in turn is split into train, validation and test with number of samples specified by fraction_train and fraction_test Output: (label_map, remapped, remaining) where label_map - dictionary containing mapping from original labels to new ones; remapped - dataset of type base.Datasets with wanted digits and remapped labels; remaining - dataset of type base.Datasets with remaining digits and original labels. """ # read data set mnist = input_data.read_data_sets("MNIST_data/") # concatenate train, validation and test data concatenated_images = np.concatenate((mnist.test.images, mnist.validation.images, mnist.train.images),axis=0) concatenated_labels = np.concatenate((mnist.test.labels, mnist.validation.labels, mnist.train.labels),axis=0) # estimate class frequencies unique, counts = np.unique(concatenated_labels, return_counts=True) digit_frequency = dict(zip(unique, counts)) print(digit_frequency) # rows to extract from dataset rows_to_extract = np.logical_or.reduce([concatenated_labels == x for x in wanted_digits]) # extract samples corresponding to desired digits extracted_labels = concatenated_labels[rows_to_extract] extracted_images = concatenated_images[rows_to_extract] # remaining samples remaining_labels = concatenated_labels[np.logical_not(rows_to_extract)] remaining_images = concatenated_images[np.logical_not(rows_to_extract)] num_train_extracted = int(extracted_labels.shape[0] * fraction_train) num_test_extracted = int(extracted_labels.shape[0] * fraction_test) num_train_remaining = int(remaining_labels.shape[0] * fraction_train) num_test_remaining = int(remaining_labels.shape[0] * fraction_test) num_train_all = int(concatenated_labels.shape[0] * fraction_train) num_test_all = int(concatenated_labels.shape[0] * fraction_test) # map desired labels to range (0:num_labels) extracted_remapped_labels = np.empty(shape=extracted_labels.shape, dtype=np.int32) remaining_remapped_labels = np.empty(shape=remaining_labels.shape, dtype=np.int32) extracted_label_map = {} # dictionary containing label map remaining_label_map = {} # # create map distionary for extracted digits for i in range(len(wanted_digits)): extracted_label_map[wanted_digits[i]] = i # create map distionary for remaining digits counter = 0 for i in range(num_classes): if i not in wanted_digits: remaining_label_map[i] = counter counter += 1 print("extracted_label map:" ,extracted_label_map) for i in range(extracted_labels.shape[0]): extracted_remapped_labels[i] = extracted_label_map[extracted_labels[i]] print("extracted_remapped_labels:" ,extracted_remapped_labels) print("remaining_label map:" ,remaining_label_map) for i in range(remaining_labels.shape[0]): remaining_remapped_labels[i] = remaining_label_map[remaining_labels[i]] print("remaining_remapped_labels:" ,remaining_remapped_labels) num_extracted_digits = len(wanted_digits) # number of classes in dataset with wanted_digits only # convert labels to one hot extracted_labels_one_hot = dense_to_one_hot(extracted_labels, num_classes) extracted_remapped_labels_one_hot = dense_to_one_hot(extracted_remapped_labels, num_extracted_digits) remaining_labels_one_hot = dense_to_one_hot(remaining_labels, num_classes) remaining_remapped_labels_one_hot = dense_to_one_hot(remaining_remapped_labels, num_classes - num_extracted_digits) concatenated_labels_one_hot = dense_to_one_hot(concatenated_labels, num_classes) print("extracted labels one hot: ",extracted_labels_one_hot) print("remaining labels one hot: ",remaining_labels_one_hot) print("extracted remapped labels one hot: ",extracted_remapped_labels_one_hot) print("remaining remapped labels one hot: ",remaining_remapped_labels_one_hot) # split datasets extracted_train = DataSet(extracted_images[:num_train_extracted] * MAX_INTENSITY, extracted_labels_one_hot[:num_train_extracted], dtype=dtypes.float32, reshape=False, one_hot=True) extracted_test = DataSet(extracted_images[num_train_extracted:(num_train_extracted + num_test_extracted)] * MAX_INTENSITY, extracted_labels_one_hot[num_train_extracted:(num_train_extracted + num_test_extracted)], dtype=dtypes.float32, reshape=False, one_hot=True) extracted_validation = DataSet(extracted_images[(num_train_extracted + num_test_extracted):] * MAX_INTENSITY, extracted_labels_one_hot[(num_train_extracted + num_test_extracted):], dtype=dtypes.float32, reshape=False, one_hot=True) extracted_remapped_train = DataSet(extracted_images[:num_train_extracted] * MAX_INTENSITY, extracted_remapped_labels_one_hot[:num_train_extracted], dtype=dtypes.float32, reshape=False, one_hot=True) extracted_remapped_test = DataSet(extracted_images[num_train_extracted:(num_train_extracted + num_test_extracted)] * MAX_INTENSITY, extracted_remapped_labels_one_hot[num_train_extracted:(num_train_extracted + num_test_extracted)], dtype=dtypes.float32, reshape=False, one_hot=True) extracted_remapped_validation = DataSet(extracted_images[(num_train_extracted + num_test_extracted):] * MAX_INTENSITY, extracted_remapped_labels_one_hot[(num_train_extracted + num_test_extracted):], dtype=dtypes.float32, reshape=False, one_hot=True) remaining_train = DataSet(remaining_images[:num_train_remaining] * MAX_INTENSITY, remaining_labels_one_hot[:num_train_remaining], dtype=dtypes.float32, reshape=False, one_hot=True) remaining_test = DataSet(remaining_images[num_train_remaining:(num_train_remaining + num_test_remaining)] * MAX_INTENSITY, remaining_labels_one_hot[num_train_remaining:(num_train_remaining + num_test_remaining)], dtype=dtypes.float32, reshape=False, one_hot=True) remaining_validation = DataSet(remaining_images[(num_train_remaining + num_test_remaining):] * MAX_INTENSITY, remaining_labels_one_hot[(num_train_remaining + num_test_remaining):], dtype=dtypes.float32, reshape=False, one_hot=True) remaining_remapped_train = DataSet(remaining_images[:num_train_remaining] * MAX_INTENSITY, remaining_remapped_labels_one_hot[:num_train_remaining], dtype=dtypes.float32, reshape=False, one_hot=True) remaining_remapped_test = DataSet(remaining_images[num_train_remaining:(num_train_remaining + num_test_remaining)] * MAX_INTENSITY, remaining_remapped_labels_one_hot[num_train_remaining:(num_train_remaining + num_test_remaining)], dtype=dtypes.float32, reshape=False, one_hot=True) remaining_remapped_validation = DataSet(remaining_images[(num_train_remaining + num_test_remaining):] * MAX_INTENSITY, remaining_remapped_labels_one_hot[(num_train_remaining + num_test_remaining):], dtype=dtypes.float32, reshape=False, one_hot=True) # data sets comprised of all digits all_train = DataSet(concatenated_images[:num_train_all] * MAX_INTENSITY, concatenated_labels_one_hot[:num_train_all], dtype=dtypes.float32, reshape=False, one_hot=True) all_test = DataSet(concatenated_images[num_train_all:(num_train_all + num_test_all)] * MAX_INTENSITY, concatenated_labels_one_hot[num_train_all:(num_train_all + num_test_all)], dtype=dtypes.float32, reshape=False, one_hot=True) all_validation = DataSet(concatenated_images[(num_train_all + num_test_all):] * MAX_INTENSITY, concatenated_labels_one_hot[(num_train_all + num_test_all):], dtype=dtypes.float32, reshape=False, one_hot=True) # combine data sets into single base.Datasets class extracted = base.Datasets(train=extracted_train, validation=extracted_validation, test=extracted_test) extracted_remapped = base.Datasets(train=extracted_remapped_train, validation=extracted_remapped_validation, test=extracted_remapped_test) remaining = base.Datasets(train=remaining_train, validation=remaining_validation, test=remaining_test) remaining_remapped = base.Datasets(train=remaining_remapped_train, validation=remaining_remapped_validation, test=remaining_remapped_test) all_digits = base.Datasets(train=all_train, validation=all_validation, test=all_test) return (extracted_label_map, remaining_label_map, extracted, extracted_remapped, \ remaining, remaining_remapped, all_digits)