def convert_to(tf_dataset: Dataset, directory: str, name: str): if not os.path.exists(directory): os.makedirs(directory) tf_dataset = tf_dataset.batch(batch_size=1) batches = tf_dataset.make_one_shot_iterator().get_next() filename = os.path.join(directory, name + ".tfrecords") tf.logging.info(f"Write tfrecords into {filename}") writer = tf.python_io.TFRecordWriter(filename) total_samples = 0 with tf.Session() as sess: while True: try: image, label = sess.run(batches) example = tf.train.Example(features=tf.train.Features( feature={ "label": _int64_feature(label), "image_raw": _bytes_feature(image.tostring()), })) writer.write(example.SerializeToString()) total_samples += 1 except tf.errors.OutOfRangeError: tf.logging.info( f"Finished conversion. Total samples: {total_samples}") break
def write_to_txt(dataset: Dataset, output_dir: str, name: str): if not os.path.exists(output_dir): os.makedirs(output_dir) dataset = dataset.batch(1) iterator = dataset.make_one_shot_iterator() batch = iterator.get_next() filename = f"{output_dir}/mnist.{name}.txt.gz" tf.logging.info(f"Write txt into {filename}") total_samples = 0 with gzip.open(filename, "wt") as f: with tf.Session() as sess: while True: try: image, label = sess.run(batch) label = label[0] image = image[0].astype("<U16") contents = str(label) + ":" + ",".join(list(image)) f.write(f"{contents}\n") total_samples += 1 except tf.errors.OutOfRangeError: tf.logging.info( f"Finished conversion. Total samples: {total_samples}") break