def main(_): for mnist in ["mnist", "fashion_mnist", "kmnist", "emnist"]: output_dir = mnist_dir(mnist) test_utils.remake_dir(output_dir) write_image_file(os.path.join(output_dir, _TRAIN_DATA_FILENAME), 10) write_label_file(os.path.join(output_dir, _TRAIN_LABELS_FILENAME), 10) write_image_file(os.path.join(output_dir, _TEST_DATA_FILENAME), 2) write_label_file(os.path.join(output_dir, _TEST_LABELS_FILENAME), 2)
def _generate(): """Generates a fake data set and writes it to the fake_examples directory.""" output_dir = os.path.join(FLAGS.tfds_dir, "testing", "test_data", "fake_examples", "abstract_reasoning") test_utils.remake_dir(output_dir) random_state = np.random.RandomState(0) for split_type in SPLIT_TYPES: _create_fake_file(output_dir, split_type, random_state)
def _generate(): """Generates a fake data set and writes it to the fake_examples directory.""" output_dir = os.path.join(FLAGS.tfds_dir, "testing", "test_data", "fake_examples", "smallnorb") test_utils.remake_dir(output_dir) random_state = np.random.RandomState(0) _create_chunk(os.path.join(output_dir, TRAINING_OUTPUT_NAME), random_state) _create_chunk(os.path.join(output_dir, TESTING_OUTPUT_NAME), random_state)
def _generate_cifar10_data(): output_dir = cifar10_output_dir() test_utils.remake_dir(output_dir) for batch_number in range(1, NUMBER_BATCHES + 1): generate_cifar10_batch("data_batch_%s.bin" % batch_number) generate_cifar10_batch("test_batch.bin") label_names = tfds.builder("cifar10").info.features["label"].names print(label_names) with open(os.path.join(output_dir, "batches.meta.txt"), "w") as f: f.write("\n".join(label_names))
def main(_): task_index = np.random.randint(2**31) for subset in ["training", "evaluation"]: output_dir = arc_dir(subset) test_utils.remake_dir(output_dir) num_tasks = NUM_TASKS[subset] for _ in range(num_tasks): task_index += 1 task_id = "{:08x}".format(task_index) task = make_task() write_task(output_dir, task_id, task)
def _generate_cifar100_data(): """Generates .bin and label .txt files for cifar100.""" output_dir = cifar100_output_dir() test_utils.remake_dir(output_dir) generate_cifar100_batch("train.bin", 10) generate_cifar100_batch("test.bin", 2) fine_names = tfds.builder("cifar100").info.features["label"].names coarse_names = tfds.builder("cifar100").info.features["coarse_label"].names with open(os.path.join(output_dir, "fine_label_names.txt"), "w") as f: f.write("\n".join(fine_names)) with open(os.path.join(output_dir, "coarse_label_names.txt"), "w") as f: f.write("\n".join(coarse_names))
def _generate(): """Generates a fake data set and writes it to the fake_examples directory.""" output_dir = os.path.join(FLAGS.tfds_dir, "testing", "test_data", "fake_examples", "shapes3d") test_utils.remake_dir(output_dir) images, values = _create_fake_samples() with h5py.File(os.path.join(output_dir, OUTPUT_NAME), "w") as f: img_dataset = f.create_dataset("images", images.shape, "|u1") img_dataset.write_direct(images) values_dataset = f.create_dataset("labels", values.shape, "<f8") values_dataset.write_direct(np.ascontiguousarray(values))
def _generate_stl10_data(): """Generates .bin files for stl10.""" output_dir = stl_output_dir() test_utils.remake_dir(output_dir) for fname in ["train_y.bin", "test_y.bin"]: labels = np.random.randint(NUMBER_LABELS, size=(1), dtype=np.uint8) dump(stl_output_dir(), fname, labels) for fname in ["train_X.bin", "test_X.bin", "unlabeled_X.bin"]: images = np.random.randint( 256, size=(1, HEIGHT * WIDTH * 3), dtype=np.uint8) dump(stl_output_dir(), fname, images) label_names = [ "airplane", "bird", "car", "cat", "deer", "dog", "horse", "monkey", "ship", "truck" ] with open(os.path.join(output_dir, "class_names.txt"), "w") as f: f.write("\n".join(label_names))
def make_part_data(): base_dir = os.path.join( fake_examples_dir, "shapenet_part2017", "shapenetcore_partanno_segmentation_benchmark_v0_normal", ) test_utils.remake_dir(base_dir) split_dir = os.path.join(base_dir, "train_test_split") tf.io.gfile.makedirs(split_dir) j = 0 for split, num_examples in part_test.splits.items(): if split == "validation": split = "val" paths = [] synset_ids = random.sample(PART_SYNSET_IDS, num_examples) for synset_id in synset_ids: filename = "example%d.txt" % j j += 1 subdir = os.path.join(base_dir, synset_id) if not tf.io.gfile.isdir(subdir): tf.io.gfile.makedirs(subdir) path = os.path.join(subdir, filename) n_points = np.random.randint(10) + 2 points = np.random.normal(size=n_points * 3).reshape((n_points, 3)) normals = np.random.normal(size=n_points * 3).reshape((n_points, 3)) normals /= np.linalg.norm(normals, axis=-1, keepdims=True) point_labels = np.random.randint(NUM_PART_CLASSES, size=n_points) data = np.empty((n_points, 7), dtype=np.float32) data[:, :3] = points.astype(np.float32) data[:, 3:6] = normals.astype(np.float32) data[:, 6] = point_labels.astype(np.float32) with tf.io.gfile.GFile(path, "wb") as fp: np.savetxt(fp, data) paths.append(os.path.join("shape_data", synset_id, filename[:-4])) with tf.io.gfile.GFile( os.path.join(split_dir, "shuffled_%s_file_list.json" % split), "wb" ) as fp: json.dump(paths, fp)
def main(_): output_dir = mnist_dir("binarized_mnist") test_utils.remake_dir(output_dir) write_image_file(os.path.join(output_dir, _TRAIN_DATA_FILENAME), 10) write_image_file(os.path.join(output_dir, _VALID_DATA_FILENAME), 2) write_image_file(os.path.join(output_dir, _TEST_DATA_FILENAME), 2)