def prepare(self, ds: Dataset, ds_len: int, shuffle: bool = False, augment: bool = False) -> Dataset: #TODO: move the general calls such as shuffle, batch, prefetch etc. to Parent class print(f"Preparing dataset of {ds_len} elements...") # Resize and rescale all datasets ds = ds.map( lambda x, y = None: (self._resize_and_rescale(x), y) if y is not None else self._resize_and_rescale(x), num_parallel_calls=self._workers ) # Cache all the datasets ds = ds.cache() if shuffle: ds = ds.shuffle(buffer_size=1000)#ds_len) # Batch all datasets ds = ds.batch(self._batch_size) # Use data augmentation only on the training set if augment: ds = ds.map( lambda x, y = None: (self._data_augmentation(x, training=True), y) if y is not None else self._data_augmentation(x, training=True), num_parallel_calls=self._workers ) # Use buffured prefetching on all datasets return ds.prefetch(buffer_size=self._workers)
def convert_to(tf_dataset: Dataset, directory: str, name: str): if not os.path.exists(directory): os.makedirs(directory) tf_dataset = tf_dataset.batch(batch_size=1) batches = tf_dataset.make_one_shot_iterator().get_next() filename = os.path.join(directory, name + ".tfrecords") tf.logging.info(f"Write tfrecords into {filename}") writer = tf.python_io.TFRecordWriter(filename) total_samples = 0 with tf.Session() as sess: while True: try: image, label = sess.run(batches) example = tf.train.Example(features=tf.train.Features( feature={ "label": _int64_feature(label), "image_raw": _bytes_feature(image.tostring()), })) writer.write(example.SerializeToString()) total_samples += 1 except tf.errors.OutOfRangeError: tf.logging.info( f"Finished conversion. Total samples: {total_samples}") break
def write_to_txt(dataset: Dataset, output_dir: str, name: str): if not os.path.exists(output_dir): os.makedirs(output_dir) dataset = dataset.batch(1) iterator = dataset.make_one_shot_iterator() batch = iterator.get_next() filename = f"{output_dir}/mnist.{name}.txt.gz" tf.logging.info(f"Write txt into {filename}") total_samples = 0 with gzip.open(filename, "wt") as f: with tf.Session() as sess: while True: try: image, label = sess.run(batch) label = label[0] image = image[0].astype("<U16") contents = str(label) + ":" + ",".join(list(image)) f.write(f"{contents}\n") total_samples += 1 except tf.errors.OutOfRangeError: tf.logging.info( f"Finished conversion. Total samples: {total_samples}") break
def evaluate(self, test_set: Dataset) -> float: features, labels = next(tfds.as_numpy(test_set.batch(1000))) ntotal: int = labels.shape[0] if self.memory_features is None: return np.sum(labels == 0) / ntotal prediction = np.empty(features.shape[0]) for idx, feature in enumerate(features): search: Array = np.all(self.memory_features == feature, axis=1).nonzero() if search[0].size == 0: prediction[idx] = 0 # default prediction else: prediction[idx] = self.memory_labels[search[0][0]] return np.sum(prediction == labels) / ntotal
def train(self, training_set: Dataset, validation_set: Dataset, labels: Iterable[int]) -> None: del validation_set del labels features, labels = next(tfds.as_numpy(training_set.batch(1000))) if self.memory_features is None: self.memory_features, self.memory_labels = features, labels else: self.memory_features = np.vstack([features, self.memory_features]) self.memory_labels = np.hstack([labels, self.memory_labels])