def __init__(self, output_path, train_dataset: tf.data.Dataset, valid_dataset: tf.data.Dataset): self.writer = tf.contrib.summary.create_file_writer(output_path) self.global_batch = 0 self.train_x, self.train_y = train_dataset.make_one_shot_iterator( ).get_next() self.valid_x, self.valid_y = valid_dataset.make_one_shot_iterator( ).get_next()
def compute_mean_std(data: tf.data.Dataset): data = data.map(lambda x: x['image']).batch(1024).prefetch(1) data = data.make_one_shot_iterator().get_next() count = 0 stats = [] with tf.Session(config=utils.get_config()) as sess: def iterator(): while True: try: yield sess.run(data) except tf.errors.OutOfRangeError: break for batch in tqdm(iterator(), unit='kimg', desc='Computing dataset mean and std'): ratio = batch.shape[0] / 1024. count += ratio stats.append((batch.mean((0, 1, 2)) * ratio, (batch**2).mean( (0, 1, 2)) * ratio)) mean = sum(x[0] for x in stats) / count sigma = sum(x[1] for x in stats) / count - mean**2 std = np.sqrt(sigma) print('Mean %s Std: %s' % (mean, std)) return mean, std
def _get_data_results(data: tf.data.Dataset, session_manager, max_iteration=None) -> dict: iterator = data.make_one_shot_iterator() sample = iterator.get_next() outputs_flatten = {} iteration_number = 0 with session_manager as sess: while True: try: sample_out = sess.run(sample) sample_out_flatten = nest_utils.flatten_nested_struct( sample_out) for k, v in sample_out_flatten.items(): outputs_flatten.setdefault(k, []) if isinstance(v, bytes): v = v.decode() outputs_flatten[k].append(v) iteration_number += 1 except tf.errors.OutOfRangeError: break if max_iteration is not None and iteration_number >= max_iteration: break outputs = nest_utils.unflatten_dict_to_nested(outputs_flatten) return outputs
def memoize(dataset: tf.data.Dataset) -> tf.data.Dataset: data = [] with tf.Graph().as_default(), tf.Session( config=utils.get_config()) as session: dataset = dataset.prefetch(16) it = dataset.make_one_shot_iterator().get_next() try: while 1: data.append(session.run(it)) except tf.errors.OutOfRangeError: pass images = np.stack([x['image'] for x in data]) labels = np.stack([x['label'] for x in data]) def tf_get(index): def get(index): return images[index], labels[index] image, label = tf.py_func(get, [index], [tf.float32, tf.int64]) return dict(image=image, label=label) dataset = tf.data.Dataset.range(len(data)).repeat() dataset = dataset.shuffle( len(data) if len(data) < FLAGS.shuffle else FLAGS.shuffle) return dataset.map(tf_get)
def train_fn(ds: tf.data.Dataset, batch_size=1, shuffle=10000, repeat: int = None): '''Create input function for training, prediction, evaluation.''' if shuffle: ds = ds.shuffle(shuffle) ds = ds.batch(batch_size) if repeat != 1: ds = ds.repeat(repeat) return lambda: ds.make_one_shot_iterator().get_next()
def compare_datasets_graph_mode(original_dataset: tf.data.Dataset, dataset_from_stream: tf.data.Dataset) -> int: next_element_from_stream = dataset_from_stream.make_one_shot_iterator( ).get_next() next_element_from_orig = original_dataset.make_one_shot_iterator( ).get_next() data_samples = 0 with tf.Session() as sess: while True: try: element_from_stream = sess.run(next_element_from_stream) element_from_dataset = sess.run(next_element_from_orig) assert element_from_stream["label"] == element_from_dataset[ "label"] assert np.array_equal(element_from_stream["image"], element_from_dataset["image"]) data_samples += 1 except tf.errors.OutOfRangeError: break return data_samples
def create_dataset_iter(dataset: tf.data.Dataset): """ create dataset iter Parameters ---------- dataset : tf.data.Dataset Returns ------- dataset iter """ data_it = dataset.make_one_shot_iterator() # 定义个获取下一组数据的操作(operator) return data_it.get_next()
def print_dataset(dataset: tf.data.Dataset): next_record = dataset.make_one_shot_iterator().get_next() counter = 0 with tf.Session() as sess: while True: try: record = sess.run(next_record) example = tf.train.Example.FromString(record) if counter < 10: print(example) counter += 1 except tf.errors.OutOfRangeError: break print("total examples: " + str(counter))
def bias_ops(ds: tf.data.Dataset, V): features, labels = ds.make_one_shot_iterator().get_next() tokens = features[TEXT] # (N, L) token_lengths = features[SENTENCE_LENGTH] # (N,) vocab_tally = tf.get_local_variable( name='vocab_tally', dtype=tf.int64, initializer=tf.initializers.zeros, shape=(V,) ) # (V,) word_count = tf.get_local_variable( name='word_count', dtype=token_lengths.dtype, initializer=tf.initializers.zeros, shape=[] ) max_length = tf.get_local_variable( name='max_length', dtype=token_lengths.dtype, initializer=tf.initializers.zeros, shape=[] ) sentence_count = tf.get_local_variable( name='sentence_count', dtype=tf.int32, initializer=tf.initializers.zeros, shape=[] ) mask = tf.sequence_mask( maxlen=tf.shape(tokens)[1], lengths=token_lengths ) # (N, L) valid_tokens = tf.boolean_mask(tensor=tokens, mask=mask) # (Z,) update_tally = tf.scatter_nd_add( ref=vocab_tally, indices=tf.expand_dims(valid_tokens, 1), updates=tf.ones(shape=tf.shape(valid_tokens), dtype=vocab_tally.dtype) ) update_sentence_count = tf.assign_add(ref=sentence_count, value=tf.shape(tokens)[0]) update_word_count = tf.assign_add(ref=word_count, value=tf.reduce_sum(token_lengths)) update_max_length = tf.assign(ref=max_length, value=tf.maximum( max_length, tf.reduce_max(token_lengths) )) update = tf.group(update_tally, update_sentence_count, update_word_count, update_max_length) return vocab_tally, sentence_count, word_count, max_length, update
def load_dataset(dataset: tf.data.Dataset) -> Dict[str, np.ndarray]: """Given a TensorFlow dataset, load it into memory as numpy arrays. Args: dataset: input dataset with some finite size. Returns: Dict of numpy arrays with concatenated data from the full input dataset. """ tensors = dataset.make_one_shot_iterator().get_next() metrics = { k: tf.contrib.metrics.streaming_concat(v) for k, v in tensors.items() } initializer = tf.local_variables_initializer() with tf.Session(config=_disable_rewrite_config()) as sess: return evaluate_metrics(sess, initializer, metrics)
def show_dataset( ds: tf.data.Dataset, n_batch: int = 1, n_img: int = 10, converter: Callable = vision.transform.reverse_imagenet_normalize_tf ) -> FuncAnimation: X = [] data_op = ds.make_one_shot_iterator().get_next() with tf.Session() as sess: for _ in range(n_batch): x, _ = sess.run(data_op) if len(x) >= n_img: X.extend(converter(x[:n_img])) break X.extend(converter(x)) n_img -= len(x) X = np.clip(np.array(X), 0.0, 1.0) return images_anim(X)
def iter_first_x(dataset: tf.data.Dataset, num_batches: int): """ Return a generator for the first num_batches batches in a given dataset :param dataset: tf.data.Dataset object :param num_batches: number of batches :return: """ counter = 0 # creating one shot iterator ops in same graph as dataset ops # TODO: this will keep adding iterator ops in the same graph every time this function is being called, need # better solution (reinitializable iterator) # pylint: disable=protected-access with dataset._graph.as_default(): iterator = dataset.make_one_shot_iterator() # create iterator graph element. This does not actually get the data, but makes a tensor which must be # evaluated. next_element = iterator.get_next() # creating session with graph that has dataset and iterator ops with tf.compat.v1.Session(graph=dataset._graph) as sess: while counter < num_batches: try: batch_data = sess.run(next_element) except tf.errors.OutOfRangeError: # if no data remains in the dataset, instead of throwing out of range error, # treat it as if it were the end of the iteration logger.info( 'dataset ran out of elements, stopping after %s batches', counter) break yield batch_data counter += 1 # close session sess.close()
def fit_dataset(self, dataset: tf.data.Dataset): with self._graph.as_default(): dataset = dataset.batch(self.batch_size).prefetch(self.batch_size) iterator = dataset.make_one_shot_iterator() with tf.Session() as self._sess: batch_n = 0 while True: try: self._load_or_init_session() batch = iterator.get_next() stack_batch_op = tf.stack(batch) stacked_batch = self._sess.run(stack_batch_op) # if batch size is different from the specified batch size the fitting network won't work due to mismatching shapes if len(stacked_batch) != self.batch_size: logging.warning( "Ignored last batch because it was smaller than the specified batch size. To avoid this choose " "a batch size that is a factor of the dataset size." ) break for step in range(self.epochs): self._sess.run( self.train_step, feed_dict={self._x_batch: stacked_batch}) if step + 1 % 10 == 0: self._write_summaries(stacked_batch) self._saver.save(self._sess, self.save_file, global_step=self.global_step) self._log_progress(batch_n, step, stacked_batch) batch_n += 1 except tf.errors.OutOfRangeError: break
def read_dataset(dataset: tf.data.Dataset) -> Tuple[float, int]: dataset = dataset.apply( tf.data.experimental.map_and_batch(dataset_parser, batch_size=1, num_parallel_batches=2, drop_remainder=True)) dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE) next_element_from_dataset = dataset.make_one_shot_iterator().get_next() with tf.Session() as sess: data_samples = 0 dataset_read_start_time = time.time() while True: try: sess.run(next_element_from_dataset) data_samples += 1 except tf.errors.OutOfRangeError: break dataset_read_time = time.time() - dataset_read_start_time return dataset_read_time, data_samples
def sample_generator(dataset: tf.data.Dataset, n_workers: int = 4): """TF dataset -> python generator Args: dataset: n_workers: Thanks to @velikodniy """ iterator = dataset.make_one_shot_iterator() handlers = iterator.get_next() with tf.Session() as sess: @background(n_workers) def load(): try: while True: yield sess.run(handlers) except tf.errors.OutOfRangeError: return yield from load()
def draw_images(self, ds: tf.data.Dataset, n=9): """Draw images from dataset. Args: ds: dataset n: first most n images to draw """ import matplotlib.pyplot as plt cols = 3 rows = n // cols n = rows * cols fig, ax = plt.subplots(ncols=cols, nrows=rows) it = ds.make_one_shot_iterator() b = it.get_next() i = 0 with tf.Session() as s: while True: if i >= n: break try: image, label = s.run(b) except tf.errors.OutOfRangeError: break class_idx = next( idx for idx, i in enumerate(label[0]) if i == 1) class_name = self.image_classes[class_idx] image_data = np.asarray(image).astype(np.uint8) image_data = np.reshape(image_data, (224, 224, 3)) image_fig = ax[i // 3, i % 3] image_fig.imshow(image_data) image_fig.set_title(class_name) i = i + 1 fig.tight_layout()
def draw_images(self, ds: tf.data.Dataset, n=9): """Draw images from dataset. Args: ds: dataset n: first most n images to draw """ import matplotlib.pyplot as plt cols = 3 rows = n // cols n = rows * cols fig, ax = plt.subplots(ncols=cols, nrows=rows) it = ds.make_one_shot_iterator() b = it.get_next() i = 0 with tf.Session() as s: while True: if i >= n: break try: image, label = s.run(b) except tf.errors.OutOfRangeError: break class_idx = next(idx for idx, i in enumerate(label[0]) if i == 1) class_name = self.image_classes[class_idx] image_data = np.asarray(image).astype(np.uint8) image_data = np.reshape(image_data, (224, 224, 3)) image_fig = ax[i // 3, i % 3] image_fig.imshow(image_data) image_fig.set_title(class_name) i = i + 1 fig.tight_layout()
def get_first_batch(dataset: tf.data.Dataset): iterator = dataset.make_one_shot_iterator() first_batch = iterator.get_next() return first_batch
def get_sub_sampled_data( cls, orig_layer: Layer, pruned_layer: Layer, inp_op_names: List, orig_layer_db: LayerDatabase, comp_layer_db: LayerDatabase, data_set: tf.data.Dataset, batch_size: int, num_reconstruction_samples: int) -> (np.ndarray, np.ndarray): # pylint: disable=too-many-arguments # pylint: disable=too-many-locals """ Get all the input data from pruned model and output data from original model :param orig_layer: layer in original model database :param pruned_layer: layer in pruned model database :param inp_op_names : input Op names, should be same in both models :param orig_layer_db: original model database, un-pruned, used to provide the actual outputs :param comp_layer_db: comp. model database, this is potentially already pruned in the upstreams layers of given layer name :param data_set: tf.data.Dataset object :param batch_size : batch size :param num_reconstruction_samples: The number of reconstruction samples :return: input_data, output_data """ # Grow GPU memory as needed at the cost of fragmentation. config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True # pylint: disable=no-member # create an iterator and iterator.get_next() Op in the same graph as dataset # TODO: currently dataset (user provided) and iterator are in the same graph, and the iterator is # being created every time this function is called. Use re-initialize iterator sess = tf.compat.v1.Session(graph=data_set._graph, config=config) # pylint: disable=protected-access with sess.graph.as_default(): iterator = data_set.make_one_shot_iterator() next_element = iterator.get_next() # hard coded value samples_per_image = 10 total_num_of_images = int(num_reconstruction_samples / samples_per_image) # number of possible batches - round up num_of_batches = math.ceil(total_num_of_images / batch_size) all_sub_sampled_inp_data = list() all_sub_sampled_out_data = list() for _ in range(num_of_batches): try: # get the data batch_data = sess.run(next_element) # output data from original model feed_dict = aimet_tensorflow.utils.common.create_input_feed_dict( orig_layer_db.model.graph, inp_op_names, batch_data) output_data = orig_layer_db.model.run( orig_layer.module.outputs[0], feed_dict=feed_dict) # input data from compressed model feed_dict = aimet_tensorflow.utils.common.create_input_feed_dict( comp_layer_db.model.graph, inp_op_names, batch_data) input_data = comp_layer_db.model.run( pruned_layer.module.inputs[0], feed_dict=feed_dict) # get the layer attributes (kernel_size, stride, padding) layer_attributes = aimet_tensorflow.utils.op.conv.get_layer_attributes( sess=orig_layer_db.model, op=orig_layer.module, input_op_names=orig_layer_db.starting_ops, input_shape=orig_layer_db.input_shape) # channels_last (NHWC) to channels_first data format (NCHW - Common format) input_data = np.transpose(input_data, (0, 3, 1, 2)) output_data = np.transpose(output_data, (0, 3, 1, 2)) # get the sub sampled input and output data sub_sampled_inp_data, sub_sampled_out_data = InputMatchSearch.subsample_data( layer_attributes, input_data, output_data, samples_per_image) all_sub_sampled_inp_data.append(sub_sampled_inp_data) all_sub_sampled_out_data.append(sub_sampled_out_data) except tf.errors.OutOfRangeError: raise StopIteration( "There are insufficient batches of data in the provided dataset for the purpose of" " weight reconstruction! Either reduce number of reconstruction samples or increase" " data in dataset") # close the session sess.close() # accumulate total sub sampled input and output data return np.vstack(all_sub_sampled_inp_data), np.vstack( all_sub_sampled_out_data)
def __input_fn(dataset: tf.data.Dataset): return dataset.make_one_shot_iterator().get_next()
def dataset_to_tensor(x: tf.data.Dataset): return x.make_one_shot_iterator().get_next()